1 /*
2 * By downloading, copying, installing or using the software you agree to this license.
3 * If you do not agree to this license, do not download, install,
4 * copy or use the software.
5 *
6 *
7 * License Agreement
8 * For Open Source Computer Vision Library
9 * (3-clause BSD License)
10 *
11 * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12 * Third party copyrights are property of their respective owners.
13 *
14 * Redistribution and use in source and binary forms, with or without modification,
15 * are permitted provided that the following conditions are met:
16 *
17 * * Redistributions of source code must retain the above copyright notice,
18 * this list of conditions and the following disclaimer.
19 *
20 * * Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimer in the documentation
22 * and/or other materials provided with the distribution.
23 *
24 * * Neither the names of the copyright holders nor the names of the contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided by the copyright holders and contributors "as is" and
29 * any express or implied warranties, including, but not limited to, the implied
30 * warranties of merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall copyright holders or contributors be liable for any direct,
32 * indirect, incidental, special, exemplary, or consequential damages
33 * (including, but not limited to, procurement of substitute goods or services;
34 * loss of use, data, or profits; or business interruption) however caused
35 * and on any theory of liability, whether in contract, strict liability,
36 * or tort (including negligence or otherwise) arising in any way out of
37 * the use of this software, even if advised of the possibility of such damage.
38 */
39
40 #include "common.hpp"
41
42 #include "saturate_cast.hpp"
43 #include <vector>
44 #include <cstring>
45
46 namespace CAROTENE_NS {
47
48 #ifdef CAROTENE_NEON
49 namespace {
50 struct RowFilter3x3Canny
51 {
RowFilter3x3CannyCAROTENE_NS::__anon266eb4810111::RowFilter3x3Canny52 inline RowFilter3x3Canny(const ptrdiff_t borderxl, const ptrdiff_t borderxr)
53 {
54 vfmask = vreinterpret_u8_u64(vmov_n_u64(borderxl ? 0x0000FFffFFffFFffULL : 0x0100FFffFFffFFffULL));
55 vtmask = vreinterpret_u8_u64(vmov_n_u64(borderxr ? 0x0707060504030201ULL : 0x0706050403020100ULL));
56 lookLeft = offsetk - borderxl;
57 lookRight = offsetk - borderxr;
58 }
59
operator ()CAROTENE_NS::__anon266eb4810111::RowFilter3x3Canny60 inline void operator()(const u8* src, s16* dstx, s16* dsty, ptrdiff_t width)
61 {
62 uint8x8_t l = vtbl1_u8(vld1_u8(src - lookLeft), vfmask);
63 ptrdiff_t i = 0;
64 for (; i < width - 8 + lookRight; i += 8)
65 {
66 internal::prefetch(src + i);
67 uint8x8_t l18u = vld1_u8(src + i + 1);
68
69 uint8x8_t l2 = l18u;
70 uint8x8_t l0 = vext_u8(l, l18u, 6);
71 int16x8_t l1x2 = vreinterpretq_s16_u16(vshll_n_u8(vext_u8(l, l18u, 7), 1));
72
73 l = l18u;
74
75 int16x8_t l02 = vreinterpretq_s16_u16(vaddl_u8(l2, l0));
76 int16x8_t ldx = vreinterpretq_s16_u16(vsubl_u8(l2, l0));
77 int16x8_t ldy = vaddq_s16(l02, l1x2);
78
79 vst1q_s16(dstx + i, ldx);
80 vst1q_s16(dsty + i, ldy);
81 }
82
83 //tail
84 if (lookRight == 0 || i != width)
85 {
86 uint8x8_t tail0 = vld1_u8(src + (width - 9));//can't get left 1 pixel another way if width==8*k+1
87 uint8x8_t tail2 = vtbl1_u8(vld1_u8(src + (width - 8 + lookRight)), vtmask);
88 uint8x8_t tail1 = vext_u8(vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(tail0), 8*6)), tail2, 7);
89
90 int16x8_t tail02 = vreinterpretq_s16_u16(vaddl_u8(tail2, tail0));
91 int16x8_t tail1x2 = vreinterpretq_s16_u16(vshll_n_u8(tail1, 1));
92 int16x8_t taildx = vreinterpretq_s16_u16(vsubl_u8(tail2, tail0));
93 int16x8_t taildy = vqaddq_s16(tail02, tail1x2);
94
95 vst1q_s16(dstx + (width - 8), taildx);
96 vst1q_s16(dsty + (width - 8), taildy);
97 }
98 }
99
100 uint8x8_t vfmask;
101 uint8x8_t vtmask;
102 enum { offsetk = 1};
103 ptrdiff_t lookLeft;
104 ptrdiff_t lookRight;
105 };
106
107 template <bool L2gradient>
ColFilter3x3Canny(const s16 * src0,const s16 * src1,const s16 * src2,s16 * dstx,s16 * dsty,s32 * mag,ptrdiff_t width)108 inline void ColFilter3x3Canny(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
109 {
110 ptrdiff_t j = 0;
111 for (; j <= width - 8; j += 8)
112 {
113 ColFilter3x3CannyL1Loop:
114 int16x8_t line0x = vld1q_s16(src0 + j);
115 int16x8_t line1x = vld1q_s16(src1 + j);
116 int16x8_t line2x = vld1q_s16(src2 + j);
117 int16x8_t line0y = vld1q_s16(src0 + j + width);
118 int16x8_t line2y = vld1q_s16(src2 + j + width);
119
120 int16x8_t l02 = vaddq_s16(line0x, line2x);
121 int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
122 int16x8_t dy = vsubq_s16(line2y, line0y);
123 int16x8_t dx = vaddq_s16(l1x2, l02);
124
125 int16x8_t dya = vabsq_s16(dy);
126 int16x8_t dxa = vabsq_s16(dx);
127 int16x8_t norm = vaddq_s16(dya, dxa);
128
129 int32x4_t normh = vmovl_s16(vget_high_s16(norm));
130 int32x4_t norml = vmovl_s16(vget_low_s16(norm));
131
132 vst1q_s16(dsty + j, dy);
133 vst1q_s16(dstx + j, dx);
134 vst1q_s32(mag + j + 4, normh);
135 vst1q_s32(mag + j, norml);
136 }
137 if (j != width)
138 {
139 j = width - 8;
140 goto ColFilter3x3CannyL1Loop;
141 }
142 }
143 template <>
ColFilter3x3Canny(const s16 * src0,const s16 * src1,const s16 * src2,s16 * dstx,s16 * dsty,s32 * mag,ptrdiff_t width)144 inline void ColFilter3x3Canny<true>(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
145 {
146 ptrdiff_t j = 0;
147 for (; j <= width - 8; j += 8)
148 {
149 ColFilter3x3CannyL2Loop:
150 int16x8_t line0x = vld1q_s16(src0 + j);
151 int16x8_t line1x = vld1q_s16(src1 + j);
152 int16x8_t line2x = vld1q_s16(src2 + j);
153 int16x8_t line0y = vld1q_s16(src0 + j + width);
154 int16x8_t line2y = vld1q_s16(src2 + j + width);
155
156 int16x8_t l02 = vaddq_s16(line0x, line2x);
157 int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
158 int16x8_t dy = vsubq_s16(line2y, line0y);
159 int16x8_t dx = vaddq_s16(l1x2, l02);
160
161 int32x4_t norml = vmull_s16(vget_low_s16(dx), vget_low_s16(dx));
162 int32x4_t normh = vmull_s16(vget_high_s16(dy), vget_high_s16(dy));
163
164 norml = vmlal_s16(norml, vget_low_s16(dy), vget_low_s16(dy));
165 normh = vmlal_s16(normh, vget_high_s16(dx), vget_high_s16(dx));
166
167 vst1q_s16(dsty + j, dy);
168 vst1q_s16(dstx + j, dx);
169 vst1q_s32(mag + j, norml);
170 vst1q_s32(mag + j + 4, normh);
171 }
172 if (j != width)
173 {
174 j = width - 8;
175 goto ColFilter3x3CannyL2Loop;
176 }
177 }
178
179 template <bool L2gradient>
NormCanny(const ptrdiff_t colscn,s16 * _dx,s16 * _dy,s32 * _norm)180 inline void NormCanny(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
181 {
182 ptrdiff_t j = 0;
183 if (colscn >= 8)
184 {
185 int16x8_t vx = vld1q_s16(_dx);
186 int16x8_t vy = vld1q_s16(_dy);
187 for (; j <= colscn - 16; j+=8)
188 {
189 internal::prefetch(_dx);
190 internal::prefetch(_dy);
191
192 int16x8_t vx2 = vld1q_s16(_dx + j + 8);
193 int16x8_t vy2 = vld1q_s16(_dy + j + 8);
194
195 int16x8_t vabsx = vabsq_s16(vx);
196 int16x8_t vabsy = vabsq_s16(vy);
197
198 int16x8_t norm = vaddq_s16(vabsx, vabsy);
199
200 int32x4_t normh = vmovl_s16(vget_high_s16(norm));
201 int32x4_t norml = vmovl_s16(vget_low_s16(norm));
202
203 vst1q_s32(_norm + j + 4, normh);
204 vst1q_s32(_norm + j + 0, norml);
205
206 vx = vx2;
207 vy = vy2;
208 }
209 int16x8_t vabsx = vabsq_s16(vx);
210 int16x8_t vabsy = vabsq_s16(vy);
211
212 int16x8_t norm = vaddq_s16(vabsx, vabsy);
213
214 int32x4_t normh = vmovl_s16(vget_high_s16(norm));
215 int32x4_t norml = vmovl_s16(vget_low_s16(norm));
216
217 vst1q_s32(_norm + j + 4, normh);
218 vst1q_s32(_norm + j + 0, norml);
219 }
220 for (; j < colscn; j++)
221 _norm[j] = std::abs(s32(_dx[j])) + std::abs(s32(_dy[j]));
222 }
223
224 template <>
NormCanny(const ptrdiff_t colscn,s16 * _dx,s16 * _dy,s32 * _norm)225 inline void NormCanny<true>(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
226 {
227 ptrdiff_t j = 0;
228 if (colscn >= 8)
229 {
230 int16x8_t vx = vld1q_s16(_dx);
231 int16x8_t vy = vld1q_s16(_dy);
232
233 for (; j <= colscn - 16; j+=8)
234 {
235 internal::prefetch(_dx);
236 internal::prefetch(_dy);
237
238 int16x8_t vxnext = vld1q_s16(_dx + j + 8);
239 int16x8_t vynext = vld1q_s16(_dy + j + 8);
240
241 int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
242 int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
243
244 norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
245 normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
246
247 vst1q_s32(_norm + j + 0, norml);
248 vst1q_s32(_norm + j + 4, normh);
249
250 vx = vxnext;
251 vy = vynext;
252 }
253 int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
254 int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
255
256 norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
257 normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
258
259 vst1q_s32(_norm + j + 0, norml);
260 vst1q_s32(_norm + j + 4, normh);
261 }
262 for (; j < colscn; j++)
263 _norm[j] = s32(_dx[j])*_dx[j] + s32(_dy[j])*_dy[j];
264 }
265
266 template <bool L2gradient>
prepareThresh(f64 low_thresh,f64 high_thresh,s32 & low,s32 & high)267 inline void prepareThresh(f64 low_thresh, f64 high_thresh,
268 s32 &low, s32 &high)
269 {
270 if (low_thresh > high_thresh)
271 std::swap(low_thresh, high_thresh);
272 #if defined __GNUC__
273 low = (s32)low_thresh;
274 high = (s32)high_thresh;
275 low -= (low > low_thresh);
276 high -= (high > high_thresh);
277 #else
278 low = internal::round(low_thresh);
279 high = internal::round(high_thresh);
280 f32 ldiff = (f32)(low_thresh - low);
281 f32 hdiff = (f32)(high_thresh - high);
282 low -= (ldiff < 0);
283 high -= (hdiff < 0);
284 #endif
285 }
286 template <>
prepareThresh(f64 low_thresh,f64 high_thresh,s32 & low,s32 & high)287 inline void prepareThresh<true>(f64 low_thresh, f64 high_thresh,
288 s32 &low, s32 &high)
289 {
290 if (low_thresh > high_thresh)
291 std::swap(low_thresh, high_thresh);
292 if (low_thresh > 0) low_thresh *= low_thresh;
293 if (high_thresh > 0) high_thresh *= high_thresh;
294 #if defined __GNUC__
295 low = (s32)low_thresh;
296 high = (s32)high_thresh;
297 low -= (low > low_thresh);
298 high -= (high > high_thresh);
299 #else
300 low = internal::round(low_thresh);
301 high = internal::round(high_thresh);
302 f32 ldiff = (f32)(low_thresh - low);
303 f32 hdiff = (f32)(high_thresh - high);
304 low -= (ldiff < 0);
305 high -= (hdiff < 0);
306 #endif
307 }
308
309 template <bool L2gradient, bool externalSobel>
310 struct _normEstimator
311 {
312 ptrdiff_t magstep;
313 ptrdiff_t dxOffset;
314 ptrdiff_t dyOffset;
315 ptrdiff_t shxOffset;
316 ptrdiff_t shyOffset;
317 std::vector<u8> buffer;
318 const ptrdiff_t offsetk;
319 ptrdiff_t borderyt, borderyb;
320 RowFilter3x3Canny sobelRow;
321
_normEstimatorCAROTENE_NS::__anon266eb4810111::_normEstimator322 inline _normEstimator(const Size2D &size, s32, Margin borderMargin,
323 ptrdiff_t &mapstep, s32** mag_buf, u8* &map):
324 offsetk(1),
325 sobelRow(std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.left),
326 std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.right))
327 {
328 mapstep = size.width + 2;
329 magstep = size.width + 2 + size.width * (4 * sizeof(s16)/sizeof(s32));
330 dxOffset = mapstep * sizeof(s32)/sizeof(s16);
331 dyOffset = dxOffset + size.width * 1;
332 shxOffset = dxOffset + size.width * 2;
333 shyOffset = dxOffset + size.width * 3;
334 buffer.resize( (size.width+2)*(size.height+2) + magstep*3*sizeof(s32) );
335 mag_buf[0] = (s32*)&buffer[0];
336 mag_buf[1] = mag_buf[0] + magstep;
337 mag_buf[2] = mag_buf[1] + magstep;
338 memset(mag_buf[0], 0, mapstep * sizeof(s32));
339
340 map = (u8*)(mag_buf[2] + magstep);
341 memset(map, 1, mapstep);
342 memset(map + mapstep*(size.height + 1), 1, mapstep);
343 borderyt = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.top);
344 borderyb = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.bottom);
345 }
firstRowCAROTENE_NS::__anon266eb4810111::_normEstimator346 inline void firstRow(const Size2D &size, s32,
347 const u8 *srcBase, ptrdiff_t srcStride,
348 s16*, ptrdiff_t,
349 s16*, ptrdiff_t,
350 s32** mag_buf)
351 {
352 //sobelH row #0
353 const u8* _src = internal::getRowPtr(srcBase, srcStride, 0);
354 sobelRow(_src, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shyOffset, size.width);
355 //sobelH row #1
356 _src = internal::getRowPtr(srcBase, srcStride, 1);
357 sobelRow(_src, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shyOffset, size.width);
358
359 mag_buf[1][0] = mag_buf[1][size.width+1] = 0;
360 if (borderyt == 0)
361 {
362 //sobelH row #-1
363 _src = internal::getRowPtr(srcBase, srcStride, -1);
364 sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
365
366 ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
367 ((s16*)mag_buf[1]) + dxOffset, ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
368 }
369 else
370 {
371 ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
372 ((s16*)mag_buf[1]) + dxOffset, ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
373 }
374 }
nextRowCAROTENE_NS::__anon266eb4810111::_normEstimator375 inline void nextRow(const Size2D &size, s32,
376 const u8 *srcBase, ptrdiff_t srcStride,
377 s16*, ptrdiff_t,
378 s16*, ptrdiff_t,
379 const ptrdiff_t &mapstep, s32** mag_buf,
380 size_t i, const s16* &_x, const s16* &_y)
381 {
382 mag_buf[2][0] = mag_buf[2][size.width+1] = 0;
383 if (i < size.height - borderyb)
384 {
385 const u8* _src = internal::getRowPtr(srcBase, srcStride, i+1);
386 //sobelH row #i+1
387 sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
388
389 ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[2]) + shxOffset,
390 ((s16*)mag_buf[2]) + dxOffset, ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
391 }
392 else if (i < size.height)
393 {
394 ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
395 ((s16*)mag_buf[2]) + dxOffset, ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
396 }
397 else
398 memset(mag_buf[2], 0, mapstep*sizeof(s32));
399 _x = ((s16*)mag_buf[1]) + dxOffset;
400 _y = ((s16*)mag_buf[1]) + dyOffset;
401 }
402 };
403 template <bool L2gradient>
404 struct _normEstimator<L2gradient, true>
405 {
406 std::vector<u8> buffer;
407
_normEstimatorCAROTENE_NS::__anon266eb4810111::_normEstimator408 inline _normEstimator(const Size2D &size, s32 cn, Margin,
409 ptrdiff_t &mapstep, s32** mag_buf, u8* &map)
410 {
411 mapstep = size.width + 2;
412 buffer.resize( (size.width+2)*(size.height+2) + cn*mapstep*3*sizeof(s32) );
413 mag_buf[0] = (s32*)&buffer[0];
414 mag_buf[1] = mag_buf[0] + mapstep*cn;
415 mag_buf[2] = mag_buf[1] + mapstep*cn;
416 memset(mag_buf[0], 0, /* cn* */mapstep * sizeof(s32));
417
418 map = (u8*)(mag_buf[2] + mapstep*cn);
419 memset(map, 1, mapstep);
420 memset(map + mapstep*(size.height + 1), 1, mapstep);
421 }
firstRowCAROTENE_NS::__anon266eb4810111::_normEstimator422 inline void firstRow(const Size2D &size, s32 cn,
423 const u8 *, ptrdiff_t,
424 s16* dxBase, ptrdiff_t dxStride,
425 s16* dyBase, ptrdiff_t dyStride,
426 s32** mag_buf)
427 {
428 s32* _norm = mag_buf[1] + 1;
429
430 s16* _dx = internal::getRowPtr(dxBase, dxStride, 0);
431 s16* _dy = internal::getRowPtr(dyBase, dyStride, 0);
432
433 NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
434
435 if(cn > 1)
436 {
437 for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
438 {
439 size_t maxIdx = jn;
440 for(s32 k = 1; k < cn; ++k)
441 if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
442 _norm[j] = _norm[maxIdx];
443 _dx[j] = _dx[maxIdx];
444 _dy[j] = _dy[maxIdx];
445 }
446 }
447
448 _norm[-1] = _norm[size.width] = 0;
449 }
nextRowCAROTENE_NS::__anon266eb4810111::_normEstimator450 inline void nextRow(const Size2D &size, s32 cn,
451 const u8 *, ptrdiff_t,
452 s16* dxBase, ptrdiff_t dxStride,
453 s16* dyBase, ptrdiff_t dyStride,
454 const ptrdiff_t &mapstep, s32** mag_buf,
455 size_t i, const s16* &_x, const s16* &_y)
456 {
457 s32* _norm = mag_buf[(i > 0) + 1] + 1;
458 if (i < size.height)
459 {
460 s16* _dx = internal::getRowPtr(dxBase, dxStride, i);
461 s16* _dy = internal::getRowPtr(dyBase, dyStride, i);
462
463 NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
464
465 if(cn > 1)
466 {
467 for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
468 {
469 size_t maxIdx = jn;
470 for(s32 k = 1; k < cn; ++k)
471 if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
472 _norm[j] = _norm[maxIdx];
473 _dx[j] = _dx[maxIdx];
474 _dy[j] = _dy[maxIdx];
475 }
476 }
477
478 _norm[-1] = _norm[size.width] = 0;
479 }
480 else
481 memset(_norm-1, 0, /* cn* */mapstep*sizeof(s32));
482
483 _x = internal::getRowPtr(dxBase, dxStride, i-1);
484 _y = internal::getRowPtr(dyBase, dyStride, i-1);
485 }
486 };
487
488 template <bool L2gradient, bool externalSobel>
Canny3x3(const Size2D & size,s32 cn,const u8 * srcBase,ptrdiff_t srcStride,u8 * dstBase,ptrdiff_t dstStride,s16 * dxBase,ptrdiff_t dxStride,s16 * dyBase,ptrdiff_t dyStride,f64 low_thresh,f64 high_thresh,Margin borderMargin)489 inline void Canny3x3(const Size2D &size, s32 cn,
490 const u8 * srcBase, ptrdiff_t srcStride,
491 u8 * dstBase, ptrdiff_t dstStride,
492 s16 * dxBase, ptrdiff_t dxStride,
493 s16 * dyBase, ptrdiff_t dyStride,
494 f64 low_thresh, f64 high_thresh,
495 Margin borderMargin)
496 {
497 s32 low, high;
498 prepareThresh<L2gradient>(low_thresh, high_thresh, low, high);
499
500 ptrdiff_t mapstep;
501 s32* mag_buf[3];
502 u8* map;
503 _normEstimator<L2gradient, externalSobel> normEstimator(size, cn, borderMargin, mapstep, mag_buf, map);
504
505 size_t maxsize = std::max<size_t>( 1u << 10, size.width * size.height / 10 );
506 std::vector<u8*> stack( maxsize );
507 u8 **stack_top = &stack[0];
508 u8 **stack_bottom = &stack[0];
509
510 /* sector numbers
511 (Top-Left Origin)
512
513 1 2 3
514 * * *
515 * * *
516 0*******0
517 * * *
518 * * *
519 3 2 1
520 */
521
522 #define CANNY_PUSH(d) *(d) = u8(2), *stack_top++ = (d)
523 #define CANNY_POP(d) (d) = *--stack_top
524
525 //i == 0
526 normEstimator.firstRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mag_buf);
527 // calculate magnitude and angle of gradient, perform non-maxima supression.
528 // fill the map with one of the following values:
529 // 0 - the pixel might belong to an edge
530 // 1 - the pixel can not belong to an edge
531 // 2 - the pixel does belong to an edge
532 for (size_t i = 1; i <= size.height; i++)
533 {
534 const s16 *_x, *_y;
535 normEstimator.nextRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mapstep, mag_buf, i, _x, _y);
536
537 u8* _map = map + mapstep*i + 1;
538 _map[-1] = _map[size.width] = 1;
539
540 s32* _mag = mag_buf[1] + 1; // take the central row
541 ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
542 ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
543
544 if ((stack_top - stack_bottom) + size.width > maxsize)
545 {
546 ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
547 maxsize = maxsize * 3/2;
548 stack.resize(maxsize);
549 stack_bottom = &stack[0];
550 stack_top = stack_bottom + sz;
551 }
552
553 s32 prev_flag = 0;
554 for (ptrdiff_t j = 0; j < (ptrdiff_t)size.width; j++)
555 {
556 #define CANNY_SHIFT 15
557 const s32 TG22 = (s32)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
558
559 s32 m = _mag[j];
560
561 if (m > low)
562 {
563 s32 xs = _x[j];
564 s32 ys = _y[j];
565 s32 x = abs(xs);
566 s32 y = abs(ys) << CANNY_SHIFT;
567
568 s32 tg22x = x * TG22;
569
570 if (y < tg22x)
571 {
572 if (m > _mag[j-1] && m >= _mag[j+1]) goto __push;
573 }
574 else
575 {
576 s32 tg67x = tg22x + (x << (CANNY_SHIFT+1));
577 if (y > tg67x)
578 {
579 if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __push;
580 }
581 else
582 {
583 s32 s = (xs ^ ys) < 0 ? -1 : 1;
584 if(m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __push;
585 }
586 }
587 }
588 prev_flag = 0;
589 _map[j] = u8(1);
590 continue;
591 __push:
592 if (!prev_flag && m > high && _map[j-mapstep] != 2)
593 {
594 CANNY_PUSH(_map + j);
595 prev_flag = 1;
596 }
597 else
598 _map[j] = 0;
599 }
600
601 // scroll the ring buffer
602 _mag = mag_buf[0];
603 mag_buf[0] = mag_buf[1];
604 mag_buf[1] = mag_buf[2];
605 mag_buf[2] = _mag;
606 }
607
608 // now track the edges (hysteresis thresholding)
609 while (stack_top > stack_bottom)
610 {
611 u8* m;
612 if ((size_t)(stack_top - stack_bottom) + 8u > maxsize)
613 {
614 ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
615 maxsize = maxsize * 3/2;
616 stack.resize(maxsize);
617 stack_bottom = &stack[0];
618 stack_top = stack_bottom + sz;
619 }
620
621 CANNY_POP(m);
622
623 if (!m[-1]) CANNY_PUSH(m - 1);
624 if (!m[1]) CANNY_PUSH(m + 1);
625 if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
626 if (!m[-mapstep]) CANNY_PUSH(m - mapstep);
627 if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
628 if (!m[mapstep-1]) CANNY_PUSH(m + mapstep - 1);
629 if (!m[mapstep]) CANNY_PUSH(m + mapstep);
630 if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1);
631 }
632
633 // the final pass, form the final image
634 uint8x16_t v2 = vmovq_n_u8(2);
635 const u8* ptrmap = map + mapstep + 1;
636 for (size_t i = 0; i < size.height; i++, ptrmap += mapstep)
637 {
638 u8* _dst = internal::getRowPtr(dstBase, dstStride, i);
639 ptrdiff_t j = 0;
640 for (; j < (ptrdiff_t)size.width - 16; j += 16)
641 {
642 internal::prefetch(ptrmap);
643 uint8x16_t vmap = vld1q_u8(ptrmap + j);
644 uint8x16_t vdst = vceqq_u8(vmap, v2);
645 vst1q_u8(_dst+j, vdst);
646 }
647 for (; j < (ptrdiff_t)size.width; j++)
648 _dst[j] = (u8)-(ptrmap[j] >> 1);
649 }
650 }
651
652 } // namespace
653 #endif
654
isCanny3x3Supported(const Size2D & size)655 bool isCanny3x3Supported(const Size2D &size)
656 {
657 return isSupportedConfiguration() &&
658 size.height >= 2 && size.width >= 9;
659 }
660
Canny3x3L1(const Size2D & size,const u8 * srcBase,ptrdiff_t srcStride,u8 * dstBase,ptrdiff_t dstStride,f64 low_thresh,f64 high_thresh,Margin borderMargin)661 void Canny3x3L1(const Size2D &size,
662 const u8 * srcBase, ptrdiff_t srcStride,
663 u8 * dstBase, ptrdiff_t dstStride,
664 f64 low_thresh, f64 high_thresh,
665 Margin borderMargin)
666 {
667 internal::assertSupportedConfiguration(isCanny3x3Supported(size));
668 #ifdef CAROTENE_NEON
669 Canny3x3<false, false>(size, 1,
670 srcBase, srcStride,
671 dstBase, dstStride,
672 NULL, 0,
673 NULL, 0,
674 low_thresh, high_thresh,
675 borderMargin);
676 #else
677 (void)size;
678 (void)srcBase;
679 (void)srcStride;
680 (void)dstBase;
681 (void)dstStride;
682 (void)low_thresh;
683 (void)high_thresh;
684 (void)borderMargin;
685 #endif
686 }
687
Canny3x3L2(const Size2D & size,const u8 * srcBase,ptrdiff_t srcStride,u8 * dstBase,ptrdiff_t dstStride,f64 low_thresh,f64 high_thresh,Margin borderMargin)688 void Canny3x3L2(const Size2D &size,
689 const u8 * srcBase, ptrdiff_t srcStride,
690 u8 * dstBase, ptrdiff_t dstStride,
691 f64 low_thresh, f64 high_thresh,
692 Margin borderMargin)
693 {
694 internal::assertSupportedConfiguration(isCanny3x3Supported(size));
695 #ifdef CAROTENE_NEON
696 Canny3x3<true, false>(size, 1,
697 srcBase, srcStride,
698 dstBase, dstStride,
699 NULL, 0,
700 NULL, 0,
701 low_thresh, high_thresh,
702 borderMargin);
703 #else
704 (void)size;
705 (void)srcBase;
706 (void)srcStride;
707 (void)dstBase;
708 (void)dstStride;
709 (void)low_thresh;
710 (void)high_thresh;
711 (void)borderMargin;
712 #endif
713 }
714
Canny3x3L1(const Size2D & size,s32 cn,s16 * dxBase,ptrdiff_t dxStride,s16 * dyBase,ptrdiff_t dyStride,u8 * dstBase,ptrdiff_t dstStride,f64 low_thresh,f64 high_thresh)715 void Canny3x3L1(const Size2D &size, s32 cn,
716 s16 * dxBase, ptrdiff_t dxStride,
717 s16 * dyBase, ptrdiff_t dyStride,
718 u8 * dstBase, ptrdiff_t dstStride,
719 f64 low_thresh, f64 high_thresh)
720 {
721 internal::assertSupportedConfiguration();
722 #ifdef CAROTENE_NEON
723 Canny3x3<false, true>(size, cn,
724 NULL, 0,
725 dstBase, dstStride,
726 dxBase, dxStride,
727 dyBase, dyStride,
728 low_thresh, high_thresh,
729 Margin());
730 #else
731 (void)size;
732 (void)cn;
733 (void)dstBase;
734 (void)dstStride;
735 (void)dxBase;
736 (void)dxStride;
737 (void)dyBase;
738 (void)dyStride;
739 (void)low_thresh;
740 (void)high_thresh;
741 #endif
742 }
743
Canny3x3L2(const Size2D & size,s32 cn,s16 * dxBase,ptrdiff_t dxStride,s16 * dyBase,ptrdiff_t dyStride,u8 * dstBase,ptrdiff_t dstStride,f64 low_thresh,f64 high_thresh)744 void Canny3x3L2(const Size2D &size, s32 cn,
745 s16 * dxBase, ptrdiff_t dxStride,
746 s16 * dyBase, ptrdiff_t dyStride,
747 u8 * dstBase, ptrdiff_t dstStride,
748 f64 low_thresh, f64 high_thresh)
749 {
750 internal::assertSupportedConfiguration();
751 #ifdef CAROTENE_NEON
752 Canny3x3<true, true>(size, cn,
753 NULL, 0,
754 dstBase, dstStride,
755 dxBase, dxStride,
756 dyBase, dyStride,
757 low_thresh, high_thresh,
758 Margin());
759 #else
760 (void)size;
761 (void)cn;
762 (void)dstBase;
763 (void)dstStride;
764 (void)dxBase;
765 (void)dxStride;
766 (void)dyBase;
767 (void)dyStride;
768 (void)low_thresh;
769 (void)high_thresh;
770 #endif
771 }
772
773 } // namespace CAROTENE_NS
774