• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * By downloading, copying, installing or using the software you agree to this license.
3  * If you do not agree to this license, do not download, install,
4  * copy or use the software.
5  *
6  *
7  *                           License Agreement
8  *                For Open Source Computer Vision Library
9  *                        (3-clause BSD License)
10  *
11  * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12  * Third party copyrights are property of their respective owners.
13  *
14  * Redistribution and use in source and binary forms, with or without modification,
15  * are permitted provided that the following conditions are met:
16  *
17  *   * Redistributions of source code must retain the above copyright notice,
18  *     this list of conditions and the following disclaimer.
19  *
20  *   * Redistributions in binary form must reproduce the above copyright notice,
21  *     this list of conditions and the following disclaimer in the documentation
22  *     and/or other materials provided with the distribution.
23  *
24  *   * Neither the names of the copyright holders nor the names of the contributors
25  *     may be used to endorse or promote products derived from this software
26  *     without specific prior written permission.
27  *
28  * This software is provided by the copyright holders and contributors "as is" and
29  * any express or implied warranties, including, but not limited to, the implied
30  * warranties of merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall copyright holders or contributors be liable for any direct,
32  * indirect, incidental, special, exemplary, or consequential damages
33  * (including, but not limited to, procurement of substitute goods or services;
34  * loss of use, data, or profits; or business interruption) however caused
35  * and on any theory of liability, whether in contract, strict liability,
36  * or tort (including negligence or otherwise) arising in any way out of
37  * the use of this software, even if advised of the possibility of such damage.
38  */
39 
40 #include "common.hpp"
41 
42 #include "saturate_cast.hpp"
43 #include <vector>
44 #include <cstring>
45 
46 namespace CAROTENE_NS {
47 
48 #ifdef CAROTENE_NEON
49 namespace {
50 struct RowFilter3x3Canny
51 {
RowFilter3x3CannyCAROTENE_NS::__anon266eb4810111::RowFilter3x3Canny52     inline RowFilter3x3Canny(const ptrdiff_t borderxl, const ptrdiff_t borderxr)
53     {
54         vfmask = vreinterpret_u8_u64(vmov_n_u64(borderxl ? 0x0000FFffFFffFFffULL : 0x0100FFffFFffFFffULL));
55         vtmask = vreinterpret_u8_u64(vmov_n_u64(borderxr ? 0x0707060504030201ULL : 0x0706050403020100ULL));
56         lookLeft = offsetk - borderxl;
57         lookRight = offsetk - borderxr;
58     }
59 
operator ()CAROTENE_NS::__anon266eb4810111::RowFilter3x3Canny60     inline void operator()(const u8* src, s16* dstx, s16* dsty, ptrdiff_t width)
61     {
62         uint8x8_t l = vtbl1_u8(vld1_u8(src - lookLeft), vfmask);
63         ptrdiff_t i = 0;
64         for (; i < width - 8 + lookRight; i += 8)
65         {
66             internal::prefetch(src + i);
67             uint8x8_t l18u = vld1_u8(src + i + 1);
68 
69             uint8x8_t l2 = l18u;
70             uint8x8_t l0 = vext_u8(l, l18u, 6);
71             int16x8_t l1x2 = vreinterpretq_s16_u16(vshll_n_u8(vext_u8(l, l18u, 7), 1));
72 
73             l = l18u;
74 
75             int16x8_t l02 = vreinterpretq_s16_u16(vaddl_u8(l2, l0));
76             int16x8_t ldx = vreinterpretq_s16_u16(vsubl_u8(l2, l0));
77             int16x8_t ldy = vaddq_s16(l02, l1x2);
78 
79             vst1q_s16(dstx + i, ldx);
80             vst1q_s16(dsty + i, ldy);
81         }
82 
83         //tail
84         if (lookRight == 0 || i != width)
85         {
86             uint8x8_t tail0 = vld1_u8(src + (width - 9));//can't get left 1 pixel another way if width==8*k+1
87             uint8x8_t tail2 = vtbl1_u8(vld1_u8(src + (width - 8 + lookRight)), vtmask);
88             uint8x8_t tail1 = vext_u8(vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(tail0), 8*6)), tail2, 7);
89 
90             int16x8_t tail02 = vreinterpretq_s16_u16(vaddl_u8(tail2, tail0));
91             int16x8_t tail1x2 = vreinterpretq_s16_u16(vshll_n_u8(tail1, 1));
92             int16x8_t taildx = vreinterpretq_s16_u16(vsubl_u8(tail2, tail0));
93             int16x8_t taildy = vqaddq_s16(tail02, tail1x2);
94 
95             vst1q_s16(dstx + (width - 8), taildx);
96             vst1q_s16(dsty + (width - 8), taildy);
97         }
98     }
99 
100     uint8x8_t vfmask;
101     uint8x8_t vtmask;
102     enum { offsetk = 1};
103     ptrdiff_t lookLeft;
104     ptrdiff_t lookRight;
105 };
106 
107 template <bool L2gradient>
ColFilter3x3Canny(const s16 * src0,const s16 * src1,const s16 * src2,s16 * dstx,s16 * dsty,s32 * mag,ptrdiff_t width)108 inline void ColFilter3x3Canny(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
109 {
110     ptrdiff_t j = 0;
111     for (; j <= width - 8; j += 8)
112     {
113         ColFilter3x3CannyL1Loop:
114         int16x8_t line0x = vld1q_s16(src0 + j);
115         int16x8_t line1x = vld1q_s16(src1 + j);
116         int16x8_t line2x = vld1q_s16(src2 + j);
117         int16x8_t line0y = vld1q_s16(src0 + j + width);
118         int16x8_t line2y = vld1q_s16(src2 + j + width);
119 
120         int16x8_t l02 = vaddq_s16(line0x, line2x);
121         int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
122         int16x8_t dy = vsubq_s16(line2y, line0y);
123         int16x8_t dx = vaddq_s16(l1x2, l02);
124 
125         int16x8_t dya = vabsq_s16(dy);
126         int16x8_t dxa = vabsq_s16(dx);
127         int16x8_t norm = vaddq_s16(dya, dxa);
128 
129         int32x4_t normh = vmovl_s16(vget_high_s16(norm));
130         int32x4_t norml = vmovl_s16(vget_low_s16(norm));
131 
132         vst1q_s16(dsty + j, dy);
133         vst1q_s16(dstx + j, dx);
134         vst1q_s32(mag + j + 4, normh);
135         vst1q_s32(mag + j, norml);
136     }
137     if (j != width)
138     {
139         j = width - 8;
140         goto ColFilter3x3CannyL1Loop;
141     }
142 }
143 template <>
ColFilter3x3Canny(const s16 * src0,const s16 * src1,const s16 * src2,s16 * dstx,s16 * dsty,s32 * mag,ptrdiff_t width)144 inline void ColFilter3x3Canny<true>(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
145 {
146     ptrdiff_t j = 0;
147     for (; j <= width - 8; j += 8)
148     {
149         ColFilter3x3CannyL2Loop:
150         int16x8_t line0x = vld1q_s16(src0 + j);
151         int16x8_t line1x = vld1q_s16(src1 + j);
152         int16x8_t line2x = vld1q_s16(src2 + j);
153         int16x8_t line0y = vld1q_s16(src0 + j + width);
154         int16x8_t line2y = vld1q_s16(src2 + j + width);
155 
156         int16x8_t l02 = vaddq_s16(line0x, line2x);
157         int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
158         int16x8_t dy = vsubq_s16(line2y, line0y);
159         int16x8_t dx = vaddq_s16(l1x2, l02);
160 
161         int32x4_t norml = vmull_s16(vget_low_s16(dx), vget_low_s16(dx));
162         int32x4_t normh = vmull_s16(vget_high_s16(dy), vget_high_s16(dy));
163 
164         norml = vmlal_s16(norml, vget_low_s16(dy), vget_low_s16(dy));
165         normh = vmlal_s16(normh, vget_high_s16(dx), vget_high_s16(dx));
166 
167         vst1q_s16(dsty + j, dy);
168         vst1q_s16(dstx + j, dx);
169         vst1q_s32(mag + j, norml);
170         vst1q_s32(mag + j + 4, normh);
171     }
172     if (j != width)
173     {
174         j = width - 8;
175         goto ColFilter3x3CannyL2Loop;
176     }
177 }
178 
179 template <bool L2gradient>
NormCanny(const ptrdiff_t colscn,s16 * _dx,s16 * _dy,s32 * _norm)180 inline void NormCanny(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
181 {
182     ptrdiff_t j = 0;
183     if (colscn >= 8)
184     {
185         int16x8_t vx = vld1q_s16(_dx);
186         int16x8_t vy = vld1q_s16(_dy);
187         for (; j <= colscn - 16; j+=8)
188         {
189             internal::prefetch(_dx);
190             internal::prefetch(_dy);
191 
192             int16x8_t vx2 = vld1q_s16(_dx + j + 8);
193             int16x8_t vy2 = vld1q_s16(_dy + j + 8);
194 
195             int16x8_t vabsx = vabsq_s16(vx);
196             int16x8_t vabsy = vabsq_s16(vy);
197 
198             int16x8_t norm = vaddq_s16(vabsx, vabsy);
199 
200             int32x4_t normh = vmovl_s16(vget_high_s16(norm));
201             int32x4_t norml = vmovl_s16(vget_low_s16(norm));
202 
203             vst1q_s32(_norm + j + 4, normh);
204             vst1q_s32(_norm + j + 0, norml);
205 
206             vx = vx2;
207             vy = vy2;
208         }
209         int16x8_t vabsx = vabsq_s16(vx);
210         int16x8_t vabsy = vabsq_s16(vy);
211 
212         int16x8_t norm = vaddq_s16(vabsx, vabsy);
213 
214         int32x4_t normh = vmovl_s16(vget_high_s16(norm));
215         int32x4_t norml = vmovl_s16(vget_low_s16(norm));
216 
217         vst1q_s32(_norm + j + 4, normh);
218         vst1q_s32(_norm + j + 0, norml);
219     }
220     for (; j < colscn; j++)
221         _norm[j] = std::abs(s32(_dx[j])) + std::abs(s32(_dy[j]));
222 }
223 
224 template <>
NormCanny(const ptrdiff_t colscn,s16 * _dx,s16 * _dy,s32 * _norm)225 inline void NormCanny<true>(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
226 {
227     ptrdiff_t j = 0;
228     if (colscn >= 8)
229     {
230         int16x8_t vx = vld1q_s16(_dx);
231         int16x8_t vy = vld1q_s16(_dy);
232 
233         for (; j <= colscn - 16; j+=8)
234         {
235             internal::prefetch(_dx);
236             internal::prefetch(_dy);
237 
238             int16x8_t vxnext = vld1q_s16(_dx + j + 8);
239             int16x8_t vynext = vld1q_s16(_dy + j + 8);
240 
241             int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
242             int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
243 
244             norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
245             normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
246 
247             vst1q_s32(_norm + j + 0, norml);
248             vst1q_s32(_norm + j + 4, normh);
249 
250             vx = vxnext;
251             vy = vynext;
252         }
253         int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
254         int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
255 
256         norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
257         normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
258 
259         vst1q_s32(_norm + j + 0, norml);
260         vst1q_s32(_norm + j + 4, normh);
261     }
262     for (; j < colscn; j++)
263         _norm[j] = s32(_dx[j])*_dx[j] + s32(_dy[j])*_dy[j];
264 }
265 
266 template <bool L2gradient>
prepareThresh(f64 low_thresh,f64 high_thresh,s32 & low,s32 & high)267 inline void prepareThresh(f64 low_thresh, f64 high_thresh,
268                           s32 &low, s32 &high)
269 {
270     if (low_thresh > high_thresh)
271         std::swap(low_thresh, high_thresh);
272 #if defined __GNUC__
273     low = (s32)low_thresh;
274     high = (s32)high_thresh;
275     low -= (low > low_thresh);
276     high -= (high > high_thresh);
277 #else
278     low = internal::round(low_thresh);
279     high = internal::round(high_thresh);
280     f32 ldiff = (f32)(low_thresh - low);
281     f32 hdiff = (f32)(high_thresh - high);
282     low -= (ldiff < 0);
283     high -= (hdiff < 0);
284 #endif
285 }
286 template <>
prepareThresh(f64 low_thresh,f64 high_thresh,s32 & low,s32 & high)287 inline void prepareThresh<true>(f64 low_thresh, f64 high_thresh,
288                                 s32 &low, s32 &high)
289 {
290     if (low_thresh > high_thresh)
291         std::swap(low_thresh, high_thresh);
292     if (low_thresh > 0) low_thresh *= low_thresh;
293     if (high_thresh > 0) high_thresh *= high_thresh;
294 #if defined __GNUC__
295     low = (s32)low_thresh;
296     high = (s32)high_thresh;
297     low -= (low > low_thresh);
298     high -= (high > high_thresh);
299 #else
300     low = internal::round(low_thresh);
301     high = internal::round(high_thresh);
302     f32 ldiff = (f32)(low_thresh - low);
303     f32 hdiff = (f32)(high_thresh - high);
304     low -= (ldiff < 0);
305     high -= (hdiff < 0);
306 #endif
307 }
308 
309 template <bool L2gradient, bool externalSobel>
310 struct _normEstimator
311 {
312     ptrdiff_t magstep;
313     ptrdiff_t dxOffset;
314     ptrdiff_t dyOffset;
315     ptrdiff_t shxOffset;
316     ptrdiff_t shyOffset;
317     std::vector<u8> buffer;
318     const ptrdiff_t offsetk;
319     ptrdiff_t borderyt, borderyb;
320     RowFilter3x3Canny sobelRow;
321 
_normEstimatorCAROTENE_NS::__anon266eb4810111::_normEstimator322     inline _normEstimator(const Size2D &size, s32, Margin borderMargin,
323                           ptrdiff_t &mapstep, s32** mag_buf, u8* &map):
324                           offsetk(1),
325                           sobelRow(std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.left),
326                                    std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.right))
327     {
328         mapstep = size.width + 2;
329         magstep = size.width + 2 + size.width * (4 * sizeof(s16)/sizeof(s32));
330         dxOffset = mapstep * sizeof(s32)/sizeof(s16);
331         dyOffset = dxOffset + size.width * 1;
332         shxOffset = dxOffset + size.width * 2;
333         shyOffset = dxOffset + size.width * 3;
334         buffer.resize( (size.width+2)*(size.height+2) + magstep*3*sizeof(s32) );
335         mag_buf[0] = (s32*)&buffer[0];
336         mag_buf[1] = mag_buf[0] + magstep;
337         mag_buf[2] = mag_buf[1] + magstep;
338         memset(mag_buf[0], 0, mapstep * sizeof(s32));
339 
340         map = (u8*)(mag_buf[2] + magstep);
341         memset(map, 1, mapstep);
342         memset(map + mapstep*(size.height + 1), 1, mapstep);
343         borderyt = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.top);
344         borderyb = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.bottom);
345     }
firstRowCAROTENE_NS::__anon266eb4810111::_normEstimator346     inline void firstRow(const Size2D &size, s32,
347                          const u8 *srcBase, ptrdiff_t srcStride,
348                          s16*, ptrdiff_t,
349                          s16*, ptrdiff_t,
350                          s32** mag_buf)
351     {
352         //sobelH row #0
353         const u8* _src = internal::getRowPtr(srcBase, srcStride, 0);
354         sobelRow(_src, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shyOffset, size.width);
355         //sobelH row #1
356         _src = internal::getRowPtr(srcBase, srcStride, 1);
357         sobelRow(_src, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shyOffset, size.width);
358 
359         mag_buf[1][0] = mag_buf[1][size.width+1] = 0;
360         if (borderyt == 0)
361         {
362             //sobelH row #-1
363             _src = internal::getRowPtr(srcBase, srcStride, -1);
364             sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
365 
366             ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
367                                            ((s16*)mag_buf[1]) + dxOffset,  ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
368         }
369         else
370         {
371             ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
372                                            ((s16*)mag_buf[1]) + dxOffset,  ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
373         }
374     }
nextRowCAROTENE_NS::__anon266eb4810111::_normEstimator375     inline void nextRow(const Size2D &size, s32,
376                         const u8 *srcBase, ptrdiff_t srcStride,
377                         s16*, ptrdiff_t,
378                         s16*, ptrdiff_t,
379                         const ptrdiff_t &mapstep, s32** mag_buf,
380                         size_t i, const s16* &_x, const s16* &_y)
381     {
382         mag_buf[2][0] = mag_buf[2][size.width+1] = 0;
383         if (i < size.height - borderyb)
384         {
385             const u8* _src = internal::getRowPtr(srcBase, srcStride, i+1);
386             //sobelH row #i+1
387             sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
388 
389             ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[2]) + shxOffset,
390                                            ((s16*)mag_buf[2]) + dxOffset,  ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
391         }
392         else if (i < size.height)
393         {
394             ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
395                                            ((s16*)mag_buf[2]) + dxOffset,  ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
396         }
397         else
398             memset(mag_buf[2], 0, mapstep*sizeof(s32));
399         _x = ((s16*)mag_buf[1]) + dxOffset;
400         _y = ((s16*)mag_buf[1]) + dyOffset;
401     }
402 };
403 template <bool L2gradient>
404 struct _normEstimator<L2gradient, true>
405 {
406     std::vector<u8> buffer;
407 
_normEstimatorCAROTENE_NS::__anon266eb4810111::_normEstimator408     inline _normEstimator(const Size2D &size, s32 cn, Margin,
409                           ptrdiff_t &mapstep, s32** mag_buf, u8* &map)
410     {
411         mapstep = size.width + 2;
412         buffer.resize( (size.width+2)*(size.height+2) + cn*mapstep*3*sizeof(s32) );
413         mag_buf[0] = (s32*)&buffer[0];
414         mag_buf[1] = mag_buf[0] + mapstep*cn;
415         mag_buf[2] = mag_buf[1] + mapstep*cn;
416         memset(mag_buf[0], 0, /* cn* */mapstep * sizeof(s32));
417 
418         map = (u8*)(mag_buf[2] + mapstep*cn);
419         memset(map, 1, mapstep);
420         memset(map + mapstep*(size.height + 1), 1, mapstep);
421     }
firstRowCAROTENE_NS::__anon266eb4810111::_normEstimator422     inline void firstRow(const Size2D &size, s32 cn,
423                          const u8 *, ptrdiff_t,
424                          s16* dxBase, ptrdiff_t dxStride,
425                          s16* dyBase, ptrdiff_t dyStride,
426                          s32** mag_buf)
427     {
428         s32* _norm = mag_buf[1] + 1;
429 
430         s16* _dx = internal::getRowPtr(dxBase, dxStride, 0);
431         s16* _dy = internal::getRowPtr(dyBase, dyStride, 0);
432 
433         NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
434 
435         if(cn > 1)
436         {
437             for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
438             {
439                 size_t maxIdx = jn;
440                 for(s32 k = 1; k < cn; ++k)
441                     if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
442                 _norm[j] = _norm[maxIdx];
443                 _dx[j] = _dx[maxIdx];
444                 _dy[j] = _dy[maxIdx];
445             }
446         }
447 
448         _norm[-1] = _norm[size.width] = 0;
449     }
nextRowCAROTENE_NS::__anon266eb4810111::_normEstimator450     inline void nextRow(const Size2D &size, s32 cn,
451                         const u8 *, ptrdiff_t,
452                         s16* dxBase, ptrdiff_t dxStride,
453                         s16* dyBase, ptrdiff_t dyStride,
454                         const ptrdiff_t &mapstep, s32** mag_buf,
455                         size_t i, const s16* &_x, const s16* &_y)
456     {
457         s32* _norm = mag_buf[(i > 0) + 1] + 1;
458         if (i < size.height)
459         {
460             s16* _dx = internal::getRowPtr(dxBase, dxStride, i);
461             s16* _dy = internal::getRowPtr(dyBase, dyStride, i);
462 
463             NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
464 
465             if(cn > 1)
466             {
467                 for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
468                 {
469                     size_t maxIdx = jn;
470                     for(s32 k = 1; k < cn; ++k)
471                         if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
472                     _norm[j] = _norm[maxIdx];
473                     _dx[j] = _dx[maxIdx];
474                     _dy[j] = _dy[maxIdx];
475                 }
476             }
477 
478             _norm[-1] = _norm[size.width] = 0;
479         }
480         else
481             memset(_norm-1, 0, /* cn* */mapstep*sizeof(s32));
482 
483         _x = internal::getRowPtr(dxBase, dxStride, i-1);
484         _y = internal::getRowPtr(dyBase, dyStride, i-1);
485     }
486 };
487 
488 template <bool L2gradient, bool externalSobel>
Canny3x3(const Size2D & size,s32 cn,const u8 * srcBase,ptrdiff_t srcStride,u8 * dstBase,ptrdiff_t dstStride,s16 * dxBase,ptrdiff_t dxStride,s16 * dyBase,ptrdiff_t dyStride,f64 low_thresh,f64 high_thresh,Margin borderMargin)489 inline void Canny3x3(const Size2D &size, s32 cn,
490                      const u8 * srcBase, ptrdiff_t srcStride,
491                      u8 * dstBase, ptrdiff_t dstStride,
492                      s16 * dxBase, ptrdiff_t dxStride,
493                      s16 * dyBase, ptrdiff_t dyStride,
494                      f64 low_thresh, f64 high_thresh,
495                      Margin borderMargin)
496 {
497     s32 low, high;
498     prepareThresh<L2gradient>(low_thresh, high_thresh, low, high);
499 
500     ptrdiff_t mapstep;
501     s32* mag_buf[3];
502     u8* map;
503     _normEstimator<L2gradient, externalSobel> normEstimator(size, cn, borderMargin, mapstep, mag_buf, map);
504 
505     size_t maxsize = std::max<size_t>( 1u << 10, size.width * size.height / 10 );
506     std::vector<u8*> stack( maxsize );
507     u8 **stack_top = &stack[0];
508     u8 **stack_bottom = &stack[0];
509 
510     /* sector numbers
511        (Top-Left Origin)
512 
513         1   2   3
514          *  *  *
515           * * *
516         0*******0
517           * * *
518          *  *  *
519         3   2   1
520     */
521 
522     #define CANNY_PUSH(d)    *(d) = u8(2), *stack_top++ = (d)
523     #define CANNY_POP(d)     (d) = *--stack_top
524 
525     //i == 0
526     normEstimator.firstRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mag_buf);
527     // calculate magnitude and angle of gradient, perform non-maxima supression.
528     // fill the map with one of the following values:
529     //   0 - the pixel might belong to an edge
530     //   1 - the pixel can not belong to an edge
531     //   2 - the pixel does belong to an edge
532     for (size_t i = 1; i <= size.height; i++)
533     {
534         const s16 *_x, *_y;
535         normEstimator.nextRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mapstep, mag_buf, i, _x, _y);
536 
537         u8* _map = map + mapstep*i + 1;
538         _map[-1] = _map[size.width] = 1;
539 
540         s32* _mag = mag_buf[1] + 1; // take the central row
541         ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
542         ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
543 
544         if ((stack_top - stack_bottom) + size.width > maxsize)
545         {
546             ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
547             maxsize = maxsize * 3/2;
548             stack.resize(maxsize);
549             stack_bottom = &stack[0];
550             stack_top = stack_bottom + sz;
551         }
552 
553         s32 prev_flag = 0;
554         for (ptrdiff_t j = 0; j < (ptrdiff_t)size.width; j++)
555         {
556             #define CANNY_SHIFT 15
557             const s32 TG22 = (s32)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
558 
559             s32 m = _mag[j];
560 
561             if (m > low)
562             {
563                 s32 xs = _x[j];
564                 s32 ys = _y[j];
565                 s32 x = abs(xs);
566                 s32 y = abs(ys) << CANNY_SHIFT;
567 
568                 s32 tg22x = x * TG22;
569 
570                 if (y < tg22x)
571                 {
572                     if (m > _mag[j-1] && m >= _mag[j+1]) goto __push;
573                 }
574                 else
575                 {
576                     s32 tg67x = tg22x + (x << (CANNY_SHIFT+1));
577                     if (y > tg67x)
578                     {
579                         if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __push;
580                     }
581                     else
582                     {
583                         s32 s = (xs ^ ys) < 0 ? -1 : 1;
584                         if(m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __push;
585                     }
586                 }
587             }
588             prev_flag = 0;
589             _map[j] = u8(1);
590             continue;
591             __push:
592             if (!prev_flag && m > high && _map[j-mapstep] != 2)
593             {
594                 CANNY_PUSH(_map + j);
595                 prev_flag = 1;
596             }
597             else
598                 _map[j] = 0;
599         }
600 
601         // scroll the ring buffer
602         _mag = mag_buf[0];
603         mag_buf[0] = mag_buf[1];
604         mag_buf[1] = mag_buf[2];
605         mag_buf[2] = _mag;
606     }
607 
608     // now track the edges (hysteresis thresholding)
609     while (stack_top > stack_bottom)
610     {
611         u8* m;
612         if ((size_t)(stack_top - stack_bottom) + 8u > maxsize)
613         {
614             ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
615             maxsize = maxsize * 3/2;
616             stack.resize(maxsize);
617             stack_bottom = &stack[0];
618             stack_top = stack_bottom + sz;
619         }
620 
621         CANNY_POP(m);
622 
623         if (!m[-1])         CANNY_PUSH(m - 1);
624         if (!m[1])          CANNY_PUSH(m + 1);
625         if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
626         if (!m[-mapstep])   CANNY_PUSH(m - mapstep);
627         if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
628         if (!m[mapstep-1])  CANNY_PUSH(m + mapstep - 1);
629         if (!m[mapstep])    CANNY_PUSH(m + mapstep);
630         if (!m[mapstep+1])  CANNY_PUSH(m + mapstep + 1);
631     }
632 
633     // the final pass, form the final image
634     uint8x16_t v2 = vmovq_n_u8(2);
635     const u8* ptrmap = map + mapstep + 1;
636     for (size_t i = 0; i < size.height; i++, ptrmap += mapstep)
637     {
638         u8* _dst = internal::getRowPtr(dstBase, dstStride, i);
639         ptrdiff_t j = 0;
640         for (; j < (ptrdiff_t)size.width - 16; j += 16)
641         {
642             internal::prefetch(ptrmap);
643             uint8x16_t vmap = vld1q_u8(ptrmap + j);
644             uint8x16_t vdst = vceqq_u8(vmap, v2);
645             vst1q_u8(_dst+j, vdst);
646         }
647         for (; j < (ptrdiff_t)size.width; j++)
648             _dst[j] = (u8)-(ptrmap[j] >> 1);
649     }
650 }
651 
652 } // namespace
653 #endif
654 
isCanny3x3Supported(const Size2D & size)655 bool isCanny3x3Supported(const Size2D &size)
656 {
657     return isSupportedConfiguration() &&
658            size.height >= 2 && size.width >= 9;
659 }
660 
Canny3x3L1(const Size2D & size,const u8 * srcBase,ptrdiff_t srcStride,u8 * dstBase,ptrdiff_t dstStride,f64 low_thresh,f64 high_thresh,Margin borderMargin)661 void Canny3x3L1(const Size2D &size,
662                 const u8 * srcBase, ptrdiff_t srcStride,
663                 u8 * dstBase, ptrdiff_t dstStride,
664                 f64 low_thresh, f64 high_thresh,
665                 Margin borderMargin)
666 {
667     internal::assertSupportedConfiguration(isCanny3x3Supported(size));
668 #ifdef CAROTENE_NEON
669     Canny3x3<false, false>(size, 1,
670                            srcBase, srcStride,
671                            dstBase, dstStride,
672                            NULL, 0,
673                            NULL, 0,
674                            low_thresh, high_thresh,
675                            borderMargin);
676 #else
677     (void)size;
678     (void)srcBase;
679     (void)srcStride;
680     (void)dstBase;
681     (void)dstStride;
682     (void)low_thresh;
683     (void)high_thresh;
684     (void)borderMargin;
685 #endif
686 }
687 
Canny3x3L2(const Size2D & size,const u8 * srcBase,ptrdiff_t srcStride,u8 * dstBase,ptrdiff_t dstStride,f64 low_thresh,f64 high_thresh,Margin borderMargin)688 void Canny3x3L2(const Size2D &size,
689                 const u8 * srcBase, ptrdiff_t srcStride,
690                 u8 * dstBase, ptrdiff_t dstStride,
691                 f64 low_thresh, f64 high_thresh,
692                 Margin borderMargin)
693 {
694     internal::assertSupportedConfiguration(isCanny3x3Supported(size));
695 #ifdef CAROTENE_NEON
696     Canny3x3<true, false>(size, 1,
697                           srcBase, srcStride,
698                           dstBase, dstStride,
699                           NULL, 0,
700                           NULL, 0,
701                           low_thresh, high_thresh,
702                           borderMargin);
703 #else
704     (void)size;
705     (void)srcBase;
706     (void)srcStride;
707     (void)dstBase;
708     (void)dstStride;
709     (void)low_thresh;
710     (void)high_thresh;
711     (void)borderMargin;
712 #endif
713 }
714 
Canny3x3L1(const Size2D & size,s32 cn,s16 * dxBase,ptrdiff_t dxStride,s16 * dyBase,ptrdiff_t dyStride,u8 * dstBase,ptrdiff_t dstStride,f64 low_thresh,f64 high_thresh)715 void Canny3x3L1(const Size2D &size, s32 cn,
716                      s16 * dxBase, ptrdiff_t dxStride,
717                      s16 * dyBase, ptrdiff_t dyStride,
718                      u8 * dstBase, ptrdiff_t dstStride,
719                      f64 low_thresh, f64 high_thresh)
720 {
721     internal::assertSupportedConfiguration();
722 #ifdef CAROTENE_NEON
723     Canny3x3<false, true>(size, cn,
724                           NULL, 0,
725                           dstBase, dstStride,
726                           dxBase, dxStride,
727                           dyBase, dyStride,
728                           low_thresh, high_thresh,
729                           Margin());
730 #else
731     (void)size;
732     (void)cn;
733     (void)dstBase;
734     (void)dstStride;
735     (void)dxBase;
736     (void)dxStride;
737     (void)dyBase;
738     (void)dyStride;
739     (void)low_thresh;
740     (void)high_thresh;
741 #endif
742 }
743 
Canny3x3L2(const Size2D & size,s32 cn,s16 * dxBase,ptrdiff_t dxStride,s16 * dyBase,ptrdiff_t dyStride,u8 * dstBase,ptrdiff_t dstStride,f64 low_thresh,f64 high_thresh)744 void Canny3x3L2(const Size2D &size, s32 cn,
745                      s16 * dxBase, ptrdiff_t dxStride,
746                      s16 * dyBase, ptrdiff_t dyStride,
747                      u8 * dstBase, ptrdiff_t dstStride,
748                      f64 low_thresh, f64 high_thresh)
749 {
750     internal::assertSupportedConfiguration();
751 #ifdef CAROTENE_NEON
752     Canny3x3<true, true>(size, cn,
753                          NULL, 0,
754                          dstBase, dstStride,
755                          dxBase, dxStride,
756                          dyBase, dyStride,
757                          low_thresh, high_thresh,
758                          Margin());
759 #else
760     (void)size;
761     (void)cn;
762     (void)dstBase;
763     (void)dstStride;
764     (void)dxBase;
765     (void)dxStride;
766     (void)dyBase;
767     (void)dyStride;
768     (void)low_thresh;
769     (void)high_thresh;
770 #endif
771 }
772 
773 } // namespace CAROTENE_NS
774