• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                        Intel License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Copyright (C) 2014, Itseez Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of Intel Corporation may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 #include "opencl_kernels_imgproc.hpp"
45 
46 
47 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
48 #define USE_IPP_CANNY 1
49 #else
50 #undef USE_IPP_CANNY
51 #endif
52 
53 
54 namespace cv
55 {
56 
57 #ifdef USE_IPP_CANNY
ippCanny(const Mat & _src,Mat & _dst,float low,float high)58 static bool ippCanny(const Mat& _src, Mat& _dst, float low,  float high)
59 {
60     int size = 0, size1 = 0;
61     IppiSize roi = { _src.cols, _src.rows };
62 
63     if (ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size) < 0)
64         return false;
65     if (ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1) < 0)
66         return false;
67     size = std::max(size, size1);
68 
69     if (ippiCannyGetSize(roi, &size1) < 0)
70         return false;
71     size = std::max(size, size1);
72 
73     AutoBuffer<uchar> buf(size + 64);
74     uchar* buffer = alignPtr((uchar*)buf, 32);
75 
76     Mat _dx(_src.rows, _src.cols, CV_16S);
77     if( ippiFilterSobelNegVertBorder_8u16s_C1R(_src.ptr(), (int)_src.step,
78                     _dx.ptr<short>(), (int)_dx.step, roi,
79                     ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
80         return false;
81 
82     Mat _dy(_src.rows, _src.cols, CV_16S);
83     if( ippiFilterSobelHorizBorder_8u16s_C1R(_src.ptr(), (int)_src.step,
84                     _dy.ptr<short>(), (int)_dy.step, roi,
85                     ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
86         return false;
87 
88     if( ippiCanny_16s8u_C1R(_dx.ptr<short>(), (int)_dx.step,
89                                _dy.ptr<short>(), (int)_dy.step,
90                               _dst.ptr(), (int)_dst.step, roi, low, high, buffer) < 0 )
91         return false;
92     return true;
93 }
94 #endif
95 
96 #ifdef HAVE_OPENCL
97 
ocl_Canny(InputArray _src,OutputArray _dst,float low_thresh,float high_thresh,int aperture_size,bool L2gradient,int cn,const Size & size)98 static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float high_thresh,
99                       int aperture_size, bool L2gradient, int cn, const Size & size)
100 {
101     UMat map;
102 
103     const ocl::Device &dev = ocl::Device::getDefault();
104     int max_wg_size = (int)dev.maxWorkGroupSize();
105 
106     int lSizeX = 32;
107     int lSizeY = max_wg_size / 32;
108 
109     if (lSizeY == 0)
110     {
111         lSizeX = 16;
112         lSizeY = max_wg_size / 16;
113     }
114     if (lSizeY == 0)
115     {
116         lSizeY = 1;
117     }
118 
119     if (L2gradient)
120     {
121         low_thresh = std::min(32767.0f, low_thresh);
122         high_thresh = std::min(32767.0f, high_thresh);
123 
124         if (low_thresh > 0)
125             low_thresh *= low_thresh;
126         if (high_thresh > 0)
127             high_thresh *= high_thresh;
128     }
129     int low = cvFloor(low_thresh), high = cvFloor(high_thresh);
130 
131     if (aperture_size == 3 && !_src.isSubmatrix())
132     {
133         /*
134             stage1_with_sobel:
135                 Sobel operator
136                 Calc magnitudes
137                 Non maxima suppression
138                 Double thresholding
139         */
140         char cvt[40];
141         ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc,
142                                format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
143                                       cn, ocl::memopTypeToStr(_src.depth()),
144                                       ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt),
145                                       ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)),
146                                       lSizeX, lSizeY,
147                                       L2gradient ? " -D L2GRAD" : ""));
148         if (with_sobel.empty())
149             return false;
150 
151         UMat src = _src.getUMat();
152         map.create(size, CV_32S);
153         with_sobel.args(ocl::KernelArg::ReadOnly(src),
154                         ocl::KernelArg::WriteOnlyNoSize(map),
155                         (float) low, (float) high);
156 
157         size_t globalsize[2] = { size.width, size.height },
158                 localsize[2] = { lSizeX, lSizeY };
159 
160         if (!with_sobel.run(2, globalsize, localsize, false))
161             return false;
162     }
163     else
164     {
165         /*
166             stage1_without_sobel:
167                 Calc magnitudes
168                 Non maxima suppression
169                 Double thresholding
170         */
171         UMat dx, dy;
172         Sobel(_src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
173         Sobel(_src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
174 
175         ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc,
176                                     format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
177                                            cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : ""));
178         if (without_sobel.empty())
179             return false;
180 
181         map.create(size, CV_32S);
182         without_sobel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy),
183                            ocl::KernelArg::WriteOnly(map),
184                            low, high);
185 
186         size_t globalsize[2] = { size.width, size.height },
187                 localsize[2] = { lSizeX, lSizeY };
188 
189         if (!without_sobel.run(2, globalsize, localsize, false))
190             return false;
191     }
192 
193     int PIX_PER_WI = 8;
194     /*
195         stage2:
196             hysteresis (add weak edges if they are connected with strong edges)
197     */
198 
199     int sizey = lSizeY / PIX_PER_WI;
200     if (sizey == 0)
201         sizey = 1;
202 
203     size_t globalsize[2] = { size.width, (size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { lSizeX, sizey };
204 
205     ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc,
206                                 format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d",
207                                 PIX_PER_WI, lSizeX, sizey));
208 
209     if (edgesHysteresis.empty())
210         return false;
211 
212     edgesHysteresis.args(ocl::KernelArg::ReadWrite(map));
213     if (!edgesHysteresis.run(2, globalsize, localsize, false))
214         return false;
215 
216     // get edges
217 
218     ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc,
219                                 format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI));
220     if (getEdgesKernel.empty())
221         return false;
222 
223     _dst.create(size, CV_8UC1);
224     UMat dst = _dst.getUMat();
225 
226     getEdgesKernel.args(ocl::KernelArg::ReadOnly(map), ocl::KernelArg::WriteOnlyNoSize(dst));
227 
228     return getEdgesKernel.run(2, globalsize, NULL, false);
229 }
230 
231 #endif
232 
233 #ifdef HAVE_TBB
234 
235 // Queue with peaks that will processed serially.
236 static tbb::concurrent_queue<uchar*> borderPeaks;
237 
238 class tbbCanny
239 {
240 public:
tbbCanny(const Range _boundaries,const Mat & _src,uchar * _map,int _low,int _high,int _aperture_size,bool _L2gradient)241     tbbCanny(const Range _boundaries, const Mat& _src, uchar* _map, int _low,
242             int _high, int _aperture_size, bool _L2gradient)
243         : boundaries(_boundaries), src(_src), map(_map), low(_low), high(_high),
244           aperture_size(_aperture_size), L2gradient(_L2gradient)
245     {}
246 
247     // This parallel version of Canny algorithm splits the src image in threadsNumber horizontal slices.
248     // The first row of each slice contains the last row of the previous slice and
249     // the last row of each slice contains the first row of the next slice
250     // so that each slice is independent and no mutexes are required.
operator ()() const251     void operator()() const
252     {
253 #if CV_SSE2
254         bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
255 #endif
256 
257         const int type = src.type(), cn = CV_MAT_CN(type);
258 
259         Mat dx, dy;
260 
261         ptrdiff_t mapstep = src.cols + 2;
262 
263         // In sobel transform we calculate ksize2 extra lines for the first and last rows of each slice
264         // because IPPDerivSobel expects only isolated ROIs, in contrast with the opencv version which
265         // uses the pixels outside of the ROI to form a border.
266         uchar ksize2 = aperture_size / 2;
267 
268         if (boundaries.start == 0 && boundaries.end == src.rows)
269         {
270             Mat tempdx(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn));
271             Mat tempdy(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn));
272 
273             memset(tempdx.ptr<short>(0), 0, cn * src.cols*sizeof(short));
274             memset(tempdy.ptr<short>(0), 0, cn * src.cols*sizeof(short));
275             memset(tempdx.ptr<short>(tempdx.rows - 1), 0, cn * src.cols*sizeof(short));
276             memset(tempdy.ptr<short>(tempdy.rows - 1), 0, cn * src.cols*sizeof(short));
277 
278             Sobel(src, tempdx.rowRange(1, tempdx.rows - 1), CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
279             Sobel(src, tempdy.rowRange(1, tempdy.rows - 1), CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
280 
281             dx = tempdx;
282             dy = tempdy;
283         }
284         else if (boundaries.start == 0)
285         {
286             Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
287             Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
288 
289             memset(tempdx.ptr<short>(0), 0, cn * src.cols*sizeof(short));
290             memset(tempdy.ptr<short>(0), 0, cn * src.cols*sizeof(short));
291 
292             Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdx.rowRange(1, tempdx.rows),
293                     CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
294             Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdy.rowRange(1, tempdy.rows),
295                     CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
296 
297             dx = tempdx.rowRange(0, tempdx.rows - ksize2);
298             dy = tempdy.rowRange(0, tempdy.rows - ksize2);
299         }
300         else if (boundaries.end == src.rows)
301         {
302             Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
303             Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
304 
305             memset(tempdx.ptr<short>(tempdx.rows - 1), 0, cn * src.cols*sizeof(short));
306             memset(tempdy.ptr<short>(tempdy.rows - 1), 0, cn * src.cols*sizeof(short));
307 
308             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdx.rowRange(0, tempdx.rows - 1),
309                     CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
310             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdy.rowRange(0, tempdy.rows - 1),
311                     CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
312 
313             dx = tempdx.rowRange(ksize2, tempdx.rows);
314             dy = tempdy.rowRange(ksize2, tempdy.rows);
315         }
316         else
317         {
318             Mat tempdx(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn));
319             Mat tempdy(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn));
320 
321             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdx,
322                     CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
323             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdy,
324                     CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
325 
326             dx = tempdx.rowRange(ksize2, tempdx.rows - ksize2);
327             dy = tempdy.rowRange(ksize2, tempdy.rows - ksize2);
328         }
329 
330         int maxsize = std::max(1 << 10, src.cols * (boundaries.end - boundaries.start) / 10);
331         std::vector<uchar*> stack(maxsize);
332         uchar **stack_top = &stack[0];
333         uchar **stack_bottom = &stack[0];
334 
335         AutoBuffer<uchar> buffer(cn * mapstep * 3 * sizeof(int));
336 
337         int* mag_buf[3];
338         mag_buf[0] = (int*)(uchar*)buffer;
339         mag_buf[1] = mag_buf[0] + mapstep*cn;
340         mag_buf[2] = mag_buf[1] + mapstep*cn;
341 
342         // calculate magnitude and angle of gradient, perform non-maxima suppression.
343         // fill the map with one of the following values:
344         //   0 - the pixel might belong to an edge
345         //   1 - the pixel can not belong to an edge
346         //   2 - the pixel does belong to an edge
347         for (int i = boundaries.start - 1; i <= boundaries.end; i++)
348         {
349             int* _norm = mag_buf[(i > boundaries.start) - (i == boundaries.start - 1) + 1] + 1;
350 
351             short* _dx = dx.ptr<short>(i - boundaries.start + 1);
352             short* _dy = dy.ptr<short>(i - boundaries.start + 1);
353 
354             if (!L2gradient)
355             {
356                 int j = 0, width = src.cols * cn;
357 #if CV_SSE2
358                 if (haveSSE2)
359                 {
360                     __m128i v_zero = _mm_setzero_si128();
361                     for ( ; j <= width - 8; j += 8)
362                     {
363                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
364                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
365                         v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx));
366                         v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy));
367 
368                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero));
369                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
370 
371                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero));
372                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
373                     }
374                 }
375 #elif CV_NEON
376                 for ( ; j <= width - 8; j += 8)
377                 {
378                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
379                     vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))),
380                                                    vabsq_s32(vmovl_s16(vget_low_s16(v_dy)))));
381                     vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))),
382                                                        vabsq_s32(vmovl_s16(vget_high_s16(v_dy)))));
383                 }
384 #endif
385                 for ( ; j < width; ++j)
386                     _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
387             }
388             else
389             {
390                 int j = 0, width = src.cols * cn;
391 #if CV_SSE2
392                 if (haveSSE2)
393                 {
394                     for ( ; j <= width - 8; j += 8)
395                     {
396                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
397                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
398 
399                         __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx);
400                         __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy);
401 
402                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh));
403                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
404 
405                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh));
406                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
407                     }
408                 }
409 #elif CV_NEON
410                 for ( ; j <= width - 8; j += 8)
411                 {
412                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
413                     int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy);
414                     int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
415                     vst1q_s32(_norm + j, v_dst);
416 
417                     v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy);
418                     v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
419                     vst1q_s32(_norm + j + 4, v_dst);
420                 }
421 #endif
422                 for ( ; j < width; ++j)
423                     _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
424             }
425 
426             if (cn > 1)
427             {
428                 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
429                 {
430                     int maxIdx = jn;
431                     for(int k = 1; k < cn; ++k)
432                         if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
433                     _norm[j] = _norm[maxIdx];
434                     _dx[j] = _dx[maxIdx];
435                     _dy[j] = _dy[maxIdx];
436                 }
437             }
438             _norm[-1] = _norm[src.cols] = 0;
439 
440             // at the very beginning we do not have a complete ring
441             // buffer of 3 magnitude rows for non-maxima suppression
442             if (i <= boundaries.start)
443                 continue;
444 
445             uchar* _map = map + mapstep*i + 1;
446             _map[-1] = _map[src.cols] = 1;
447 
448             int* _mag = mag_buf[1] + 1; // take the central row
449             ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
450             ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
451 
452             const short* _x = dx.ptr<short>(i - boundaries.start);
453             const short* _y = dy.ptr<short>(i - boundaries.start);
454 
455             if ((stack_top - stack_bottom) + src.cols > maxsize)
456             {
457                 int sz = (int)(stack_top - stack_bottom);
458                 maxsize = std::max(maxsize * 3/2, sz + src.cols);
459                 stack.resize(maxsize);
460                 stack_bottom = &stack[0];
461                 stack_top = stack_bottom + sz;
462             }
463 
464 #define CANNY_PUSH(d)    *(d) = uchar(2), *stack_top++ = (d)
465 #define CANNY_POP(d)     (d) = *--stack_top
466 
467             int prev_flag = 0;
468             bool canny_push = false;
469             for (int j = 0; j < src.cols; j++)
470             {
471                 #define CANNY_SHIFT 15
472                 const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
473 
474                 int m = _mag[j];
475 
476                 if (m > low)
477                 {
478                     int xs = _x[j];
479                     int ys = _y[j];
480                     int x = std::abs(xs);
481                     int y = std::abs(ys) << CANNY_SHIFT;
482 
483                     int tg22x = x * TG22;
484 
485                     if (y < tg22x)
486                     {
487                         if (m > _mag[j-1] && m >= _mag[j+1]) canny_push = true;
488                     }
489                     else
490                     {
491                         int tg67x = tg22x + (x << (CANNY_SHIFT+1));
492                         if (y > tg67x)
493                         {
494                             if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) canny_push = true;
495                         }
496                         else
497                         {
498                             int s = (xs ^ ys) < 0 ? -1 : 1;
499                             if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) canny_push = true;
500                         }
501                     }
502                 }
503                 if (!canny_push)
504                 {
505                     prev_flag = 0;
506                     _map[j] = uchar(1);
507                     continue;
508                 }
509                 else
510                 {
511                     // _map[j-mapstep] is short-circuited at the start because previous thread is
512                     // responsible for initializing it.
513                     if (!prev_flag && m > high && (i <= boundaries.start+1 || _map[j-mapstep] != 2) )
514                     {
515                         CANNY_PUSH(_map + j);
516                         prev_flag = 1;
517                     }
518                     else
519                         _map[j] = 0;
520 
521                     canny_push = false;
522                 }
523             }
524 
525             // scroll the ring buffer
526             _mag = mag_buf[0];
527             mag_buf[0] = mag_buf[1];
528             mag_buf[1] = mag_buf[2];
529             mag_buf[2] = _mag;
530         }
531 
532         // now track the edges (hysteresis thresholding)
533         while (stack_top > stack_bottom)
534         {
535             if ((stack_top - stack_bottom) + 8 > maxsize)
536             {
537                 int sz = (int)(stack_top - stack_bottom);
538                 maxsize = maxsize * 3/2;
539                 stack.resize(maxsize);
540                 stack_bottom = &stack[0];
541                 stack_top = stack_bottom + sz;
542             }
543 
544             uchar* m;
545             CANNY_POP(m);
546 
547             // Stops thresholding from expanding to other slices by sending pixels in the borders of each
548             // slice in a queue to be serially processed later.
549             if ( (m < map + (boundaries.start + 2) * mapstep) || (m >= map + boundaries.end * mapstep) )
550             {
551                 borderPeaks.push(m);
552                 continue;
553             }
554 
555             if (!m[-1])         CANNY_PUSH(m - 1);
556             if (!m[1])          CANNY_PUSH(m + 1);
557             if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
558             if (!m[-mapstep])   CANNY_PUSH(m - mapstep);
559             if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
560             if (!m[mapstep-1])  CANNY_PUSH(m + mapstep - 1);
561             if (!m[mapstep])    CANNY_PUSH(m + mapstep);
562             if (!m[mapstep+1])  CANNY_PUSH(m + mapstep + 1);
563         }
564     }
565 
566 private:
567     const Range boundaries;
568     const Mat& src;
569     uchar* map;
570     int low;
571     int high;
572     int aperture_size;
573     bool L2gradient;
574 };
575 
576 #endif
577 
578 } // namespace cv
579 
Canny(InputArray _src,OutputArray _dst,double low_thresh,double high_thresh,int aperture_size,bool L2gradient)580 void cv::Canny( InputArray _src, OutputArray _dst,
581                 double low_thresh, double high_thresh,
582                 int aperture_size, bool L2gradient )
583 {
584     const int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
585     const Size size = _src.size();
586 
587     CV_Assert( depth == CV_8U );
588     _dst.create(size, CV_8U);
589 
590     if (!L2gradient && (aperture_size & CV_CANNY_L2_GRADIENT) == CV_CANNY_L2_GRADIENT)
591     {
592         // backward compatibility
593         aperture_size &= ~CV_CANNY_L2_GRADIENT;
594         L2gradient = true;
595     }
596 
597     if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7)))
598         CV_Error(CV_StsBadFlag, "Aperture size should be odd");
599 
600     if (low_thresh > high_thresh)
601         std::swap(low_thresh, high_thresh);
602 
603     CV_OCL_RUN(_dst.isUMat() && (cn == 1 || cn == 3),
604                ocl_Canny(_src, _dst, (float)low_thresh, (float)high_thresh, aperture_size, L2gradient, cn, size))
605 
606     Mat src = _src.getMat(), dst = _dst.getMat();
607 
608 #ifdef HAVE_TEGRA_OPTIMIZATION
609     if (tegra::useTegra() && tegra::canny(src, dst, low_thresh, high_thresh, aperture_size, L2gradient))
610         return;
611 #endif
612 
613 #ifdef USE_IPP_CANNY
614     CV_IPP_CHECK()
615     {
616         if( aperture_size == 3 && !L2gradient && 1 == cn )
617         {
618             if (ippCanny(src, dst, (float)low_thresh, (float)high_thresh))
619             {
620                 CV_IMPL_ADD(CV_IMPL_IPP);
621                 return;
622             }
623             setIppErrorStatus();
624         }
625     }
626 #endif
627 
628 #ifdef HAVE_TBB
629 
630 if (L2gradient)
631 {
632     low_thresh = std::min(32767.0, low_thresh);
633     high_thresh = std::min(32767.0, high_thresh);
634 
635     if (low_thresh > 0) low_thresh *= low_thresh;
636     if (high_thresh > 0) high_thresh *= high_thresh;
637 }
638 int low = cvFloor(low_thresh);
639 int high = cvFloor(high_thresh);
640 
641 ptrdiff_t mapstep = src.cols + 2;
642 AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2));
643 
644 uchar* map = (uchar*)buffer;
645 memset(map, 1, mapstep);
646 memset(map + mapstep*(src.rows + 1), 1, mapstep);
647 
648 int threadsNumber = tbb::task_scheduler_init::default_num_threads();
649 int grainSize = src.rows / threadsNumber;
650 
651 // Make a fallback for pictures with too few rows.
652 uchar ksize2 = aperture_size / 2;
653 int minGrainSize = 1 + ksize2;
654 int maxGrainSize = src.rows - 2 - 2*ksize2;
655 if ( !( minGrainSize <= grainSize && grainSize <= maxGrainSize ) )
656 {
657     threadsNumber = 1;
658     grainSize = src.rows;
659 }
660 
661 tbb::task_group g;
662 
663 for (int i = 0; i < threadsNumber; ++i)
664 {
665     if (i < threadsNumber - 1)
666         g.run(tbbCanny(Range(i * grainSize, (i + 1) * grainSize), src, map, low, high, aperture_size, L2gradient));
667     else
668         g.run(tbbCanny(Range(i * grainSize, src.rows), src, map, low, high, aperture_size, L2gradient));
669 }
670 
671 g.wait();
672 
673 #define CANNY_PUSH_SERIAL(d)    *(d) = uchar(2), borderPeaks.push(d)
674 
675 // now track the edges (hysteresis thresholding)
676 uchar* m;
677 while (borderPeaks.try_pop(m))
678 {
679     if (!m[-1])         CANNY_PUSH_SERIAL(m - 1);
680     if (!m[1])          CANNY_PUSH_SERIAL(m + 1);
681     if (!m[-mapstep-1]) CANNY_PUSH_SERIAL(m - mapstep - 1);
682     if (!m[-mapstep])   CANNY_PUSH_SERIAL(m - mapstep);
683     if (!m[-mapstep+1]) CANNY_PUSH_SERIAL(m - mapstep + 1);
684     if (!m[mapstep-1])  CANNY_PUSH_SERIAL(m + mapstep - 1);
685     if (!m[mapstep])    CANNY_PUSH_SERIAL(m + mapstep);
686     if (!m[mapstep+1])  CANNY_PUSH_SERIAL(m + mapstep + 1);
687 }
688 
689 #else
690 
691     Mat dx(src.rows, src.cols, CV_16SC(cn));
692     Mat dy(src.rows, src.cols, CV_16SC(cn));
693 
694     Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
695     Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
696 
697     if (L2gradient)
698     {
699         low_thresh = std::min(32767.0, low_thresh);
700         high_thresh = std::min(32767.0, high_thresh);
701 
702         if (low_thresh > 0) low_thresh *= low_thresh;
703         if (high_thresh > 0) high_thresh *= high_thresh;
704     }
705     int low = cvFloor(low_thresh);
706     int high = cvFloor(high_thresh);
707 
708     ptrdiff_t mapstep = src.cols + 2;
709     AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int));
710 
711     int* mag_buf[3];
712     mag_buf[0] = (int*)(uchar*)buffer;
713     mag_buf[1] = mag_buf[0] + mapstep*cn;
714     mag_buf[2] = mag_buf[1] + mapstep*cn;
715     memset(mag_buf[0], 0, /* cn* */mapstep*sizeof(int));
716 
717     uchar* map = (uchar*)(mag_buf[2] + mapstep*cn);
718     memset(map, 1, mapstep);
719     memset(map + mapstep*(src.rows + 1), 1, mapstep);
720 
721     int maxsize = std::max(1 << 10, src.cols * src.rows / 10);
722     std::vector<uchar*> stack(maxsize);
723     uchar **stack_top = &stack[0];
724     uchar **stack_bottom = &stack[0];
725 
726     /* sector numbers
727        (Top-Left Origin)
728 
729         1   2   3
730          *  *  *
731           * * *
732         0*******0
733           * * *
734          *  *  *
735         3   2   1
736     */
737 
738     #define CANNY_PUSH(d)    *(d) = uchar(2), *stack_top++ = (d)
739     #define CANNY_POP(d)     (d) = *--stack_top
740 
741 #if CV_SSE2
742     bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
743 #endif
744 
745     // calculate magnitude and angle of gradient, perform non-maxima suppression.
746     // fill the map with one of the following values:
747     //   0 - the pixel might belong to an edge
748     //   1 - the pixel can not belong to an edge
749     //   2 - the pixel does belong to an edge
750     for (int i = 0; i <= src.rows; i++)
751     {
752         int* _norm = mag_buf[(i > 0) + 1] + 1;
753         if (i < src.rows)
754         {
755             short* _dx = dx.ptr<short>(i);
756             short* _dy = dy.ptr<short>(i);
757 
758             if (!L2gradient)
759             {
760                 int j = 0, width = src.cols * cn;
761 #if CV_SSE2
762                 if (haveSSE2)
763                 {
764                     __m128i v_zero = _mm_setzero_si128();
765                     for ( ; j <= width - 8; j += 8)
766                     {
767                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
768                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
769                         v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx));
770                         v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy));
771 
772                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero));
773                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
774 
775                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero));
776                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
777                     }
778                 }
779 #elif CV_NEON
780                 for ( ; j <= width - 8; j += 8)
781                 {
782                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
783                     vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))),
784                                                    vabsq_s32(vmovl_s16(vget_low_s16(v_dy)))));
785                     vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))),
786                                                        vabsq_s32(vmovl_s16(vget_high_s16(v_dy)))));
787                 }
788 #endif
789                 for ( ; j < width; ++j)
790                     _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
791             }
792             else
793             {
794                 int j = 0, width = src.cols * cn;
795 #if CV_SSE2
796                 if (haveSSE2)
797                 {
798                     for ( ; j <= width - 8; j += 8)
799                     {
800                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
801                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
802 
803                         __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx);
804                         __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy);
805 
806                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh));
807                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
808 
809                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh));
810                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
811                     }
812                 }
813 #elif CV_NEON
814                 for ( ; j <= width - 8; j += 8)
815                 {
816                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
817                     int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy);
818                     int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
819                     vst1q_s32(_norm + j, v_dst);
820 
821                     v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy);
822                     v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
823                     vst1q_s32(_norm + j + 4, v_dst);
824                 }
825 #endif
826                 for ( ; j < width; ++j)
827                     _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
828             }
829 
830             if (cn > 1)
831             {
832                 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
833                 {
834                     int maxIdx = jn;
835                     for(int k = 1; k < cn; ++k)
836                         if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
837                     _norm[j] = _norm[maxIdx];
838                     _dx[j] = _dx[maxIdx];
839                     _dy[j] = _dy[maxIdx];
840                 }
841             }
842             _norm[-1] = _norm[src.cols] = 0;
843         }
844         else
845             memset(_norm-1, 0, /* cn* */mapstep*sizeof(int));
846 
847         // at the very beginning we do not have a complete ring
848         // buffer of 3 magnitude rows for non-maxima suppression
849         if (i == 0)
850             continue;
851 
852         uchar* _map = map + mapstep*i + 1;
853         _map[-1] = _map[src.cols] = 1;
854 
855         int* _mag = mag_buf[1] + 1; // take the central row
856         ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
857         ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
858 
859         const short* _x = dx.ptr<short>(i-1);
860         const short* _y = dy.ptr<short>(i-1);
861 
862         if ((stack_top - stack_bottom) + src.cols > maxsize)
863         {
864             int sz = (int)(stack_top - stack_bottom);
865             maxsize = std::max(maxsize * 3/2, sz + src.cols);
866             stack.resize(maxsize);
867             stack_bottom = &stack[0];
868             stack_top = stack_bottom + sz;
869         }
870 
871         int prev_flag = 0;
872         for (int j = 0; j < src.cols; j++)
873         {
874             #define CANNY_SHIFT 15
875             const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
876 
877             int m = _mag[j];
878 
879             if (m > low)
880             {
881                 int xs = _x[j];
882                 int ys = _y[j];
883                 int x = std::abs(xs);
884                 int y = std::abs(ys) << CANNY_SHIFT;
885 
886                 int tg22x = x * TG22;
887 
888                 if (y < tg22x)
889                 {
890                     if (m > _mag[j-1] && m >= _mag[j+1]) goto __ocv_canny_push;
891                 }
892                 else
893                 {
894                     int tg67x = tg22x + (x << (CANNY_SHIFT+1));
895                     if (y > tg67x)
896                     {
897                         if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __ocv_canny_push;
898                     }
899                     else
900                     {
901                         int s = (xs ^ ys) < 0 ? -1 : 1;
902                         if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __ocv_canny_push;
903                     }
904                 }
905             }
906             prev_flag = 0;
907             _map[j] = uchar(1);
908             continue;
909 __ocv_canny_push:
910             if (!prev_flag && m > high && _map[j-mapstep] != 2)
911             {
912                 CANNY_PUSH(_map + j);
913                 prev_flag = 1;
914             }
915             else
916                 _map[j] = 0;
917         }
918 
919         // scroll the ring buffer
920         _mag = mag_buf[0];
921         mag_buf[0] = mag_buf[1];
922         mag_buf[1] = mag_buf[2];
923         mag_buf[2] = _mag;
924     }
925 
926     // now track the edges (hysteresis thresholding)
927     while (stack_top > stack_bottom)
928     {
929         uchar* m;
930         if ((stack_top - stack_bottom) + 8 > maxsize)
931         {
932             int sz = (int)(stack_top - stack_bottom);
933             maxsize = maxsize * 3/2;
934             stack.resize(maxsize);
935             stack_bottom = &stack[0];
936             stack_top = stack_bottom + sz;
937         }
938 
939         CANNY_POP(m);
940 
941         if (!m[-1])         CANNY_PUSH(m - 1);
942         if (!m[1])          CANNY_PUSH(m + 1);
943         if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
944         if (!m[-mapstep])   CANNY_PUSH(m - mapstep);
945         if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
946         if (!m[mapstep-1])  CANNY_PUSH(m + mapstep - 1);
947         if (!m[mapstep])    CANNY_PUSH(m + mapstep);
948         if (!m[mapstep+1])  CANNY_PUSH(m + mapstep + 1);
949     }
950 
951 #endif
952 
953     // the final pass, form the final image
954     const uchar* pmap = map + mapstep + 1;
955     uchar* pdst = dst.ptr();
956     for (int i = 0; i < src.rows; i++, pmap += mapstep, pdst += dst.step)
957     {
958         for (int j = 0; j < src.cols; j++)
959             pdst[j] = (uchar)-(pmap[j] >> 1);
960     }
961 }
962 
cvCanny(const CvArr * image,CvArr * edges,double threshold1,double threshold2,int aperture_size)963 void cvCanny( const CvArr* image, CvArr* edges, double threshold1,
964               double threshold2, int aperture_size )
965 {
966     cv::Mat src = cv::cvarrToMat(image), dst = cv::cvarrToMat(edges);
967     CV_Assert( src.size == dst.size && src.depth() == CV_8U && dst.type() == CV_8U );
968 
969     cv::Canny(src, dst, threshold1, threshold2, aperture_size & 255,
970               (aperture_size & CV_CANNY_L2_GRADIENT) != 0);
971 }
972 
973 /* End of file. */
974