1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // Intel License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Copyright (C) 2014, Itseez Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of Intel Corporation may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44 #include "opencl_kernels_imgproc.hpp"
45
46
47 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
48 #define USE_IPP_CANNY 1
49 #else
50 #undef USE_IPP_CANNY
51 #endif
52
53
54 namespace cv
55 {
56
57 #ifdef USE_IPP_CANNY
ippCanny(const Mat & _src,Mat & _dst,float low,float high)58 static bool ippCanny(const Mat& _src, Mat& _dst, float low, float high)
59 {
60 int size = 0, size1 = 0;
61 IppiSize roi = { _src.cols, _src.rows };
62
63 if (ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size) < 0)
64 return false;
65 if (ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1) < 0)
66 return false;
67 size = std::max(size, size1);
68
69 if (ippiCannyGetSize(roi, &size1) < 0)
70 return false;
71 size = std::max(size, size1);
72
73 AutoBuffer<uchar> buf(size + 64);
74 uchar* buffer = alignPtr((uchar*)buf, 32);
75
76 Mat _dx(_src.rows, _src.cols, CV_16S);
77 if( ippiFilterSobelNegVertBorder_8u16s_C1R(_src.ptr(), (int)_src.step,
78 _dx.ptr<short>(), (int)_dx.step, roi,
79 ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
80 return false;
81
82 Mat _dy(_src.rows, _src.cols, CV_16S);
83 if( ippiFilterSobelHorizBorder_8u16s_C1R(_src.ptr(), (int)_src.step,
84 _dy.ptr<short>(), (int)_dy.step, roi,
85 ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
86 return false;
87
88 if( ippiCanny_16s8u_C1R(_dx.ptr<short>(), (int)_dx.step,
89 _dy.ptr<short>(), (int)_dy.step,
90 _dst.ptr(), (int)_dst.step, roi, low, high, buffer) < 0 )
91 return false;
92 return true;
93 }
94 #endif
95
96 #ifdef HAVE_OPENCL
97
ocl_Canny(InputArray _src,OutputArray _dst,float low_thresh,float high_thresh,int aperture_size,bool L2gradient,int cn,const Size & size)98 static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float high_thresh,
99 int aperture_size, bool L2gradient, int cn, const Size & size)
100 {
101 UMat map;
102
103 const ocl::Device &dev = ocl::Device::getDefault();
104 int max_wg_size = (int)dev.maxWorkGroupSize();
105
106 int lSizeX = 32;
107 int lSizeY = max_wg_size / 32;
108
109 if (lSizeY == 0)
110 {
111 lSizeX = 16;
112 lSizeY = max_wg_size / 16;
113 }
114 if (lSizeY == 0)
115 {
116 lSizeY = 1;
117 }
118
119 if (L2gradient)
120 {
121 low_thresh = std::min(32767.0f, low_thresh);
122 high_thresh = std::min(32767.0f, high_thresh);
123
124 if (low_thresh > 0)
125 low_thresh *= low_thresh;
126 if (high_thresh > 0)
127 high_thresh *= high_thresh;
128 }
129 int low = cvFloor(low_thresh), high = cvFloor(high_thresh);
130
131 if (aperture_size == 3 && !_src.isSubmatrix())
132 {
133 /*
134 stage1_with_sobel:
135 Sobel operator
136 Calc magnitudes
137 Non maxima suppression
138 Double thresholding
139 */
140 char cvt[40];
141 ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc,
142 format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
143 cn, ocl::memopTypeToStr(_src.depth()),
144 ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt),
145 ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)),
146 lSizeX, lSizeY,
147 L2gradient ? " -D L2GRAD" : ""));
148 if (with_sobel.empty())
149 return false;
150
151 UMat src = _src.getUMat();
152 map.create(size, CV_32S);
153 with_sobel.args(ocl::KernelArg::ReadOnly(src),
154 ocl::KernelArg::WriteOnlyNoSize(map),
155 (float) low, (float) high);
156
157 size_t globalsize[2] = { size.width, size.height },
158 localsize[2] = { lSizeX, lSizeY };
159
160 if (!with_sobel.run(2, globalsize, localsize, false))
161 return false;
162 }
163 else
164 {
165 /*
166 stage1_without_sobel:
167 Calc magnitudes
168 Non maxima suppression
169 Double thresholding
170 */
171 UMat dx, dy;
172 Sobel(_src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
173 Sobel(_src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
174
175 ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc,
176 format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
177 cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : ""));
178 if (without_sobel.empty())
179 return false;
180
181 map.create(size, CV_32S);
182 without_sobel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy),
183 ocl::KernelArg::WriteOnly(map),
184 low, high);
185
186 size_t globalsize[2] = { size.width, size.height },
187 localsize[2] = { lSizeX, lSizeY };
188
189 if (!without_sobel.run(2, globalsize, localsize, false))
190 return false;
191 }
192
193 int PIX_PER_WI = 8;
194 /*
195 stage2:
196 hysteresis (add weak edges if they are connected with strong edges)
197 */
198
199 int sizey = lSizeY / PIX_PER_WI;
200 if (sizey == 0)
201 sizey = 1;
202
203 size_t globalsize[2] = { size.width, (size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { lSizeX, sizey };
204
205 ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc,
206 format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d",
207 PIX_PER_WI, lSizeX, sizey));
208
209 if (edgesHysteresis.empty())
210 return false;
211
212 edgesHysteresis.args(ocl::KernelArg::ReadWrite(map));
213 if (!edgesHysteresis.run(2, globalsize, localsize, false))
214 return false;
215
216 // get edges
217
218 ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc,
219 format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI));
220 if (getEdgesKernel.empty())
221 return false;
222
223 _dst.create(size, CV_8UC1);
224 UMat dst = _dst.getUMat();
225
226 getEdgesKernel.args(ocl::KernelArg::ReadOnly(map), ocl::KernelArg::WriteOnlyNoSize(dst));
227
228 return getEdgesKernel.run(2, globalsize, NULL, false);
229 }
230
231 #endif
232
233 #ifdef HAVE_TBB
234
235 // Queue with peaks that will processed serially.
236 static tbb::concurrent_queue<uchar*> borderPeaks;
237
238 class tbbCanny
239 {
240 public:
tbbCanny(const Range _boundaries,const Mat & _src,uchar * _map,int _low,int _high,int _aperture_size,bool _L2gradient)241 tbbCanny(const Range _boundaries, const Mat& _src, uchar* _map, int _low,
242 int _high, int _aperture_size, bool _L2gradient)
243 : boundaries(_boundaries), src(_src), map(_map), low(_low), high(_high),
244 aperture_size(_aperture_size), L2gradient(_L2gradient)
245 {}
246
247 // This parallel version of Canny algorithm splits the src image in threadsNumber horizontal slices.
248 // The first row of each slice contains the last row of the previous slice and
249 // the last row of each slice contains the first row of the next slice
250 // so that each slice is independent and no mutexes are required.
operator ()() const251 void operator()() const
252 {
253 #if CV_SSE2
254 bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
255 #endif
256
257 const int type = src.type(), cn = CV_MAT_CN(type);
258
259 Mat dx, dy;
260
261 ptrdiff_t mapstep = src.cols + 2;
262
263 // In sobel transform we calculate ksize2 extra lines for the first and last rows of each slice
264 // because IPPDerivSobel expects only isolated ROIs, in contrast with the opencv version which
265 // uses the pixels outside of the ROI to form a border.
266 uchar ksize2 = aperture_size / 2;
267
268 if (boundaries.start == 0 && boundaries.end == src.rows)
269 {
270 Mat tempdx(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn));
271 Mat tempdy(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn));
272
273 memset(tempdx.ptr<short>(0), 0, cn * src.cols*sizeof(short));
274 memset(tempdy.ptr<short>(0), 0, cn * src.cols*sizeof(short));
275 memset(tempdx.ptr<short>(tempdx.rows - 1), 0, cn * src.cols*sizeof(short));
276 memset(tempdy.ptr<short>(tempdy.rows - 1), 0, cn * src.cols*sizeof(short));
277
278 Sobel(src, tempdx.rowRange(1, tempdx.rows - 1), CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
279 Sobel(src, tempdy.rowRange(1, tempdy.rows - 1), CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
280
281 dx = tempdx;
282 dy = tempdy;
283 }
284 else if (boundaries.start == 0)
285 {
286 Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
287 Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
288
289 memset(tempdx.ptr<short>(0), 0, cn * src.cols*sizeof(short));
290 memset(tempdy.ptr<short>(0), 0, cn * src.cols*sizeof(short));
291
292 Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdx.rowRange(1, tempdx.rows),
293 CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
294 Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdy.rowRange(1, tempdy.rows),
295 CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
296
297 dx = tempdx.rowRange(0, tempdx.rows - ksize2);
298 dy = tempdy.rowRange(0, tempdy.rows - ksize2);
299 }
300 else if (boundaries.end == src.rows)
301 {
302 Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
303 Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
304
305 memset(tempdx.ptr<short>(tempdx.rows - 1), 0, cn * src.cols*sizeof(short));
306 memset(tempdy.ptr<short>(tempdy.rows - 1), 0, cn * src.cols*sizeof(short));
307
308 Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdx.rowRange(0, tempdx.rows - 1),
309 CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
310 Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdy.rowRange(0, tempdy.rows - 1),
311 CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
312
313 dx = tempdx.rowRange(ksize2, tempdx.rows);
314 dy = tempdy.rowRange(ksize2, tempdy.rows);
315 }
316 else
317 {
318 Mat tempdx(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn));
319 Mat tempdy(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn));
320
321 Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdx,
322 CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
323 Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdy,
324 CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
325
326 dx = tempdx.rowRange(ksize2, tempdx.rows - ksize2);
327 dy = tempdy.rowRange(ksize2, tempdy.rows - ksize2);
328 }
329
330 int maxsize = std::max(1 << 10, src.cols * (boundaries.end - boundaries.start) / 10);
331 std::vector<uchar*> stack(maxsize);
332 uchar **stack_top = &stack[0];
333 uchar **stack_bottom = &stack[0];
334
335 AutoBuffer<uchar> buffer(cn * mapstep * 3 * sizeof(int));
336
337 int* mag_buf[3];
338 mag_buf[0] = (int*)(uchar*)buffer;
339 mag_buf[1] = mag_buf[0] + mapstep*cn;
340 mag_buf[2] = mag_buf[1] + mapstep*cn;
341
342 // calculate magnitude and angle of gradient, perform non-maxima suppression.
343 // fill the map with one of the following values:
344 // 0 - the pixel might belong to an edge
345 // 1 - the pixel can not belong to an edge
346 // 2 - the pixel does belong to an edge
347 for (int i = boundaries.start - 1; i <= boundaries.end; i++)
348 {
349 int* _norm = mag_buf[(i > boundaries.start) - (i == boundaries.start - 1) + 1] + 1;
350
351 short* _dx = dx.ptr<short>(i - boundaries.start + 1);
352 short* _dy = dy.ptr<short>(i - boundaries.start + 1);
353
354 if (!L2gradient)
355 {
356 int j = 0, width = src.cols * cn;
357 #if CV_SSE2
358 if (haveSSE2)
359 {
360 __m128i v_zero = _mm_setzero_si128();
361 for ( ; j <= width - 8; j += 8)
362 {
363 __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
364 __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
365 v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx));
366 v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy));
367
368 __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero));
369 _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
370
371 v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero));
372 _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
373 }
374 }
375 #elif CV_NEON
376 for ( ; j <= width - 8; j += 8)
377 {
378 int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
379 vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))),
380 vabsq_s32(vmovl_s16(vget_low_s16(v_dy)))));
381 vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))),
382 vabsq_s32(vmovl_s16(vget_high_s16(v_dy)))));
383 }
384 #endif
385 for ( ; j < width; ++j)
386 _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
387 }
388 else
389 {
390 int j = 0, width = src.cols * cn;
391 #if CV_SSE2
392 if (haveSSE2)
393 {
394 for ( ; j <= width - 8; j += 8)
395 {
396 __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
397 __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
398
399 __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx);
400 __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy);
401
402 __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh));
403 _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
404
405 v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh));
406 _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
407 }
408 }
409 #elif CV_NEON
410 for ( ; j <= width - 8; j += 8)
411 {
412 int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
413 int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy);
414 int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
415 vst1q_s32(_norm + j, v_dst);
416
417 v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy);
418 v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
419 vst1q_s32(_norm + j + 4, v_dst);
420 }
421 #endif
422 for ( ; j < width; ++j)
423 _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
424 }
425
426 if (cn > 1)
427 {
428 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
429 {
430 int maxIdx = jn;
431 for(int k = 1; k < cn; ++k)
432 if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
433 _norm[j] = _norm[maxIdx];
434 _dx[j] = _dx[maxIdx];
435 _dy[j] = _dy[maxIdx];
436 }
437 }
438 _norm[-1] = _norm[src.cols] = 0;
439
440 // at the very beginning we do not have a complete ring
441 // buffer of 3 magnitude rows for non-maxima suppression
442 if (i <= boundaries.start)
443 continue;
444
445 uchar* _map = map + mapstep*i + 1;
446 _map[-1] = _map[src.cols] = 1;
447
448 int* _mag = mag_buf[1] + 1; // take the central row
449 ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
450 ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
451
452 const short* _x = dx.ptr<short>(i - boundaries.start);
453 const short* _y = dy.ptr<short>(i - boundaries.start);
454
455 if ((stack_top - stack_bottom) + src.cols > maxsize)
456 {
457 int sz = (int)(stack_top - stack_bottom);
458 maxsize = std::max(maxsize * 3/2, sz + src.cols);
459 stack.resize(maxsize);
460 stack_bottom = &stack[0];
461 stack_top = stack_bottom + sz;
462 }
463
464 #define CANNY_PUSH(d) *(d) = uchar(2), *stack_top++ = (d)
465 #define CANNY_POP(d) (d) = *--stack_top
466
467 int prev_flag = 0;
468 bool canny_push = false;
469 for (int j = 0; j < src.cols; j++)
470 {
471 #define CANNY_SHIFT 15
472 const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
473
474 int m = _mag[j];
475
476 if (m > low)
477 {
478 int xs = _x[j];
479 int ys = _y[j];
480 int x = std::abs(xs);
481 int y = std::abs(ys) << CANNY_SHIFT;
482
483 int tg22x = x * TG22;
484
485 if (y < tg22x)
486 {
487 if (m > _mag[j-1] && m >= _mag[j+1]) canny_push = true;
488 }
489 else
490 {
491 int tg67x = tg22x + (x << (CANNY_SHIFT+1));
492 if (y > tg67x)
493 {
494 if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) canny_push = true;
495 }
496 else
497 {
498 int s = (xs ^ ys) < 0 ? -1 : 1;
499 if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) canny_push = true;
500 }
501 }
502 }
503 if (!canny_push)
504 {
505 prev_flag = 0;
506 _map[j] = uchar(1);
507 continue;
508 }
509 else
510 {
511 // _map[j-mapstep] is short-circuited at the start because previous thread is
512 // responsible for initializing it.
513 if (!prev_flag && m > high && (i <= boundaries.start+1 || _map[j-mapstep] != 2) )
514 {
515 CANNY_PUSH(_map + j);
516 prev_flag = 1;
517 }
518 else
519 _map[j] = 0;
520
521 canny_push = false;
522 }
523 }
524
525 // scroll the ring buffer
526 _mag = mag_buf[0];
527 mag_buf[0] = mag_buf[1];
528 mag_buf[1] = mag_buf[2];
529 mag_buf[2] = _mag;
530 }
531
532 // now track the edges (hysteresis thresholding)
533 while (stack_top > stack_bottom)
534 {
535 if ((stack_top - stack_bottom) + 8 > maxsize)
536 {
537 int sz = (int)(stack_top - stack_bottom);
538 maxsize = maxsize * 3/2;
539 stack.resize(maxsize);
540 stack_bottom = &stack[0];
541 stack_top = stack_bottom + sz;
542 }
543
544 uchar* m;
545 CANNY_POP(m);
546
547 // Stops thresholding from expanding to other slices by sending pixels in the borders of each
548 // slice in a queue to be serially processed later.
549 if ( (m < map + (boundaries.start + 2) * mapstep) || (m >= map + boundaries.end * mapstep) )
550 {
551 borderPeaks.push(m);
552 continue;
553 }
554
555 if (!m[-1]) CANNY_PUSH(m - 1);
556 if (!m[1]) CANNY_PUSH(m + 1);
557 if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
558 if (!m[-mapstep]) CANNY_PUSH(m - mapstep);
559 if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
560 if (!m[mapstep-1]) CANNY_PUSH(m + mapstep - 1);
561 if (!m[mapstep]) CANNY_PUSH(m + mapstep);
562 if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1);
563 }
564 }
565
566 private:
567 const Range boundaries;
568 const Mat& src;
569 uchar* map;
570 int low;
571 int high;
572 int aperture_size;
573 bool L2gradient;
574 };
575
576 #endif
577
578 } // namespace cv
579
Canny(InputArray _src,OutputArray _dst,double low_thresh,double high_thresh,int aperture_size,bool L2gradient)580 void cv::Canny( InputArray _src, OutputArray _dst,
581 double low_thresh, double high_thresh,
582 int aperture_size, bool L2gradient )
583 {
584 const int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
585 const Size size = _src.size();
586
587 CV_Assert( depth == CV_8U );
588 _dst.create(size, CV_8U);
589
590 if (!L2gradient && (aperture_size & CV_CANNY_L2_GRADIENT) == CV_CANNY_L2_GRADIENT)
591 {
592 // backward compatibility
593 aperture_size &= ~CV_CANNY_L2_GRADIENT;
594 L2gradient = true;
595 }
596
597 if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7)))
598 CV_Error(CV_StsBadFlag, "Aperture size should be odd");
599
600 if (low_thresh > high_thresh)
601 std::swap(low_thresh, high_thresh);
602
603 CV_OCL_RUN(_dst.isUMat() && (cn == 1 || cn == 3),
604 ocl_Canny(_src, _dst, (float)low_thresh, (float)high_thresh, aperture_size, L2gradient, cn, size))
605
606 Mat src = _src.getMat(), dst = _dst.getMat();
607
608 #ifdef HAVE_TEGRA_OPTIMIZATION
609 if (tegra::useTegra() && tegra::canny(src, dst, low_thresh, high_thresh, aperture_size, L2gradient))
610 return;
611 #endif
612
613 #ifdef USE_IPP_CANNY
614 CV_IPP_CHECK()
615 {
616 if( aperture_size == 3 && !L2gradient && 1 == cn )
617 {
618 if (ippCanny(src, dst, (float)low_thresh, (float)high_thresh))
619 {
620 CV_IMPL_ADD(CV_IMPL_IPP);
621 return;
622 }
623 setIppErrorStatus();
624 }
625 }
626 #endif
627
628 #ifdef HAVE_TBB
629
630 if (L2gradient)
631 {
632 low_thresh = std::min(32767.0, low_thresh);
633 high_thresh = std::min(32767.0, high_thresh);
634
635 if (low_thresh > 0) low_thresh *= low_thresh;
636 if (high_thresh > 0) high_thresh *= high_thresh;
637 }
638 int low = cvFloor(low_thresh);
639 int high = cvFloor(high_thresh);
640
641 ptrdiff_t mapstep = src.cols + 2;
642 AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2));
643
644 uchar* map = (uchar*)buffer;
645 memset(map, 1, mapstep);
646 memset(map + mapstep*(src.rows + 1), 1, mapstep);
647
648 int threadsNumber = tbb::task_scheduler_init::default_num_threads();
649 int grainSize = src.rows / threadsNumber;
650
651 // Make a fallback for pictures with too few rows.
652 uchar ksize2 = aperture_size / 2;
653 int minGrainSize = 1 + ksize2;
654 int maxGrainSize = src.rows - 2 - 2*ksize2;
655 if ( !( minGrainSize <= grainSize && grainSize <= maxGrainSize ) )
656 {
657 threadsNumber = 1;
658 grainSize = src.rows;
659 }
660
661 tbb::task_group g;
662
663 for (int i = 0; i < threadsNumber; ++i)
664 {
665 if (i < threadsNumber - 1)
666 g.run(tbbCanny(Range(i * grainSize, (i + 1) * grainSize), src, map, low, high, aperture_size, L2gradient));
667 else
668 g.run(tbbCanny(Range(i * grainSize, src.rows), src, map, low, high, aperture_size, L2gradient));
669 }
670
671 g.wait();
672
673 #define CANNY_PUSH_SERIAL(d) *(d) = uchar(2), borderPeaks.push(d)
674
675 // now track the edges (hysteresis thresholding)
676 uchar* m;
677 while (borderPeaks.try_pop(m))
678 {
679 if (!m[-1]) CANNY_PUSH_SERIAL(m - 1);
680 if (!m[1]) CANNY_PUSH_SERIAL(m + 1);
681 if (!m[-mapstep-1]) CANNY_PUSH_SERIAL(m - mapstep - 1);
682 if (!m[-mapstep]) CANNY_PUSH_SERIAL(m - mapstep);
683 if (!m[-mapstep+1]) CANNY_PUSH_SERIAL(m - mapstep + 1);
684 if (!m[mapstep-1]) CANNY_PUSH_SERIAL(m + mapstep - 1);
685 if (!m[mapstep]) CANNY_PUSH_SERIAL(m + mapstep);
686 if (!m[mapstep+1]) CANNY_PUSH_SERIAL(m + mapstep + 1);
687 }
688
689 #else
690
691 Mat dx(src.rows, src.cols, CV_16SC(cn));
692 Mat dy(src.rows, src.cols, CV_16SC(cn));
693
694 Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
695 Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
696
697 if (L2gradient)
698 {
699 low_thresh = std::min(32767.0, low_thresh);
700 high_thresh = std::min(32767.0, high_thresh);
701
702 if (low_thresh > 0) low_thresh *= low_thresh;
703 if (high_thresh > 0) high_thresh *= high_thresh;
704 }
705 int low = cvFloor(low_thresh);
706 int high = cvFloor(high_thresh);
707
708 ptrdiff_t mapstep = src.cols + 2;
709 AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int));
710
711 int* mag_buf[3];
712 mag_buf[0] = (int*)(uchar*)buffer;
713 mag_buf[1] = mag_buf[0] + mapstep*cn;
714 mag_buf[2] = mag_buf[1] + mapstep*cn;
715 memset(mag_buf[0], 0, /* cn* */mapstep*sizeof(int));
716
717 uchar* map = (uchar*)(mag_buf[2] + mapstep*cn);
718 memset(map, 1, mapstep);
719 memset(map + mapstep*(src.rows + 1), 1, mapstep);
720
721 int maxsize = std::max(1 << 10, src.cols * src.rows / 10);
722 std::vector<uchar*> stack(maxsize);
723 uchar **stack_top = &stack[0];
724 uchar **stack_bottom = &stack[0];
725
726 /* sector numbers
727 (Top-Left Origin)
728
729 1 2 3
730 * * *
731 * * *
732 0*******0
733 * * *
734 * * *
735 3 2 1
736 */
737
738 #define CANNY_PUSH(d) *(d) = uchar(2), *stack_top++ = (d)
739 #define CANNY_POP(d) (d) = *--stack_top
740
741 #if CV_SSE2
742 bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
743 #endif
744
745 // calculate magnitude and angle of gradient, perform non-maxima suppression.
746 // fill the map with one of the following values:
747 // 0 - the pixel might belong to an edge
748 // 1 - the pixel can not belong to an edge
749 // 2 - the pixel does belong to an edge
750 for (int i = 0; i <= src.rows; i++)
751 {
752 int* _norm = mag_buf[(i > 0) + 1] + 1;
753 if (i < src.rows)
754 {
755 short* _dx = dx.ptr<short>(i);
756 short* _dy = dy.ptr<short>(i);
757
758 if (!L2gradient)
759 {
760 int j = 0, width = src.cols * cn;
761 #if CV_SSE2
762 if (haveSSE2)
763 {
764 __m128i v_zero = _mm_setzero_si128();
765 for ( ; j <= width - 8; j += 8)
766 {
767 __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
768 __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
769 v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx));
770 v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy));
771
772 __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero));
773 _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
774
775 v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero));
776 _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
777 }
778 }
779 #elif CV_NEON
780 for ( ; j <= width - 8; j += 8)
781 {
782 int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
783 vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))),
784 vabsq_s32(vmovl_s16(vget_low_s16(v_dy)))));
785 vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))),
786 vabsq_s32(vmovl_s16(vget_high_s16(v_dy)))));
787 }
788 #endif
789 for ( ; j < width; ++j)
790 _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
791 }
792 else
793 {
794 int j = 0, width = src.cols * cn;
795 #if CV_SSE2
796 if (haveSSE2)
797 {
798 for ( ; j <= width - 8; j += 8)
799 {
800 __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
801 __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
802
803 __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx);
804 __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy);
805
806 __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh));
807 _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
808
809 v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh));
810 _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
811 }
812 }
813 #elif CV_NEON
814 for ( ; j <= width - 8; j += 8)
815 {
816 int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
817 int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy);
818 int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
819 vst1q_s32(_norm + j, v_dst);
820
821 v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy);
822 v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
823 vst1q_s32(_norm + j + 4, v_dst);
824 }
825 #endif
826 for ( ; j < width; ++j)
827 _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
828 }
829
830 if (cn > 1)
831 {
832 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
833 {
834 int maxIdx = jn;
835 for(int k = 1; k < cn; ++k)
836 if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
837 _norm[j] = _norm[maxIdx];
838 _dx[j] = _dx[maxIdx];
839 _dy[j] = _dy[maxIdx];
840 }
841 }
842 _norm[-1] = _norm[src.cols] = 0;
843 }
844 else
845 memset(_norm-1, 0, /* cn* */mapstep*sizeof(int));
846
847 // at the very beginning we do not have a complete ring
848 // buffer of 3 magnitude rows for non-maxima suppression
849 if (i == 0)
850 continue;
851
852 uchar* _map = map + mapstep*i + 1;
853 _map[-1] = _map[src.cols] = 1;
854
855 int* _mag = mag_buf[1] + 1; // take the central row
856 ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
857 ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
858
859 const short* _x = dx.ptr<short>(i-1);
860 const short* _y = dy.ptr<short>(i-1);
861
862 if ((stack_top - stack_bottom) + src.cols > maxsize)
863 {
864 int sz = (int)(stack_top - stack_bottom);
865 maxsize = std::max(maxsize * 3/2, sz + src.cols);
866 stack.resize(maxsize);
867 stack_bottom = &stack[0];
868 stack_top = stack_bottom + sz;
869 }
870
871 int prev_flag = 0;
872 for (int j = 0; j < src.cols; j++)
873 {
874 #define CANNY_SHIFT 15
875 const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
876
877 int m = _mag[j];
878
879 if (m > low)
880 {
881 int xs = _x[j];
882 int ys = _y[j];
883 int x = std::abs(xs);
884 int y = std::abs(ys) << CANNY_SHIFT;
885
886 int tg22x = x * TG22;
887
888 if (y < tg22x)
889 {
890 if (m > _mag[j-1] && m >= _mag[j+1]) goto __ocv_canny_push;
891 }
892 else
893 {
894 int tg67x = tg22x + (x << (CANNY_SHIFT+1));
895 if (y > tg67x)
896 {
897 if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __ocv_canny_push;
898 }
899 else
900 {
901 int s = (xs ^ ys) < 0 ? -1 : 1;
902 if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __ocv_canny_push;
903 }
904 }
905 }
906 prev_flag = 0;
907 _map[j] = uchar(1);
908 continue;
909 __ocv_canny_push:
910 if (!prev_flag && m > high && _map[j-mapstep] != 2)
911 {
912 CANNY_PUSH(_map + j);
913 prev_flag = 1;
914 }
915 else
916 _map[j] = 0;
917 }
918
919 // scroll the ring buffer
920 _mag = mag_buf[0];
921 mag_buf[0] = mag_buf[1];
922 mag_buf[1] = mag_buf[2];
923 mag_buf[2] = _mag;
924 }
925
926 // now track the edges (hysteresis thresholding)
927 while (stack_top > stack_bottom)
928 {
929 uchar* m;
930 if ((stack_top - stack_bottom) + 8 > maxsize)
931 {
932 int sz = (int)(stack_top - stack_bottom);
933 maxsize = maxsize * 3/2;
934 stack.resize(maxsize);
935 stack_bottom = &stack[0];
936 stack_top = stack_bottom + sz;
937 }
938
939 CANNY_POP(m);
940
941 if (!m[-1]) CANNY_PUSH(m - 1);
942 if (!m[1]) CANNY_PUSH(m + 1);
943 if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
944 if (!m[-mapstep]) CANNY_PUSH(m - mapstep);
945 if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
946 if (!m[mapstep-1]) CANNY_PUSH(m + mapstep - 1);
947 if (!m[mapstep]) CANNY_PUSH(m + mapstep);
948 if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1);
949 }
950
951 #endif
952
953 // the final pass, form the final image
954 const uchar* pmap = map + mapstep + 1;
955 uchar* pdst = dst.ptr();
956 for (int i = 0; i < src.rows; i++, pmap += mapstep, pdst += dst.step)
957 {
958 for (int j = 0; j < src.cols; j++)
959 pdst[j] = (uchar)-(pmap[j] >> 1);
960 }
961 }
962
cvCanny(const CvArr * image,CvArr * edges,double threshold1,double threshold2,int aperture_size)963 void cvCanny( const CvArr* image, CvArr* edges, double threshold1,
964 double threshold2, int aperture_size )
965 {
966 cv::Mat src = cv::cvarrToMat(image), dst = cv::cvarrToMat(edges);
967 CV_Assert( src.size == dst.size && src.depth() == CV_8U && dst.type() == CV_8U );
968
969 cv::Canny(src, dst, threshold1, threshold2, aperture_size & 255,
970 (aperture_size & CV_CANNY_L2_GRADIENT) != 0);
971 }
972
973 /* End of file. */
974