1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // Intel License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Copyright (C) 2014, Itseez, Inc, all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of Intel Corporation may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44 #include "opencl_kernels_imgproc.hpp"
45
46 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
47 static IppStatus sts = ippInit();
48 #endif
49
50 /****************************************************************************************\
51 Sobel & Scharr Derivative Filters
52 \****************************************************************************************/
53
54 namespace cv
55 {
56
getScharrKernels(OutputArray _kx,OutputArray _ky,int dx,int dy,bool normalize,int ktype)57 static void getScharrKernels( OutputArray _kx, OutputArray _ky,
58 int dx, int dy, bool normalize, int ktype )
59 {
60 const int ksize = 3;
61
62 CV_Assert( ktype == CV_32F || ktype == CV_64F );
63 _kx.create(ksize, 1, ktype, -1, true);
64 _ky.create(ksize, 1, ktype, -1, true);
65 Mat kx = _kx.getMat();
66 Mat ky = _ky.getMat();
67
68 CV_Assert( dx >= 0 && dy >= 0 && dx+dy == 1 );
69
70 for( int k = 0; k < 2; k++ )
71 {
72 Mat* kernel = k == 0 ? &kx : &ky;
73 int order = k == 0 ? dx : dy;
74 int kerI[3];
75
76 if( order == 0 )
77 kerI[0] = 3, kerI[1] = 10, kerI[2] = 3;
78 else if( order == 1 )
79 kerI[0] = -1, kerI[1] = 0, kerI[2] = 1;
80
81 Mat temp(kernel->rows, kernel->cols, CV_32S, &kerI[0]);
82 double scale = !normalize || order == 1 ? 1. : 1./32;
83 temp.convertTo(*kernel, ktype, scale);
84 }
85 }
86
87
getSobelKernels(OutputArray _kx,OutputArray _ky,int dx,int dy,int _ksize,bool normalize,int ktype)88 static void getSobelKernels( OutputArray _kx, OutputArray _ky,
89 int dx, int dy, int _ksize, bool normalize, int ktype )
90 {
91 int i, j, ksizeX = _ksize, ksizeY = _ksize;
92 if( ksizeX == 1 && dx > 0 )
93 ksizeX = 3;
94 if( ksizeY == 1 && dy > 0 )
95 ksizeY = 3;
96
97 CV_Assert( ktype == CV_32F || ktype == CV_64F );
98
99 _kx.create(ksizeX, 1, ktype, -1, true);
100 _ky.create(ksizeY, 1, ktype, -1, true);
101 Mat kx = _kx.getMat();
102 Mat ky = _ky.getMat();
103
104 if( _ksize % 2 == 0 || _ksize > 31 )
105 CV_Error( CV_StsOutOfRange, "The kernel size must be odd and not larger than 31" );
106 std::vector<int> kerI(std::max(ksizeX, ksizeY) + 1);
107
108 CV_Assert( dx >= 0 && dy >= 0 && dx+dy > 0 );
109
110 for( int k = 0; k < 2; k++ )
111 {
112 Mat* kernel = k == 0 ? &kx : &ky;
113 int order = k == 0 ? dx : dy;
114 int ksize = k == 0 ? ksizeX : ksizeY;
115
116 CV_Assert( ksize > order );
117
118 if( ksize == 1 )
119 kerI[0] = 1;
120 else if( ksize == 3 )
121 {
122 if( order == 0 )
123 kerI[0] = 1, kerI[1] = 2, kerI[2] = 1;
124 else if( order == 1 )
125 kerI[0] = -1, kerI[1] = 0, kerI[2] = 1;
126 else
127 kerI[0] = 1, kerI[1] = -2, kerI[2] = 1;
128 }
129 else
130 {
131 int oldval, newval;
132 kerI[0] = 1;
133 for( i = 0; i < ksize; i++ )
134 kerI[i+1] = 0;
135
136 for( i = 0; i < ksize - order - 1; i++ )
137 {
138 oldval = kerI[0];
139 for( j = 1; j <= ksize; j++ )
140 {
141 newval = kerI[j]+kerI[j-1];
142 kerI[j-1] = oldval;
143 oldval = newval;
144 }
145 }
146
147 for( i = 0; i < order; i++ )
148 {
149 oldval = -kerI[0];
150 for( j = 1; j <= ksize; j++ )
151 {
152 newval = kerI[j-1] - kerI[j];
153 kerI[j-1] = oldval;
154 oldval = newval;
155 }
156 }
157 }
158
159 Mat temp(kernel->rows, kernel->cols, CV_32S, &kerI[0]);
160 double scale = !normalize ? 1. : 1./(1 << (ksize-order-1));
161 temp.convertTo(*kernel, ktype, scale);
162 }
163 }
164
165 }
166
getDerivKernels(OutputArray kx,OutputArray ky,int dx,int dy,int ksize,bool normalize,int ktype)167 void cv::getDerivKernels( OutputArray kx, OutputArray ky, int dx, int dy,
168 int ksize, bool normalize, int ktype )
169 {
170 if( ksize <= 0 )
171 getScharrKernels( kx, ky, dx, dy, normalize, ktype );
172 else
173 getSobelKernels( kx, ky, dx, dy, ksize, normalize, ktype );
174 }
175
176
createDerivFilter(int srcType,int dstType,int dx,int dy,int ksize,int borderType)177 cv::Ptr<cv::FilterEngine> cv::createDerivFilter(int srcType, int dstType,
178 int dx, int dy, int ksize, int borderType )
179 {
180 Mat kx, ky;
181 getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F );
182 return createSeparableLinearFilter(srcType, dstType,
183 kx, ky, Point(-1,-1), 0, borderType );
184 }
185
186 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
187
188 #define IPP_RETURN_ERROR {setIppErrorStatus(); return false;}
189
190 namespace cv
191 {
192 #if IPP_VERSION_X100 >= 801
IPPDerivScharr(InputArray _src,OutputArray _dst,int ddepth,int dx,int dy,double scale,double delta,int borderType)193 static bool IPPDerivScharr(InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, double scale, double delta, int borderType)
194 {
195 if ((0 > dx) || (0 > dy) || (1 != dx + dy))
196 return false;
197 if (fabs(delta) > FLT_EPSILON)
198 return false;
199
200 IppiBorderType ippiBorderType = ippiGetBorderType(borderType & (~BORDER_ISOLATED));
201 if ((int)ippiBorderType < 0)
202 return false;
203
204 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
205 if (ddepth < 0)
206 ddepth = sdepth;
207 int dtype = CV_MAKETYPE(ddepth, cn);
208
209 Mat src = _src.getMat();
210 if (0 == (BORDER_ISOLATED & borderType))
211 {
212 Size size; Point offset;
213 src.locateROI(size, offset);
214 if (0 < offset.x)
215 ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemLeft);
216 if (0 < offset.y)
217 ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemTop);
218 if (offset.x + src.cols < size.width)
219 ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemRight);
220 if (offset.y + src.rows < size.height)
221 ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemBottom);
222 }
223
224 bool horz = (0 == dx) && (1 == dy);
225 IppiSize roiSize = {src.cols, src.rows};
226
227 _dst.create( _src.size(), dtype);
228 Mat dst = _dst.getMat();
229 IppStatus sts = ippStsErr;
230 if ((CV_8U == stype) && (CV_16S == dtype))
231 {
232 int bufferSize = 0; Ipp8u *pBuffer;
233 if (horz)
234 {
235 if (0 > ippiFilterScharrHorizMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp8u, ipp16s, 1, &bufferSize))
236 IPP_RETURN_ERROR
237 pBuffer = ippsMalloc_8u(bufferSize);
238 if (NULL == pBuffer)
239 IPP_RETURN_ERROR
240 sts = ippiFilterScharrHorizMaskBorder_8u16s_C1R(src.ptr(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
241 }
242 else
243 {
244 if (0 > ippiFilterScharrVertMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp8u, ipp16s, 1, &bufferSize))
245 IPP_RETURN_ERROR
246 pBuffer = ippsMalloc_8u(bufferSize);
247 if (NULL == pBuffer)
248 IPP_RETURN_ERROR
249 sts = ippiFilterScharrVertMaskBorder_8u16s_C1R(src.ptr(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
250 }
251 ippsFree(pBuffer);
252 }
253 else if ((CV_16S == stype) && (CV_16S == dtype))
254 {
255 int bufferSize = 0; Ipp8u *pBuffer;
256 if (horz)
257 {
258 if (0 > ippiFilterScharrHorizMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp16s, ipp16s, 1, &bufferSize))
259 IPP_RETURN_ERROR
260 pBuffer = ippsMalloc_8u(bufferSize);
261 if (NULL == pBuffer)
262 IPP_RETURN_ERROR
263 sts = ippiFilterScharrHorizMaskBorder_16s_C1R(src.ptr<Ipp16s>(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
264 }
265 else
266 {
267 if (0 > ippiFilterScharrVertMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp16s, ipp16s, 1, &bufferSize))
268 IPP_RETURN_ERROR
269 pBuffer = ippsMalloc_8u(bufferSize);
270 if (NULL == pBuffer)
271 IPP_RETURN_ERROR
272 sts = ippiFilterScharrVertMaskBorder_16s_C1R(src.ptr<Ipp16s>(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
273 }
274 ippsFree(pBuffer);
275 }
276 else if ((CV_32F == stype) && (CV_32F == dtype))
277 {
278 int bufferSize = 0; Ipp8u *pBuffer;
279 if (horz)
280 {
281 if (0 > ippiFilterScharrHorizMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp32f, ipp32f, 1, &bufferSize))
282 IPP_RETURN_ERROR
283 pBuffer = ippsMalloc_8u(bufferSize);
284 if (NULL == pBuffer)
285 IPP_RETURN_ERROR
286 sts = ippiFilterScharrHorizMaskBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, dst.ptr<Ipp32f>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
287 }
288 else
289 {
290 if (0 > ippiFilterScharrVertMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp32f, ipp32f, 1, &bufferSize))
291 IPP_RETURN_ERROR
292 pBuffer = ippsMalloc_8u(bufferSize);
293 if (NULL == pBuffer)
294 IPP_RETURN_ERROR
295 sts = ippiFilterScharrVertMaskBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, dst.ptr<Ipp32f>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer);
296 }
297 ippsFree(pBuffer);
298 if (sts < 0)
299 IPP_RETURN_ERROR;
300
301 if (FLT_EPSILON < fabs(scale - 1.0))
302 sts = ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, roiSize);
303 }
304 return (0 <= sts);
305 }
306 #elif IPP_VERSION_X100 >= 700
307 static bool IPPDerivScharr(InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, double scale, double delta, int borderType)
308 {
309 if (BORDER_REPLICATE != borderType)
310 return false;
311 if ((0 > dx) || (0 > dy) || (1 != dx + dy))
312 return false;
313 if (fabs(delta) > FLT_EPSILON)
314 return false;
315
316 Mat src = _src.getMat(), dst = _dst.getMat();
317
318 int bufSize = 0;
319 cv::AutoBuffer<char> buffer;
320 IppiSize roi = ippiSize(src.cols, src.rows);
321
322 if( ddepth < 0 )
323 ddepth = src.depth();
324
325 dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) );
326
327 switch(src.type())
328 {
329 case CV_8UC1:
330 {
331 if(scale != 1)
332 return false;
333
334 switch(dst.type())
335 {
336 case CV_16S:
337 {
338 if ((dx == 1) && (dy == 0))
339 {
340 if (0 > ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize))
341 return false;
342 buffer.allocate(bufSize);
343 return (0 <= ippiFilterScharrVertBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
344 dst.ptr<Ipp16s>(), (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
345 }
346 if ((dx == 0) && (dy == 1))
347 {
348 if (0 > ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize))
349 return false;
350 buffer.allocate(bufSize);
351 return (0 <= ippiFilterScharrHorizBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
352 dst.ptr<Ipp16s>(), (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer));
353 }
354 return false;
355 }
356 default:
357 return false;
358 }
359 }
360 case CV_32FC1:
361 {
362 switch(dst.type())
363 {
364 case CV_32FC1:
365 {
366 if ((dx == 1) && (dy == 0))
367 {
368 if (0 > ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize))
369 return false;
370 buffer.allocate(bufSize);
371
372 if (0 > ippiFilterScharrVertBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
373 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows),
374 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
375 {
376 return false;
377 }
378
379 if (scale != 1)
380 /* IPP is fast, so MulC produce very little perf degradation.*/
381 //ippiMulC_32f_C1IR((Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
382 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
383 return true;
384 }
385 if ((dx == 0) && (dy == 1))
386 {
387 if (0 > ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize))
388 return false;
389 buffer.allocate(bufSize);
390
391 if (0 > ippiFilterScharrHorizBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
392 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows),
393 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
394 return false;
395
396 if (scale != 1)
397 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
398 return true;
399 }
400 }
401 default:
402 return false;
403 }
404 }
405 default:
406 return false;
407 }
408 }
409 #endif
410
IPPDerivSobel(InputArray _src,OutputArray _dst,int ddepth,int dx,int dy,int ksize,double scale,double delta,int borderType)411 static bool IPPDerivSobel(InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, int ksize, double scale, double delta, int borderType)
412 {
413 if ((borderType != BORDER_REPLICATE) || ((3 != ksize) && (5 != ksize)))
414 return false;
415 if (fabs(delta) > FLT_EPSILON)
416 return false;
417 if (1 != _src.channels())
418 return false;
419
420 int bufSize = 0;
421 cv::AutoBuffer<char> buffer;
422 Mat src = _src.getMat(), dst = _dst.getMat();
423 if ( ddepth < 0 )
424 ddepth = src.depth();
425
426 if (src.type() == CV_8U && dst.type() == CV_16S && scale == 1)
427 {
428 if ((dx == 1) && (dy == 0))
429 {
430 if (0 > ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
431 IPP_RETURN_ERROR
432 buffer.allocate(bufSize);
433
434 if (0 > ippiFilterSobelNegVertBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
435 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
436 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
437 IPP_RETURN_ERROR
438 return true;
439 }
440
441 if ((dx == 0) && (dy == 1))
442 {
443 if (0 > ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
444 IPP_RETURN_ERROR
445 buffer.allocate(bufSize);
446
447 if (0 > ippiFilterSobelHorizBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
448 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
449 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
450 IPP_RETURN_ERROR
451 return true;
452 }
453
454 #if !defined(HAVE_IPP_ICV_ONLY)
455 if ((dx == 2) && (dy == 0))
456 {
457 if (0 > ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
458 IPP_RETURN_ERROR
459 buffer.allocate(bufSize);
460
461 if (0 > ippiFilterSobelVertSecondBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
462 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
463 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
464 IPP_RETURN_ERROR
465 return true;
466 }
467
468 if ((dx == 0) && (dy == 2))
469 {
470 if (0 > ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
471 IPP_RETURN_ERROR
472 buffer.allocate(bufSize);
473
474 if (0 > ippiFilterSobelHorizSecondBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step,
475 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
476 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
477 IPP_RETURN_ERROR
478 return true;
479 }
480 #endif
481 }
482
483 if (src.type() == CV_32F && dst.type() == CV_32F)
484 {
485 #if 0
486 if ((dx == 1) && (dy == 0))
487 {
488 if (0 > ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize))
489 IPP_RETURN_ERROR
490 buffer.allocate(bufSize);
491
492 if (0 > ippiFilterSobelNegVertBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
493 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
494 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
495 IPP_RETURN_ERROR
496 if(scale != 1)
497 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
498 return true;
499 }
500
501 if ((dx == 0) && (dy == 1))
502 {
503 if (0 > ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
504 IPP_RETURN_ERROR
505 buffer.allocate(bufSize);
506 if (0 > ippiFilterSobelHorizBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
507 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
508 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
509 IPP_RETURN_ERROR
510 if(scale != 1)
511 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
512 return true;
513 }
514 #endif
515 #if !defined(HAVE_IPP_ICV_ONLY)
516 if((dx == 2) && (dy == 0))
517 {
518 if (0 > ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
519 IPP_RETURN_ERROR
520 buffer.allocate(bufSize);
521
522 if (0 > ippiFilterSobelVertSecondBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
523 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
524 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
525 IPP_RETURN_ERROR
526 if(scale != 1)
527 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
528 return true;
529 }
530
531 if((dx == 0) && (dy == 2))
532 {
533 if (0 > ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize))
534 IPP_RETURN_ERROR
535 buffer.allocate(bufSize);
536
537 if (0 > ippiFilterSobelHorizSecondBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step,
538 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),
539 ippBorderRepl, 0, (Ipp8u*)(char*)buffer))
540 IPP_RETURN_ERROR
541
542 if(scale != 1)
543 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows));
544 return true;
545 }
546 #endif
547 }
548 return false;
549 }
550
551 }
552
553 #endif
554
Sobel(InputArray _src,OutputArray _dst,int ddepth,int dx,int dy,int ksize,double scale,double delta,int borderType)555 void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
556 int ksize, double scale, double delta, int borderType )
557 {
558 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
559 if (ddepth < 0)
560 ddepth = sdepth;
561 int dtype = CV_MAKE_TYPE(ddepth, cn);
562 _dst.create( _src.size(), dtype );
563
564 #ifdef HAVE_TEGRA_OPTIMIZATION
565 if (tegra::useTegra() && scale == 1.0 && delta == 0)
566 {
567 Mat src = _src.getMat(), dst = _dst.getMat();
568 if (ksize == 3 && tegra::sobel3x3(src, dst, dx, dy, borderType))
569 return;
570 if (ksize == -1 && tegra::scharr(src, dst, dx, dy, borderType))
571 return;
572 }
573 #endif
574
575 #ifdef HAVE_IPP
576 CV_IPP_CHECK()
577 {
578 if (ksize < 0)
579 {
580 if (IPPDerivScharr(_src, _dst, ddepth, dx, dy, scale, delta, borderType))
581 {
582 CV_IMPL_ADD(CV_IMPL_IPP);
583 return;
584 }
585 }
586 else if (0 < ksize)
587 {
588 if (IPPDerivSobel(_src, _dst, ddepth, dx, dy, ksize, scale, delta, borderType))
589 {
590 CV_IMPL_ADD(CV_IMPL_IPP);
591 return;
592 }
593 }
594 }
595 #endif
596 int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
597
598 Mat kx, ky;
599 getDerivKernels( kx, ky, dx, dy, ksize, false, ktype );
600 if( scale != 1 )
601 {
602 // usually the smoothing part is the slowest to compute,
603 // so try to scale it instead of the faster differenciating part
604 if( dx == 0 )
605 kx *= scale;
606 else
607 ky *= scale;
608 }
609 sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
610 }
611
612
Scharr(InputArray _src,OutputArray _dst,int ddepth,int dx,int dy,double scale,double delta,int borderType)613 void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
614 double scale, double delta, int borderType )
615 {
616 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
617 if (ddepth < 0)
618 ddepth = sdepth;
619 int dtype = CV_MAKETYPE(ddepth, cn);
620 _dst.create( _src.size(), dtype );
621
622 #ifdef HAVE_TEGRA_OPTIMIZATION
623 if (tegra::useTegra() && scale == 1.0 && delta == 0)
624 {
625 Mat src = _src.getMat(), dst = _dst.getMat();
626 if (tegra::scharr(src, dst, dx, dy, borderType))
627 return;
628 }
629 #endif
630
631 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
632 CV_IPP_CHECK()
633 {
634 if (IPPDerivScharr(_src, _dst, ddepth, dx, dy, scale, delta, borderType))
635 {
636 CV_IMPL_ADD(CV_IMPL_IPP);
637 return;
638 }
639 }
640 #endif
641 int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
642
643 Mat kx, ky;
644 getScharrKernels( kx, ky, dx, dy, false, ktype );
645 if( scale != 1 )
646 {
647 // usually the smoothing part is the slowest to compute,
648 // so try to scale it instead of the faster differenciating part
649 if( dx == 0 )
650 kx *= scale;
651 else
652 ky *= scale;
653 }
654 sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
655 }
656
657 #ifdef HAVE_OPENCL
658
659 namespace cv {
660
661 #define LAPLACIAN_LOCAL_MEM(tileX, tileY, ksize, elsize) (((tileX) + 2 * (int)((ksize) / 2)) * (3 * (tileY) + 2 * (int)((ksize) / 2)) * elsize)
662
ocl_Laplacian5(InputArray _src,OutputArray _dst,const Mat & kd,const Mat & ks,double scale,double delta,int borderType,int depth,int ddepth)663 static bool ocl_Laplacian5(InputArray _src, OutputArray _dst,
664 const Mat & kd, const Mat & ks, double scale, double delta,
665 int borderType, int depth, int ddepth)
666 {
667 const size_t tileSizeX = 16;
668 const size_t tileSizeYmin = 8;
669
670 const ocl::Device dev = ocl::Device::getDefault();
671
672 int stype = _src.type();
673 int sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), esz = CV_ELEM_SIZE(stype);
674
675 bool doubleSupport = dev.doubleFPConfig() > 0;
676 if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
677 return false;
678
679 Mat kernelX = kd.reshape(1, 1);
680 if (kernelX.cols % 2 != 1)
681 return false;
682 Mat kernelY = ks.reshape(1, 1);
683 if (kernelY.cols % 2 != 1)
684 return false;
685 CV_Assert(kernelX.cols == kernelY.cols);
686
687 size_t wgs = dev.maxWorkGroupSize();
688 size_t lmsz = dev.localMemSize();
689 size_t src_step = _src.step(), src_offset = _src.offset();
690 const size_t tileSizeYmax = wgs / tileSizeX;
691
692 // workaround for Nvidia: 3 channel vector type takes 4*elem_size in local memory
693 int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn;
694
695 if (((src_offset % src_step) % esz == 0) &&
696 (
697 (borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE) ||
698 ((borderType == BORDER_REFLECT || borderType == BORDER_WRAP || borderType == BORDER_REFLECT_101) &&
699 (_src.cols() >= (int) (kernelX.cols + tileSizeX) && _src.rows() >= (int) (kernelY.cols + tileSizeYmax)))
700 ) &&
701 (tileSizeX * tileSizeYmin <= wgs) &&
702 (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeYmin, kernelX.cols, loc_mem_cn * 4) <= lmsz)
703 )
704 {
705 Size size = _src.size(), wholeSize;
706 Point origin;
707 int dtype = CV_MAKE_TYPE(ddepth, cn);
708 int wdepth = CV_32F;
709
710 size_t tileSizeY = tileSizeYmax;
711 while ((tileSizeX * tileSizeY > wgs) || (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeY, kernelX.cols, loc_mem_cn * 4) > lmsz))
712 {
713 tileSizeY /= 2;
714 }
715 size_t lt2[2] = { tileSizeX, tileSizeY};
716 size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1] };
717
718 char cvt[2][40];
719 const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
720 "BORDER_REFLECT_101" };
721
722 String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUS=%d%s%s"
723 " -D convertToWT=%s -D convertToDT=%s"
724 " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s"
725 " -D srcT=%s -D dstT=%s -D WT=%s"
726 " -D CN=%d ",
727 (int)lt2[0], (int)lt2[1], kernelX.cols / 2,
728 ocl::kernelToStr(kernelX, wdepth, "KERNEL_MATRIX_X").c_str(),
729 ocl::kernelToStr(kernelY, wdepth, "KERNEL_MATRIX_Y").c_str(),
730 ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
731 ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
732 borderMap[borderType],
733 ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), ocl::typeToStr(wdepth),
734 ocl::typeToStr(CV_MAKETYPE(sdepth, cn)),
735 ocl::typeToStr(CV_MAKETYPE(ddepth, cn)),
736 ocl::typeToStr(CV_MAKETYPE(wdepth, cn)),
737 cn);
738
739 ocl::Kernel k("laplacian", ocl::imgproc::laplacian5_oclsrc, opts);
740 if (k.empty())
741 return false;
742 UMat src = _src.getUMat();
743 _dst.create(size, dtype);
744 UMat dst = _dst.getUMat();
745
746 int src_offset_x = static_cast<int>((src_offset % src_step) / esz);
747 int src_offset_y = static_cast<int>(src_offset / src_step);
748
749 src.locateROI(wholeSize, origin);
750
751 k.args(ocl::KernelArg::PtrReadOnly(src), (int)src_step, src_offset_x, src_offset_y,
752 wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst),
753 static_cast<float>(scale), static_cast<float>(delta));
754
755 return k.run(2, gt2, lt2, false);
756 }
757 int iscale = cvRound(scale), idelta = cvRound(delta);
758 bool floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON;
759 int wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1;
760
761 if (!doubleSupport && wdepth == CV_64F)
762 return false;
763
764 char cvt[2][40];
765 ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc,
766 format("-D ONLY_SUM_CONVERT "
767 "-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d "
768 "-D convertToWT=%s -D convertToDT=%s%s",
769 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
770 ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)),
771 ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
772 ocl::typeToStr(wdepth), wdepth,
773 ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
774 ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]),
775 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
776 if (k.empty())
777 return false;
778
779 UMat d2x, d2y;
780 sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType);
781 sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType);
782
783 UMat dst = _dst.getUMat();
784
785 ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x),
786 d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y),
787 dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
788
789 if (wdepth >= CV_32F)
790 k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta);
791 else
792 k.args(d2xarg, d2yarg, dstarg, iscale, idelta);
793
794 size_t globalsize[] = { dst.cols * cn / kercn, dst.rows };
795 return k.run(2, globalsize, NULL, false);
796 }
797
798 }
799
800 #endif
801
Laplacian(InputArray _src,OutputArray _dst,int ddepth,int ksize,double scale,double delta,int borderType)802 void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize,
803 double scale, double delta, int borderType )
804 {
805 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
806 if (ddepth < 0)
807 ddepth = sdepth;
808 _dst.create( _src.size(), CV_MAKETYPE(ddepth, cn) );
809
810 #ifdef HAVE_IPP
811 CV_IPP_CHECK()
812 {
813 if ((ksize == 3 || ksize == 5) && ((borderType & BORDER_ISOLATED) != 0 || !_src.isSubmatrix()) &&
814 ((stype == CV_8UC1 && ddepth == CV_16S) || (ddepth == CV_32F && stype == CV_32FC1)) && !ocl::useOpenCL())
815 {
816 int iscale = saturate_cast<int>(scale), idelta = saturate_cast<int>(delta);
817 bool floatScale = std::fabs(scale - iscale) > DBL_EPSILON, needScale = iscale != 1;
818 bool floatDelta = std::fabs(delta - idelta) > DBL_EPSILON, needDelta = delta != 0;
819 int borderTypeNI = borderType & ~BORDER_ISOLATED;
820 Mat src = _src.getMat(), dst = _dst.getMat();
821
822 if (src.data != dst.data)
823 {
824 Ipp32s bufsize;
825 IppStatus status = (IppStatus)-1;
826 IppiSize roisize = { src.cols, src.rows };
827 IppiMaskSize masksize = ksize == 3 ? ippMskSize3x3 : ippMskSize5x5;
828 IppiBorderType borderTypeIpp = ippiGetBorderType(borderTypeNI);
829
830 #define IPP_FILTER_LAPLACIAN(ippsrctype, ippdsttype, ippfavor) \
831 do \
832 { \
833 if (borderTypeIpp >= 0 && ippiFilterLaplacianGetBufferSize_##ippfavor##_C1R(roisize, masksize, &bufsize) >= 0) \
834 { \
835 Ipp8u * buffer = ippsMalloc_8u(bufsize); \
836 status = ippiFilterLaplacianBorder_##ippfavor##_C1R(src.ptr<ippsrctype>(), (int)src.step, dst.ptr<ippdsttype>(), \
837 (int)dst.step, roisize, masksize, borderTypeIpp, 0, buffer); \
838 ippsFree(buffer); \
839 } \
840 } while ((void)0, 0)
841
842 CV_SUPPRESS_DEPRECATED_START
843 if (sdepth == CV_8U && ddepth == CV_16S && !floatScale && !floatDelta)
844 {
845 IPP_FILTER_LAPLACIAN(Ipp8u, Ipp16s, 8u16s);
846
847 if (needScale && status >= 0)
848 status = ippiMulC_16s_C1IRSfs((Ipp16s)iscale, dst.ptr<Ipp16s>(), (int)dst.step, roisize, 0);
849 if (needDelta && status >= 0)
850 status = ippiAddC_16s_C1IRSfs((Ipp16s)idelta, dst.ptr<Ipp16s>(), (int)dst.step, roisize, 0);
851 }
852 else if (sdepth == CV_32F && ddepth == CV_32F)
853 {
854 IPP_FILTER_LAPLACIAN(Ipp32f, Ipp32f, 32f);
855
856 if (needScale && status >= 0)
857 status = ippiMulC_32f_C1IR((Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, roisize);
858 if (needDelta && status >= 0)
859 status = ippiAddC_32f_C1IR((Ipp32f)delta, dst.ptr<Ipp32f>(), (int)dst.step, roisize);
860 }
861 CV_SUPPRESS_DEPRECATED_END
862
863 if (status >= 0)
864 {
865 CV_IMPL_ADD(CV_IMPL_IPP);
866 return;
867 }
868 setIppErrorStatus();
869 }
870 }
871 #undef IPP_FILTER_LAPLACIAN
872 }
873 #endif
874
875 #ifdef HAVE_TEGRA_OPTIMIZATION
876 if (tegra::useTegra() && scale == 1.0 && delta == 0)
877 {
878 Mat src = _src.getMat(), dst = _dst.getMat();
879 if (ksize == 1 && tegra::laplace1(src, dst, borderType))
880 return;
881 if (ksize == 3 && tegra::laplace3(src, dst, borderType))
882 return;
883 if (ksize == 5 && tegra::laplace5(src, dst, borderType))
884 return;
885 }
886 #endif
887
888 if( ksize == 1 || ksize == 3 )
889 {
890 float K[2][9] =
891 {
892 { 0, 1, 0, 1, -4, 1, 0, 1, 0 },
893 { 2, 0, 2, 0, -8, 0, 2, 0, 2 }
894 };
895 Mat kernel(3, 3, CV_32F, K[ksize == 3]);
896 if( scale != 1 )
897 kernel *= scale;
898 filter2D( _src, _dst, ddepth, kernel, Point(-1, -1), delta, borderType );
899 }
900 else
901 {
902 int ktype = std::max(CV_32F, std::max(ddepth, sdepth));
903 int wdepth = sdepth == CV_8U && ksize <= 5 ? CV_16S : sdepth <= CV_32F ? CV_32F : CV_64F;
904 int wtype = CV_MAKETYPE(wdepth, cn);
905 Mat kd, ks;
906 getSobelKernels( kd, ks, 2, 0, ksize, false, ktype );
907
908 CV_OCL_RUN(_dst.isUMat(),
909 ocl_Laplacian5(_src, _dst, kd, ks, scale,
910 delta, borderType, wdepth, ddepth))
911
912 const size_t STRIPE_SIZE = 1 << 14;
913 Ptr<FilterEngine> fx = createSeparableLinearFilter(stype,
914 wtype, kd, ks, Point(-1,-1), 0, borderType, borderType, Scalar() );
915 Ptr<FilterEngine> fy = createSeparableLinearFilter(stype,
916 wtype, ks, kd, Point(-1,-1), 0, borderType, borderType, Scalar() );
917
918 Mat src = _src.getMat(), dst = _dst.getMat();
919 int y = fx->start(src), dsty = 0, dy = 0;
920 fy->start(src);
921 const uchar* sptr = src.ptr(y);
922
923 int dy0 = std::min(std::max((int)(STRIPE_SIZE/(CV_ELEM_SIZE(stype)*src.cols)), 1), src.rows);
924 Mat d2x( dy0 + kd.rows - 1, src.cols, wtype );
925 Mat d2y( dy0 + kd.rows - 1, src.cols, wtype );
926
927 for( ; dsty < src.rows; sptr += dy0*src.step, dsty += dy )
928 {
929 fx->proceed( sptr, (int)src.step, dy0, d2x.ptr(), (int)d2x.step );
930 dy = fy->proceed( sptr, (int)src.step, dy0, d2y.ptr(), (int)d2y.step );
931 if( dy > 0 )
932 {
933 Mat dstripe = dst.rowRange(dsty, dsty + dy);
934 d2x.rows = d2y.rows = dy; // modify the headers, which should work
935 d2x += d2y;
936 d2x.convertTo( dstripe, ddepth, scale, delta );
937 }
938 }
939 }
940 }
941
942 /////////////////////////////////////////////////////////////////////////////////////////
943
944 CV_IMPL void
cvSobel(const void * srcarr,void * dstarr,int dx,int dy,int aperture_size)945 cvSobel( const void* srcarr, void* dstarr, int dx, int dy, int aperture_size )
946 {
947 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
948
949 CV_Assert( src.size() == dst.size() && src.channels() == dst.channels() );
950
951 cv::Sobel( src, dst, dst.depth(), dx, dy, aperture_size, 1, 0, cv::BORDER_REPLICATE );
952 if( CV_IS_IMAGE(srcarr) && ((IplImage*)srcarr)->origin && dy % 2 != 0 )
953 dst *= -1;
954 }
955
956
957 CV_IMPL void
cvLaplace(const void * srcarr,void * dstarr,int aperture_size)958 cvLaplace( const void* srcarr, void* dstarr, int aperture_size )
959 {
960 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
961
962 CV_Assert( src.size() == dst.size() && src.channels() == dst.channels() );
963
964 cv::Laplacian( src, dst, dst.depth(), aperture_size, 1, 0, cv::BORDER_REPLICATE );
965 }
966
967 /* End of file. */
968