1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44
45 using namespace cv;
46 using namespace cv::cuda;
47
48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
49
createBoxFilter(int,int,Size,Point,int,Scalar)50 Ptr<Filter> cv::cuda::createBoxFilter(int, int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
51
createLinearFilter(int,int,InputArray,Point,int,Scalar)52 Ptr<Filter> cv::cuda::createLinearFilter(int, int, InputArray, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
53
createLaplacianFilter(int,int,int,double,int,Scalar)54 Ptr<Filter> cv::cuda::createLaplacianFilter(int, int, int, double, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
55
createSeparableLinearFilter(int,int,InputArray,InputArray,Point,int,int)56 Ptr<Filter> cv::cuda::createSeparableLinearFilter(int, int, InputArray, InputArray, Point, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
57
createDerivFilter(int,int,int,int,int,bool,double,int,int)58 Ptr<Filter> cv::cuda::createDerivFilter(int, int, int, int, int, bool, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
createSobelFilter(int,int,int,int,int,double,int,int)59 Ptr<Filter> cv::cuda::createSobelFilter(int, int, int, int, int, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
createScharrFilter(int,int,int,int,double,int,int)60 Ptr<Filter> cv::cuda::createScharrFilter(int, int, int, int, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
61
createGaussianFilter(int,int,Size,double,double,int,int)62 Ptr<Filter> cv::cuda::createGaussianFilter(int, int, Size, double, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
63
createMorphologyFilter(int,int,InputArray,Point,int)64 Ptr<Filter> cv::cuda::createMorphologyFilter(int, int, InputArray, Point, int) { throw_no_cuda(); return Ptr<Filter>(); }
65
createBoxMaxFilter(int,Size,Point,int,Scalar)66 Ptr<Filter> cv::cuda::createBoxMaxFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
createBoxMinFilter(int,Size,Point,int,Scalar)67 Ptr<Filter> cv::cuda::createBoxMinFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
68
createRowSumFilter(int,int,int,int,int,Scalar)69 Ptr<Filter> cv::cuda::createRowSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
createColumnSumFilter(int,int,int,int,int,Scalar)70 Ptr<Filter> cv::cuda::createColumnSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
71
72 #else
73
74 namespace
75 {
normalizeAnchor(int & anchor,int ksize)76 void normalizeAnchor(int& anchor, int ksize)
77 {
78 if (anchor < 0)
79 anchor = ksize >> 1;
80
81 CV_Assert( 0 <= anchor && anchor < ksize );
82 }
83
normalizeAnchor(Point & anchor,Size ksize)84 void normalizeAnchor(Point& anchor, Size ksize)
85 {
86 normalizeAnchor(anchor.x, ksize.width);
87 normalizeAnchor(anchor.y, ksize.height);
88 }
89 }
90
91 ////////////////////////////////////////////////////////////////////////////////////////////////////
92 // Box Filter
93
94 namespace
95 {
96 class NPPBoxFilter : public Filter
97 {
98 public:
99 NPPBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal);
100
101 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
102
103 private:
104 typedef NppStatus (*nppFilterBox_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep,
105 NppiSize oSizeROI, NppiSize oMaskSize, NppiPoint oAnchor);
106
107 Size ksize_;
108 Point anchor_;
109 int type_;
110 nppFilterBox_t func_;
111 int borderMode_;
112 Scalar borderVal_;
113 GpuMat srcBorder_;
114 };
115
NPPBoxFilter(int srcType,int dstType,Size ksize,Point anchor,int borderMode,Scalar borderVal)116 NPPBoxFilter::NPPBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal) :
117 ksize_(ksize), anchor_(anchor), type_(srcType), borderMode_(borderMode), borderVal_(borderVal)
118 {
119 static const nppFilterBox_t funcs[] = {0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R};
120
121 CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 );
122 CV_Assert( dstType == srcType );
123
124 normalizeAnchor(anchor_, ksize);
125
126 func_ = funcs[CV_MAT_CN(srcType)];
127 }
128
apply(InputArray _src,OutputArray _dst,Stream & _stream)129 void NPPBoxFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
130 {
131 GpuMat src = _src.getGpuMat();
132 CV_Assert( src.type() == type_ );
133
134 cuda::copyMakeBorder(src, srcBorder_, ksize_.height, ksize_.height, ksize_.width, ksize_.width, borderMode_, borderVal_, _stream);
135
136 _dst.create(src.size(), src.type());
137 GpuMat dst = _dst.getGpuMat();
138
139 GpuMat srcRoi = srcBorder_(Rect(ksize_.width, ksize_.height, src.cols, src.rows));
140
141 cudaStream_t stream = StreamAccessor::getStream(_stream);
142 NppStreamHandler h(stream);
143
144 NppiSize oSizeROI;
145 oSizeROI.width = src.cols;
146 oSizeROI.height = src.rows;
147
148 NppiSize oMaskSize;
149 oMaskSize.height = ksize_.height;
150 oMaskSize.width = ksize_.width;
151
152 NppiPoint oAnchor;
153 oAnchor.x = anchor_.x;
154 oAnchor.y = anchor_.y;
155
156 nppSafeCall( func_(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step),
157 dst.ptr<Npp8u>(), static_cast<int>(dst.step),
158 oSizeROI, oMaskSize, oAnchor) );
159
160 if (stream == 0)
161 cudaSafeCall( cudaDeviceSynchronize() );
162 }
163 }
164
createBoxFilter(int srcType,int dstType,Size ksize,Point anchor,int borderMode,Scalar borderVal)165 Ptr<Filter> cv::cuda::createBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
166 {
167 if (dstType < 0)
168 dstType = srcType;
169
170 dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType));
171
172 return makePtr<NPPBoxFilter>(srcType, dstType, ksize, anchor, borderMode, borderVal);
173 }
174
175 ////////////////////////////////////////////////////////////////////////////////////////////////////
176 // Linear Filter
177
178 namespace cv { namespace cuda { namespace device
179 {
180 template <typename T, typename D>
181 void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
182 int kWidth, int kHeight, int anchorX, int anchorY,
183 int borderMode, const float* borderValue, cudaStream_t stream);
184 }}}
185
186 namespace
187 {
188 class LinearFilter : public Filter
189 {
190 public:
191 LinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal);
192
193 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
194
195 private:
196 typedef void (*filter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
197 int kWidth, int kHeight, int anchorX, int anchorY,
198 int borderMode, const float* borderValue, cudaStream_t stream);
199
200 GpuMat kernel_;
201 Point anchor_;
202 int type_;
203 filter2D_t func_;
204 int borderMode_;
205 Scalar_<float> borderVal_;
206 };
207
LinearFilter(int srcType,int dstType,InputArray _kernel,Point anchor,int borderMode,Scalar borderVal)208 LinearFilter::LinearFilter(int srcType, int dstType, InputArray _kernel, Point anchor, int borderMode, Scalar borderVal) :
209 anchor_(anchor), type_(srcType), borderMode_(borderMode), borderVal_(borderVal)
210 {
211 const int sdepth = CV_MAT_DEPTH(srcType);
212 const int scn = CV_MAT_CN(srcType);
213
214 Mat kernel = _kernel.getMat();
215
216 CV_Assert( sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F );
217 CV_Assert( scn == 1 || scn == 4 );
218 CV_Assert( dstType == srcType );
219 CV_Assert( kernel.channels() == 1 );
220 CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
221
222 Mat kernel32F;
223 kernel.convertTo(kernel32F, CV_32F);
224
225 kernel_ = cuda::createContinuous(kernel.size(), CV_32FC1);
226 kernel_.upload(kernel32F);
227
228 normalizeAnchor(anchor_, kernel.size());
229
230 switch (srcType)
231 {
232 case CV_8UC1:
233 func_ = cv::cuda::device::filter2D<uchar, uchar>;
234 break;
235 case CV_8UC4:
236 func_ = cv::cuda::device::filter2D<uchar4, uchar4>;
237 break;
238 case CV_16UC1:
239 func_ = cv::cuda::device::filter2D<ushort, ushort>;
240 break;
241 case CV_16UC4:
242 func_ = cv::cuda::device::filter2D<ushort4, ushort4>;
243 break;
244 case CV_32FC1:
245 func_ = cv::cuda::device::filter2D<float, float>;
246 break;
247 case CV_32FC4:
248 func_ = cv::cuda::device::filter2D<float4, float4>;
249 break;
250 }
251 }
252
apply(InputArray _src,OutputArray _dst,Stream & _stream)253 void LinearFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
254 {
255 GpuMat src = _src.getGpuMat();
256 CV_Assert( src.type() == type_ );
257
258 _dst.create(src.size(), src.type());
259 GpuMat dst = _dst.getGpuMat();
260
261 Point ofs;
262 Size wholeSize;
263 src.locateROI(wholeSize, ofs);
264
265 GpuMat srcWhole(wholeSize, src.type(), src.datastart);
266
267 func_(srcWhole, ofs.x, ofs.y, dst, kernel_.ptr<float>(),
268 kernel_.cols, kernel_.rows, anchor_.x, anchor_.y,
269 borderMode_, borderVal_.val, StreamAccessor::getStream(_stream));
270 }
271 }
272
createLinearFilter(int srcType,int dstType,InputArray kernel,Point anchor,int borderMode,Scalar borderVal)273 Ptr<Filter> cv::cuda::createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal)
274 {
275 if (dstType < 0)
276 dstType = srcType;
277
278 dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType));
279
280 return makePtr<LinearFilter>(srcType, dstType, kernel, anchor, borderMode, borderVal);
281 }
282
283 ////////////////////////////////////////////////////////////////////////////////////////////////////
284 // Laplacian Filter
285
createLaplacianFilter(int srcType,int dstType,int ksize,double scale,int borderMode,Scalar borderVal)286 Ptr<Filter> cv::cuda::createLaplacianFilter(int srcType, int dstType, int ksize, double scale, int borderMode, Scalar borderVal)
287 {
288 CV_Assert( ksize == 1 || ksize == 3 );
289
290 static const float K[2][9] =
291 {
292 {0.0f, 1.0f, 0.0f, 1.0f, -4.0f, 1.0f, 0.0f, 1.0f, 0.0f},
293 {2.0f, 0.0f, 2.0f, 0.0f, -8.0f, 0.0f, 2.0f, 0.0f, 2.0f}
294 };
295
296 Mat kernel(3, 3, CV_32FC1, (void*)K[ksize == 3]);
297 if (scale != 1)
298 kernel *= scale;
299
300 return cuda::createLinearFilter(srcType, dstType, kernel, Point(-1,-1), borderMode, borderVal);
301 }
302
303 ////////////////////////////////////////////////////////////////////////////////////////////////////
304 // Separable Linear Filter
305
306 namespace filter
307 {
308 template <typename T, typename D>
309 void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
310
311 template <typename T, typename D>
312 void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
313 }
314
315 namespace
316 {
317 class SeparableLinearFilter : public Filter
318 {
319 public:
320 SeparableLinearFilter(int srcType, int dstType,
321 InputArray rowKernel, InputArray columnKernel,
322 Point anchor, int rowBorderMode, int columnBorderMode);
323
324 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
325
326 private:
327 typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
328
329 int srcType_, bufType_, dstType_;
330 GpuMat rowKernel_, columnKernel_;
331 func_t rowFilter_, columnFilter_;
332 Point anchor_;
333 int rowBorderMode_, columnBorderMode_;
334
335 GpuMat buf_;
336 };
337
SeparableLinearFilter(int srcType,int dstType,InputArray _rowKernel,InputArray _columnKernel,Point anchor,int rowBorderMode,int columnBorderMode)338 SeparableLinearFilter::SeparableLinearFilter(int srcType, int dstType,
339 InputArray _rowKernel, InputArray _columnKernel,
340 Point anchor, int rowBorderMode, int columnBorderMode) :
341 srcType_(srcType), dstType_(dstType), anchor_(anchor), rowBorderMode_(rowBorderMode), columnBorderMode_(columnBorderMode)
342 {
343 static const func_t rowFilterFuncs[7][4] =
344 {
345 {filter::linearRow<uchar, float>, 0, filter::linearRow<uchar3, float3>, filter::linearRow<uchar4, float4>},
346 {0, 0, 0, 0},
347 {filter::linearRow<ushort, float>, 0, filter::linearRow<ushort3, float3>, filter::linearRow<ushort4, float4>},
348 {filter::linearRow<short, float>, 0, filter::linearRow<short3, float3>, filter::linearRow<short4, float4>},
349 {filter::linearRow<int, float>, 0, filter::linearRow<int3, float3>, filter::linearRow<int4, float4>},
350 {filter::linearRow<float, float>, 0, filter::linearRow<float3, float3>, filter::linearRow<float4, float4>},
351 {0, 0, 0, 0}
352 };
353
354 static const func_t columnFilterFuncs[7][4] =
355 {
356 {filter::linearColumn<float, uchar>, 0, filter::linearColumn<float3, uchar3>, filter::linearColumn<float4, uchar4>},
357 {0, 0, 0, 0},
358 {filter::linearColumn<float, ushort>, 0, filter::linearColumn<float3, ushort3>, filter::linearColumn<float4, ushort4>},
359 {filter::linearColumn<float, short>, 0, filter::linearColumn<float3, short3>, filter::linearColumn<float4, short4>},
360 {filter::linearColumn<float, int>, 0, filter::linearColumn<float3, int3>, filter::linearColumn<float4, int4>},
361 {filter::linearColumn<float, float>, 0, filter::linearColumn<float3, float3>, filter::linearColumn<float4, float4>},
362 {0, 0, 0, 0}
363 };
364
365 const int sdepth = CV_MAT_DEPTH(srcType);
366 const int cn = CV_MAT_CN(srcType);
367 const int ddepth = CV_MAT_DEPTH(dstType);
368
369 Mat rowKernel = _rowKernel.getMat();
370 Mat columnKernel = _columnKernel.getMat();
371
372 CV_Assert( sdepth <= CV_64F && cn <= 4 );
373 CV_Assert( rowKernel.channels() == 1 );
374 CV_Assert( columnKernel.channels() == 1 );
375 CV_Assert( rowBorderMode == BORDER_REFLECT101 || rowBorderMode == BORDER_REPLICATE || rowBorderMode == BORDER_CONSTANT || rowBorderMode == BORDER_REFLECT || rowBorderMode == BORDER_WRAP );
376 CV_Assert( columnBorderMode == BORDER_REFLECT101 || columnBorderMode == BORDER_REPLICATE || columnBorderMode == BORDER_CONSTANT || columnBorderMode == BORDER_REFLECT || columnBorderMode == BORDER_WRAP );
377
378 Mat kernel32F;
379
380 rowKernel.convertTo(kernel32F, CV_32F);
381 rowKernel_.upload(kernel32F.reshape(1, 1));
382
383 columnKernel.convertTo(kernel32F, CV_32F);
384 columnKernel_.upload(kernel32F.reshape(1, 1));
385
386 CV_Assert( rowKernel_.cols > 0 && rowKernel_.cols <= 32 );
387 CV_Assert( columnKernel_.cols > 0 && columnKernel_.cols <= 32 );
388
389 normalizeAnchor(anchor_.x, rowKernel_.cols);
390 normalizeAnchor(anchor_.y, columnKernel_.cols);
391
392 bufType_ = CV_MAKE_TYPE(CV_32F, cn);
393
394 rowFilter_ = rowFilterFuncs[sdepth][cn - 1];
395 CV_Assert( rowFilter_ != 0 );
396
397 columnFilter_ = columnFilterFuncs[ddepth][cn - 1];
398 CV_Assert( columnFilter_ != 0 );
399 }
400
apply(InputArray _src,OutputArray _dst,Stream & _stream)401 void SeparableLinearFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
402 {
403 GpuMat src = _src.getGpuMat();
404 CV_Assert( src.type() == srcType_ );
405
406 _dst.create(src.size(), dstType_);
407 GpuMat dst = _dst.getGpuMat();
408
409 ensureSizeIsEnough(src.size(), bufType_, buf_);
410
411 DeviceInfo devInfo;
412 const int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
413
414 cudaStream_t stream = StreamAccessor::getStream(_stream);
415
416 rowFilter_(src, buf_, rowKernel_.ptr<float>(), rowKernel_.cols, anchor_.x, rowBorderMode_, cc, stream);
417 columnFilter_(buf_, dst, columnKernel_.ptr<float>(), columnKernel_.cols, anchor_.y, columnBorderMode_, cc, stream);
418 }
419 }
420
createSeparableLinearFilter(int srcType,int dstType,InputArray rowKernel,InputArray columnKernel,Point anchor,int rowBorderMode,int columnBorderMode)421 Ptr<Filter> cv::cuda::createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, Point anchor, int rowBorderMode, int columnBorderMode)
422 {
423 if (dstType < 0)
424 dstType = srcType;
425
426 dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType));
427
428 if (columnBorderMode < 0)
429 columnBorderMode = rowBorderMode;
430
431 return makePtr<SeparableLinearFilter>(srcType, dstType, rowKernel, columnKernel, anchor, rowBorderMode, columnBorderMode);
432 }
433
434 ////////////////////////////////////////////////////////////////////////////////////////////////////
435 // Deriv Filter
436
createDerivFilter(int srcType,int dstType,int dx,int dy,int ksize,bool normalize,double scale,int rowBorderMode,int columnBorderMode)437 Ptr<Filter> cv::cuda::createDerivFilter(int srcType, int dstType, int dx, int dy, int ksize, bool normalize, double scale, int rowBorderMode, int columnBorderMode)
438 {
439 Mat kx, ky;
440 getDerivKernels(kx, ky, dx, dy, ksize, normalize, CV_32F);
441
442 if (scale != 1)
443 {
444 // usually the smoothing part is the slowest to compute,
445 // so try to scale it instead of the faster differenciating part
446 if (dx == 0)
447 kx *= scale;
448 else
449 ky *= scale;
450 }
451
452 return cuda::createSeparableLinearFilter(srcType, dstType, kx, ky, Point(-1, -1), rowBorderMode, columnBorderMode);
453 }
454
createSobelFilter(int srcType,int dstType,int dx,int dy,int ksize,double scale,int rowBorderMode,int columnBorderMode)455 Ptr<Filter> cv::cuda::createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize, double scale, int rowBorderMode, int columnBorderMode)
456 {
457 return cuda::createDerivFilter(srcType, dstType, dx, dy, ksize, false, scale, rowBorderMode, columnBorderMode);
458 }
459
createScharrFilter(int srcType,int dstType,int dx,int dy,double scale,int rowBorderMode,int columnBorderMode)460 Ptr<Filter> cv::cuda::createScharrFilter(int srcType, int dstType, int dx, int dy, double scale, int rowBorderMode, int columnBorderMode)
461 {
462 return cuda::createDerivFilter(srcType, dstType, dx, dy, -1, false, scale, rowBorderMode, columnBorderMode);
463 }
464
465 ////////////////////////////////////////////////////////////////////////////////////////////////////
466 // Gaussian Filter
467
createGaussianFilter(int srcType,int dstType,Size ksize,double sigma1,double sigma2,int rowBorderMode,int columnBorderMode)468 Ptr<Filter> cv::cuda::createGaussianFilter(int srcType, int dstType, Size ksize, double sigma1, double sigma2, int rowBorderMode, int columnBorderMode)
469 {
470 const int depth = CV_MAT_DEPTH(srcType);
471
472 if (sigma2 <= 0)
473 sigma2 = sigma1;
474
475 // automatic detection of kernel size from sigma
476 if (ksize.width <= 0 && sigma1 > 0)
477 ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;
478 if (ksize.height <= 0 && sigma2 > 0)
479 ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;
480
481 CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 );
482
483 sigma1 = std::max(sigma1, 0.0);
484 sigma2 = std::max(sigma2, 0.0);
485
486 Mat kx = getGaussianKernel(ksize.width, sigma1, CV_32F);
487 Mat ky;
488 if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON)
489 ky = kx;
490 else
491 ky = getGaussianKernel(ksize.height, sigma2, CV_32F);
492
493 return createSeparableLinearFilter(srcType, dstType, kx, ky, Point(-1,-1), rowBorderMode, columnBorderMode);
494 }
495
496 ////////////////////////////////////////////////////////////////////////////////////////////////////
497 // Morphology Filter
498
499 namespace
500 {
501 class MorphologyFilter : public Filter
502 {
503 public:
504 MorphologyFilter(int op, int srcType, InputArray kernel, Point anchor, int iterations);
505
506 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
507
508 private:
509 typedef NppStatus (*nppMorfFilter_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI,
510 const Npp8u* pMask, NppiSize oMaskSize, NppiPoint oAnchor);
511
512 int type_;
513 GpuMat kernel_;
514 Point anchor_;
515 int iters_;
516 nppMorfFilter_t func_;
517
518 GpuMat srcBorder_;
519 GpuMat buf_;
520 };
521
MorphologyFilter(int op,int srcType,InputArray _kernel,Point anchor,int iterations)522 MorphologyFilter::MorphologyFilter(int op, int srcType, InputArray _kernel, Point anchor, int iterations) :
523 type_(srcType), anchor_(anchor), iters_(iterations)
524 {
525 static const nppMorfFilter_t funcs[2][5] =
526 {
527 {0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R },
528 {0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R }
529 };
530
531 CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE );
532 CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 );
533
534 Mat kernel = _kernel.getMat();
535 Size ksize = !kernel.empty() ? _kernel.size() : Size(3, 3);
536
537 normalizeAnchor(anchor_, ksize);
538
539 if (kernel.empty())
540 {
541 kernel = getStructuringElement(MORPH_RECT, Size(1 + iters_ * 2, 1 + iters_ * 2));
542 anchor_ = Point(iters_, iters_);
543 iters_ = 1;
544 }
545 else if (iters_ > 1 && cv::countNonZero(kernel) == (int) kernel.total())
546 {
547 anchor_ = Point(anchor_.x * iters_, anchor_.y * iters_);
548 kernel = getStructuringElement(MORPH_RECT,
549 Size(ksize.width + (iters_ - 1) * (ksize.width - 1),
550 ksize.height + (iters_ - 1) * (ksize.height - 1)),
551 anchor_);
552 iters_ = 1;
553 }
554
555 CV_Assert( kernel.channels() == 1 );
556
557 Mat kernel8U;
558 kernel.convertTo(kernel8U, CV_8U);
559
560 kernel_ = cuda::createContinuous(kernel.size(), CV_8UC1);
561 kernel_.upload(kernel8U);
562
563 func_ = funcs[op][CV_MAT_CN(srcType)];
564 }
565
apply(InputArray _src,OutputArray _dst,Stream & _stream)566 void MorphologyFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
567 {
568 GpuMat src = _src.getGpuMat();
569 CV_Assert( src.type() == type_ );
570
571 Size ksize = kernel_.size();
572 cuda::copyMakeBorder(src, srcBorder_, ksize.height, ksize.height, ksize.width, ksize.width, BORDER_DEFAULT, Scalar(), _stream);
573
574 GpuMat srcRoi = srcBorder_(Rect(ksize.width, ksize.height, src.cols, src.rows));
575
576 GpuMat bufRoi;
577 if (iters_ > 1)
578 {
579 ensureSizeIsEnough(srcBorder_.size(), type_, buf_);
580 buf_.setTo(Scalar::all(0), _stream);
581 bufRoi = buf_(Rect(ksize.width, ksize.height, src.cols, src.rows));
582 }
583
584 _dst.create(src.size(), src.type());
585 GpuMat dst = _dst.getGpuMat();
586
587 cudaStream_t stream = StreamAccessor::getStream(_stream);
588 NppStreamHandler h(stream);
589
590 NppiSize oSizeROI;
591 oSizeROI.width = src.cols;
592 oSizeROI.height = src.rows;
593
594 NppiSize oMaskSize;
595 oMaskSize.height = ksize.height;
596 oMaskSize.width = ksize.width;
597
598 NppiPoint oAnchor;
599 oAnchor.x = anchor_.x;
600 oAnchor.y = anchor_.y;
601
602 nppSafeCall( func_(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
603 oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
604
605 for(int i = 1; i < iters_; ++i)
606 {
607 dst.copyTo(bufRoi, _stream);
608
609 nppSafeCall( func_(bufRoi.ptr<Npp8u>(), static_cast<int>(bufRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
610 oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
611 }
612
613 if (stream == 0)
614 cudaSafeCall( cudaDeviceSynchronize() );
615 }
616 }
617
618 namespace
619 {
620 class MorphologyExFilter : public Filter
621 {
622 public:
623 MorphologyExFilter(int srcType, InputArray kernel, Point anchor, int iterations);
624
625 protected:
626 Ptr<cuda::Filter> erodeFilter_, dilateFilter_;
627 GpuMat buf_;
628 };
629
MorphologyExFilter(int srcType,InputArray kernel,Point anchor,int iterations)630 MorphologyExFilter::MorphologyExFilter(int srcType, InputArray kernel, Point anchor, int iterations)
631 {
632 erodeFilter_ = cuda::createMorphologyFilter(MORPH_ERODE, srcType, kernel, anchor, iterations);
633 dilateFilter_ = cuda::createMorphologyFilter(MORPH_DILATE, srcType, kernel, anchor, iterations);
634 }
635
636 // MORPH_OPEN
637
638 class MorphologyOpenFilter : public MorphologyExFilter
639 {
640 public:
641 MorphologyOpenFilter(int srcType, InputArray kernel, Point anchor, int iterations);
642
643 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
644 };
645
MorphologyOpenFilter(int srcType,InputArray kernel,Point anchor,int iterations)646 MorphologyOpenFilter::MorphologyOpenFilter(int srcType, InputArray kernel, Point anchor, int iterations) :
647 MorphologyExFilter(srcType, kernel, anchor, iterations)
648 {
649 }
650
apply(InputArray src,OutputArray dst,Stream & stream)651 void MorphologyOpenFilter::apply(InputArray src, OutputArray dst, Stream& stream)
652 {
653 erodeFilter_->apply(src, buf_, stream);
654 dilateFilter_->apply(buf_, dst, stream);
655 }
656
657 // MORPH_CLOSE
658
659 class MorphologyCloseFilter : public MorphologyExFilter
660 {
661 public:
662 MorphologyCloseFilter(int srcType, InputArray kernel, Point anchor, int iterations);
663
664 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
665 };
666
MorphologyCloseFilter(int srcType,InputArray kernel,Point anchor,int iterations)667 MorphologyCloseFilter::MorphologyCloseFilter(int srcType, InputArray kernel, Point anchor, int iterations) :
668 MorphologyExFilter(srcType, kernel, anchor, iterations)
669 {
670 }
671
apply(InputArray src,OutputArray dst,Stream & stream)672 void MorphologyCloseFilter::apply(InputArray src, OutputArray dst, Stream& stream)
673 {
674 dilateFilter_->apply(src, buf_, stream);
675 erodeFilter_->apply(buf_, dst, stream);
676 }
677
678 // MORPH_GRADIENT
679
680 class MorphologyGradientFilter : public MorphologyExFilter
681 {
682 public:
683 MorphologyGradientFilter(int srcType, InputArray kernel, Point anchor, int iterations);
684
685 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
686 };
687
MorphologyGradientFilter(int srcType,InputArray kernel,Point anchor,int iterations)688 MorphologyGradientFilter::MorphologyGradientFilter(int srcType, InputArray kernel, Point anchor, int iterations) :
689 MorphologyExFilter(srcType, kernel, anchor, iterations)
690 {
691 }
692
apply(InputArray src,OutputArray dst,Stream & stream)693 void MorphologyGradientFilter::apply(InputArray src, OutputArray dst, Stream& stream)
694 {
695 erodeFilter_->apply(src, buf_, stream);
696 dilateFilter_->apply(src, dst, stream);
697 cuda::subtract(dst, buf_, dst, noArray(), -1, stream);
698 }
699
700 // MORPH_TOPHAT
701
702 class MorphologyTophatFilter : public MorphologyExFilter
703 {
704 public:
705 MorphologyTophatFilter(int srcType, InputArray kernel, Point anchor, int iterations);
706
707 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
708 };
709
MorphologyTophatFilter(int srcType,InputArray kernel,Point anchor,int iterations)710 MorphologyTophatFilter::MorphologyTophatFilter(int srcType, InputArray kernel, Point anchor, int iterations) :
711 MorphologyExFilter(srcType, kernel, anchor, iterations)
712 {
713 }
714
apply(InputArray src,OutputArray dst,Stream & stream)715 void MorphologyTophatFilter::apply(InputArray src, OutputArray dst, Stream& stream)
716 {
717 erodeFilter_->apply(src, dst, stream);
718 dilateFilter_->apply(dst, buf_, stream);
719 cuda::subtract(src, buf_, dst, noArray(), -1, stream);
720 }
721
722 // MORPH_BLACKHAT
723
724 class MorphologyBlackhatFilter : public MorphologyExFilter
725 {
726 public:
727 MorphologyBlackhatFilter(int srcType, InputArray kernel, Point anchor, int iterations);
728
729 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
730 };
731
MorphologyBlackhatFilter(int srcType,InputArray kernel,Point anchor,int iterations)732 MorphologyBlackhatFilter::MorphologyBlackhatFilter(int srcType, InputArray kernel, Point anchor, int iterations) :
733 MorphologyExFilter(srcType, kernel, anchor, iterations)
734 {
735 }
736
apply(InputArray src,OutputArray dst,Stream & stream)737 void MorphologyBlackhatFilter::apply(InputArray src, OutputArray dst, Stream& stream)
738 {
739 dilateFilter_->apply(src, dst, stream);
740 erodeFilter_->apply(dst, buf_, stream);
741 cuda::subtract(buf_, src, dst, noArray(), -1, stream);
742 }
743 }
744
createMorphologyFilter(int op,int srcType,InputArray kernel,Point anchor,int iterations)745 Ptr<Filter> cv::cuda::createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor, int iterations)
746 {
747 switch( op )
748 {
749 case MORPH_ERODE:
750 case MORPH_DILATE:
751 return makePtr<MorphologyFilter>(op, srcType, kernel, anchor, iterations);
752 break;
753
754 case MORPH_OPEN:
755 return makePtr<MorphologyOpenFilter>(srcType, kernel, anchor, iterations);
756 break;
757
758 case MORPH_CLOSE:
759 return makePtr<MorphologyCloseFilter>(srcType, kernel, anchor, iterations);
760 break;
761
762 case MORPH_GRADIENT:
763 return makePtr<MorphologyGradientFilter>(srcType, kernel, anchor, iterations);
764 break;
765
766 case MORPH_TOPHAT:
767 return makePtr<MorphologyTophatFilter>(srcType, kernel, anchor, iterations);
768 break;
769
770 case MORPH_BLACKHAT:
771 return makePtr<MorphologyBlackhatFilter>(srcType, kernel, anchor, iterations);
772 break;
773
774 default:
775 CV_Error(Error::StsBadArg, "Unknown morphological operation");
776 return Ptr<Filter>();
777 }
778 }
779
780 ////////////////////////////////////////////////////////////////////////////////////////////////////
781 // Image Rank Filter
782
783 namespace
784 {
785 enum RankType
786 {
787 RANK_MAX,
788 RANK_MIN
789 };
790
791 class NPPRankFilter : public Filter
792 {
793 public:
794 NPPRankFilter(int op, int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal);
795
796 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
797
798 private:
799 typedef NppStatus (*nppFilterRank_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI,
800 NppiSize oMaskSize, NppiPoint oAnchor);
801
802 int type_;
803 Size ksize_;
804 Point anchor_;
805 int borderMode_;
806 Scalar borderVal_;
807 nppFilterRank_t func_;
808
809 GpuMat srcBorder_;
810 };
811
NPPRankFilter(int op,int srcType,Size ksize,Point anchor,int borderMode,Scalar borderVal)812 NPPRankFilter::NPPRankFilter(int op, int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal) :
813 type_(srcType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal)
814 {
815 static const nppFilterRank_t maxFuncs[] = {0, nppiFilterMax_8u_C1R, 0, 0, nppiFilterMax_8u_C4R};
816 static const nppFilterRank_t minFuncs[] = {0, nppiFilterMin_8u_C1R, 0, 0, nppiFilterMin_8u_C4R};
817
818 CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 );
819
820 normalizeAnchor(anchor_, ksize_);
821
822 if (op == RANK_MAX)
823 func_ = maxFuncs[CV_MAT_CN(srcType)];
824 else
825 func_ = minFuncs[CV_MAT_CN(srcType)];
826 }
827
apply(InputArray _src,OutputArray _dst,Stream & _stream)828 void NPPRankFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
829 {
830 GpuMat src = _src.getGpuMat();
831 CV_Assert( src.type() == type_ );
832
833 cuda::copyMakeBorder(src, srcBorder_, ksize_.height, ksize_.height, ksize_.width, ksize_.width, borderMode_, borderVal_, _stream);
834
835 _dst.create(src.size(), src.type());
836 GpuMat dst = _dst.getGpuMat();
837
838 GpuMat srcRoi = srcBorder_(Rect(ksize_.width, ksize_.height, src.cols, src.rows));
839
840 cudaStream_t stream = StreamAccessor::getStream(_stream);
841 NppStreamHandler h(stream);
842
843 NppiSize oSizeROI;
844 oSizeROI.width = src.cols;
845 oSizeROI.height = src.rows;
846
847 NppiSize oMaskSize;
848 oMaskSize.height = ksize_.height;
849 oMaskSize.width = ksize_.width;
850
851 NppiPoint oAnchor;
852 oAnchor.x = anchor_.x;
853 oAnchor.y = anchor_.y;
854
855 nppSafeCall( func_(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
856 oSizeROI, oMaskSize, oAnchor) );
857
858 if (stream == 0)
859 cudaSafeCall( cudaDeviceSynchronize() );
860 }
861 }
862
createBoxMaxFilter(int srcType,Size ksize,Point anchor,int borderMode,Scalar borderVal)863 Ptr<Filter> cv::cuda::createBoxMaxFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
864 {
865 return makePtr<NPPRankFilter>(RANK_MAX, srcType, ksize, anchor, borderMode, borderVal);
866 }
867
createBoxMinFilter(int srcType,Size ksize,Point anchor,int borderMode,Scalar borderVal)868 Ptr<Filter> cv::cuda::createBoxMinFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
869 {
870 return makePtr<NPPRankFilter>(RANK_MIN, srcType, ksize, anchor, borderMode, borderVal);
871 }
872
873 ////////////////////////////////////////////////////////////////////////////////////////////////////
874 // 1D Sum Filter
875
876 namespace
877 {
878 class NppRowSumFilter : public Filter
879 {
880 public:
881 NppRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal);
882
883 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
884
885 private:
886 int srcType_, dstType_;
887 int ksize_;
888 int anchor_;
889 int borderMode_;
890 Scalar borderVal_;
891
892 GpuMat srcBorder_;
893 };
894
NppRowSumFilter(int srcType,int dstType,int ksize,int anchor,int borderMode,Scalar borderVal)895 NppRowSumFilter::NppRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) :
896 srcType_(srcType), dstType_(dstType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal)
897 {
898 CV_Assert( srcType_ == CV_8UC1 );
899 CV_Assert( dstType_ == CV_32FC1 );
900
901 normalizeAnchor(anchor_, ksize_);
902 }
903
apply(InputArray _src,OutputArray _dst,Stream & _stream)904 void NppRowSumFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
905 {
906 GpuMat src = _src.getGpuMat();
907 CV_Assert( src.type() == srcType_ );
908
909 cuda::copyMakeBorder(src, srcBorder_, 0, 0, ksize_, ksize_, borderMode_, borderVal_, _stream);
910
911 _dst.create(src.size(), dstType_);
912 GpuMat dst = _dst.getGpuMat();
913
914 GpuMat srcRoi = srcBorder_(Rect(ksize_, 0, src.cols, src.rows));
915
916 cudaStream_t stream = StreamAccessor::getStream(_stream);
917 NppStreamHandler h(stream);
918
919 NppiSize oSizeROI;
920 oSizeROI.width = src.cols;
921 oSizeROI.height = src.rows;
922
923 nppSafeCall( nppiSumWindowRow_8u32f_C1R(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step),
924 dst.ptr<Npp32f>(), static_cast<int>(dst.step),
925 oSizeROI, ksize_, anchor_) );
926
927 if (stream == 0)
928 cudaSafeCall( cudaDeviceSynchronize() );
929 }
930 }
931
createRowSumFilter(int srcType,int dstType,int ksize,int anchor,int borderMode,Scalar borderVal)932 Ptr<Filter> cv::cuda::createRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal)
933 {
934 return makePtr<NppRowSumFilter>(srcType, dstType, ksize, anchor, borderMode, borderVal);
935 }
936
937 namespace
938 {
939 class NppColumnSumFilter : public Filter
940 {
941 public:
942 NppColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal);
943
944 void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
945
946 private:
947 int srcType_, dstType_;
948 int ksize_;
949 int anchor_;
950 int borderMode_;
951 Scalar borderVal_;
952
953 GpuMat srcBorder_;
954 };
955
NppColumnSumFilter(int srcType,int dstType,int ksize,int anchor,int borderMode,Scalar borderVal)956 NppColumnSumFilter::NppColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) :
957 srcType_(srcType), dstType_(dstType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal)
958 {
959 CV_Assert( srcType_ == CV_8UC1 );
960 CV_Assert( dstType_ == CV_32FC1 );
961
962 normalizeAnchor(anchor_, ksize_);
963 }
964
apply(InputArray _src,OutputArray _dst,Stream & _stream)965 void NppColumnSumFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
966 {
967 GpuMat src = _src.getGpuMat();
968 CV_Assert( src.type() == srcType_ );
969
970 cuda::copyMakeBorder(src, srcBorder_, ksize_, ksize_, 0, 0, borderMode_, borderVal_, _stream);
971
972 _dst.create(src.size(), dstType_);
973 GpuMat dst = _dst.getGpuMat();
974
975 GpuMat srcRoi = srcBorder_(Rect(0, ksize_, src.cols, src.rows));
976
977 cudaStream_t stream = StreamAccessor::getStream(_stream);
978 NppStreamHandler h(stream);
979
980 NppiSize oSizeROI;
981 oSizeROI.width = src.cols;
982 oSizeROI.height = src.rows;
983
984 nppSafeCall( nppiSumWindowColumn_8u32f_C1R(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step),
985 dst.ptr<Npp32f>(), static_cast<int>(dst.step),
986 oSizeROI, ksize_, anchor_) );
987
988 if (stream == 0)
989 cudaSafeCall( cudaDeviceSynchronize() );
990 }
991 }
992
createColumnSumFilter(int srcType,int dstType,int ksize,int anchor,int borderMode,Scalar borderVal)993 Ptr<Filter> cv::cuda::createColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal)
994 {
995 return makePtr<NppColumnSumFilter>(srcType, dstType, ksize, anchor, borderMode, borderVal);
996 }
997
998 #endif
999