• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 
45 using namespace cv;
46 using namespace cv::cuda;
47 
48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
49 
createBoxFilter(int,int,Size,Point,int,Scalar)50 Ptr<Filter> cv::cuda::createBoxFilter(int, int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
51 
createLinearFilter(int,int,InputArray,Point,int,Scalar)52 Ptr<Filter> cv::cuda::createLinearFilter(int, int, InputArray, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
53 
createLaplacianFilter(int,int,int,double,int,Scalar)54 Ptr<Filter> cv::cuda::createLaplacianFilter(int, int, int, double, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
55 
createSeparableLinearFilter(int,int,InputArray,InputArray,Point,int,int)56 Ptr<Filter> cv::cuda::createSeparableLinearFilter(int, int, InputArray, InputArray, Point, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
57 
createDerivFilter(int,int,int,int,int,bool,double,int,int)58 Ptr<Filter> cv::cuda::createDerivFilter(int, int, int, int, int, bool, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
createSobelFilter(int,int,int,int,int,double,int,int)59 Ptr<Filter> cv::cuda::createSobelFilter(int, int, int, int, int, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
createScharrFilter(int,int,int,int,double,int,int)60 Ptr<Filter> cv::cuda::createScharrFilter(int, int, int, int, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
61 
createGaussianFilter(int,int,Size,double,double,int,int)62 Ptr<Filter> cv::cuda::createGaussianFilter(int, int, Size, double, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
63 
createMorphologyFilter(int,int,InputArray,Point,int)64 Ptr<Filter> cv::cuda::createMorphologyFilter(int, int, InputArray, Point, int) { throw_no_cuda(); return Ptr<Filter>(); }
65 
createBoxMaxFilter(int,Size,Point,int,Scalar)66 Ptr<Filter> cv::cuda::createBoxMaxFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
createBoxMinFilter(int,Size,Point,int,Scalar)67 Ptr<Filter> cv::cuda::createBoxMinFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
68 
createRowSumFilter(int,int,int,int,int,Scalar)69 Ptr<Filter> cv::cuda::createRowSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
createColumnSumFilter(int,int,int,int,int,Scalar)70 Ptr<Filter> cv::cuda::createColumnSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
71 
72 #else
73 
74 namespace
75 {
normalizeAnchor(int & anchor,int ksize)76     void normalizeAnchor(int& anchor, int ksize)
77     {
78         if (anchor < 0)
79             anchor = ksize >> 1;
80 
81         CV_Assert( 0 <= anchor && anchor < ksize );
82     }
83 
normalizeAnchor(Point & anchor,Size ksize)84     void normalizeAnchor(Point& anchor, Size ksize)
85     {
86         normalizeAnchor(anchor.x, ksize.width);
87         normalizeAnchor(anchor.y, ksize.height);
88     }
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////////////////////////
92 // Box Filter
93 
94 namespace
95 {
96     class NPPBoxFilter : public Filter
97     {
98     public:
99         NPPBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal);
100 
101         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
102 
103     private:
104         typedef NppStatus (*nppFilterBox_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep,
105                                             NppiSize oSizeROI, NppiSize oMaskSize, NppiPoint oAnchor);
106 
107         Size ksize_;
108         Point anchor_;
109         int type_;
110         nppFilterBox_t func_;
111         int borderMode_;
112         Scalar borderVal_;
113         GpuMat srcBorder_;
114     };
115 
NPPBoxFilter(int srcType,int dstType,Size ksize,Point anchor,int borderMode,Scalar borderVal)116     NPPBoxFilter::NPPBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal) :
117         ksize_(ksize), anchor_(anchor), type_(srcType), borderMode_(borderMode), borderVal_(borderVal)
118     {
119         static const nppFilterBox_t funcs[] = {0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R};
120 
121         CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 );
122         CV_Assert( dstType == srcType );
123 
124         normalizeAnchor(anchor_, ksize);
125 
126         func_ = funcs[CV_MAT_CN(srcType)];
127     }
128 
apply(InputArray _src,OutputArray _dst,Stream & _stream)129     void NPPBoxFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
130     {
131         GpuMat src = _src.getGpuMat();
132         CV_Assert( src.type() == type_ );
133 
134         cuda::copyMakeBorder(src, srcBorder_, ksize_.height, ksize_.height, ksize_.width, ksize_.width, borderMode_, borderVal_, _stream);
135 
136         _dst.create(src.size(), src.type());
137         GpuMat dst = _dst.getGpuMat();
138 
139         GpuMat srcRoi = srcBorder_(Rect(ksize_.width, ksize_.height, src.cols, src.rows));
140 
141         cudaStream_t stream = StreamAccessor::getStream(_stream);
142         NppStreamHandler h(stream);
143 
144         NppiSize oSizeROI;
145         oSizeROI.width = src.cols;
146         oSizeROI.height = src.rows;
147 
148         NppiSize oMaskSize;
149         oMaskSize.height = ksize_.height;
150         oMaskSize.width = ksize_.width;
151 
152         NppiPoint oAnchor;
153         oAnchor.x = anchor_.x;
154         oAnchor.y = anchor_.y;
155 
156         nppSafeCall( func_(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step),
157                            dst.ptr<Npp8u>(), static_cast<int>(dst.step),
158                            oSizeROI, oMaskSize, oAnchor) );
159 
160         if (stream == 0)
161             cudaSafeCall( cudaDeviceSynchronize() );
162     }
163 }
164 
createBoxFilter(int srcType,int dstType,Size ksize,Point anchor,int borderMode,Scalar borderVal)165 Ptr<Filter> cv::cuda::createBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
166 {
167     if (dstType < 0)
168         dstType = srcType;
169 
170     dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType));
171 
172     return makePtr<NPPBoxFilter>(srcType, dstType, ksize, anchor, borderMode, borderVal);
173 }
174 
175 ////////////////////////////////////////////////////////////////////////////////////////////////////
176 // Linear Filter
177 
178 namespace cv { namespace cuda { namespace device
179 {
180     template <typename T, typename D>
181     void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
182                   int kWidth, int kHeight, int anchorX, int anchorY,
183                   int borderMode, const float* borderValue, cudaStream_t stream);
184 }}}
185 
186 namespace
187 {
188     class LinearFilter : public Filter
189     {
190     public:
191         LinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal);
192 
193         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
194 
195     private:
196         typedef void (*filter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
197                                    int kWidth, int kHeight, int anchorX, int anchorY,
198                                    int borderMode, const float* borderValue, cudaStream_t stream);
199 
200         GpuMat kernel_;
201         Point anchor_;
202         int type_;
203         filter2D_t func_;
204         int borderMode_;
205         Scalar_<float> borderVal_;
206     };
207 
LinearFilter(int srcType,int dstType,InputArray _kernel,Point anchor,int borderMode,Scalar borderVal)208     LinearFilter::LinearFilter(int srcType, int dstType, InputArray _kernel, Point anchor, int borderMode, Scalar borderVal) :
209         anchor_(anchor), type_(srcType), borderMode_(borderMode), borderVal_(borderVal)
210     {
211         const int sdepth = CV_MAT_DEPTH(srcType);
212         const int scn = CV_MAT_CN(srcType);
213 
214         Mat kernel = _kernel.getMat();
215 
216         CV_Assert( sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F );
217         CV_Assert( scn == 1 || scn == 4 );
218         CV_Assert( dstType == srcType );
219         CV_Assert( kernel.channels() == 1 );
220         CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
221 
222         Mat kernel32F;
223         kernel.convertTo(kernel32F, CV_32F);
224 
225         kernel_ = cuda::createContinuous(kernel.size(), CV_32FC1);
226         kernel_.upload(kernel32F);
227 
228         normalizeAnchor(anchor_, kernel.size());
229 
230         switch (srcType)
231         {
232         case CV_8UC1:
233             func_ = cv::cuda::device::filter2D<uchar, uchar>;
234             break;
235         case CV_8UC4:
236             func_ = cv::cuda::device::filter2D<uchar4, uchar4>;
237             break;
238         case CV_16UC1:
239             func_ = cv::cuda::device::filter2D<ushort, ushort>;
240             break;
241         case CV_16UC4:
242             func_ = cv::cuda::device::filter2D<ushort4, ushort4>;
243             break;
244         case CV_32FC1:
245             func_ = cv::cuda::device::filter2D<float, float>;
246             break;
247         case CV_32FC4:
248             func_ = cv::cuda::device::filter2D<float4, float4>;
249             break;
250         }
251     }
252 
apply(InputArray _src,OutputArray _dst,Stream & _stream)253     void LinearFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
254     {
255         GpuMat src = _src.getGpuMat();
256         CV_Assert( src.type() == type_ );
257 
258         _dst.create(src.size(), src.type());
259         GpuMat dst = _dst.getGpuMat();
260 
261         Point ofs;
262         Size wholeSize;
263         src.locateROI(wholeSize, ofs);
264 
265         GpuMat srcWhole(wholeSize, src.type(), src.datastart);
266 
267         func_(srcWhole, ofs.x, ofs.y, dst, kernel_.ptr<float>(),
268               kernel_.cols, kernel_.rows, anchor_.x, anchor_.y,
269               borderMode_, borderVal_.val, StreamAccessor::getStream(_stream));
270     }
271 }
272 
createLinearFilter(int srcType,int dstType,InputArray kernel,Point anchor,int borderMode,Scalar borderVal)273 Ptr<Filter> cv::cuda::createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal)
274 {
275     if (dstType < 0)
276         dstType = srcType;
277 
278     dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType));
279 
280     return makePtr<LinearFilter>(srcType, dstType, kernel, anchor, borderMode, borderVal);
281 }
282 
283 ////////////////////////////////////////////////////////////////////////////////////////////////////
284 // Laplacian Filter
285 
createLaplacianFilter(int srcType,int dstType,int ksize,double scale,int borderMode,Scalar borderVal)286 Ptr<Filter> cv::cuda::createLaplacianFilter(int srcType, int dstType, int ksize, double scale, int borderMode, Scalar borderVal)
287 {
288     CV_Assert( ksize == 1 || ksize == 3 );
289 
290     static const float K[2][9] =
291     {
292         {0.0f, 1.0f, 0.0f, 1.0f, -4.0f, 1.0f, 0.0f, 1.0f, 0.0f},
293         {2.0f, 0.0f, 2.0f, 0.0f, -8.0f, 0.0f, 2.0f, 0.0f, 2.0f}
294     };
295 
296     Mat kernel(3, 3, CV_32FC1, (void*)K[ksize == 3]);
297     if (scale != 1)
298         kernel *= scale;
299 
300     return cuda::createLinearFilter(srcType, dstType, kernel, Point(-1,-1), borderMode, borderVal);
301 }
302 
303 ////////////////////////////////////////////////////////////////////////////////////////////////////
304 // Separable Linear Filter
305 
306 namespace filter
307 {
308     template <typename T, typename D>
309     void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
310 
311     template <typename T, typename D>
312     void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
313 }
314 
315 namespace
316 {
317     class SeparableLinearFilter : public Filter
318     {
319     public:
320         SeparableLinearFilter(int srcType, int dstType,
321                               InputArray rowKernel, InputArray columnKernel,
322                               Point anchor, int rowBorderMode, int columnBorderMode);
323 
324         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
325 
326     private:
327         typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
328 
329         int srcType_, bufType_, dstType_;
330         GpuMat rowKernel_, columnKernel_;
331         func_t rowFilter_, columnFilter_;
332         Point anchor_;
333         int rowBorderMode_, columnBorderMode_;
334 
335         GpuMat buf_;
336     };
337 
SeparableLinearFilter(int srcType,int dstType,InputArray _rowKernel,InputArray _columnKernel,Point anchor,int rowBorderMode,int columnBorderMode)338     SeparableLinearFilter::SeparableLinearFilter(int srcType, int dstType,
339                                                  InputArray _rowKernel, InputArray _columnKernel,
340                                                  Point anchor, int rowBorderMode, int columnBorderMode) :
341         srcType_(srcType), dstType_(dstType), anchor_(anchor), rowBorderMode_(rowBorderMode), columnBorderMode_(columnBorderMode)
342     {
343         static const func_t rowFilterFuncs[7][4] =
344         {
345             {filter::linearRow<uchar, float>, 0, filter::linearRow<uchar3, float3>, filter::linearRow<uchar4, float4>},
346             {0, 0, 0, 0},
347             {filter::linearRow<ushort, float>, 0, filter::linearRow<ushort3, float3>, filter::linearRow<ushort4, float4>},
348             {filter::linearRow<short, float>, 0, filter::linearRow<short3, float3>, filter::linearRow<short4, float4>},
349             {filter::linearRow<int, float>, 0, filter::linearRow<int3, float3>, filter::linearRow<int4, float4>},
350             {filter::linearRow<float, float>, 0, filter::linearRow<float3, float3>, filter::linearRow<float4, float4>},
351             {0, 0, 0, 0}
352         };
353 
354         static const func_t columnFilterFuncs[7][4] =
355         {
356             {filter::linearColumn<float, uchar>, 0, filter::linearColumn<float3, uchar3>, filter::linearColumn<float4, uchar4>},
357             {0, 0, 0, 0},
358             {filter::linearColumn<float, ushort>, 0, filter::linearColumn<float3, ushort3>, filter::linearColumn<float4, ushort4>},
359             {filter::linearColumn<float, short>, 0, filter::linearColumn<float3, short3>, filter::linearColumn<float4, short4>},
360             {filter::linearColumn<float, int>, 0, filter::linearColumn<float3, int3>, filter::linearColumn<float4, int4>},
361             {filter::linearColumn<float, float>, 0, filter::linearColumn<float3, float3>, filter::linearColumn<float4, float4>},
362             {0, 0, 0, 0}
363         };
364 
365         const int sdepth = CV_MAT_DEPTH(srcType);
366         const int cn = CV_MAT_CN(srcType);
367         const int ddepth = CV_MAT_DEPTH(dstType);
368 
369         Mat rowKernel = _rowKernel.getMat();
370         Mat columnKernel = _columnKernel.getMat();
371 
372         CV_Assert( sdepth <= CV_64F && cn <= 4 );
373         CV_Assert( rowKernel.channels() == 1 );
374         CV_Assert( columnKernel.channels() == 1 );
375         CV_Assert( rowBorderMode == BORDER_REFLECT101 || rowBorderMode == BORDER_REPLICATE || rowBorderMode == BORDER_CONSTANT || rowBorderMode == BORDER_REFLECT || rowBorderMode == BORDER_WRAP );
376         CV_Assert( columnBorderMode == BORDER_REFLECT101 || columnBorderMode == BORDER_REPLICATE || columnBorderMode == BORDER_CONSTANT || columnBorderMode == BORDER_REFLECT || columnBorderMode == BORDER_WRAP );
377 
378         Mat kernel32F;
379 
380         rowKernel.convertTo(kernel32F, CV_32F);
381         rowKernel_.upload(kernel32F.reshape(1, 1));
382 
383         columnKernel.convertTo(kernel32F, CV_32F);
384         columnKernel_.upload(kernel32F.reshape(1, 1));
385 
386         CV_Assert( rowKernel_.cols > 0 && rowKernel_.cols <= 32 );
387         CV_Assert( columnKernel_.cols > 0 && columnKernel_.cols <= 32 );
388 
389         normalizeAnchor(anchor_.x, rowKernel_.cols);
390         normalizeAnchor(anchor_.y, columnKernel_.cols);
391 
392         bufType_ = CV_MAKE_TYPE(CV_32F, cn);
393 
394         rowFilter_ = rowFilterFuncs[sdepth][cn - 1];
395         CV_Assert( rowFilter_ != 0 );
396 
397         columnFilter_ = columnFilterFuncs[ddepth][cn - 1];
398         CV_Assert( columnFilter_ != 0 );
399     }
400 
apply(InputArray _src,OutputArray _dst,Stream & _stream)401     void SeparableLinearFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
402     {
403         GpuMat src = _src.getGpuMat();
404         CV_Assert( src.type() == srcType_ );
405 
406         _dst.create(src.size(), dstType_);
407         GpuMat dst = _dst.getGpuMat();
408 
409         ensureSizeIsEnough(src.size(), bufType_, buf_);
410 
411         DeviceInfo devInfo;
412         const int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
413 
414         cudaStream_t stream = StreamAccessor::getStream(_stream);
415 
416         rowFilter_(src, buf_, rowKernel_.ptr<float>(), rowKernel_.cols, anchor_.x, rowBorderMode_, cc, stream);
417         columnFilter_(buf_, dst, columnKernel_.ptr<float>(), columnKernel_.cols, anchor_.y, columnBorderMode_, cc, stream);
418     }
419 }
420 
createSeparableLinearFilter(int srcType,int dstType,InputArray rowKernel,InputArray columnKernel,Point anchor,int rowBorderMode,int columnBorderMode)421 Ptr<Filter> cv::cuda::createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, Point anchor, int rowBorderMode, int columnBorderMode)
422 {
423     if (dstType < 0)
424         dstType = srcType;
425 
426     dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType));
427 
428     if (columnBorderMode < 0)
429         columnBorderMode = rowBorderMode;
430 
431     return makePtr<SeparableLinearFilter>(srcType, dstType, rowKernel, columnKernel, anchor, rowBorderMode, columnBorderMode);
432 }
433 
434 ////////////////////////////////////////////////////////////////////////////////////////////////////
435 // Deriv Filter
436 
createDerivFilter(int srcType,int dstType,int dx,int dy,int ksize,bool normalize,double scale,int rowBorderMode,int columnBorderMode)437 Ptr<Filter> cv::cuda::createDerivFilter(int srcType, int dstType, int dx, int dy, int ksize, bool normalize, double scale, int rowBorderMode, int columnBorderMode)
438 {
439     Mat kx, ky;
440     getDerivKernels(kx, ky, dx, dy, ksize, normalize, CV_32F);
441 
442     if (scale != 1)
443     {
444         // usually the smoothing part is the slowest to compute,
445         // so try to scale it instead of the faster differenciating part
446         if (dx == 0)
447             kx *= scale;
448         else
449             ky *= scale;
450     }
451 
452     return cuda::createSeparableLinearFilter(srcType, dstType, kx, ky, Point(-1, -1), rowBorderMode, columnBorderMode);
453 }
454 
createSobelFilter(int srcType,int dstType,int dx,int dy,int ksize,double scale,int rowBorderMode,int columnBorderMode)455 Ptr<Filter> cv::cuda::createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize, double scale, int rowBorderMode, int columnBorderMode)
456 {
457     return cuda::createDerivFilter(srcType, dstType, dx, dy, ksize, false, scale, rowBorderMode, columnBorderMode);
458 }
459 
createScharrFilter(int srcType,int dstType,int dx,int dy,double scale,int rowBorderMode,int columnBorderMode)460 Ptr<Filter> cv::cuda::createScharrFilter(int srcType, int dstType, int dx, int dy, double scale, int rowBorderMode, int columnBorderMode)
461 {
462     return cuda::createDerivFilter(srcType, dstType, dx, dy, -1, false, scale, rowBorderMode, columnBorderMode);
463 }
464 
465 ////////////////////////////////////////////////////////////////////////////////////////////////////
466 // Gaussian Filter
467 
createGaussianFilter(int srcType,int dstType,Size ksize,double sigma1,double sigma2,int rowBorderMode,int columnBorderMode)468 Ptr<Filter> cv::cuda::createGaussianFilter(int srcType, int dstType, Size ksize, double sigma1, double sigma2, int rowBorderMode, int columnBorderMode)
469 {
470     const int depth = CV_MAT_DEPTH(srcType);
471 
472     if (sigma2 <= 0)
473         sigma2 = sigma1;
474 
475     // automatic detection of kernel size from sigma
476     if (ksize.width <= 0 && sigma1 > 0)
477         ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;
478     if (ksize.height <= 0 && sigma2 > 0)
479         ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;
480 
481     CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 );
482 
483     sigma1 = std::max(sigma1, 0.0);
484     sigma2 = std::max(sigma2, 0.0);
485 
486     Mat kx = getGaussianKernel(ksize.width, sigma1, CV_32F);
487     Mat ky;
488     if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON)
489         ky = kx;
490     else
491         ky = getGaussianKernel(ksize.height, sigma2, CV_32F);
492 
493     return createSeparableLinearFilter(srcType, dstType, kx, ky, Point(-1,-1), rowBorderMode, columnBorderMode);
494 }
495 
496 ////////////////////////////////////////////////////////////////////////////////////////////////////
497 // Morphology Filter
498 
499 namespace
500 {
501     class MorphologyFilter : public Filter
502     {
503     public:
504         MorphologyFilter(int op, int srcType, InputArray kernel, Point anchor, int iterations);
505 
506         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
507 
508     private:
509         typedef NppStatus (*nppMorfFilter_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI,
510                                              const Npp8u* pMask, NppiSize oMaskSize, NppiPoint oAnchor);
511 
512         int type_;
513         GpuMat kernel_;
514         Point anchor_;
515         int iters_;
516         nppMorfFilter_t func_;
517 
518         GpuMat srcBorder_;
519         GpuMat buf_;
520     };
521 
MorphologyFilter(int op,int srcType,InputArray _kernel,Point anchor,int iterations)522     MorphologyFilter::MorphologyFilter(int op, int srcType, InputArray _kernel, Point anchor, int iterations) :
523         type_(srcType), anchor_(anchor), iters_(iterations)
524     {
525         static const nppMorfFilter_t funcs[2][5] =
526         {
527             {0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R },
528             {0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R }
529         };
530 
531         CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE );
532         CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 );
533 
534         Mat kernel = _kernel.getMat();
535         Size ksize = !kernel.empty() ? _kernel.size() : Size(3, 3);
536 
537         normalizeAnchor(anchor_, ksize);
538 
539         if (kernel.empty())
540         {
541             kernel = getStructuringElement(MORPH_RECT, Size(1 + iters_ * 2, 1 + iters_ * 2));
542             anchor_ = Point(iters_, iters_);
543             iters_ = 1;
544         }
545         else if (iters_ > 1 && cv::countNonZero(kernel) == (int) kernel.total())
546         {
547             anchor_ = Point(anchor_.x * iters_, anchor_.y * iters_);
548             kernel = getStructuringElement(MORPH_RECT,
549                                            Size(ksize.width + (iters_ - 1) * (ksize.width - 1),
550                                                 ksize.height + (iters_ - 1) * (ksize.height - 1)),
551                                            anchor_);
552             iters_ = 1;
553         }
554 
555         CV_Assert( kernel.channels() == 1 );
556 
557         Mat kernel8U;
558         kernel.convertTo(kernel8U, CV_8U);
559 
560         kernel_ = cuda::createContinuous(kernel.size(), CV_8UC1);
561         kernel_.upload(kernel8U);
562 
563         func_ = funcs[op][CV_MAT_CN(srcType)];
564     }
565 
apply(InputArray _src,OutputArray _dst,Stream & _stream)566     void MorphologyFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
567     {
568         GpuMat src = _src.getGpuMat();
569         CV_Assert( src.type() == type_ );
570 
571         Size ksize = kernel_.size();
572         cuda::copyMakeBorder(src, srcBorder_, ksize.height, ksize.height, ksize.width, ksize.width, BORDER_DEFAULT, Scalar(), _stream);
573 
574         GpuMat srcRoi = srcBorder_(Rect(ksize.width, ksize.height, src.cols, src.rows));
575 
576         GpuMat bufRoi;
577         if (iters_ > 1)
578         {
579             ensureSizeIsEnough(srcBorder_.size(), type_, buf_);
580             buf_.setTo(Scalar::all(0), _stream);
581             bufRoi = buf_(Rect(ksize.width, ksize.height, src.cols, src.rows));
582         }
583 
584         _dst.create(src.size(), src.type());
585         GpuMat dst = _dst.getGpuMat();
586 
587         cudaStream_t stream = StreamAccessor::getStream(_stream);
588         NppStreamHandler h(stream);
589 
590         NppiSize oSizeROI;
591         oSizeROI.width = src.cols;
592         oSizeROI.height = src.rows;
593 
594         NppiSize oMaskSize;
595         oMaskSize.height = ksize.height;
596         oMaskSize.width = ksize.width;
597 
598         NppiPoint oAnchor;
599         oAnchor.x = anchor_.x;
600         oAnchor.y = anchor_.y;
601 
602         nppSafeCall( func_(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
603                            oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
604 
605         for(int i = 1; i < iters_; ++i)
606         {
607             dst.copyTo(bufRoi, _stream);
608 
609             nppSafeCall( func_(bufRoi.ptr<Npp8u>(), static_cast<int>(bufRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
610                                oSizeROI, kernel_.ptr<Npp8u>(), oMaskSize, oAnchor) );
611         }
612 
613         if (stream == 0)
614             cudaSafeCall( cudaDeviceSynchronize() );
615     }
616 }
617 
618 namespace
619 {
620     class MorphologyExFilter : public Filter
621     {
622     public:
623         MorphologyExFilter(int srcType, InputArray kernel, Point anchor, int iterations);
624 
625     protected:
626         Ptr<cuda::Filter> erodeFilter_, dilateFilter_;
627         GpuMat buf_;
628     };
629 
MorphologyExFilter(int srcType,InputArray kernel,Point anchor,int iterations)630     MorphologyExFilter::MorphologyExFilter(int srcType, InputArray kernel, Point anchor, int iterations)
631     {
632         erodeFilter_ = cuda::createMorphologyFilter(MORPH_ERODE, srcType, kernel, anchor, iterations);
633         dilateFilter_ = cuda::createMorphologyFilter(MORPH_DILATE, srcType, kernel, anchor, iterations);
634     }
635 
636     // MORPH_OPEN
637 
638     class MorphologyOpenFilter : public MorphologyExFilter
639     {
640     public:
641         MorphologyOpenFilter(int srcType, InputArray kernel, Point anchor, int iterations);
642 
643         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
644     };
645 
MorphologyOpenFilter(int srcType,InputArray kernel,Point anchor,int iterations)646     MorphologyOpenFilter::MorphologyOpenFilter(int srcType, InputArray kernel, Point anchor, int iterations) :
647         MorphologyExFilter(srcType, kernel, anchor, iterations)
648     {
649     }
650 
apply(InputArray src,OutputArray dst,Stream & stream)651     void MorphologyOpenFilter::apply(InputArray src, OutputArray dst, Stream& stream)
652     {
653         erodeFilter_->apply(src, buf_, stream);
654         dilateFilter_->apply(buf_, dst, stream);
655     }
656 
657     // MORPH_CLOSE
658 
659     class MorphologyCloseFilter : public MorphologyExFilter
660     {
661     public:
662         MorphologyCloseFilter(int srcType, InputArray kernel, Point anchor, int iterations);
663 
664         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
665     };
666 
MorphologyCloseFilter(int srcType,InputArray kernel,Point anchor,int iterations)667     MorphologyCloseFilter::MorphologyCloseFilter(int srcType, InputArray kernel, Point anchor, int iterations) :
668         MorphologyExFilter(srcType, kernel, anchor, iterations)
669     {
670     }
671 
apply(InputArray src,OutputArray dst,Stream & stream)672     void MorphologyCloseFilter::apply(InputArray src, OutputArray dst, Stream& stream)
673     {
674         dilateFilter_->apply(src, buf_, stream);
675         erodeFilter_->apply(buf_, dst, stream);
676     }
677 
678     // MORPH_GRADIENT
679 
680     class MorphologyGradientFilter : public MorphologyExFilter
681     {
682     public:
683         MorphologyGradientFilter(int srcType, InputArray kernel, Point anchor, int iterations);
684 
685         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
686     };
687 
MorphologyGradientFilter(int srcType,InputArray kernel,Point anchor,int iterations)688     MorphologyGradientFilter::MorphologyGradientFilter(int srcType, InputArray kernel, Point anchor, int iterations) :
689         MorphologyExFilter(srcType, kernel, anchor, iterations)
690     {
691     }
692 
apply(InputArray src,OutputArray dst,Stream & stream)693     void MorphologyGradientFilter::apply(InputArray src, OutputArray dst, Stream& stream)
694     {
695         erodeFilter_->apply(src, buf_, stream);
696         dilateFilter_->apply(src, dst, stream);
697         cuda::subtract(dst, buf_, dst, noArray(), -1, stream);
698     }
699 
700     // MORPH_TOPHAT
701 
702     class MorphologyTophatFilter : public MorphologyExFilter
703     {
704     public:
705         MorphologyTophatFilter(int srcType, InputArray kernel, Point anchor, int iterations);
706 
707         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
708     };
709 
MorphologyTophatFilter(int srcType,InputArray kernel,Point anchor,int iterations)710     MorphologyTophatFilter::MorphologyTophatFilter(int srcType, InputArray kernel, Point anchor, int iterations) :
711         MorphologyExFilter(srcType, kernel, anchor, iterations)
712     {
713     }
714 
apply(InputArray src,OutputArray dst,Stream & stream)715     void MorphologyTophatFilter::apply(InputArray src, OutputArray dst, Stream& stream)
716     {
717         erodeFilter_->apply(src, dst, stream);
718         dilateFilter_->apply(dst, buf_, stream);
719         cuda::subtract(src, buf_, dst, noArray(), -1, stream);
720     }
721 
722     // MORPH_BLACKHAT
723 
724     class MorphologyBlackhatFilter : public MorphologyExFilter
725     {
726     public:
727         MorphologyBlackhatFilter(int srcType, InputArray kernel, Point anchor, int iterations);
728 
729         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
730     };
731 
MorphologyBlackhatFilter(int srcType,InputArray kernel,Point anchor,int iterations)732     MorphologyBlackhatFilter::MorphologyBlackhatFilter(int srcType, InputArray kernel, Point anchor, int iterations) :
733         MorphologyExFilter(srcType, kernel, anchor, iterations)
734     {
735     }
736 
apply(InputArray src,OutputArray dst,Stream & stream)737     void MorphologyBlackhatFilter::apply(InputArray src, OutputArray dst, Stream& stream)
738     {
739         dilateFilter_->apply(src, dst, stream);
740         erodeFilter_->apply(dst, buf_, stream);
741         cuda::subtract(buf_, src, dst, noArray(), -1, stream);
742     }
743 }
744 
createMorphologyFilter(int op,int srcType,InputArray kernel,Point anchor,int iterations)745 Ptr<Filter> cv::cuda::createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor, int iterations)
746 {
747     switch( op )
748     {
749     case MORPH_ERODE:
750     case MORPH_DILATE:
751         return makePtr<MorphologyFilter>(op, srcType, kernel, anchor, iterations);
752         break;
753 
754     case MORPH_OPEN:
755         return makePtr<MorphologyOpenFilter>(srcType, kernel, anchor, iterations);
756         break;
757 
758     case MORPH_CLOSE:
759         return makePtr<MorphologyCloseFilter>(srcType, kernel, anchor, iterations);
760         break;
761 
762     case MORPH_GRADIENT:
763         return makePtr<MorphologyGradientFilter>(srcType, kernel, anchor, iterations);
764         break;
765 
766     case MORPH_TOPHAT:
767         return makePtr<MorphologyTophatFilter>(srcType, kernel, anchor, iterations);
768         break;
769 
770     case MORPH_BLACKHAT:
771         return makePtr<MorphologyBlackhatFilter>(srcType, kernel, anchor, iterations);
772         break;
773 
774     default:
775         CV_Error(Error::StsBadArg, "Unknown morphological operation");
776         return Ptr<Filter>();
777     }
778 }
779 
780 ////////////////////////////////////////////////////////////////////////////////////////////////////
781 // Image Rank Filter
782 
783 namespace
784 {
785     enum RankType
786     {
787         RANK_MAX,
788         RANK_MIN
789     };
790 
791     class NPPRankFilter : public Filter
792     {
793     public:
794         NPPRankFilter(int op, int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal);
795 
796         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
797 
798     private:
799         typedef NppStatus (*nppFilterRank_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI,
800                                              NppiSize oMaskSize, NppiPoint oAnchor);
801 
802         int type_;
803         Size ksize_;
804         Point anchor_;
805         int borderMode_;
806         Scalar borderVal_;
807         nppFilterRank_t func_;
808 
809         GpuMat srcBorder_;
810     };
811 
NPPRankFilter(int op,int srcType,Size ksize,Point anchor,int borderMode,Scalar borderVal)812     NPPRankFilter::NPPRankFilter(int op, int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal) :
813         type_(srcType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal)
814     {
815         static const nppFilterRank_t maxFuncs[] = {0, nppiFilterMax_8u_C1R, 0, 0, nppiFilterMax_8u_C4R};
816         static const nppFilterRank_t minFuncs[] = {0, nppiFilterMin_8u_C1R, 0, 0, nppiFilterMin_8u_C4R};
817 
818         CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 );
819 
820         normalizeAnchor(anchor_, ksize_);
821 
822         if (op == RANK_MAX)
823             func_ = maxFuncs[CV_MAT_CN(srcType)];
824         else
825             func_ = minFuncs[CV_MAT_CN(srcType)];
826     }
827 
apply(InputArray _src,OutputArray _dst,Stream & _stream)828     void NPPRankFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
829     {
830         GpuMat src = _src.getGpuMat();
831         CV_Assert( src.type() == type_ );
832 
833         cuda::copyMakeBorder(src, srcBorder_, ksize_.height, ksize_.height, ksize_.width, ksize_.width, borderMode_, borderVal_, _stream);
834 
835         _dst.create(src.size(), src.type());
836         GpuMat dst = _dst.getGpuMat();
837 
838         GpuMat srcRoi = srcBorder_(Rect(ksize_.width, ksize_.height, src.cols, src.rows));
839 
840         cudaStream_t stream = StreamAccessor::getStream(_stream);
841         NppStreamHandler h(stream);
842 
843         NppiSize oSizeROI;
844         oSizeROI.width = src.cols;
845         oSizeROI.height = src.rows;
846 
847         NppiSize oMaskSize;
848         oMaskSize.height = ksize_.height;
849         oMaskSize.width = ksize_.width;
850 
851         NppiPoint oAnchor;
852         oAnchor.x = anchor_.x;
853         oAnchor.y = anchor_.y;
854 
855         nppSafeCall( func_(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step),
856                            oSizeROI, oMaskSize, oAnchor) );
857 
858         if (stream == 0)
859             cudaSafeCall( cudaDeviceSynchronize() );
860     }
861 }
862 
createBoxMaxFilter(int srcType,Size ksize,Point anchor,int borderMode,Scalar borderVal)863 Ptr<Filter> cv::cuda::createBoxMaxFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
864 {
865     return makePtr<NPPRankFilter>(RANK_MAX, srcType, ksize, anchor, borderMode, borderVal);
866 }
867 
createBoxMinFilter(int srcType,Size ksize,Point anchor,int borderMode,Scalar borderVal)868 Ptr<Filter> cv::cuda::createBoxMinFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
869 {
870     return makePtr<NPPRankFilter>(RANK_MIN, srcType, ksize, anchor, borderMode, borderVal);
871 }
872 
873 ////////////////////////////////////////////////////////////////////////////////////////////////////
874 // 1D Sum Filter
875 
876 namespace
877 {
878     class NppRowSumFilter : public Filter
879     {
880     public:
881         NppRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal);
882 
883         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
884 
885     private:
886         int srcType_, dstType_;
887         int ksize_;
888         int anchor_;
889         int borderMode_;
890         Scalar borderVal_;
891 
892         GpuMat srcBorder_;
893     };
894 
NppRowSumFilter(int srcType,int dstType,int ksize,int anchor,int borderMode,Scalar borderVal)895     NppRowSumFilter::NppRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) :
896         srcType_(srcType), dstType_(dstType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal)
897     {
898         CV_Assert( srcType_ == CV_8UC1 );
899         CV_Assert( dstType_ == CV_32FC1 );
900 
901         normalizeAnchor(anchor_, ksize_);
902     }
903 
apply(InputArray _src,OutputArray _dst,Stream & _stream)904     void NppRowSumFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
905     {
906         GpuMat src = _src.getGpuMat();
907         CV_Assert( src.type() == srcType_ );
908 
909         cuda::copyMakeBorder(src, srcBorder_, 0, 0, ksize_, ksize_, borderMode_, borderVal_, _stream);
910 
911         _dst.create(src.size(), dstType_);
912         GpuMat dst = _dst.getGpuMat();
913 
914         GpuMat srcRoi = srcBorder_(Rect(ksize_, 0, src.cols, src.rows));
915 
916         cudaStream_t stream = StreamAccessor::getStream(_stream);
917         NppStreamHandler h(stream);
918 
919         NppiSize oSizeROI;
920         oSizeROI.width = src.cols;
921         oSizeROI.height = src.rows;
922 
923         nppSafeCall( nppiSumWindowRow_8u32f_C1R(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step),
924                                                 dst.ptr<Npp32f>(), static_cast<int>(dst.step),
925                                                 oSizeROI, ksize_, anchor_) );
926 
927         if (stream == 0)
928             cudaSafeCall( cudaDeviceSynchronize() );
929     }
930 }
931 
createRowSumFilter(int srcType,int dstType,int ksize,int anchor,int borderMode,Scalar borderVal)932 Ptr<Filter> cv::cuda::createRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal)
933 {
934     return makePtr<NppRowSumFilter>(srcType, dstType, ksize, anchor, borderMode, borderVal);
935 }
936 
937 namespace
938 {
939     class NppColumnSumFilter : public Filter
940     {
941     public:
942         NppColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal);
943 
944         void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
945 
946     private:
947         int srcType_, dstType_;
948         int ksize_;
949         int anchor_;
950         int borderMode_;
951         Scalar borderVal_;
952 
953         GpuMat srcBorder_;
954     };
955 
NppColumnSumFilter(int srcType,int dstType,int ksize,int anchor,int borderMode,Scalar borderVal)956     NppColumnSumFilter::NppColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) :
957         srcType_(srcType), dstType_(dstType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal)
958     {
959         CV_Assert( srcType_ == CV_8UC1 );
960         CV_Assert( dstType_ == CV_32FC1 );
961 
962         normalizeAnchor(anchor_, ksize_);
963     }
964 
apply(InputArray _src,OutputArray _dst,Stream & _stream)965     void NppColumnSumFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
966     {
967         GpuMat src = _src.getGpuMat();
968         CV_Assert( src.type() == srcType_ );
969 
970         cuda::copyMakeBorder(src, srcBorder_, ksize_, ksize_, 0, 0, borderMode_, borderVal_, _stream);
971 
972         _dst.create(src.size(), dstType_);
973         GpuMat dst = _dst.getGpuMat();
974 
975         GpuMat srcRoi = srcBorder_(Rect(0, ksize_, src.cols, src.rows));
976 
977         cudaStream_t stream = StreamAccessor::getStream(_stream);
978         NppStreamHandler h(stream);
979 
980         NppiSize oSizeROI;
981         oSizeROI.width = src.cols;
982         oSizeROI.height = src.rows;
983 
984         nppSafeCall( nppiSumWindowColumn_8u32f_C1R(srcRoi.ptr<Npp8u>(), static_cast<int>(srcRoi.step),
985                                                    dst.ptr<Npp32f>(), static_cast<int>(dst.step),
986                                                    oSizeROI, ksize_, anchor_) );
987 
988         if (stream == 0)
989             cudaSafeCall( cudaDeviceSynchronize() );
990     }
991 }
992 
createColumnSumFilter(int srcType,int dstType,int ksize,int anchor,int borderMode,Scalar borderVal)993 Ptr<Filter> cv::cuda::createColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal)
994 {
995     return makePtr<NppColumnSumFilter>(srcType, dstType, ksize, anchor, borderMode, borderVal);
996 }
997 
998 #endif
999