• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #if !defined CUDA_DISABLER
44 
45 #include "opencv2/core/cuda/common.hpp"
46 #include "opencv2/core/cuda/saturate_cast.hpp"
47 #include "opencv2/core/cuda/border_interpolate.hpp"
48 
49 namespace cv { namespace cuda { namespace device
50 {
51     template <class SrcPtr, typename D>
filter2D(const SrcPtr src,PtrStepSz<D> dst,const float * __restrict__ kernel,const int kWidth,const int kHeight,const int anchorX,const int anchorY)52     __global__ void filter2D(const SrcPtr src, PtrStepSz<D> dst,
53                              const float* __restrict__ kernel,
54                              const int kWidth, const int kHeight,
55                              const int anchorX, const int anchorY)
56     {
57         typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
58 
59         const int x = blockIdx.x * blockDim.x + threadIdx.x;
60         const int y = blockIdx.y * blockDim.y + threadIdx.y;
61 
62         if (x >= dst.cols || y >= dst.rows)
63             return;
64 
65         sum_t res = VecTraits<sum_t>::all(0);
66         int kInd = 0;
67 
68         for (int i = 0; i < kHeight; ++i)
69         {
70             for (int j = 0; j < kWidth; ++j)
71                 res = res + src(y - anchorY + i, x - anchorX + j) * kernel[kInd++];
72         }
73 
74         dst(y, x) = saturate_cast<D>(res);
75     }
76 
77     template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
78 
79     #define IMPLEMENT_FILTER2D_TEX_READER(type) \
80         texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
81         struct tex_filter2D_ ## type ## _reader \
82         { \
83             typedef type elem_type; \
84             typedef int index_type; \
85             const int xoff; \
86             const int yoff; \
87             tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
88             __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
89             { \
90                 return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
91             } \
92         }; \
93         template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
94         { \
95             static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, const float* kernel, \
96                 int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
97             { \
98                 typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
99                 dim3 block(16, 16); \
100                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
101                 bindTexture(&tex_filter2D_ ## type , srcWhole); \
102                 tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
103                 Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
104                 BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
105                 filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kernel, kWidth, kHeight, anchorX, anchorY); \
106                 cudaSafeCall( cudaGetLastError() ); \
107                 if (stream == 0) \
108                     cudaSafeCall( cudaDeviceSynchronize() ); \
109             } \
110         };
111 
112     IMPLEMENT_FILTER2D_TEX_READER(uchar);
113     IMPLEMENT_FILTER2D_TEX_READER(uchar4);
114 
115     IMPLEMENT_FILTER2D_TEX_READER(ushort);
116     IMPLEMENT_FILTER2D_TEX_READER(ushort4);
117 
118     IMPLEMENT_FILTER2D_TEX_READER(float);
119     IMPLEMENT_FILTER2D_TEX_READER(float4);
120 
121     #undef IMPLEMENT_FILTER2D_TEX_READER
122 
123     template <typename T, typename D>
filter2D(PtrStepSzb srcWhole,int ofsX,int ofsY,PtrStepSzb dst,const float * kernel,int kWidth,int kHeight,int anchorX,int anchorY,int borderMode,const float * borderValue,cudaStream_t stream)124     void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
125                   int kWidth, int kHeight, int anchorX, int anchorY,
126                   int borderMode, const float* borderValue, cudaStream_t stream)
127     {
128         typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, const float* kernel,
129                                int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
130         static const func_t funcs[] =
131         {
132             Filter2DCaller<T, D, BrdConstant>::call,
133             Filter2DCaller<T, D, BrdReplicate>::call,
134             Filter2DCaller<T, D, BrdReflect>::call,
135             Filter2DCaller<T, D, BrdWrap>::call,
136             Filter2DCaller<T, D, BrdReflect101>::call
137         };
138 
139         funcs[borderMode]((PtrStepSz<T>) srcWhole, ofsX, ofsY, (PtrStepSz<D>) dst, kernel,
140                           kWidth, kHeight, anchorX, anchorY, borderValue, stream);
141     }
142 
143     template void filter2D<uchar  , uchar  >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
144     template void filter2D<uchar4 , uchar4 >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
145     template void filter2D<ushort , ushort >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
146     template void filter2D<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
147     template void filter2D<float  , float  >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
148     template void filter2D<float4 , float4 >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
149 }}}
150 
151 #endif // CUDA_DISABLER
152