• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #if !defined CUDA_DISABLER
44 
45 #include "opencv2/core/cuda/common.hpp"
46 #include "opencv2/core/cuda/vec_traits.hpp"
47 #include "opencv2/core/cuda/vec_math.hpp"
48 #include "opencv2/core/cuda/saturate_cast.hpp"
49 #include "opencv2/core/cuda/border_interpolate.hpp"
50 
51 namespace cv { namespace cuda { namespace device
52 {
53     namespace imgproc
54     {
55         texture<uchar4, 2> tex_meanshift;
56 
do_mean_shift(int x0,int y0,unsigned char * out,size_t out_step,int cols,int rows,int sp,int sr,int maxIter,float eps)57         __device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
58                                         size_t out_step, int cols, int rows,
59                                         int sp, int sr, int maxIter, float eps)
60         {
61             int isr2 = sr*sr;
62             uchar4 c = tex2D(tex_meanshift, x0, y0 );
63 
64             // iterate meanshift procedure
65             for( int iter = 0; iter < maxIter; iter++ )
66             {
67                 int count = 0;
68                 int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
69                 float icount;
70 
71                 //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
72                 int minx = x0-sp;
73                 int miny = y0-sp;
74                 int maxx = x0+sp;
75                 int maxy = y0+sp;
76 
77                 for( int y = miny; y <= maxy; y++)
78                 {
79                     int rowCount = 0;
80                     for( int x = minx; x <= maxx; x++ )
81                     {
82                         uchar4 t = tex2D( tex_meanshift, x, y );
83 
84                         int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z);
85                         if( norm2 <= isr2 )
86                         {
87                             s0 += t.x; s1 += t.y; s2 += t.z;
88                             sx += x; rowCount++;
89                         }
90                     }
91                     count += rowCount;
92                     sy += y*rowCount;
93                 }
94 
95                 if( count == 0 )
96                     break;
97 
98                 icount = 1.f/count;
99                 int x1 = __float2int_rz(sx*icount);
100                 int y1 = __float2int_rz(sy*icount);
101                 s0 = __float2int_rz(s0*icount);
102                 s1 = __float2int_rz(s1*icount);
103                 s2 = __float2int_rz(s2*icount);
104 
105                 int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z);
106 
107                 bool stopFlag = (x0 == x1 && y0 == y1) || (::abs(x1-x0) + ::abs(y1-y0) + norm2 <= eps);
108 
109                 x0 = x1; y0 = y1;
110                 c.x = s0; c.y = s1; c.z = s2;
111 
112                 if( stopFlag )
113                     break;
114             }
115 
116             int base = (blockIdx.y * blockDim.y + threadIdx.y) * out_step + (blockIdx.x * blockDim.x + threadIdx.x) * 4 * sizeof(uchar);
117             *(uchar4*)(out + base) = c;
118 
119             return make_short2((short)x0, (short)y0);
120         }
121 
meanshift_kernel(unsigned char * out,size_t out_step,int cols,int rows,int sp,int sr,int maxIter,float eps)122         __global__ void meanshift_kernel(unsigned char* out, size_t out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
123         {
124             int x0 = blockIdx.x * blockDim.x + threadIdx.x;
125             int y0 = blockIdx.y * blockDim.y + threadIdx.y;
126 
127             if( x0 < cols && y0 < rows )
128                 do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps);
129         }
130 
meanShiftFiltering_gpu(const PtrStepSzb & src,PtrStepSzb dst,int sp,int sr,int maxIter,float eps,cudaStream_t stream)131         void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
132         {
133             dim3 grid(1, 1, 1);
134             dim3 threads(32, 8, 1);
135             grid.x = divUp(src.cols, threads.x);
136             grid.y = divUp(src.rows, threads.y);
137 
138             cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
139             cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
140 
141             meanshift_kernel<<< grid, threads, 0, stream >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
142             cudaSafeCall( cudaGetLastError() );
143 
144             if (stream == 0)
145                 cudaSafeCall( cudaDeviceSynchronize() );
146         }
147 
meanshiftproc_kernel(unsigned char * outr,size_t outrstep,unsigned char * outsp,size_t outspstep,int cols,int rows,int sp,int sr,int maxIter,float eps)148         __global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep,
149                                              unsigned char* outsp, size_t outspstep,
150                                              int cols, int rows,
151                                              int sp, int sr, int maxIter, float eps)
152         {
153             int x0 = blockIdx.x * blockDim.x + threadIdx.x;
154             int y0 = blockIdx.y * blockDim.y + threadIdx.y;
155 
156             if( x0 < cols && y0 < rows )
157             {
158                 int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short);
159                 *(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
160             }
161         }
162 
meanShiftProc_gpu(const PtrStepSzb & src,PtrStepSzb dstr,PtrStepSzb dstsp,int sp,int sr,int maxIter,float eps,cudaStream_t stream)163         void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
164         {
165             dim3 grid(1, 1, 1);
166             dim3 threads(32, 8, 1);
167             grid.x = divUp(src.cols, threads.x);
168             grid.y = divUp(src.rows, threads.y);
169 
170             cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
171             cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
172 
173             meanshiftproc_kernel<<< grid, threads, 0, stream >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
174             cudaSafeCall( cudaGetLastError() );
175 
176             if (stream == 0)
177                 cudaSafeCall( cudaDeviceSynchronize() );
178         }
179     }
180 }}}
181 
182 #endif
183