1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/vec_traits.hpp" 47 #include "opencv2/core/cuda/vec_math.hpp" 48 #include "opencv2/core/cuda/saturate_cast.hpp" 49 #include "opencv2/core/cuda/border_interpolate.hpp" 50 51 namespace cv { namespace cuda { namespace device 52 { 53 namespace imgproc 54 { 55 texture<uchar4, 2> tex_meanshift; 56 do_mean_shift(int x0,int y0,unsigned char * out,size_t out_step,int cols,int rows,int sp,int sr,int maxIter,float eps)57 __device__ short2 do_mean_shift(int x0, int y0, unsigned char* out, 58 size_t out_step, int cols, int rows, 59 int sp, int sr, int maxIter, float eps) 60 { 61 int isr2 = sr*sr; 62 uchar4 c = tex2D(tex_meanshift, x0, y0 ); 63 64 // iterate meanshift procedure 65 for( int iter = 0; iter < maxIter; iter++ ) 66 { 67 int count = 0; 68 int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; 69 float icount; 70 71 //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) 72 int minx = x0-sp; 73 int miny = y0-sp; 74 int maxx = x0+sp; 75 int maxy = y0+sp; 76 77 for( int y = miny; y <= maxy; y++) 78 { 79 int rowCount = 0; 80 for( int x = minx; x <= maxx; x++ ) 81 { 82 uchar4 t = tex2D( tex_meanshift, x, y ); 83 84 int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z); 85 if( norm2 <= isr2 ) 86 { 87 s0 += t.x; s1 += t.y; s2 += t.z; 88 sx += x; rowCount++; 89 } 90 } 91 count += rowCount; 92 sy += y*rowCount; 93 } 94 95 if( count == 0 ) 96 break; 97 98 icount = 1.f/count; 99 int x1 = __float2int_rz(sx*icount); 100 int y1 = __float2int_rz(sy*icount); 101 s0 = __float2int_rz(s0*icount); 102 s1 = __float2int_rz(s1*icount); 103 s2 = __float2int_rz(s2*icount); 104 105 int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z); 106 107 bool stopFlag = (x0 == x1 && y0 == y1) || (::abs(x1-x0) + ::abs(y1-y0) + norm2 <= eps); 108 109 x0 = x1; y0 = y1; 110 c.x = s0; c.y = s1; c.z = s2; 111 112 if( stopFlag ) 113 break; 114 } 115 116 int base = (blockIdx.y * blockDim.y + threadIdx.y) * out_step + (blockIdx.x * blockDim.x + threadIdx.x) * 4 * sizeof(uchar); 117 *(uchar4*)(out + base) = c; 118 119 return make_short2((short)x0, (short)y0); 120 } 121 meanshift_kernel(unsigned char * out,size_t out_step,int cols,int rows,int sp,int sr,int maxIter,float eps)122 __global__ void meanshift_kernel(unsigned char* out, size_t out_step, int cols, int rows, int sp, int sr, int maxIter, float eps ) 123 { 124 int x0 = blockIdx.x * blockDim.x + threadIdx.x; 125 int y0 = blockIdx.y * blockDim.y + threadIdx.y; 126 127 if( x0 < cols && y0 < rows ) 128 do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps); 129 } 130 meanShiftFiltering_gpu(const PtrStepSzb & src,PtrStepSzb dst,int sp,int sr,int maxIter,float eps,cudaStream_t stream)131 void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream) 132 { 133 dim3 grid(1, 1, 1); 134 dim3 threads(32, 8, 1); 135 grid.x = divUp(src.cols, threads.x); 136 grid.y = divUp(src.rows, threads.y); 137 138 cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>(); 139 cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) ); 140 141 meanshift_kernel<<< grid, threads, 0, stream >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps ); 142 cudaSafeCall( cudaGetLastError() ); 143 144 if (stream == 0) 145 cudaSafeCall( cudaDeviceSynchronize() ); 146 } 147 meanshiftproc_kernel(unsigned char * outr,size_t outrstep,unsigned char * outsp,size_t outspstep,int cols,int rows,int sp,int sr,int maxIter,float eps)148 __global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep, 149 unsigned char* outsp, size_t outspstep, 150 int cols, int rows, 151 int sp, int sr, int maxIter, float eps) 152 { 153 int x0 = blockIdx.x * blockDim.x + threadIdx.x; 154 int y0 = blockIdx.y * blockDim.y + threadIdx.y; 155 156 if( x0 < cols && y0 < rows ) 157 { 158 int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short); 159 *(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps); 160 } 161 } 162 meanShiftProc_gpu(const PtrStepSzb & src,PtrStepSzb dstr,PtrStepSzb dstsp,int sp,int sr,int maxIter,float eps,cudaStream_t stream)163 void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream) 164 { 165 dim3 grid(1, 1, 1); 166 dim3 threads(32, 8, 1); 167 grid.x = divUp(src.cols, threads.x); 168 grid.y = divUp(src.rows, threads.y); 169 170 cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>(); 171 cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) ); 172 173 meanshiftproc_kernel<<< grid, threads, 0, stream >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps ); 174 cudaSafeCall( cudaGetLastError() ); 175 176 if (stream == 0) 177 cudaSafeCall( cudaDeviceSynchronize() ); 178 } 179 } 180 }}} 181 182 #endif 183