• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #if !defined CUDA_DISABLER
44 
45 #include "opencv2/core/cuda/common.hpp"
46 #include "opencv2/core/cuda/functional.hpp"
47 #include "opencv2/core/cuda/emulation.hpp"
48 #include "opencv2/core/cuda/transform.hpp"
49 
50 using namespace cv::cuda;
51 using namespace cv::cuda::device;
52 
53 namespace hist
54 {
histogram256Kernel(const uchar * src,int cols,int rows,size_t step,int * hist)55     __global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t step, int* hist)
56     {
57         __shared__ int shist[256];
58 
59         const int y = blockIdx.x * blockDim.y + threadIdx.y;
60         const int tid = threadIdx.y * blockDim.x + threadIdx.x;
61 
62         shist[tid] = 0;
63         __syncthreads();
64 
65         if (y < rows)
66         {
67             const unsigned int* rowPtr = (const unsigned int*) (src + y * step);
68 
69             const int cols_4 = cols / 4;
70             for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
71             {
72                 unsigned int data = rowPtr[x];
73 
74                 Emulation::smem::atomicAdd(&shist[(data >>  0) & 0xFFU], 1);
75                 Emulation::smem::atomicAdd(&shist[(data >>  8) & 0xFFU], 1);
76                 Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1);
77                 Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1);
78             }
79 
80             if (cols % 4 != 0 && threadIdx.x == 0)
81             {
82                 for (int x = cols_4 * 4; x < cols; ++x)
83                 {
84                     unsigned int data = ((const uchar*)rowPtr)[x];
85                     Emulation::smem::atomicAdd(&shist[data], 1);
86                 }
87             }
88         }
89 
90         __syncthreads();
91 
92         const int histVal = shist[tid];
93         if (histVal > 0)
94             ::atomicAdd(hist + tid, histVal);
95     }
96 
histogram256(PtrStepSzb src,int * hist,cudaStream_t stream)97     void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream)
98     {
99         const dim3 block(32, 8);
100         const dim3 grid(divUp(src.rows, block.y));
101 
102         histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
103         cudaSafeCall( cudaGetLastError() );
104 
105         if (stream == 0)
106             cudaSafeCall( cudaDeviceSynchronize() );
107     }
108 }
109 
110 /////////////////////////////////////////////////////////////////////////
111 
112 namespace hist
113 {
histEvenInc(int * shist,uint data,int binSize,int lowerLevel,int upperLevel)114     __device__ __forceinline__ void histEvenInc(int* shist, uint data, int binSize, int lowerLevel, int upperLevel)
115     {
116         if (data >= lowerLevel && data <= upperLevel)
117         {
118             const uint ind = (data - lowerLevel) / binSize;
119             Emulation::smem::atomicAdd(shist + ind, 1);
120         }
121     }
122 
histEven8u(const uchar * src,const size_t step,const int rows,const int cols,int * hist,const int binCount,const int binSize,const int lowerLevel,const int upperLevel)123     __global__ void histEven8u(const uchar* src, const size_t step, const int rows, const int cols,
124                                int* hist, const int binCount, const int binSize, const int lowerLevel, const int upperLevel)
125     {
126         extern __shared__ int shist[];
127 
128         const int y = blockIdx.x * blockDim.y + threadIdx.y;
129         const int tid = threadIdx.y * blockDim.x + threadIdx.x;
130 
131         if (tid < binCount)
132             shist[tid] = 0;
133 
134         __syncthreads();
135 
136         if (y < rows)
137         {
138             const uchar* rowPtr = src + y * step;
139             const uint* rowPtr4 = (uint*) rowPtr;
140 
141             const int cols_4 = cols / 4;
142             for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
143             {
144                 const uint data = rowPtr4[x];
145 
146                 histEvenInc(shist, (data >>  0) & 0xFFU, binSize, lowerLevel, upperLevel);
147                 histEvenInc(shist, (data >>  8) & 0xFFU, binSize, lowerLevel, upperLevel);
148                 histEvenInc(shist, (data >> 16) & 0xFFU, binSize, lowerLevel, upperLevel);
149                 histEvenInc(shist, (data >> 24) & 0xFFU, binSize, lowerLevel, upperLevel);
150             }
151 
152             if (cols % 4 != 0 && threadIdx.x == 0)
153             {
154                 for (int x = cols_4 * 4; x < cols; ++x)
155                 {
156                     const uchar data = rowPtr[x];
157                     histEvenInc(shist, data, binSize, lowerLevel, upperLevel);
158                 }
159             }
160         }
161 
162         __syncthreads();
163 
164         if (tid < binCount)
165         {
166             const int histVal = shist[tid];
167 
168             if (histVal > 0)
169                 ::atomicAdd(hist + tid, histVal);
170         }
171     }
172 
histEven8u(PtrStepSzb src,int * hist,int binCount,int lowerLevel,int upperLevel,cudaStream_t stream)173     void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, cudaStream_t stream)
174     {
175         const dim3 block(32, 8);
176         const dim3 grid(divUp(src.rows, block.y));
177 
178         const int binSize = divUp(upperLevel - lowerLevel, binCount);
179 
180         const size_t smem_size = binCount * sizeof(int);
181 
182         histEven8u<<<grid, block, smem_size, stream>>>(src.data, src.step, src.rows, src.cols, hist, binCount, binSize, lowerLevel, upperLevel);
183         cudaSafeCall( cudaGetLastError() );
184 
185         if (stream == 0)
186             cudaSafeCall( cudaDeviceSynchronize() );
187     }
188 }
189 
190 /////////////////////////////////////////////////////////////////////////
191 
192 namespace hist
193 {
194     __constant__ int c_lut[256];
195 
196     struct EqualizeHist : unary_function<uchar, uchar>
197     {
198         float scale;
199 
EqualizeHisthist::EqualizeHist200         __host__ EqualizeHist(float _scale) : scale(_scale) {}
201 
operator ()hist::EqualizeHist202         __device__ __forceinline__ uchar operator ()(uchar val) const
203         {
204             const int lut = c_lut[val];
205             return __float2int_rn(scale * lut);
206         }
207     };
208 }
209 
210 namespace cv { namespace cuda { namespace device
211 {
212     template <> struct TransformFunctorTraits<hist::EqualizeHist> : DefaultTransformFunctorTraits<hist::EqualizeHist>
213     {
214         enum { smart_shift = 4 };
215     };
216 }}}
217 
218 namespace hist
219 {
equalizeHist(PtrStepSzb src,PtrStepSzb dst,const int * lut,cudaStream_t stream)220     void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
221     {
222         if (stream == 0)
223             cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
224         else
225             cudaSafeCall( cudaMemcpyToSymbolAsync(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice, stream) );
226 
227         const float scale = 255.0f / (src.cols * src.rows);
228 
229         device::transform(src, dst, EqualizeHist(scale), WithOutMask(), stream);
230     }
231 }
232 
233 #endif /* CUDA_DISABLER */
234