1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
14 // Copyright (C) 2014, Itseez Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the copyright holders or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44 #include "opencl_kernels_imgproc.hpp"
45
46 // ----------------------------------------------------------------------
47 // CLAHE
48
49 #ifdef HAVE_OPENCL
50
51 namespace clahe
52 {
calcLut(cv::InputArray _src,cv::OutputArray _dst,const int tilesX,const int tilesY,const cv::Size tileSize,const int clipLimit,const float lutScale)53 static bool calcLut(cv::InputArray _src, cv::OutputArray _dst,
54 const int tilesX, const int tilesY, const cv::Size tileSize,
55 const int clipLimit, const float lutScale)
56 {
57 cv::ocl::Kernel _k("calcLut", cv::ocl::imgproc::clahe_oclsrc);
58
59 bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
60 cv::String opts;
61 if(is_cpu)
62 opts = "-D CPU ";
63 else
64 opts = cv::format("-D WAVE_SIZE=%d", _k.preferedWorkGroupSizeMultiple());
65
66 cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc, opts);
67 if(k.empty())
68 return false;
69
70 cv::UMat src = _src.getUMat();
71 _dst.create(tilesX * tilesY, 256, CV_8UC1);
72 cv::UMat dst = _dst.getUMat();
73
74 int tile_size[2];
75 tile_size[0] = tileSize.width;
76 tile_size[1] = tileSize.height;
77
78 size_t localThreads[3] = { 32, 8, 1 };
79 size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
80
81 int idx = 0;
82 idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
83 idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
84 idx = k.set(idx, tile_size);
85 idx = k.set(idx, tilesX);
86 idx = k.set(idx, clipLimit);
87 k.set(idx, lutScale);
88
89 return k.run(2, globalThreads, localThreads, false);
90 }
91
transform(cv::InputArray _src,cv::OutputArray _dst,cv::InputArray _lut,const int tilesX,const int tilesY,const cv::Size & tileSize)92 static bool transform(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _lut,
93 const int tilesX, const int tilesY, const cv::Size & tileSize)
94 {
95
96 cv::ocl::Kernel k("transform", cv::ocl::imgproc::clahe_oclsrc);
97 if(k.empty())
98 return false;
99
100 int tile_size[2];
101 tile_size[0] = tileSize.width;
102 tile_size[1] = tileSize.height;
103
104 cv::UMat src = _src.getUMat();
105 _dst.create(src.size(), src.type());
106 cv::UMat dst = _dst.getUMat();
107 cv::UMat lut = _lut.getUMat();
108
109 size_t localThreads[3] = { 32, 8, 1 };
110 size_t globalThreads[3] = { src.cols, src.rows, 1 };
111
112 int idx = 0;
113 idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
114 idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
115 idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(lut));
116 idx = k.set(idx, src.cols);
117 idx = k.set(idx, src.rows);
118 idx = k.set(idx, tile_size);
119 idx = k.set(idx, tilesX);
120 k.set(idx, tilesY);
121
122 return k.run(2, globalThreads, localThreads, false);
123 }
124 }
125
126 #endif
127
128 namespace
129 {
130 template <class T, int histSize, int shift>
131 class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
132 {
133 public:
CLAHE_CalcLut_Body(const cv::Mat & src,const cv::Mat & lut,const cv::Size & tileSize,const int & tilesX,const int & clipLimit,const float & lutScale)134 CLAHE_CalcLut_Body(const cv::Mat& src, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& clipLimit, const float& lutScale) :
135 src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), clipLimit_(clipLimit), lutScale_(lutScale)
136 {
137 }
138
139 void operator ()(const cv::Range& range) const;
140
141 private:
142 cv::Mat src_;
143 mutable cv::Mat lut_;
144
145 cv::Size tileSize_;
146 int tilesX_;
147 int clipLimit_;
148 float lutScale_;
149 };
150
151 template <class T, int histSize, int shift>
operator ()(const cv::Range & range) const152 void CLAHE_CalcLut_Body<T,histSize,shift>::operator ()(const cv::Range& range) const
153 {
154 T* tileLut = lut_.ptr<T>(range.start);
155 const size_t lut_step = lut_.step / sizeof(T);
156
157 for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
158 {
159 const int ty = k / tilesX_;
160 const int tx = k % tilesX_;
161
162 // retrieve tile submatrix
163
164 cv::Rect tileROI;
165 tileROI.x = tx * tileSize_.width;
166 tileROI.y = ty * tileSize_.height;
167 tileROI.width = tileSize_.width;
168 tileROI.height = tileSize_.height;
169
170 const cv::Mat tile = src_(tileROI);
171
172 // calc histogram
173
174 int tileHist[histSize] = {0, };
175
176 int height = tileROI.height;
177 const size_t sstep = src_.step / sizeof(T);
178 for (const T* ptr = tile.ptr<T>(0); height--; ptr += sstep)
179 {
180 int x = 0;
181 for (; x <= tileROI.width - 4; x += 4)
182 {
183 int t0 = ptr[x], t1 = ptr[x+1];
184 tileHist[t0 >> shift]++; tileHist[t1 >> shift]++;
185 t0 = ptr[x+2]; t1 = ptr[x+3];
186 tileHist[t0 >> shift]++; tileHist[t1 >> shift]++;
187 }
188
189 for (; x < tileROI.width; ++x)
190 tileHist[ptr[x] >> shift]++;
191 }
192
193 // clip histogram
194
195 if (clipLimit_ > 0)
196 {
197 // how many pixels were clipped
198 int clipped = 0;
199 for (int i = 0; i < histSize; ++i)
200 {
201 if (tileHist[i] > clipLimit_)
202 {
203 clipped += tileHist[i] - clipLimit_;
204 tileHist[i] = clipLimit_;
205 }
206 }
207
208 // redistribute clipped pixels
209 int redistBatch = clipped / histSize;
210 int residual = clipped - redistBatch * histSize;
211
212 for (int i = 0; i < histSize; ++i)
213 tileHist[i] += redistBatch;
214
215 for (int i = 0; i < residual; ++i)
216 tileHist[i]++;
217 }
218
219 // calc Lut
220
221 int sum = 0;
222 for (int i = 0; i < histSize; ++i)
223 {
224 sum += tileHist[i];
225 tileLut[i] = cv::saturate_cast<T>(sum * lutScale_);
226 }
227 }
228 }
229
230 template <class T>
231 class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
232 {
233 public:
CLAHE_Interpolation_Body(const cv::Mat & src,const cv::Mat & dst,const cv::Mat & lut,const cv::Size & tileSize,const int & tilesX,const int & tilesY)234 CLAHE_Interpolation_Body(const cv::Mat& src, const cv::Mat& dst, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& tilesY) :
235 src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
236 {
237 buf.allocate(src.cols << 2);
238 ind1_p = (int *)buf;
239 ind2_p = ind1_p + src.cols;
240 xa_p = (float *)(ind2_p + src.cols);
241 xa1_p = xa_p + src.cols;
242
243 int lut_step = static_cast<int>(lut_.step / sizeof(T));
244 float inv_tw = 1.0f / tileSize_.width;
245
246 for (int x = 0; x < src.cols; ++x)
247 {
248 float txf = x * inv_tw - 0.5f;
249
250 int tx1 = cvFloor(txf);
251 int tx2 = tx1 + 1;
252
253 xa_p[x] = txf - tx1;
254 xa1_p[x] = 1.0f - xa_p[x];
255
256 tx1 = std::max(tx1, 0);
257 tx2 = std::min(tx2, tilesX_ - 1);
258
259 ind1_p[x] = tx1 * lut_step;
260 ind2_p[x] = tx2 * lut_step;
261 }
262 }
263
264 void operator ()(const cv::Range& range) const;
265
266 private:
267 cv::Mat src_;
268 mutable cv::Mat dst_;
269 cv::Mat lut_;
270
271 cv::Size tileSize_;
272 int tilesX_;
273 int tilesY_;
274
275 cv::AutoBuffer<int> buf;
276 int * ind1_p, * ind2_p;
277 float * xa_p, * xa1_p;
278 };
279
280 template <class T>
operator ()(const cv::Range & range) const281 void CLAHE_Interpolation_Body<T>::operator ()(const cv::Range& range) const
282 {
283 float inv_th = 1.0f / tileSize_.height;
284
285 for (int y = range.start; y < range.end; ++y)
286 {
287 const T* srcRow = src_.ptr<T>(y);
288 T* dstRow = dst_.ptr<T>(y);
289
290 float tyf = y * inv_th - 0.5f;
291
292 int ty1 = cvFloor(tyf);
293 int ty2 = ty1 + 1;
294
295 float ya = tyf - ty1, ya1 = 1.0f - ya;
296
297 ty1 = std::max(ty1, 0);
298 ty2 = std::min(ty2, tilesY_ - 1);
299
300 const T* lutPlane1 = lut_.ptr<T>(ty1 * tilesX_);
301 const T* lutPlane2 = lut_.ptr<T>(ty2 * tilesX_);
302
303 for (int x = 0; x < src_.cols; ++x)
304 {
305 int srcVal = srcRow[x];
306
307 int ind1 = ind1_p[x] + srcVal;
308 int ind2 = ind2_p[x] + srcVal;
309
310 float res = (lutPlane1[ind1] * xa1_p[x] + lutPlane1[ind2] * xa_p[x]) * ya1 +
311 (lutPlane2[ind1] * xa1_p[x] + lutPlane2[ind2] * xa_p[x]) * ya;
312
313 dstRow[x] = cv::saturate_cast<T>(res);
314 }
315 }
316 }
317
318 class CLAHE_Impl : public cv::CLAHE
319 {
320 public:
321 CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
322
323 void apply(cv::InputArray src, cv::OutputArray dst);
324
325 void setClipLimit(double clipLimit);
326 double getClipLimit() const;
327
328 void setTilesGridSize(cv::Size tileGridSize);
329 cv::Size getTilesGridSize() const;
330
331 void collectGarbage();
332
333 private:
334 double clipLimit_;
335 int tilesX_;
336 int tilesY_;
337
338 cv::Mat srcExt_;
339 cv::Mat lut_;
340
341 #ifdef HAVE_OPENCL
342 cv::UMat usrcExt_;
343 cv::UMat ulut_;
344 #endif
345 };
346
CLAHE_Impl(double clipLimit,int tilesX,int tilesY)347 CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
348 clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
349 {
350 }
351
apply(cv::InputArray _src,cv::OutputArray _dst)352 void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
353 {
354 CV_Assert( _src.type() == CV_8UC1 || _src.type() == CV_16UC1 );
355
356 #ifdef HAVE_OPENCL
357 bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.dims()<=2 && _src.type() == CV_8UC1;
358 #endif
359
360 int histSize = _src.type() == CV_8UC1 ? 256 : 4096;
361
362 cv::Size tileSize;
363 cv::_InputArray _srcForLut;
364
365 if (_src.size().width % tilesX_ == 0 && _src.size().height % tilesY_ == 0)
366 {
367 tileSize = cv::Size(_src.size().width / tilesX_, _src.size().height / tilesY_);
368 _srcForLut = _src;
369 }
370 else
371 {
372 #ifdef HAVE_OPENCL
373 if(useOpenCL)
374 {
375 cv::copyMakeBorder(_src, usrcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
376 tileSize = cv::Size(usrcExt_.size().width / tilesX_, usrcExt_.size().height / tilesY_);
377 _srcForLut = usrcExt_;
378 }
379 else
380 #endif
381 {
382 cv::copyMakeBorder(_src, srcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
383 tileSize = cv::Size(srcExt_.size().width / tilesX_, srcExt_.size().height / tilesY_);
384 _srcForLut = srcExt_;
385 }
386 }
387
388 const int tileSizeTotal = tileSize.area();
389 const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
390
391 int clipLimit = 0;
392 if (clipLimit_ > 0.0)
393 {
394 clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
395 clipLimit = std::max(clipLimit, 1);
396 }
397
398 #ifdef HAVE_OPENCL
399 if (useOpenCL && clahe::calcLut(_srcForLut, ulut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale) )
400 if( clahe::transform(_src, _dst, ulut_, tilesX_, tilesY_, tileSize) )
401 {
402 CV_IMPL_ADD(CV_IMPL_OCL);
403 return;
404 }
405 #endif
406
407 cv::Mat src = _src.getMat();
408 _dst.create( src.size(), src.type() );
409 cv::Mat dst = _dst.getMat();
410 cv::Mat srcForLut = _srcForLut.getMat();
411 lut_.create(tilesX_ * tilesY_, histSize, _src.type());
412
413 cv::Ptr<cv::ParallelLoopBody> calcLutBody;
414 if (_src.type() == CV_8UC1)
415 calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<uchar, 256, 0> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale);
416 else if (_src.type() == CV_16UC1)
417 calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<ushort, 4096, 4> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale);
418 else
419 CV_Error( CV_StsBadArg, "Unsupported type" );
420
421 cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), *calcLutBody);
422
423 cv::Ptr<cv::ParallelLoopBody> interpolationBody;
424 if (_src.type() == CV_8UC1)
425 interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<uchar> >(src, dst, lut_, tileSize, tilesX_, tilesY_);
426 else if (_src.type() == CV_16UC1)
427 interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<ushort> >(src, dst, lut_, tileSize, tilesX_, tilesY_);
428
429 cv::parallel_for_(cv::Range(0, src.rows), *interpolationBody);
430 }
431
setClipLimit(double clipLimit)432 void CLAHE_Impl::setClipLimit(double clipLimit)
433 {
434 clipLimit_ = clipLimit;
435 }
436
getClipLimit() const437 double CLAHE_Impl::getClipLimit() const
438 {
439 return clipLimit_;
440 }
441
setTilesGridSize(cv::Size tileGridSize)442 void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
443 {
444 tilesX_ = tileGridSize.width;
445 tilesY_ = tileGridSize.height;
446 }
447
getTilesGridSize() const448 cv::Size CLAHE_Impl::getTilesGridSize() const
449 {
450 return cv::Size(tilesX_, tilesY_);
451 }
452
collectGarbage()453 void CLAHE_Impl::collectGarbage()
454 {
455 srcExt_.release();
456 lut_.release();
457 #ifdef HAVE_OPENCL
458 usrcExt_.release();
459 ulut_.release();
460 #endif
461 }
462 }
463
createCLAHE(double clipLimit,cv::Size tileGridSize)464 cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
465 {
466 return makePtr<CLAHE_Impl>(clipLimit, tileGridSize.width, tileGridSize.height);
467 }
468