• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
14 // Copyright (C) 2014, Itseez Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the copyright holders or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 #include "opencl_kernels_imgproc.hpp"
45 
46 // ----------------------------------------------------------------------
47 // CLAHE
48 
49 #ifdef HAVE_OPENCL
50 
51 namespace clahe
52 {
calcLut(cv::InputArray _src,cv::OutputArray _dst,const int tilesX,const int tilesY,const cv::Size tileSize,const int clipLimit,const float lutScale)53     static bool calcLut(cv::InputArray _src, cv::OutputArray _dst,
54         const int tilesX, const int tilesY, const cv::Size tileSize,
55         const int clipLimit, const float lutScale)
56     {
57         cv::ocl::Kernel _k("calcLut", cv::ocl::imgproc::clahe_oclsrc);
58 
59         bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
60         cv::String opts;
61         if(is_cpu)
62             opts = "-D CPU ";
63         else
64             opts = cv::format("-D WAVE_SIZE=%d", _k.preferedWorkGroupSizeMultiple());
65 
66         cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc, opts);
67         if(k.empty())
68             return false;
69 
70         cv::UMat src = _src.getUMat();
71         _dst.create(tilesX * tilesY, 256, CV_8UC1);
72         cv::UMat dst = _dst.getUMat();
73 
74         int tile_size[2];
75         tile_size[0] = tileSize.width;
76         tile_size[1] = tileSize.height;
77 
78         size_t localThreads[3]  = { 32, 8, 1 };
79         size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
80 
81         int idx = 0;
82         idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
83         idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
84         idx = k.set(idx, tile_size);
85         idx = k.set(idx, tilesX);
86         idx = k.set(idx, clipLimit);
87         k.set(idx, lutScale);
88 
89         return k.run(2, globalThreads, localThreads, false);
90     }
91 
transform(cv::InputArray _src,cv::OutputArray _dst,cv::InputArray _lut,const int tilesX,const int tilesY,const cv::Size & tileSize)92     static bool transform(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _lut,
93         const int tilesX, const int tilesY, const cv::Size & tileSize)
94     {
95 
96         cv::ocl::Kernel k("transform", cv::ocl::imgproc::clahe_oclsrc);
97         if(k.empty())
98             return false;
99 
100         int tile_size[2];
101         tile_size[0] = tileSize.width;
102         tile_size[1] = tileSize.height;
103 
104         cv::UMat src = _src.getUMat();
105         _dst.create(src.size(), src.type());
106         cv::UMat dst = _dst.getUMat();
107         cv::UMat lut = _lut.getUMat();
108 
109         size_t localThreads[3]  = { 32, 8, 1 };
110         size_t globalThreads[3] = { src.cols, src.rows, 1 };
111 
112         int idx = 0;
113         idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
114         idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
115         idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(lut));
116         idx = k.set(idx, src.cols);
117         idx = k.set(idx, src.rows);
118         idx = k.set(idx, tile_size);
119         idx = k.set(idx, tilesX);
120         k.set(idx, tilesY);
121 
122         return k.run(2, globalThreads, localThreads, false);
123     }
124 }
125 
126 #endif
127 
128 namespace
129 {
130     template <class T, int histSize, int shift>
131     class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
132     {
133     public:
CLAHE_CalcLut_Body(const cv::Mat & src,const cv::Mat & lut,const cv::Size & tileSize,const int & tilesX,const int & clipLimit,const float & lutScale)134         CLAHE_CalcLut_Body(const cv::Mat& src, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& clipLimit, const float& lutScale) :
135             src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), clipLimit_(clipLimit), lutScale_(lutScale)
136         {
137         }
138 
139         void operator ()(const cv::Range& range) const;
140 
141     private:
142         cv::Mat src_;
143         mutable cv::Mat lut_;
144 
145         cv::Size tileSize_;
146         int tilesX_;
147         int clipLimit_;
148         float lutScale_;
149     };
150 
151     template <class T, int histSize, int shift>
operator ()(const cv::Range & range) const152     void CLAHE_CalcLut_Body<T,histSize,shift>::operator ()(const cv::Range& range) const
153     {
154         T* tileLut = lut_.ptr<T>(range.start);
155         const size_t lut_step = lut_.step / sizeof(T);
156 
157         for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
158         {
159             const int ty = k / tilesX_;
160             const int tx = k % tilesX_;
161 
162             // retrieve tile submatrix
163 
164             cv::Rect tileROI;
165             tileROI.x = tx * tileSize_.width;
166             tileROI.y = ty * tileSize_.height;
167             tileROI.width = tileSize_.width;
168             tileROI.height = tileSize_.height;
169 
170             const cv::Mat tile = src_(tileROI);
171 
172             // calc histogram
173 
174             int tileHist[histSize] = {0, };
175 
176             int height = tileROI.height;
177             const size_t sstep = src_.step / sizeof(T);
178             for (const T* ptr = tile.ptr<T>(0); height--; ptr += sstep)
179             {
180                 int x = 0;
181                 for (; x <= tileROI.width - 4; x += 4)
182                 {
183                     int t0 = ptr[x], t1 = ptr[x+1];
184                     tileHist[t0 >> shift]++; tileHist[t1 >> shift]++;
185                     t0 = ptr[x+2]; t1 = ptr[x+3];
186                     tileHist[t0 >> shift]++; tileHist[t1 >> shift]++;
187                 }
188 
189                 for (; x < tileROI.width; ++x)
190                     tileHist[ptr[x] >> shift]++;
191             }
192 
193             // clip histogram
194 
195             if (clipLimit_ > 0)
196             {
197                 // how many pixels were clipped
198                 int clipped = 0;
199                 for (int i = 0; i < histSize; ++i)
200                 {
201                     if (tileHist[i] > clipLimit_)
202                     {
203                         clipped += tileHist[i] - clipLimit_;
204                         tileHist[i] = clipLimit_;
205                     }
206                 }
207 
208                 // redistribute clipped pixels
209                 int redistBatch = clipped / histSize;
210                 int residual = clipped - redistBatch * histSize;
211 
212                 for (int i = 0; i < histSize; ++i)
213                     tileHist[i] += redistBatch;
214 
215                 for (int i = 0; i < residual; ++i)
216                     tileHist[i]++;
217             }
218 
219             // calc Lut
220 
221             int sum = 0;
222             for (int i = 0; i < histSize; ++i)
223             {
224                 sum += tileHist[i];
225                 tileLut[i] = cv::saturate_cast<T>(sum * lutScale_);
226             }
227         }
228     }
229 
230     template <class T>
231     class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
232     {
233     public:
CLAHE_Interpolation_Body(const cv::Mat & src,const cv::Mat & dst,const cv::Mat & lut,const cv::Size & tileSize,const int & tilesX,const int & tilesY)234         CLAHE_Interpolation_Body(const cv::Mat& src, const cv::Mat& dst, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& tilesY) :
235             src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
236         {
237             buf.allocate(src.cols << 2);
238             ind1_p = (int *)buf;
239             ind2_p = ind1_p + src.cols;
240             xa_p = (float *)(ind2_p + src.cols);
241             xa1_p = xa_p + src.cols;
242 
243             int lut_step = static_cast<int>(lut_.step / sizeof(T));
244             float inv_tw = 1.0f / tileSize_.width;
245 
246             for (int x = 0; x < src.cols; ++x)
247             {
248                 float txf = x * inv_tw - 0.5f;
249 
250                 int tx1 = cvFloor(txf);
251                 int tx2 = tx1 + 1;
252 
253                 xa_p[x] = txf - tx1;
254                 xa1_p[x] = 1.0f - xa_p[x];
255 
256                 tx1 = std::max(tx1, 0);
257                 tx2 = std::min(tx2, tilesX_ - 1);
258 
259                 ind1_p[x] = tx1 * lut_step;
260                 ind2_p[x] = tx2 * lut_step;
261             }
262         }
263 
264         void operator ()(const cv::Range& range) const;
265 
266     private:
267         cv::Mat src_;
268         mutable cv::Mat dst_;
269         cv::Mat lut_;
270 
271         cv::Size tileSize_;
272         int tilesX_;
273         int tilesY_;
274 
275         cv::AutoBuffer<int> buf;
276         int * ind1_p, * ind2_p;
277         float * xa_p, * xa1_p;
278     };
279 
280     template <class T>
operator ()(const cv::Range & range) const281     void CLAHE_Interpolation_Body<T>::operator ()(const cv::Range& range) const
282     {
283         float inv_th = 1.0f / tileSize_.height;
284 
285         for (int y = range.start; y < range.end; ++y)
286         {
287             const T* srcRow = src_.ptr<T>(y);
288             T* dstRow = dst_.ptr<T>(y);
289 
290             float tyf = y * inv_th - 0.5f;
291 
292             int ty1 = cvFloor(tyf);
293             int ty2 = ty1 + 1;
294 
295             float ya = tyf - ty1, ya1 = 1.0f - ya;
296 
297             ty1 = std::max(ty1, 0);
298             ty2 = std::min(ty2, tilesY_ - 1);
299 
300             const T* lutPlane1 = lut_.ptr<T>(ty1 * tilesX_);
301             const T* lutPlane2 = lut_.ptr<T>(ty2 * tilesX_);
302 
303             for (int x = 0; x < src_.cols; ++x)
304             {
305                 int srcVal = srcRow[x];
306 
307                 int ind1 = ind1_p[x] + srcVal;
308                 int ind2 = ind2_p[x] + srcVal;
309 
310                 float res = (lutPlane1[ind1] * xa1_p[x] + lutPlane1[ind2] * xa_p[x]) * ya1 +
311                             (lutPlane2[ind1] * xa1_p[x] + lutPlane2[ind2] * xa_p[x]) * ya;
312 
313                 dstRow[x] = cv::saturate_cast<T>(res);
314             }
315         }
316     }
317 
318     class CLAHE_Impl : public cv::CLAHE
319     {
320     public:
321         CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
322 
323         void apply(cv::InputArray src, cv::OutputArray dst);
324 
325         void setClipLimit(double clipLimit);
326         double getClipLimit() const;
327 
328         void setTilesGridSize(cv::Size tileGridSize);
329         cv::Size getTilesGridSize() const;
330 
331         void collectGarbage();
332 
333     private:
334         double clipLimit_;
335         int tilesX_;
336         int tilesY_;
337 
338         cv::Mat srcExt_;
339         cv::Mat lut_;
340 
341 #ifdef HAVE_OPENCL
342         cv::UMat usrcExt_;
343         cv::UMat ulut_;
344 #endif
345     };
346 
CLAHE_Impl(double clipLimit,int tilesX,int tilesY)347     CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
348         clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
349     {
350     }
351 
apply(cv::InputArray _src,cv::OutputArray _dst)352     void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
353     {
354         CV_Assert( _src.type() == CV_8UC1 || _src.type() == CV_16UC1 );
355 
356 #ifdef HAVE_OPENCL
357         bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.dims()<=2 && _src.type() == CV_8UC1;
358 #endif
359 
360         int histSize = _src.type() == CV_8UC1 ? 256 : 4096;
361 
362         cv::Size tileSize;
363         cv::_InputArray _srcForLut;
364 
365         if (_src.size().width % tilesX_ == 0 && _src.size().height % tilesY_ == 0)
366         {
367             tileSize = cv::Size(_src.size().width / tilesX_, _src.size().height / tilesY_);
368             _srcForLut = _src;
369         }
370         else
371         {
372 #ifdef HAVE_OPENCL
373             if(useOpenCL)
374             {
375                 cv::copyMakeBorder(_src, usrcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
376                 tileSize = cv::Size(usrcExt_.size().width / tilesX_, usrcExt_.size().height / tilesY_);
377                 _srcForLut = usrcExt_;
378             }
379             else
380 #endif
381             {
382                 cv::copyMakeBorder(_src, srcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
383                 tileSize = cv::Size(srcExt_.size().width / tilesX_, srcExt_.size().height / tilesY_);
384                 _srcForLut = srcExt_;
385             }
386         }
387 
388         const int tileSizeTotal = tileSize.area();
389         const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
390 
391         int clipLimit = 0;
392         if (clipLimit_ > 0.0)
393         {
394             clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
395             clipLimit = std::max(clipLimit, 1);
396         }
397 
398 #ifdef HAVE_OPENCL
399         if (useOpenCL && clahe::calcLut(_srcForLut, ulut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale) )
400             if( clahe::transform(_src, _dst, ulut_, tilesX_, tilesY_, tileSize) )
401             {
402                 CV_IMPL_ADD(CV_IMPL_OCL);
403                 return;
404             }
405 #endif
406 
407         cv::Mat src = _src.getMat();
408         _dst.create( src.size(), src.type() );
409         cv::Mat dst = _dst.getMat();
410         cv::Mat srcForLut = _srcForLut.getMat();
411         lut_.create(tilesX_ * tilesY_, histSize, _src.type());
412 
413         cv::Ptr<cv::ParallelLoopBody> calcLutBody;
414         if (_src.type() == CV_8UC1)
415             calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<uchar, 256, 0> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale);
416         else if (_src.type() == CV_16UC1)
417             calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<ushort, 4096, 4> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale);
418         else
419             CV_Error( CV_StsBadArg, "Unsupported type" );
420 
421         cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), *calcLutBody);
422 
423         cv::Ptr<cv::ParallelLoopBody> interpolationBody;
424         if (_src.type() == CV_8UC1)
425             interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<uchar> >(src, dst, lut_, tileSize, tilesX_, tilesY_);
426         else if (_src.type() == CV_16UC1)
427             interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<ushort> >(src, dst, lut_, tileSize, tilesX_, tilesY_);
428 
429         cv::parallel_for_(cv::Range(0, src.rows), *interpolationBody);
430     }
431 
setClipLimit(double clipLimit)432     void CLAHE_Impl::setClipLimit(double clipLimit)
433     {
434         clipLimit_ = clipLimit;
435     }
436 
getClipLimit() const437     double CLAHE_Impl::getClipLimit() const
438     {
439         return clipLimit_;
440     }
441 
setTilesGridSize(cv::Size tileGridSize)442     void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
443     {
444         tilesX_ = tileGridSize.width;
445         tilesY_ = tileGridSize.height;
446     }
447 
getTilesGridSize() const448     cv::Size CLAHE_Impl::getTilesGridSize() const
449     {
450         return cv::Size(tilesX_, tilesY_);
451     }
452 
collectGarbage()453     void CLAHE_Impl::collectGarbage()
454     {
455         srcExt_.release();
456         lut_.release();
457 #ifdef HAVE_OPENCL
458         usrcExt_.release();
459         ulut_.release();
460 #endif
461     }
462 }
463 
createCLAHE(double clipLimit,cv::Size tileGridSize)464 cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
465 {
466     return makePtr<CLAHE_Impl>(clipLimit, tileGridSize.width, tileGridSize.height);
467 }
468