• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * cl_newwavelet_denoise_handler.cpp - CL wavelet denoise handler
3  *
4  *  Copyright (c) 2015 Intel Corporation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * Author: Wei Zong <wei.zong@intel.com>
19  */
20 
21 #include "cl_utils.h"
22 #include "cl_context.h"
23 #include "cl_device.h"
24 #include "cl_newwavelet_denoise_handler.h"
25 
26 #define WAVELET_DECOMPOSITION_LEVELS 4
27 
28 namespace XCam {
29 
30 enum {
31     KernelWaveletDecompose = 0,
32     KernelWaveletReconstruct,
33     KernelWaveletNoiseEstimate,
34     KernelWaveletThreshold,
35 };
36 
37 static const XCamKernelInfo kernel_new_wavelet_info[] = {
38     {
39         "kernel_wavelet_haar_decomposition",
40 #include "kernel_wavelet_haar.clx"
41         , 0,
42     },
43     {
44         "kernel_wavelet_haar_reconstruction",
45 #include "kernel_wavelet_haar.clx"
46         , 0,
47     },
48     {
49         "kernel_wavelet_coeff_variance",
50 #include "kernel_wavelet_coeff.clx"
51         , 0,
52     },
53     {
54         "kernel_wavelet_coeff_thresholding",
55 #include "kernel_wavelet_coeff.clx"
56         , 0,
57     },
58 };
59 
60 
CLWaveletNoiseEstimateKernel(const SmartPtr<CLContext> & context,const char * name,SmartPtr<CLNewWaveletDenoiseImageHandler> & handler,uint32_t channel,uint32_t subband,uint32_t layer)61 CLWaveletNoiseEstimateKernel::CLWaveletNoiseEstimateKernel (
62     const SmartPtr<CLContext> &context,
63     const char *name,
64     SmartPtr<CLNewWaveletDenoiseImageHandler> &handler,
65     uint32_t channel,
66     uint32_t subband,
67     uint32_t layer)
68     : CLImageKernel (context, name)
69     , _decomposition_levels (WAVELET_DECOMPOSITION_LEVELS)
70     , _channel (channel)
71     , _subband (subband)
72     , _current_layer (layer)
73     , _analog_gain (-1.0)
74     , _handler (handler)
75 {
76 }
77 
78 SmartPtr<CLImage>
get_input_buffer()79 CLWaveletNoiseEstimateKernel::get_input_buffer ()
80 {
81     SmartPtr<VideoBuffer> input = _handler->get_input_buf ();
82     const VideoBufferInfo & video_info = input->get_video_info ();
83 
84     SmartPtr<CLImage> image;
85     SmartPtr<CLWaveletDecompBuffer> buffer = _handler->get_decomp_buffer (_channel, _current_layer);
86     XCAM_ASSERT (buffer.ptr ());
87 
88     if (_subband == CL_WAVELET_SUBBAND_HL) {
89         image = buffer->hl[0];
90     } else if (_subband == CL_WAVELET_SUBBAND_LH) {
91         image = buffer->lh[0];
92     } else if (_subband == CL_WAVELET_SUBBAND_HH) {
93         image = buffer->hh[0];
94     } else {
95         image = buffer->ll;
96     }
97 
98     float current_ag = _handler->get_denoise_config ().analog_gain;
99     if ((_analog_gain == -1.0f) ||
100             (fabs(_analog_gain - current_ag) > 0.2)) {
101 
102         if ((_current_layer == 1) && (_subband == CL_WAVELET_SUBBAND_HH)) {
103             _analog_gain = current_ag;
104             estimate_noise_variance (video_info, buffer->hh[0], buffer->noise_variance);
105             _handler->set_estimated_noise_variation (buffer->noise_variance);
106         } else {
107             _handler->get_estimated_noise_variation (buffer->noise_variance);
108         }
109     } else {
110         _handler->get_estimated_noise_variation (buffer->noise_variance);
111     }
112     return image;
113 }
114 
115 SmartPtr<CLImage>
get_output_buffer()116 CLWaveletNoiseEstimateKernel::get_output_buffer ()
117 {
118     SmartPtr<CLImage> image;
119     SmartPtr<CLWaveletDecompBuffer> buffer = _handler->get_decomp_buffer (_channel, _current_layer);
120     XCAM_ASSERT (buffer.ptr ());
121 
122     if (_subband == CL_WAVELET_SUBBAND_HL) {
123         image = buffer->hl[1];
124     } else if (_subband == CL_WAVELET_SUBBAND_LH) {
125         image = buffer->lh[1];
126     } else if (_subband == CL_WAVELET_SUBBAND_HH) {
127         image = buffer->hh[1];
128     } else {
129         image = buffer->ll;
130     }
131     return image;
132 }
133 
134 XCamReturn
prepare_arguments(CLArgList & args,CLWorkSize & work_size)135 CLWaveletNoiseEstimateKernel::prepare_arguments (
136     CLArgList &args, CLWorkSize &work_size)
137 {
138     SmartPtr<CLContext> context = get_context ();
139 
140     SmartPtr<CLImage> image_in = get_input_buffer ();
141     SmartPtr<CLImage> image_out = get_output_buffer ();
142 
143     CLImageDesc cl_desc = image_in->get_image_desc ();
144     uint32_t cl_width = XCAM_ALIGN_UP (cl_desc.width, 2);
145     uint32_t cl_height = XCAM_ALIGN_UP (cl_desc.height, 2);
146 
147     XCAM_FAIL_RETURN (
148         WARNING,
149         image_in->is_valid () && image_out->is_valid (),
150         XCAM_RETURN_ERROR_MEM,
151         "cl image kernel(%s) in/out memory not available", get_kernel_name ());
152 
153     //set args;
154     args.push_back (new CLMemArgument (image_in));
155     args.push_back (new CLMemArgument (image_out));
156     args.push_back (new CLArgumentT<uint32_t> (_current_layer));
157 
158     work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
159     work_size.local[0] = 8;
160     work_size.local[1] = 8;
161     work_size.global[0] = XCAM_ALIGN_UP (cl_width, work_size.local[0]);
162     work_size.global[1] = XCAM_ALIGN_UP (cl_height, work_size.local[1]);
163 
164     return XCAM_RETURN_NO_ERROR;
165 }
166 
167 XCamReturn
estimate_noise_variance(const VideoBufferInfo & video_info,SmartPtr<CLImage> image,float * noise_var)168 CLWaveletNoiseEstimateKernel::estimate_noise_variance (const VideoBufferInfo & video_info, SmartPtr<CLImage> image, float* noise_var)
169 {
170     XCamReturn ret = XCAM_RETURN_NO_ERROR;
171 
172     SmartPtr<CLEvent> map_event = new CLEvent;
173     void *buf_ptr = NULL;
174 
175     CLImageDesc cl_desc = image->get_image_desc ();
176     uint32_t cl_width = XCAM_ALIGN_UP (cl_desc.width, 2);
177     uint32_t cl_height = XCAM_ALIGN_UP (cl_desc.height, 2);
178 
179     uint32_t image_width = cl_width << 2;
180     uint32_t image_height = cl_height;
181 
182     size_t origin[3] = {0, 0, 0};
183     size_t row_pitch = cl_desc.row_pitch;
184     size_t slice_pitch = 0;
185     size_t region[3] = {cl_width, cl_height, 1};
186 
187     ret = image->enqueue_map (buf_ptr,
188                               origin, region,
189                               &row_pitch, &slice_pitch,
190                               CL_MAP_READ,
191                               CLEvent::EmptyList,
192                               map_event);
193     if (ret != XCAM_RETURN_NO_ERROR) {
194         XCAM_LOG_ERROR ("wavelet noise variance buffer enqueue map failed");
195     }
196     XCAM_ASSERT (map_event->get_event_id ());
197 
198     ret = map_event->wait ();
199     if (ret != XCAM_RETURN_NO_ERROR) {
200         XCAM_LOG_ERROR ("wavelet noise variance buffer enqueue map event wait failed");
201     }
202 
203     uint8_t* pixel = (uint8_t*)buf_ptr;
204     uint32_t pixel_count = image_width * image_height;
205     uint32_t pixel_sum = 0;
206 
207     uint32_t median_thresh = pixel_count >> 1;
208     float median = 0;
209     float noise_std_deviation = 0;
210 
211     uint32_t hist_bin_count = 1 << video_info.color_bits;
212     uint32_t hist_y[128] = {0};
213     uint32_t hist_u[128] = {0};
214     uint32_t hist_v[128] = {0};
215 
216     if (_channel == CL_IMAGE_CHANNEL_Y) {
217         for (uint32_t i = 0; i < image_width; i++) {
218             for (uint32_t j = 0; j < image_height; j++) {
219                 uint8_t base = (pixel[i + j * row_pitch] <= 127) ? 127 : 128;
220                 hist_y[abs(pixel[i + j * row_pitch] - base)]++;
221             }
222         }
223         pixel_sum = 0;
224         median = 0;
225         for (uint32_t i = 0; i < (hist_bin_count - 1); i++) {
226             pixel_sum += hist_y[i];
227             if (pixel_sum >= median_thresh) {
228                 median = i;
229                 break;
230             }
231         }
232         noise_std_deviation = median / 0.6745;
233         noise_var[0] = noise_std_deviation * noise_std_deviation;
234     }
235     if (_channel == CL_IMAGE_CHANNEL_UV) {
236         for (uint32_t i = 0; i < (image_width / 2); i++) {
237             for (uint32_t j = 0; j < image_height; j++) {
238                 uint8_t base = (pixel[2 * i + j * row_pitch] <= 127) ? 127 : 128;
239                 hist_u[abs(pixel[2 * i + j * row_pitch] - base)]++;
240                 base = (pixel[2 * i + 1 + j * row_pitch] <= 127) ? 127 : 128;
241                 hist_v[abs(pixel[2 * i + 1 + j * row_pitch] - base)]++;
242             }
243         }
244         pixel_sum = 0;
245         median = 0;
246         for (uint32_t i = 0; i < (hist_bin_count - 1); i++) {
247             pixel_sum += hist_u[i];
248             if (pixel_sum >= median_thresh >> 1) {
249                 median = i;
250                 break;
251             }
252         }
253         noise_std_deviation = median / 0.6745;
254         noise_var[1] = noise_std_deviation * noise_std_deviation;
255 
256         pixel_sum = 0;
257         median = 0;
258         for (uint32_t i = 0; i < (hist_bin_count - 1); i++) {
259             pixel_sum += hist_v[i];
260             if (pixel_sum >= median_thresh >> 1) {
261                 median = i;
262                 break;
263             }
264         }
265         noise_std_deviation = median / 0.6745;
266         noise_var[2] = noise_std_deviation * noise_std_deviation;
267     }
268 
269     map_event.release ();
270 
271     SmartPtr<CLEvent> unmap_event = new CLEvent;
272     ret = image->enqueue_unmap (buf_ptr, CLEvent::EmptyList, unmap_event);
273     if (ret != XCAM_RETURN_NO_ERROR) {
274         XCAM_LOG_ERROR ("wavelet noise variance buffer enqueue unmap failed");
275     }
276     XCAM_ASSERT (unmap_event->get_event_id ());
277 
278     ret = unmap_event->wait ();
279     if (ret != XCAM_RETURN_NO_ERROR) {
280         XCAM_LOG_ERROR ("wavelet noise variance buffer enqueue unmap event wait failed");
281     }
282     unmap_event.release ();
283 
284     return ret;
285 }
286 
CLWaveletThresholdingKernel(const SmartPtr<CLContext> & context,const char * name,SmartPtr<CLNewWaveletDenoiseImageHandler> & handler,uint32_t channel,uint32_t layer)287 CLWaveletThresholdingKernel::CLWaveletThresholdingKernel (
288     const SmartPtr<CLContext> &context,
289     const char *name,
290     SmartPtr<CLNewWaveletDenoiseImageHandler> &handler,
291     uint32_t channel,
292     uint32_t layer)
293     : CLImageKernel (context, name, true)
294     , _decomposition_levels (WAVELET_DECOMPOSITION_LEVELS)
295     , _channel (channel)
296     , _current_layer (layer)
297     , _handler (handler)
298 {
299 }
300 
301 XCamReturn
prepare_arguments(CLArgList & args,CLWorkSize & work_size)302 CLWaveletThresholdingKernel::prepare_arguments (
303     CLArgList &args, CLWorkSize &work_size)
304 {
305     SmartPtr<CLContext> context = get_context ();
306     float noise_variance[2];
307 
308     xcam_mem_clear (noise_variance);
309     _decomposition_levels = WAVELET_DECOMPOSITION_LEVELS;
310     float soft_threshold = _handler->get_denoise_config ().threshold[0];
311     float hard_threshold = _handler->get_denoise_config ().threshold[1];
312     float anolog_gain_weight = 1.0 + 100 *  _handler->get_denoise_config ().analog_gain;
313 
314     SmartPtr<CLWaveletDecompBuffer> buffer;
315     buffer = _handler->get_decomp_buffer (_channel, _current_layer);
316 
317     CLImageDesc cl_desc = buffer->ll->get_image_desc ();
318 
319     float weight = 4;
320     if (_channel == CL_IMAGE_CHANNEL_Y) {
321         noise_variance[0] = buffer->noise_variance[0] * weight;
322         noise_variance[1] = buffer->noise_variance[0] * weight;
323     } else {
324         noise_variance[0] = buffer->noise_variance[1] * weight;
325         noise_variance[1] = buffer->noise_variance[2] * weight;
326     }
327 #if 0
328     {
329         SmartPtr<CLImage> save_image = buffer->hh[0];
330         _handler->dump_coeff (save_image, _channel, _current_layer, CL_WAVELET_SUBBAND_HH);
331     }
332 #endif
333     if (_channel == CL_IMAGE_CHANNEL_Y) {
334         args.push_back (new CLArgumentT<float> (noise_variance[0]));
335         args.push_back (new CLArgumentT<float> (noise_variance[0]));
336     } else {
337         args.push_back (new CLArgumentT<float> (noise_variance[0]));
338         args.push_back (new CLArgumentT<float> (noise_variance[1]));
339     }
340 
341     args.push_back (new CLMemArgument (buffer->hl[0]));
342     args.push_back (new CLMemArgument (buffer->hl[1]));
343     args.push_back (new CLMemArgument (buffer->hl[2]));
344 
345     args.push_back (new CLMemArgument (buffer->lh[0]));
346     args.push_back (new CLMemArgument (buffer->lh[1]));
347     args.push_back (new CLMemArgument (buffer->lh[2]));
348 
349     args.push_back (new CLMemArgument (buffer->hh[0]));
350     args.push_back (new CLMemArgument (buffer->hh[1]));
351     args.push_back (new CLMemArgument (buffer->hh[2]));
352 
353     args.push_back (new CLArgumentT<uint32_t> (_current_layer));
354     args.push_back (new CLArgumentT<uint32_t> (_decomposition_levels));
355     args.push_back (new CLArgumentT<float> (hard_threshold));
356     args.push_back (new CLArgumentT<float> (soft_threshold));
357     args.push_back (new CLArgumentT<float> (anolog_gain_weight));
358 
359     uint32_t cl_width = XCAM_ALIGN_UP (cl_desc.width, 2);
360     uint32_t cl_height = XCAM_ALIGN_UP (cl_desc.height, 2);
361 
362     //set args;
363     work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
364     work_size.local[0] = 8;
365     work_size.local[1] = 4;
366     work_size.global[0] = XCAM_ALIGN_UP (cl_width , work_size.local[0]);
367     work_size.global[1] = XCAM_ALIGN_UP (cl_height, work_size.local[1]);
368 
369     return XCAM_RETURN_NO_ERROR;
370 }
371 
CLWaveletTransformKernel(const SmartPtr<CLContext> & context,const char * name,SmartPtr<CLNewWaveletDenoiseImageHandler> & handler,CLWaveletFilterBank fb,uint32_t channel,uint32_t layer,bool bayes_shrink)372 CLWaveletTransformKernel::CLWaveletTransformKernel (
373     const SmartPtr<CLContext> &context,
374     const char *name,
375     SmartPtr<CLNewWaveletDenoiseImageHandler> &handler,
376     CLWaveletFilterBank fb,
377     uint32_t channel,
378     uint32_t layer,
379     bool bayes_shrink)
380     : CLImageKernel (context, name, true)
381     , _filter_bank (fb)
382     , _decomposition_levels (WAVELET_DECOMPOSITION_LEVELS)
383     , _channel (channel)
384     , _current_layer (layer)
385     , _bayes_shrink (bayes_shrink)
386     , _handler (handler)
387 {
388 }
389 
390 XCamReturn
prepare_arguments(CLArgList & args,CLWorkSize & work_size)391 CLWaveletTransformKernel::prepare_arguments (
392     CLArgList &args, CLWorkSize &work_size)
393 {
394     SmartPtr<VideoBuffer> input = _handler->get_input_buf ();
395     SmartPtr<VideoBuffer> output = _handler->get_output_buf ();
396     SmartPtr<CLContext> context = get_context ();
397 
398     const VideoBufferInfo & video_info_in = input->get_video_info ();
399     const VideoBufferInfo & video_info_out = output->get_video_info ();
400 
401     _decomposition_levels = WAVELET_DECOMPOSITION_LEVELS;
402     float soft_threshold = _handler->get_denoise_config ().threshold[0];
403     float hard_threshold = _handler->get_denoise_config ().threshold[1];
404 
405     CLImageDesc cl_desc_in, cl_desc_out;
406     cl_desc_in.format.image_channel_data_type = CL_UNORM_INT8;
407     cl_desc_in.format.image_channel_order = CL_RGBA;
408     cl_desc_in.width = XCAM_ALIGN_UP (video_info_in.width, 4) / 4;
409     cl_desc_in.height = video_info_in.height;
410     cl_desc_in.row_pitch = video_info_in.strides[0];
411 
412     cl_desc_out.format.image_channel_data_type = CL_UNORM_INT8;
413     cl_desc_out.format.image_channel_order = CL_RGBA;
414     cl_desc_out.width = XCAM_ALIGN_UP (video_info_out.width, 4) / 4;
415     cl_desc_out.height = video_info_out.height;
416     cl_desc_out.row_pitch = video_info_out.strides[0];
417 
418     SmartPtr<CLImage> image_in = convert_to_climage (context, input, cl_desc_in, video_info_in.offsets[0]);
419     SmartPtr<CLImage> image_out = convert_to_climage (context, output, cl_desc_out, video_info_out.offsets[0]);
420 
421     cl_desc_in.height = XCAM_ALIGN_UP (video_info_in.height, 2) / 2;
422     cl_desc_in.row_pitch = video_info_in.strides[1];
423 
424     cl_desc_out.height = XCAM_ALIGN_UP (video_info_out.height, 2) / 2;
425     cl_desc_out.row_pitch = video_info_out.strides[1];
426 
427     SmartPtr<CLImage> image_in_uv = convert_to_climage (context, input, cl_desc_in, video_info_in.offsets[1]);
428     SmartPtr<CLImage> image_out_uv = convert_to_climage (context, output, cl_desc_out, video_info_out.offsets[1]);
429 
430     XCAM_FAIL_RETURN (
431         WARNING,
432         image_in->is_valid () && image_in_uv->is_valid () &&
433         image_out->is_valid () && image_out_uv->is_valid(),
434         XCAM_RETURN_ERROR_MEM,
435         "cl image kernel(%s) in/out memory not available", get_kernel_name ());
436 
437     //set args;
438     work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
439     work_size.local[0] = 8;
440     work_size.local[1] = 4;
441     if (_channel == CL_IMAGE_CHANNEL_Y) {
442         work_size.global[0] = XCAM_ALIGN_UP ((video_info_in.width >> _current_layer) / 4 , work_size.local[0]);
443         work_size.global[1] = XCAM_ALIGN_UP (video_info_in.height >> _current_layer, work_size.local[1]);
444     } else if (_channel == CL_IMAGE_CHANNEL_UV) {
445         work_size.global[0] = XCAM_ALIGN_UP ((video_info_in.width >> _current_layer) / 4 , work_size.local[0]);
446         work_size.global[1] = XCAM_ALIGN_UP (video_info_in.height >> (_current_layer + 1), work_size.local[1]);
447     }
448 
449     SmartPtr<CLWaveletDecompBuffer> buffer;
450     if (_current_layer == 1) {
451         if (_filter_bank == CL_WAVELET_HAAR_ANALYSIS) {
452             if (_channel == CL_IMAGE_CHANNEL_Y) {
453                 args.push_back (new CLMemArgument (image_in));
454             } else if (_channel == CL_IMAGE_CHANNEL_UV) {
455                 args.push_back (new CLMemArgument (image_in_uv));
456             }
457         } else if (_filter_bank == CL_WAVELET_HAAR_SYNTHESIS) {
458             if (_channel == CL_IMAGE_CHANNEL_Y) {
459                 args.push_back (new CLMemArgument (image_out));
460             } else if (_channel == CL_IMAGE_CHANNEL_UV) {
461                 args.push_back (new CLMemArgument (image_out_uv));
462             }
463         }
464     } else {
465         buffer = get_decomp_buffer (_channel, _current_layer - 1);
466         args.push_back (new CLMemArgument (buffer->ll));
467     }
468 
469     buffer = get_decomp_buffer (_channel, _current_layer);
470     args.push_back (new CLMemArgument (buffer->ll));
471 
472     if (_bayes_shrink == true) {
473         if (_filter_bank == CL_WAVELET_HAAR_ANALYSIS) {
474             args.push_back (new CLMemArgument (buffer->hl[0]));
475             args.push_back (new CLMemArgument (buffer->lh[0]));
476             args.push_back (new CLMemArgument (buffer->hh[0]));
477         } else if (_filter_bank == CL_WAVELET_HAAR_SYNTHESIS) {
478             args.push_back (new CLMemArgument (buffer->hl[2]));
479             args.push_back (new CLMemArgument (buffer->lh[2]));
480             args.push_back (new CLMemArgument (buffer->hh[2]));
481         }
482     } else {
483         args.push_back (new CLMemArgument (buffer->hl[0]));
484         args.push_back (new CLMemArgument (buffer->lh[0]));
485         args.push_back (new CLMemArgument (buffer->hh[0]));
486     }
487 
488     args.push_back (new CLArgumentT<uint32_t> (_current_layer));
489     args.push_back (new CLArgumentT<uint32_t> (_decomposition_levels));
490     args.push_back (new CLArgumentT<float> (hard_threshold));
491     args.push_back (new CLArgumentT<float> (soft_threshold));
492 
493     return XCAM_RETURN_NO_ERROR;
494 }
495 
496 SmartPtr<CLWaveletDecompBuffer>
get_decomp_buffer(uint32_t channel,int layer)497 CLWaveletTransformKernel::get_decomp_buffer (uint32_t channel, int layer)
498 {
499     SmartPtr<CLWaveletDecompBuffer> buffer;
500     if (_handler.ptr ()) {
501         buffer = _handler->get_decomp_buffer (channel, layer);
502     }
503 
504     if (!buffer.ptr ()) {
505         XCAM_LOG_ERROR ("get channel(%d) layer(%d) decomposition buffer failed!", channel, layer);
506     }
507     XCAM_ASSERT (buffer.ptr ());
508     return buffer;
509 }
510 
CLNewWaveletDenoiseImageHandler(const SmartPtr<CLContext> & context,const char * name,uint32_t channel)511 CLNewWaveletDenoiseImageHandler::CLNewWaveletDenoiseImageHandler (
512     const SmartPtr<CLContext> &context, const char *name, uint32_t channel)
513     : CLImageHandler (context, name)
514     , _channel (channel)
515 {
516     _config.decomposition_levels = 5;
517     _config.threshold[0] = 0.5;
518     _config.threshold[1] = 5.0;
519     xcam_mem_clear (_noise_variance);
520 }
521 
522 XCamReturn
prepare_output_buf(SmartPtr<VideoBuffer> & input,SmartPtr<VideoBuffer> & output)523 CLNewWaveletDenoiseImageHandler::prepare_output_buf (SmartPtr<VideoBuffer> &input, SmartPtr<VideoBuffer> &output)
524 {
525     XCamReturn ret = XCAM_RETURN_NO_ERROR;
526     CLImageHandler::prepare_output_buf(input, output);
527 
528     SmartPtr<CLContext> context = get_context ();
529     const VideoBufferInfo & video_info = input->get_video_info ();
530     CLImageDesc cl_desc;
531     SmartPtr<CLWaveletDecompBuffer> decompBuffer;
532 
533     CLImage::video_info_2_cl_image_desc (video_info, cl_desc);
534 
535     _decompBufferList.clear ();
536 
537     if (_channel & CL_IMAGE_CHANNEL_Y) {
538         for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
539             decompBuffer = new CLWaveletDecompBuffer ();
540             if (decompBuffer.ptr ()) {
541                 decompBuffer->width = XCAM_ALIGN_UP (video_info.width, 1 << layer) >> layer;
542                 decompBuffer->height = XCAM_ALIGN_UP (video_info.height, 1 << layer) >> layer;
543                 decompBuffer->width = XCAM_ALIGN_UP (decompBuffer->width, 4);
544                 decompBuffer->height = XCAM_ALIGN_UP (decompBuffer->height, 2);
545 
546                 decompBuffer->channel = CL_IMAGE_CHANNEL_Y;
547                 decompBuffer->layer = layer;
548                 decompBuffer->noise_variance[0] = 0;
549 
550                 cl_desc.width = decompBuffer->width / 4;
551                 cl_desc.height = decompBuffer->height;
552                 cl_desc.slice_pitch = 0;
553                 cl_desc.format.image_channel_order = CL_RGBA;
554                 cl_desc.format.image_channel_data_type = CL_UNORM_INT8;
555 
556                 decompBuffer->ll = new CLImage2D (context, cl_desc);
557 
558                 decompBuffer->hl[0] = new CLImage2D (context, cl_desc);
559                 decompBuffer->lh[0] = new CLImage2D (context, cl_desc);
560                 decompBuffer->hh[0] = new CLImage2D (context, cl_desc);
561                 /*
562                                 uint32_t width = decompBuffer->width / 4;
563                                 uint32_t height = decompBuffer->height;
564                                 SmartPtr<CLBuffer> hh_buffer = new CLBuffer (
565                                     context, sizeof(uint8_t) * width * height,
566                                     CL_MEM_READ_WRITE, NULL);
567                                 CLImageDesc hh_desc;
568                                 hh_desc.format = {CL_RGBA, CL_UNORM_INT8};
569                                 hh_desc.width = width;
570                                 hh_desc.height = height;
571                                 hh_desc.row_pitch = sizeof(uint8_t) * width;
572                                 hh_desc.slice_pitch = 0;
573                                 hh_desc.size = 0;
574                                 hh_desc.array_size = 0;
575 
576                                 decompBuffer->hh[0] = new CLImage2D (
577                                     context, hh_desc, 0, hh_buffer);
578                 */
579 
580                 cl_desc.format.image_channel_data_type = CL_UNORM_INT16;
581                 decompBuffer->hl[1] = new CLImage2D (context, cl_desc);
582                 decompBuffer->lh[1] = new CLImage2D (context, cl_desc);
583                 decompBuffer->hh[1] = new CLImage2D (context, cl_desc);
584 
585                 cl_desc.format.image_channel_data_type = CL_UNORM_INT8;
586                 decompBuffer->hl[2] = new CLImage2D (context, cl_desc);
587                 decompBuffer->lh[2] = new CLImage2D (context, cl_desc);
588                 decompBuffer->hh[2] = new CLImage2D (context, cl_desc);
589 
590                 _decompBufferList.push_back (decompBuffer);
591             } else {
592                 XCAM_LOG_ERROR ("create Y decomposition buffer failed!");
593                 ret = XCAM_RETURN_ERROR_MEM;
594             }
595         }
596     }
597 
598     if (_channel & CL_IMAGE_CHANNEL_UV) {
599         for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
600             decompBuffer = new CLWaveletDecompBuffer ();
601             if (decompBuffer.ptr ()) {
602                 decompBuffer->width = XCAM_ALIGN_UP (video_info.width, 1 << layer) >> layer;
603                 decompBuffer->height = XCAM_ALIGN_UP (video_info.height, 1 << (layer + 1)) >> (layer + 1);
604                 decompBuffer->width = XCAM_ALIGN_UP (decompBuffer->width, 4);
605                 decompBuffer->height = XCAM_ALIGN_UP (decompBuffer->height, 2);
606 
607                 decompBuffer->channel = CL_IMAGE_CHANNEL_UV;
608                 decompBuffer->layer = layer;
609                 decompBuffer->noise_variance[1] = 0;
610                 decompBuffer->noise_variance[2] = 0;
611 
612                 cl_desc.width = decompBuffer->width / 4;
613                 cl_desc.height = decompBuffer->height;
614                 cl_desc.slice_pitch = 0;
615                 cl_desc.format.image_channel_order = CL_RGBA;
616                 cl_desc.format.image_channel_data_type = CL_UNORM_INT8;
617 
618                 decompBuffer->ll = new CLImage2D (context, cl_desc);
619 
620                 decompBuffer->hl[0] = new CLImage2D (context, cl_desc);
621                 decompBuffer->lh[0] = new CLImage2D (context, cl_desc);
622                 decompBuffer->hh[0] = new CLImage2D (context, cl_desc);
623                 /*
624                                 uint32_t width = decompBuffer->width / 4;
625                                 uint32_t height = decompBuffer->height;
626                                 SmartPtr<CLBuffer> hh_buffer = new CLBuffer (
627                                     context, sizeof(uint8_t) * width * height,
628                                     CL_MEM_READ_WRITE, NULL);
629                                 CLImageDesc hh_desc;
630                                 hh_desc.format = {CL_RGBA, CL_UNORM_INT8};
631                                 hh_desc.width = width;
632                                 hh_desc.height = height;
633                                 hh_desc.row_pitch = sizeof(uint8_t) * width;
634                                 hh_desc.slice_pitch = 0;
635                                 hh_desc.size = 0;
636                                 hh_desc.array_size = 0;
637                                 decompBuffer->hh[0] = new CLImage2D (
638                                     context, hh_desc, 0, hh_buffer);
639                 */
640                 cl_desc.format.image_channel_data_type = CL_UNORM_INT16;
641                 decompBuffer->hl[1] = new CLImage2D (context, cl_desc);
642                 decompBuffer->lh[1] = new CLImage2D (context, cl_desc);
643                 decompBuffer->hh[1] = new CLImage2D (context, cl_desc);
644 
645                 cl_desc.format.image_channel_data_type = CL_UNORM_INT8;
646                 decompBuffer->hl[2] = new CLImage2D (context, cl_desc);
647                 decompBuffer->lh[2] = new CLImage2D (context, cl_desc);
648                 decompBuffer->hh[2] = new CLImage2D (context, cl_desc);
649 
650                 _decompBufferList.push_back (decompBuffer);
651             } else {
652                 XCAM_LOG_ERROR ("create UV decomposition buffer failed!");
653                 ret = XCAM_RETURN_ERROR_MEM;
654             }
655         }
656     }
657     return ret;
658 }
659 
660 bool
set_denoise_config(const XCam3aResultWaveletNoiseReduction & config)661 CLNewWaveletDenoiseImageHandler::set_denoise_config (const XCam3aResultWaveletNoiseReduction& config)
662 {
663     _config = config;
664 
665     return true;
666 }
667 
668 SmartPtr<CLWaveletDecompBuffer>
get_decomp_buffer(uint32_t channel,int layer)669 CLNewWaveletDenoiseImageHandler::get_decomp_buffer (uint32_t channel, int layer)
670 {
671     SmartPtr<CLWaveletDecompBuffer> buffer;
672 
673     for (CLWaveletDecompBufferList::iterator it = _decompBufferList.begin ();
674             it != _decompBufferList.end (); ++it) {
675         if ((channel == (*it)->channel) && (layer == (*it)->layer))
676             buffer = (*it);
677     }
678     return buffer;
679 }
680 
681 void
set_estimated_noise_variation(float * noise_var)682 CLNewWaveletDenoiseImageHandler::set_estimated_noise_variation (float* noise_var)
683 {
684     if (noise_var == NULL) {
685         XCAM_LOG_ERROR ("invalid input noise variation!");
686         return;
687     }
688     _noise_variance[0] = noise_var[0];
689     _noise_variance[1] = noise_var[1];
690     _noise_variance[2] = noise_var[2];
691 }
692 
693 void
get_estimated_noise_variation(float * noise_var)694 CLNewWaveletDenoiseImageHandler::get_estimated_noise_variation (float* noise_var)
695 {
696     if (noise_var == NULL) {
697         XCAM_LOG_ERROR ("invalid output parameters!");
698         return;
699     }
700     noise_var[0] = _noise_variance[0];
701     noise_var[1] = _noise_variance[1];
702     noise_var[2] = _noise_variance[2];
703 }
704 
705 void
dump_coeff(SmartPtr<CLImage> image,uint32_t channel,uint32_t layer,uint32_t subband)706 CLNewWaveletDenoiseImageHandler::dump_coeff (SmartPtr<CLImage> image, uint32_t channel, uint32_t layer, uint32_t subband)
707 {
708     FILE *file;
709 
710     void *buf_ptr = NULL;
711     SmartPtr<CLEvent> map_event = new CLEvent;
712 
713     CLImageDesc cl_desc = image->get_image_desc ();
714 
715     uint32_t cl_width = XCAM_ALIGN_UP (cl_desc.width, 2);
716     uint32_t cl_height = XCAM_ALIGN_UP (cl_desc.height, 2);
717 
718     size_t origin[3] = {0, 0, 0};
719     size_t row_pitch = cl_desc.row_pitch;
720     size_t slice_pitch = 0;
721     size_t region[3] = {cl_width, cl_height, 1};
722 
723     image->enqueue_map (buf_ptr,
724                         origin, region,
725                         &row_pitch, &slice_pitch,
726                         CL_MAP_READ,
727                         CLEvent::EmptyList,
728                         map_event);
729     XCAM_ASSERT (map_event->get_event_id ());
730 
731     map_event->wait ();
732 
733     uint8_t* pixel = (uint8_t*)buf_ptr;
734     uint32_t pixel_count = row_pitch * cl_height;
735 
736     char file_name[512];
737     snprintf (file_name, sizeof(file_name),
738               "wavelet_cl_coeff_"
739               "channel%d_"
740               "layer%d_"
741               "subband%d_"
742               "rowpitch%d_"
743               "width%dxheight%d"
744               ".raw",
745               channel, layer, subband, (uint32_t)row_pitch, cl_width, cl_height);
746     file = fopen(file_name, "wb");
747 
748     if (file != NULL) {
749         if (fwrite (pixel, pixel_count, 1, file) <= 0) {
750             XCAM_LOG_WARNING ("write frame failed.");
751         }
752         fclose (file);
753     }
754     map_event.release ();
755 
756     SmartPtr<CLEvent> unmap_event = new CLEvent;
757     image->enqueue_unmap (buf_ptr, CLEvent::EmptyList, unmap_event);
758     XCAM_ASSERT (unmap_event->get_event_id ());
759 
760     unmap_event->wait ();
761     unmap_event.release ();
762 }
763 
764 static SmartPtr<CLWaveletTransformKernel>
create_kernel_haar_decomposition(const SmartPtr<CLContext> & context,SmartPtr<CLNewWaveletDenoiseImageHandler> handler,uint32_t channel,uint32_t layer,bool bayes_shrink)765 create_kernel_haar_decomposition (
766     const SmartPtr<CLContext> &context,
767     SmartPtr<CLNewWaveletDenoiseImageHandler> handler,
768     uint32_t channel,
769     uint32_t layer,
770     bool bayes_shrink)
771 {
772     SmartPtr<CLWaveletTransformKernel> haar_decomp_kernel;
773 
774     char build_options[1024];
775     xcam_mem_clear (build_options);
776 
777     snprintf (build_options, sizeof (build_options),
778               " -DWAVELET_DENOISE_Y=%d "
779               " -DWAVELET_DENOISE_UV=%d ",
780               (channel == CL_IMAGE_CHANNEL_Y ? 1 : 0),
781               (channel == CL_IMAGE_CHANNEL_UV ? 1 : 0));
782 
783     haar_decomp_kernel = new CLWaveletTransformKernel (context, "kernel_wavelet_haar_decomposition",
784             handler, CL_WAVELET_HAAR_ANALYSIS, channel, layer, bayes_shrink);
785 
786     XCAM_ASSERT (haar_decomp_kernel.ptr ());
787     XCAM_FAIL_RETURN (
788         WARNING,
789         haar_decomp_kernel->build_kernel (kernel_new_wavelet_info[KernelWaveletDecompose], build_options) == XCAM_RETURN_NO_ERROR,
790         NULL,
791         "wavelet denoise build kernel(%s) failed", kernel_new_wavelet_info[KernelWaveletDecompose].kernel_name);
792     XCAM_ASSERT (haar_decomp_kernel->is_valid ());
793 
794     return haar_decomp_kernel;
795 }
796 
797 static SmartPtr<CLWaveletTransformKernel>
create_kernel_haar_reconstruction(const SmartPtr<CLContext> & context,SmartPtr<CLNewWaveletDenoiseImageHandler> handler,uint32_t channel,uint32_t layer,bool bayes_shrink)798 create_kernel_haar_reconstruction (
799     const SmartPtr<CLContext> &context,
800     SmartPtr<CLNewWaveletDenoiseImageHandler> handler,
801     uint32_t channel,
802     uint32_t layer,
803     bool bayes_shrink)
804 {
805     SmartPtr<CLWaveletTransformKernel> haar_reconstruction_kernel;
806 
807     char build_options[1024];
808     xcam_mem_clear (build_options);
809     snprintf (build_options, sizeof (build_options),
810               " -DWAVELET_DENOISE_Y=%d "
811               " -DWAVELET_DENOISE_UV=%d "
812               " -DWAVELET_BAYES_SHRINK=%d",
813               (channel == CL_IMAGE_CHANNEL_Y ? 1 : 0),
814               (channel == CL_IMAGE_CHANNEL_UV ? 1 : 0),
815               (bayes_shrink == true ? 1 : 0));
816 
817     haar_reconstruction_kernel = new CLWaveletTransformKernel (context, "kernel_wavelet_haar_reconstruction",
818             handler, CL_WAVELET_HAAR_SYNTHESIS, channel, layer, bayes_shrink);
819 
820     XCAM_ASSERT (haar_reconstruction_kernel.ptr ());
821     XCAM_FAIL_RETURN (
822         WARNING,
823         haar_reconstruction_kernel->build_kernel (kernel_new_wavelet_info[KernelWaveletReconstruct], build_options) == XCAM_RETURN_NO_ERROR,
824         NULL,
825         "wavelet denoise build kernel(%s) failed", kernel_new_wavelet_info[KernelWaveletReconstruct].kernel_name);
826     XCAM_ASSERT (haar_reconstruction_kernel->is_valid ());
827 
828     return haar_reconstruction_kernel;
829 }
830 
831 static SmartPtr<CLWaveletNoiseEstimateKernel>
create_kernel_noise_estimation(const SmartPtr<CLContext> & context,SmartPtr<CLNewWaveletDenoiseImageHandler> handler,uint32_t channel,uint32_t subband,uint32_t layer)832 create_kernel_noise_estimation (
833     const SmartPtr<CLContext> &context,
834     SmartPtr<CLNewWaveletDenoiseImageHandler> handler,
835     uint32_t channel, uint32_t subband, uint32_t layer)
836 {
837     SmartPtr<CLWaveletNoiseEstimateKernel> estimation_kernel;
838 
839     char build_options[1024];
840     xcam_mem_clear (build_options);
841 
842     snprintf (build_options, sizeof (build_options),
843               " -DWAVELET_DENOISE_Y=%d "
844               " -DWAVELET_DENOISE_UV=%d ",
845               (channel == CL_IMAGE_CHANNEL_Y ? 1 : 0),
846               (channel == CL_IMAGE_CHANNEL_UV ? 1 : 0));
847 
848     estimation_kernel = new CLWaveletNoiseEstimateKernel (
849         context, "kernel_wavelet_coeff_variance", handler, channel, subband, layer);
850     XCAM_ASSERT (estimation_kernel.ptr ());
851     XCAM_FAIL_RETURN (
852         WARNING,
853         estimation_kernel->build_kernel (kernel_new_wavelet_info[KernelWaveletNoiseEstimate], build_options) == XCAM_RETURN_NO_ERROR,
854         NULL,
855         "wavelet denoise build kernel(%s) failed", kernel_new_wavelet_info[KernelWaveletNoiseEstimate].kernel_name);
856     XCAM_ASSERT (estimation_kernel->is_valid ());
857 
858     return estimation_kernel;
859 }
860 
861 static SmartPtr<CLWaveletThresholdingKernel>
create_kernel_thresholding(const SmartPtr<CLContext> & context,SmartPtr<CLNewWaveletDenoiseImageHandler> handler,uint32_t channel,uint32_t layer)862 create_kernel_thresholding (
863     const SmartPtr<CLContext> &context,
864     SmartPtr<CLNewWaveletDenoiseImageHandler> handler,
865     uint32_t channel, uint32_t layer)
866 {
867     SmartPtr<CLWaveletThresholdingKernel> threshold_kernel;
868 
869     char build_options[1024];
870     xcam_mem_clear (build_options);
871 
872     snprintf (build_options, sizeof (build_options),
873               " -DWAVELET_DENOISE_Y=%d "
874               " -DWAVELET_DENOISE_UV=%d ",
875               (channel == CL_IMAGE_CHANNEL_Y ? 1 : 0),
876               (channel == CL_IMAGE_CHANNEL_UV ? 1 : 0));
877 
878     threshold_kernel = new CLWaveletThresholdingKernel (context,
879             "kernel_wavelet_coeff_thresholding",
880             handler, channel, layer);
881     XCAM_ASSERT (threshold_kernel.ptr ());
882     XCAM_FAIL_RETURN (
883         WARNING,
884         threshold_kernel->build_kernel (kernel_new_wavelet_info[KernelWaveletThreshold], build_options) == XCAM_RETURN_NO_ERROR,
885         NULL,
886         "wavelet denoise build kernel(%s) failed", kernel_new_wavelet_info[KernelWaveletThreshold].kernel_name);
887     XCAM_ASSERT (threshold_kernel->is_valid ());
888 
889     return threshold_kernel;
890 }
891 
892 SmartPtr<CLImageHandler>
create_cl_newwavelet_denoise_image_handler(const SmartPtr<CLContext> & context,uint32_t channel,bool bayes_shrink)893 create_cl_newwavelet_denoise_image_handler (
894     const SmartPtr<CLContext> &context, uint32_t channel, bool bayes_shrink)
895 {
896     SmartPtr<CLNewWaveletDenoiseImageHandler> wavelet_handler;
897     SmartPtr<CLWaveletTransformKernel> haar_decomposition_kernel;
898     SmartPtr<CLWaveletTransformKernel> haar_reconstruction_kernel;
899 
900     wavelet_handler = new CLNewWaveletDenoiseImageHandler (context, "cl_newwavelet_denoise_handler", channel);
901     XCAM_ASSERT (wavelet_handler.ptr ());
902 
903     if (channel & CL_IMAGE_CHANNEL_Y) {
904         for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
905             SmartPtr<CLImageKernel> image_kernel =
906                 create_kernel_haar_decomposition (context, wavelet_handler, CL_IMAGE_CHANNEL_Y, layer, bayes_shrink);
907             wavelet_handler->add_kernel (image_kernel);
908         }
909 
910         if (bayes_shrink) {
911             for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
912                 SmartPtr<CLImageKernel> image_kernel;
913 
914                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
915                                CL_IMAGE_CHANNEL_Y, CL_WAVELET_SUBBAND_HH, layer);
916                 wavelet_handler->add_kernel (image_kernel);
917 
918                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
919                                CL_IMAGE_CHANNEL_Y, CL_WAVELET_SUBBAND_LH, layer);
920                 wavelet_handler->add_kernel (image_kernel);
921 
922                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
923                                CL_IMAGE_CHANNEL_Y, CL_WAVELET_SUBBAND_HL, layer);
924                 wavelet_handler->add_kernel (image_kernel);
925             }
926             for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
927                 SmartPtr<CLImageKernel> image_kernel;
928                 image_kernel = create_kernel_thresholding (context, wavelet_handler, CL_IMAGE_CHANNEL_Y, layer);
929                 wavelet_handler->add_kernel (image_kernel);
930             }
931         }
932 
933         for (int layer = WAVELET_DECOMPOSITION_LEVELS; layer >= 1; layer--) {
934             SmartPtr<CLImageKernel> image_kernel =
935                 create_kernel_haar_reconstruction (context, wavelet_handler, CL_IMAGE_CHANNEL_Y, layer, bayes_shrink);
936             wavelet_handler->add_kernel (image_kernel);
937         }
938     }
939 
940     if (channel & CL_IMAGE_CHANNEL_UV) {
941         for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
942             SmartPtr<CLImageKernel> image_kernel =
943                 create_kernel_haar_decomposition (context, wavelet_handler, CL_IMAGE_CHANNEL_UV, layer, bayes_shrink);
944             wavelet_handler->add_kernel (image_kernel);
945         }
946 
947         if (bayes_shrink) {
948             for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
949                 SmartPtr<CLImageKernel> image_kernel;
950 
951                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
952                                CL_IMAGE_CHANNEL_UV, CL_WAVELET_SUBBAND_HH, layer);
953                 wavelet_handler->add_kernel (image_kernel);
954 
955                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
956                                CL_IMAGE_CHANNEL_UV, CL_WAVELET_SUBBAND_LH, layer);
957                 wavelet_handler->add_kernel (image_kernel);
958 
959                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
960                                CL_IMAGE_CHANNEL_UV, CL_WAVELET_SUBBAND_HL, layer);
961                 wavelet_handler->add_kernel (image_kernel);
962             }
963             for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
964                 SmartPtr<CLImageKernel> image_kernel;
965                 image_kernel = create_kernel_thresholding (context, wavelet_handler, CL_IMAGE_CHANNEL_UV, layer);
966                 wavelet_handler->add_kernel (image_kernel);
967             }
968         }
969 
970         for (int layer = WAVELET_DECOMPOSITION_LEVELS; layer >= 1; layer--) {
971             SmartPtr<CLImageKernel> image_kernel =
972                 create_kernel_haar_reconstruction (context, wavelet_handler, CL_IMAGE_CHANNEL_UV, layer, bayes_shrink);
973             wavelet_handler->add_kernel (image_kernel);
974         }
975     }
976 
977     return wavelet_handler;
978 }
979 
980 };
981