• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * function: kernel_bayer_pipe
3 * params:
4 *   input:    image2d_t as read only
5 *   output:   image2d_t as write only
6 *   blc_config: black level correction configuration
7 *   wb_config: whitebalance configuration
8 *   gamma_table: RGGB table
9 *   stats_output: 3a stats output
10 */
11
12
13#define WORKGROUP_CELL_WIDTH 64
14#define WORKGROUP_CELL_HEIGHT 4
15
16#define DEMOSAIC_X_CELL_PER_WORKITEM 2
17
18#define PIXEL_PER_CELL 2
19
20#define SLM_CELL_X_OFFSET 4
21#define SLM_CELL_Y_OFFSET 1
22
23// 8x8
24#define SLM_CELL_X_VALID_SIZE WORKGROUP_CELL_WIDTH
25#define SLM_CELL_Y_VALID_SIZE WORKGROUP_CELL_HEIGHT
26
27// 10x10
28#define SLM_CELL_X_SIZE (SLM_CELL_X_VALID_SIZE + SLM_CELL_X_OFFSET * 2)
29#define SLM_CELL_Y_SIZE (SLM_CELL_Y_VALID_SIZE + SLM_CELL_Y_OFFSET * 2)
30
31#define GUASS_DELTA_S_1      1.031739f
32#define GUASS_DELTA_S_1_5    1.072799f
33#define GUASS_DELTA_S_2      1.133173f
34#define GUASS_DELTA_S_2_5    1.215717f
35
36typedef struct
37{
38    float           ee_gain;
39    float           ee_threshold;
40    float           nr_gain;
41} CLEeConfig;
42
43inline int get_shared_pos_x (int i)
44{
45    return i % SLM_CELL_X_SIZE;
46}
47
48inline int get_shared_pos_y (int i)
49{
50    return i / SLM_CELL_X_SIZE;
51}
52
53inline int shared_pos (int x, int y)
54{
55    return mad24(y, SLM_CELL_X_SIZE, x);
56}
57
58/* BA10=> GRBG  */
59inline void grbg_slm_load (
60    __local float *px, __local float *py, __local float *pz, __local float *pw,
61    int index, __read_only image2d_t input, uint input_height, int x_start, int y_start
62)
63{
64    sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
65    float4 data1, data2, line1, line2;
66    int x0 = (get_shared_pos_x (index) + x_start) / 4;
67    int y0 = get_shared_pos_y (index) + y_start;
68    int2 pos = (int2)(x0, y0);
69    float4 gr, r, b, gb;
70
71    y0 = y0 > 0 ? y0 : 0;
72
73    gr = read_imagef (input, sampler, (int2)(x0, y0));
74    r = read_imagef (input, sampler, (int2)(x0, y0 + input_height));
75    b = read_imagef (input, sampler, (int2)(x0, y0 + input_height * 2));
76    gb = read_imagef (input, sampler, (int2)(x0, y0 + input_height * 3));
77
78    (*(__local float4 *)(px + index)) = gr;
79    (*(__local float4 *)(py + index)) = r;
80    (*(__local float4 *)(pz + index)) = b;
81    (*(__local float4 *)(pw + index)) = gb;
82}
83
84#define MAX_DELTA_COFF 5.0f
85#define MIN_DELTA_COFF 1.0f
86#define DEFAULT_DELTA_COFF 4.0f
87
88inline float2 delta_coff (float2 in, __local float *table)
89{
90    float2 out;
91    out.x = table[(int)(fabs(in.x * 64.0f))];
92    out.y = table[(int)(fabs(in.y * 64.0f))];
93
94    return out;
95}
96
97inline float2 dot_denoise (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, __local float *table, float coff0)
98{
99    float2 coff1, coff2, coff3, coff4, coff5;
100    coff1 = delta_coff (in1 - value, table);
101    coff2 = delta_coff (in2 - value, table);
102    coff3 = delta_coff (in3 - value, table);
103    coff4 = delta_coff (in4 - value, table);
104    //(in1 * coff1 + in2 * coff2 + in3 * coff3 + in4 * coff4 + value * coff0)
105    float2 sum1 = (mad (in1, coff1,
106                        mad (in2, coff2,
107                             mad (in3, coff3,
108                                  mad (in4, coff4, value * coff0)))));
109    return  sum1 / (coff0 + coff1 + coff2 + coff3 + coff4);
110}
111
112inline float2 dot_ee (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, float2 out, CLEeConfig ee_config, float2 *egain)
113{
114
115    float2 ee = mad(in1 + in2 + in3 + in4, -0.25f, value);
116    ee =  fabs(ee) > ee_config.ee_threshold ? ee : 0.0f;
117
118    egain[0] = mad(ee, ee_config.ee_gain, out + 0.03f) / (out + 0.03f);
119
120    return out * egain[0];
121}
122
123inline float2 dot_denoise_ee (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, __local float *table, float coff0, float2 *egain, CLEeConfig ee_config)
124{
125    float2 out = dot_denoise(value, in1, in2, in3, in4, table, coff0);
126    return dot_ee(value, in1, in2, in3, in4, out, ee_config, egain);
127}
128
129void demosaic_2_cell (
130    __local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in,
131    int in_x, int in_y,
132    __write_only image2d_t out, uint out_height, int out_x, int out_y)
133{
134    float4 out_data;
135    float2 value;
136    int index;
137    {
138        float3 R_y[2];
139        index = shared_pos (in_x - 1, in_y);
140        R_y[0] = *(__local float3*)(y_data_in + index);
141        index = shared_pos (in_x - 1, in_y + 1);
142        R_y[1] = *(__local float3*)(y_data_in + index);
143
144        out_data.s02 = (R_y[0].s01 + R_y[0].s12) * 0.5f;
145        out_data.s13 = R_y[0].s12;
146        write_imagef (out, (int2)(out_x, out_y), out_data);
147
148        out_data.s02 = (R_y[0].s01 + R_y[0].s12 + R_y[1].s01 + R_y[1].s12) * 0.25f;
149        out_data.s13 = (R_y[0].s12 + R_y[1].s12) * 0.5f;
150        write_imagef (out, (int2)(out_x, out_y + 1), out_data);
151    }
152
153    {
154        float3 B_z[2];
155        index = shared_pos (in_x, in_y - 1);
156        B_z[0] = *(__local float3*)(z_data_in + index);
157        index = shared_pos (in_x, in_y);
158        B_z[1] = *(__local float3*)(z_data_in + index);
159
160        out_data.s02 = (B_z[0].s01 + B_z[1].s01) * 0.5f;
161        out_data.s13 = (B_z[0].s01 + B_z[0].s12 + B_z[1].s01 + B_z[1].s12) * 0.25f;
162        write_imagef (out, (int2)(out_x, out_y + out_height * 2), out_data);
163
164        out_data.s02 = B_z[1].s01;
165        out_data.s13 = (B_z[1].s01 + B_z[1].s12) * 0.5f;
166        write_imagef (out, (int2)(out_x, out_y + 1 + out_height * 2), out_data);
167    }
168
169    {
170        float3 Gr_x[2], Gb_w[2];
171        index = shared_pos (in_x, in_y);
172        Gr_x[0] = *(__local float3*)(x_data_in + index);
173        index = shared_pos (in_x, in_y + 1);
174        Gr_x[1] = *(__local float3*)(x_data_in + index);
175
176        index = shared_pos (in_x - 1, in_y - 1);
177        Gb_w[0] = *(__local float3*)(w_data_in + index);
178        index = shared_pos (in_x - 1, in_y);
179        Gb_w[1] = *(__local float3*)(w_data_in + index);
180
181        out_data.s02 = (Gr_x[0].s01 * 4.0f + Gb_w[0].s01 +
182                        Gb_w[0].s12 + Gb_w[1].s01 + Gb_w[1].s12) * 0.125f;
183        out_data.s13 = (Gr_x[0].s01 + Gr_x[0].s12 + Gb_w[0].s12 + Gb_w[1].s12) * 0.25f;
184        write_imagef (out, (int2)(out_x, out_y + out_height), out_data);
185
186        out_data.s02 = (Gr_x[0].s01 + Gr_x[1].s01 + Gb_w[1].s01 + Gb_w[1].s12) * 0.25f;
187
188        out_data.s13 = (Gb_w[1].s12 * 4.0f + Gr_x[0].s01 +
189                        Gr_x[0].s12 + Gr_x[1].s01 + Gr_x[1].s12) * 0.125f;
190        write_imagef (out, (int2)(out_x, out_y + 1 + out_height), out_data);
191    }
192}
193
194void demosaic_denoise_2_cell (
195    __local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in,
196    int in_x, int in_y,
197    __write_only image2d_t out, uint out_height, int out_x, int out_y, __local float *table, CLEeConfig ee_config)
198{
199    float4 out_data_r[2];
200    float4 out_data_g[2];
201    float4 out_data_b[2];
202    float2 value;
203    int index;
204    float2 egain[4];
205    float2 de;
206    float gain_coff0 = table[0];
207
208    float4 R_y[3], B_z[3];;
209    float2 Gr_x0, Gb_w2;
210    float4 Gr_x1, Gb_w1;
211    float3 Gr_x2, Gb_w0;
212
213    // R egain
214    {
215        index = shared_pos (in_x - 1, in_y - 1);
216        R_y[0] = *(__local float4*)(y_data_in + index);
217        index = shared_pos (in_x - 1, in_y);
218        R_y[1] = *(__local float4*)(y_data_in + index);
219        index = shared_pos (in_x - 1, in_y + 1);
220        R_y[2] = *(__local float4*)(y_data_in + index);
221
222        out_data_r[0].s13 = dot_denoise_ee (R_y[1].s12, R_y[0].s12, R_y[1].s01, R_y[1].s23, R_y[2].s12,
223                                            table, gain_coff0 * GUASS_DELTA_S_2, &egain[1], ee_config);
224    }
225
226    // Gr, Gb egain
227    {
228        index = shared_pos (in_x, in_y - 1);
229        Gr_x0 = *(__local float2*)(x_data_in + index);
230        index = shared_pos (in_x - 1, in_y);
231        Gr_x1 = *(__local float4*)(x_data_in + index);
232        index = shared_pos (in_x, in_y + 1);
233        Gr_x2 = *(__local float3*)(x_data_in + index);
234
235        index = shared_pos (in_x - 1, in_y - 1);
236        Gb_w0 = *(__local float3*)(w_data_in + index);
237        index = shared_pos (in_x - 1, in_y);
238        Gb_w1 = *(__local float4*)(w_data_in + index);
239        index = shared_pos (in_x, in_y + 1);
240        Gb_w2 = *(__local float2*)(w_data_in + index);
241
242        value = mad (Gr_x1.s12, 4.0f, (Gb_w0.s01 + Gb_w0.s12 + Gb_w1.s01 + Gb_w1.s12)) * 0.125f;
243        de = dot_denoise (value, Gb_w0.s01, Gb_w0.s12, Gb_w1.s01, Gb_w1.s12, table, gain_coff0 * GUASS_DELTA_S_1_5);
244        out_data_g[0].s02 = dot_ee(Gr_x1.s12, Gr_x0, Gr_x1.s01, Gr_x1.s23, Gr_x2.s01, de, ee_config, &egain[0]);
245
246        value = mad (Gb_w1.s12, 4.0f, (Gr_x1.s12 + Gr_x1.s23 + Gr_x2.s01 + Gr_x2.s12)) * 0.125f;
247        de = dot_denoise (value, Gr_x1.s12, Gr_x1.s23, Gr_x2.s01, Gr_x2.s12, table, gain_coff0 * GUASS_DELTA_S_1_5);
248        out_data_g[1].s13 = dot_ee(Gb_w1.s12, Gb_w0.s12, Gb_w1.s01, Gb_w1.s23, Gb_w2, de, ee_config, &egain[3]);
249    }
250
251    // B egain
252    {
253        index = shared_pos (in_x - 1, in_y - 1);
254        B_z[0] = *(__local float4*)(z_data_in + index);
255        index = shared_pos (in_x - 1, in_y);
256        B_z[1] = *(__local float4*)(z_data_in + index);
257        index = shared_pos (in_x - 1, in_y + 1);
258        B_z[2] = *(__local float4*)(z_data_in + index);
259
260        out_data_b[1].s02 = dot_denoise_ee (B_z[1].s12, B_z[0].s12, B_z[1].s01, B_z[1].s23, B_z[2].s12,
261                                            table, gain_coff0 * GUASS_DELTA_S_2, &egain[2], ee_config);
262    }
263
264    ////////////////////////////////R//////////////////////////////////////////
265    {
266        value = (R_y[1].s01 + R_y[1].s12) * 0.5f;
267        de =  dot_denoise (value, R_y[0].s01, R_y[0].s12, R_y[2].s01, R_y[2].s12, table, gain_coff0 * GUASS_DELTA_S_2_5);
268        out_data_r[0].s02 = de * egain[0];
269
270        value = (R_y[1].s01 + R_y[1].s12 + R_y[2].s01 + R_y[2].s12) * 0.25f;
271        de = dot_denoise (value, R_y[1].s01, R_y[1].s12, R_y[2].s01, R_y[2].s12, table, gain_coff0 * GUASS_DELTA_S_1_5);
272        out_data_r[1].s02 = de * egain[2];
273
274        value = (R_y[1].s12 + R_y[2].s12) * 0.5f;
275        de = dot_denoise (value, R_y[1].s01, R_y[1].s23, R_y[2].s01, R_y[2].s23, table, gain_coff0 * GUASS_DELTA_S_2_5);
276        out_data_r[1].s13 = de * egain[3];
277
278        write_imagef (out, (int2)(out_x, out_y), out_data_r[0]);
279        write_imagef (out, (int2)(out_x, out_y + 1), out_data_r[1]);
280    }
281
282    ////////////////////////////////G//////////////////////////////////////////
283    {
284        value = (Gr_x1.s12 + Gr_x1.s23 + Gb_w0.s12 + Gb_w1.s12) * 0.25f;
285        de = dot_denoise(value, Gr_x1.s12, Gr_x1.s23, Gb_w0.s12, Gb_w1.s12, table, gain_coff0 * GUASS_DELTA_S_1);
286        out_data_g[0].s13 = de * egain[1];
287
288        value = (Gr_x1.s12 + Gr_x2.s01 + Gb_w1.s01 + Gb_w1.s12) * 0.25f;
289        de = dot_denoise (value, Gr_x1.s12, Gr_x2.s01, Gb_w1.s01, Gb_w1.s12, table, gain_coff0 * GUASS_DELTA_S_1);
290        out_data_g[1].s02 = de * egain[2];
291
292        write_imagef (out, (int2)(out_x, out_y + out_height), out_data_g[0]);
293        write_imagef (out, (int2)(out_x, out_y + 1 + out_height), out_data_g[1]);
294    }
295
296    ////////////////////////////////B//////////////////////////////////////////
297    {
298        value = (B_z[0].s12 + B_z[1].s12) * 0.5f;
299        de = dot_denoise (value, B_z[0].s01, B_z[0].s23, B_z[1].s01, B_z[1].s23, table, gain_coff0 * GUASS_DELTA_S_2_5);
300        out_data_b[0].s02 = de * egain[0];
301
302        value = (B_z[0].s12 + B_z[0].s23 +
303                 B_z[1].s12 + B_z[1].s23) * 0.25f;
304        de = dot_denoise (value, B_z[0].s12, B_z[0].s23, B_z[1].s12, B_z[1].s23, table, gain_coff0 * GUASS_DELTA_S_1_5);
305        out_data_b[0].s13 = de * egain[1];
306
307        value = (B_z[1].s12 + B_z[1].s23) * 0.5f;
308        de = dot_denoise (value, B_z[0].s12, B_z[0].s23, B_z[2].s12, B_z[2].s23, table, gain_coff0 * GUASS_DELTA_S_2_5);
309        out_data_b[1].s13 = de * egain[3];
310
311        write_imagef (out, (int2)(out_x, out_y + out_height * 2), out_data_b[0]);
312        write_imagef (out, (int2)(out_x, out_y + 1 + out_height * 2), out_data_b[1]);
313    }
314}
315
316void shared_demosaic (
317    __local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in,
318    int in_x, int in_y,
319    __write_only image2d_t out, uint output_height, int out_x, int out_y,
320    uint has_denoise, __local float *table, CLEeConfig ee_config)
321{
322    if (has_denoise) {
323        demosaic_denoise_2_cell (
324            x_data_in, y_data_in, z_data_in, w_data_in, in_x, in_y,
325            out, output_height, out_x, out_y, table, ee_config);
326    } else {
327        demosaic_2_cell (
328            x_data_in, y_data_in, z_data_in, w_data_in, in_x, in_y,
329            out, output_height, out_x, out_y);
330    }
331}
332
333__kernel void kernel_bayer_pipe (__read_only image2d_t input,
334                                 uint input_height,
335                                 __write_only image2d_t output,
336                                 uint output_height,
337                                 __global float * bnr_table,
338                                 uint has_denoise,
339                                 CLEeConfig ee_config
340                                )
341{
342    int g_id_x = get_global_id (0);
343    int g_id_y = get_global_id (1);
344    int g_size_x = get_global_size (0);
345    int g_size_y = get_global_size (1);
346
347    int l_id_x = get_local_id(0);
348    int l_id_y = get_local_id(1);
349    int l_size_x = get_local_size (0);
350    int l_size_y = get_local_size (1);
351
352    __local float p1_x[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_y[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_z[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_w[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE];
353    __local float SLM_delta_coef_table[64];
354
355    int out_x_start, out_y_start;
356    int x_start = get_group_id (0) * WORKGROUP_CELL_WIDTH;
357    int y_start = get_group_id (1) * WORKGROUP_CELL_HEIGHT;
358    int i = mad24 (l_id_y, l_size_x, l_id_x);
359    int j = i;
360
361    i *= 4;
362    if(i < SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE)
363    {
364        grbg_slm_load (p1_x, p1_y, p1_z, p1_w, i,
365                       input, input_height,
366                       x_start - SLM_CELL_X_OFFSET, y_start - SLM_CELL_Y_OFFSET);
367    }
368    if(j < 64)
369        SLM_delta_coef_table[j] = bnr_table[j];
370
371    barrier(CLK_LOCAL_MEM_FENCE);
372
373    i = mad24 (l_id_y, l_size_x, l_id_x);
374    int workitem_x_size = (SLM_CELL_X_VALID_SIZE / DEMOSAIC_X_CELL_PER_WORKITEM);
375    int input_x = (i % workitem_x_size) * DEMOSAIC_X_CELL_PER_WORKITEM;
376    int input_y = i / workitem_x_size;
377
378    shared_demosaic (
379        p1_x, p1_y, p1_z, p1_w,
380        input_x + SLM_CELL_X_OFFSET, input_y + SLM_CELL_Y_OFFSET,
381        output, output_height,
382        (input_x + x_start) * PIXEL_PER_CELL / 4, (input_y + y_start) * PIXEL_PER_CELL, has_denoise, SLM_delta_coef_table, ee_config);
383}
384
385