1/* 2 * function: kernel_bayer_pipe 3 * params: 4 * input: image2d_t as read only 5 * output: image2d_t as write only 6 * blc_config: black level correction configuration 7 * wb_config: whitebalance configuration 8 * gamma_table: RGGB table 9 * stats_output: 3a stats output 10 */ 11 12 13#define WORKGROUP_CELL_WIDTH 64 14#define WORKGROUP_CELL_HEIGHT 4 15 16#define DEMOSAIC_X_CELL_PER_WORKITEM 2 17 18#define PIXEL_PER_CELL 2 19 20#define SLM_CELL_X_OFFSET 4 21#define SLM_CELL_Y_OFFSET 1 22 23// 8x8 24#define SLM_CELL_X_VALID_SIZE WORKGROUP_CELL_WIDTH 25#define SLM_CELL_Y_VALID_SIZE WORKGROUP_CELL_HEIGHT 26 27// 10x10 28#define SLM_CELL_X_SIZE (SLM_CELL_X_VALID_SIZE + SLM_CELL_X_OFFSET * 2) 29#define SLM_CELL_Y_SIZE (SLM_CELL_Y_VALID_SIZE + SLM_CELL_Y_OFFSET * 2) 30 31#define GUASS_DELTA_S_1 1.031739f 32#define GUASS_DELTA_S_1_5 1.072799f 33#define GUASS_DELTA_S_2 1.133173f 34#define GUASS_DELTA_S_2_5 1.215717f 35 36typedef struct 37{ 38 float ee_gain; 39 float ee_threshold; 40 float nr_gain; 41} CLEeConfig; 42 43inline int get_shared_pos_x (int i) 44{ 45 return i % SLM_CELL_X_SIZE; 46} 47 48inline int get_shared_pos_y (int i) 49{ 50 return i / SLM_CELL_X_SIZE; 51} 52 53inline int shared_pos (int x, int y) 54{ 55 return mad24(y, SLM_CELL_X_SIZE, x); 56} 57 58/* BA10=> GRBG */ 59inline void grbg_slm_load ( 60 __local float *px, __local float *py, __local float *pz, __local float *pw, 61 int index, __read_only image2d_t input, uint input_height, int x_start, int y_start 62) 63{ 64 sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; 65 float4 data1, data2, line1, line2; 66 int x0 = (get_shared_pos_x (index) + x_start) / 4; 67 int y0 = get_shared_pos_y (index) + y_start; 68 int2 pos = (int2)(x0, y0); 69 float4 gr, r, b, gb; 70 71 y0 = y0 > 0 ? y0 : 0; 72 73 gr = read_imagef (input, sampler, (int2)(x0, y0)); 74 r = read_imagef (input, sampler, (int2)(x0, y0 + input_height)); 75 b = read_imagef (input, sampler, (int2)(x0, y0 + input_height * 2)); 76 gb = read_imagef (input, sampler, (int2)(x0, y0 + input_height * 3)); 77 78 (*(__local float4 *)(px + index)) = gr; 79 (*(__local float4 *)(py + index)) = r; 80 (*(__local float4 *)(pz + index)) = b; 81 (*(__local float4 *)(pw + index)) = gb; 82} 83 84#define MAX_DELTA_COFF 5.0f 85#define MIN_DELTA_COFF 1.0f 86#define DEFAULT_DELTA_COFF 4.0f 87 88inline float2 delta_coff (float2 in, __local float *table) 89{ 90 float2 out; 91 out.x = table[(int)(fabs(in.x * 64.0f))]; 92 out.y = table[(int)(fabs(in.y * 64.0f))]; 93 94 return out; 95} 96 97inline float2 dot_denoise (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, __local float *table, float coff0) 98{ 99 float2 coff1, coff2, coff3, coff4, coff5; 100 coff1 = delta_coff (in1 - value, table); 101 coff2 = delta_coff (in2 - value, table); 102 coff3 = delta_coff (in3 - value, table); 103 coff4 = delta_coff (in4 - value, table); 104 //(in1 * coff1 + in2 * coff2 + in3 * coff3 + in4 * coff4 + value * coff0) 105 float2 sum1 = (mad (in1, coff1, 106 mad (in2, coff2, 107 mad (in3, coff3, 108 mad (in4, coff4, value * coff0))))); 109 return sum1 / (coff0 + coff1 + coff2 + coff3 + coff4); 110} 111 112inline float2 dot_ee (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, float2 out, CLEeConfig ee_config, float2 *egain) 113{ 114 115 float2 ee = mad(in1 + in2 + in3 + in4, -0.25f, value); 116 ee = fabs(ee) > ee_config.ee_threshold ? ee : 0.0f; 117 118 egain[0] = mad(ee, ee_config.ee_gain, out + 0.03f) / (out + 0.03f); 119 120 return out * egain[0]; 121} 122 123inline float2 dot_denoise_ee (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, __local float *table, float coff0, float2 *egain, CLEeConfig ee_config) 124{ 125 float2 out = dot_denoise(value, in1, in2, in3, in4, table, coff0); 126 return dot_ee(value, in1, in2, in3, in4, out, ee_config, egain); 127} 128 129void demosaic_2_cell ( 130 __local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in, 131 int in_x, int in_y, 132 __write_only image2d_t out, uint out_height, int out_x, int out_y) 133{ 134 float4 out_data; 135 float2 value; 136 int index; 137 { 138 float3 R_y[2]; 139 index = shared_pos (in_x - 1, in_y); 140 R_y[0] = *(__local float3*)(y_data_in + index); 141 index = shared_pos (in_x - 1, in_y + 1); 142 R_y[1] = *(__local float3*)(y_data_in + index); 143 144 out_data.s02 = (R_y[0].s01 + R_y[0].s12) * 0.5f; 145 out_data.s13 = R_y[0].s12; 146 write_imagef (out, (int2)(out_x, out_y), out_data); 147 148 out_data.s02 = (R_y[0].s01 + R_y[0].s12 + R_y[1].s01 + R_y[1].s12) * 0.25f; 149 out_data.s13 = (R_y[0].s12 + R_y[1].s12) * 0.5f; 150 write_imagef (out, (int2)(out_x, out_y + 1), out_data); 151 } 152 153 { 154 float3 B_z[2]; 155 index = shared_pos (in_x, in_y - 1); 156 B_z[0] = *(__local float3*)(z_data_in + index); 157 index = shared_pos (in_x, in_y); 158 B_z[1] = *(__local float3*)(z_data_in + index); 159 160 out_data.s02 = (B_z[0].s01 + B_z[1].s01) * 0.5f; 161 out_data.s13 = (B_z[0].s01 + B_z[0].s12 + B_z[1].s01 + B_z[1].s12) * 0.25f; 162 write_imagef (out, (int2)(out_x, out_y + out_height * 2), out_data); 163 164 out_data.s02 = B_z[1].s01; 165 out_data.s13 = (B_z[1].s01 + B_z[1].s12) * 0.5f; 166 write_imagef (out, (int2)(out_x, out_y + 1 + out_height * 2), out_data); 167 } 168 169 { 170 float3 Gr_x[2], Gb_w[2]; 171 index = shared_pos (in_x, in_y); 172 Gr_x[0] = *(__local float3*)(x_data_in + index); 173 index = shared_pos (in_x, in_y + 1); 174 Gr_x[1] = *(__local float3*)(x_data_in + index); 175 176 index = shared_pos (in_x - 1, in_y - 1); 177 Gb_w[0] = *(__local float3*)(w_data_in + index); 178 index = shared_pos (in_x - 1, in_y); 179 Gb_w[1] = *(__local float3*)(w_data_in + index); 180 181 out_data.s02 = (Gr_x[0].s01 * 4.0f + Gb_w[0].s01 + 182 Gb_w[0].s12 + Gb_w[1].s01 + Gb_w[1].s12) * 0.125f; 183 out_data.s13 = (Gr_x[0].s01 + Gr_x[0].s12 + Gb_w[0].s12 + Gb_w[1].s12) * 0.25f; 184 write_imagef (out, (int2)(out_x, out_y + out_height), out_data); 185 186 out_data.s02 = (Gr_x[0].s01 + Gr_x[1].s01 + Gb_w[1].s01 + Gb_w[1].s12) * 0.25f; 187 188 out_data.s13 = (Gb_w[1].s12 * 4.0f + Gr_x[0].s01 + 189 Gr_x[0].s12 + Gr_x[1].s01 + Gr_x[1].s12) * 0.125f; 190 write_imagef (out, (int2)(out_x, out_y + 1 + out_height), out_data); 191 } 192} 193 194void demosaic_denoise_2_cell ( 195 __local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in, 196 int in_x, int in_y, 197 __write_only image2d_t out, uint out_height, int out_x, int out_y, __local float *table, CLEeConfig ee_config) 198{ 199 float4 out_data_r[2]; 200 float4 out_data_g[2]; 201 float4 out_data_b[2]; 202 float2 value; 203 int index; 204 float2 egain[4]; 205 float2 de; 206 float gain_coff0 = table[0]; 207 208 float4 R_y[3], B_z[3];; 209 float2 Gr_x0, Gb_w2; 210 float4 Gr_x1, Gb_w1; 211 float3 Gr_x2, Gb_w0; 212 213 // R egain 214 { 215 index = shared_pos (in_x - 1, in_y - 1); 216 R_y[0] = *(__local float4*)(y_data_in + index); 217 index = shared_pos (in_x - 1, in_y); 218 R_y[1] = *(__local float4*)(y_data_in + index); 219 index = shared_pos (in_x - 1, in_y + 1); 220 R_y[2] = *(__local float4*)(y_data_in + index); 221 222 out_data_r[0].s13 = dot_denoise_ee (R_y[1].s12, R_y[0].s12, R_y[1].s01, R_y[1].s23, R_y[2].s12, 223 table, gain_coff0 * GUASS_DELTA_S_2, &egain[1], ee_config); 224 } 225 226 // Gr, Gb egain 227 { 228 index = shared_pos (in_x, in_y - 1); 229 Gr_x0 = *(__local float2*)(x_data_in + index); 230 index = shared_pos (in_x - 1, in_y); 231 Gr_x1 = *(__local float4*)(x_data_in + index); 232 index = shared_pos (in_x, in_y + 1); 233 Gr_x2 = *(__local float3*)(x_data_in + index); 234 235 index = shared_pos (in_x - 1, in_y - 1); 236 Gb_w0 = *(__local float3*)(w_data_in + index); 237 index = shared_pos (in_x - 1, in_y); 238 Gb_w1 = *(__local float4*)(w_data_in + index); 239 index = shared_pos (in_x, in_y + 1); 240 Gb_w2 = *(__local float2*)(w_data_in + index); 241 242 value = mad (Gr_x1.s12, 4.0f, (Gb_w0.s01 + Gb_w0.s12 + Gb_w1.s01 + Gb_w1.s12)) * 0.125f; 243 de = dot_denoise (value, Gb_w0.s01, Gb_w0.s12, Gb_w1.s01, Gb_w1.s12, table, gain_coff0 * GUASS_DELTA_S_1_5); 244 out_data_g[0].s02 = dot_ee(Gr_x1.s12, Gr_x0, Gr_x1.s01, Gr_x1.s23, Gr_x2.s01, de, ee_config, &egain[0]); 245 246 value = mad (Gb_w1.s12, 4.0f, (Gr_x1.s12 + Gr_x1.s23 + Gr_x2.s01 + Gr_x2.s12)) * 0.125f; 247 de = dot_denoise (value, Gr_x1.s12, Gr_x1.s23, Gr_x2.s01, Gr_x2.s12, table, gain_coff0 * GUASS_DELTA_S_1_5); 248 out_data_g[1].s13 = dot_ee(Gb_w1.s12, Gb_w0.s12, Gb_w1.s01, Gb_w1.s23, Gb_w2, de, ee_config, &egain[3]); 249 } 250 251 // B egain 252 { 253 index = shared_pos (in_x - 1, in_y - 1); 254 B_z[0] = *(__local float4*)(z_data_in + index); 255 index = shared_pos (in_x - 1, in_y); 256 B_z[1] = *(__local float4*)(z_data_in + index); 257 index = shared_pos (in_x - 1, in_y + 1); 258 B_z[2] = *(__local float4*)(z_data_in + index); 259 260 out_data_b[1].s02 = dot_denoise_ee (B_z[1].s12, B_z[0].s12, B_z[1].s01, B_z[1].s23, B_z[2].s12, 261 table, gain_coff0 * GUASS_DELTA_S_2, &egain[2], ee_config); 262 } 263 264 ////////////////////////////////R////////////////////////////////////////// 265 { 266 value = (R_y[1].s01 + R_y[1].s12) * 0.5f; 267 de = dot_denoise (value, R_y[0].s01, R_y[0].s12, R_y[2].s01, R_y[2].s12, table, gain_coff0 * GUASS_DELTA_S_2_5); 268 out_data_r[0].s02 = de * egain[0]; 269 270 value = (R_y[1].s01 + R_y[1].s12 + R_y[2].s01 + R_y[2].s12) * 0.25f; 271 de = dot_denoise (value, R_y[1].s01, R_y[1].s12, R_y[2].s01, R_y[2].s12, table, gain_coff0 * GUASS_DELTA_S_1_5); 272 out_data_r[1].s02 = de * egain[2]; 273 274 value = (R_y[1].s12 + R_y[2].s12) * 0.5f; 275 de = dot_denoise (value, R_y[1].s01, R_y[1].s23, R_y[2].s01, R_y[2].s23, table, gain_coff0 * GUASS_DELTA_S_2_5); 276 out_data_r[1].s13 = de * egain[3]; 277 278 write_imagef (out, (int2)(out_x, out_y), out_data_r[0]); 279 write_imagef (out, (int2)(out_x, out_y + 1), out_data_r[1]); 280 } 281 282 ////////////////////////////////G////////////////////////////////////////// 283 { 284 value = (Gr_x1.s12 + Gr_x1.s23 + Gb_w0.s12 + Gb_w1.s12) * 0.25f; 285 de = dot_denoise(value, Gr_x1.s12, Gr_x1.s23, Gb_w0.s12, Gb_w1.s12, table, gain_coff0 * GUASS_DELTA_S_1); 286 out_data_g[0].s13 = de * egain[1]; 287 288 value = (Gr_x1.s12 + Gr_x2.s01 + Gb_w1.s01 + Gb_w1.s12) * 0.25f; 289 de = dot_denoise (value, Gr_x1.s12, Gr_x2.s01, Gb_w1.s01, Gb_w1.s12, table, gain_coff0 * GUASS_DELTA_S_1); 290 out_data_g[1].s02 = de * egain[2]; 291 292 write_imagef (out, (int2)(out_x, out_y + out_height), out_data_g[0]); 293 write_imagef (out, (int2)(out_x, out_y + 1 + out_height), out_data_g[1]); 294 } 295 296 ////////////////////////////////B////////////////////////////////////////// 297 { 298 value = (B_z[0].s12 + B_z[1].s12) * 0.5f; 299 de = dot_denoise (value, B_z[0].s01, B_z[0].s23, B_z[1].s01, B_z[1].s23, table, gain_coff0 * GUASS_DELTA_S_2_5); 300 out_data_b[0].s02 = de * egain[0]; 301 302 value = (B_z[0].s12 + B_z[0].s23 + 303 B_z[1].s12 + B_z[1].s23) * 0.25f; 304 de = dot_denoise (value, B_z[0].s12, B_z[0].s23, B_z[1].s12, B_z[1].s23, table, gain_coff0 * GUASS_DELTA_S_1_5); 305 out_data_b[0].s13 = de * egain[1]; 306 307 value = (B_z[1].s12 + B_z[1].s23) * 0.5f; 308 de = dot_denoise (value, B_z[0].s12, B_z[0].s23, B_z[2].s12, B_z[2].s23, table, gain_coff0 * GUASS_DELTA_S_2_5); 309 out_data_b[1].s13 = de * egain[3]; 310 311 write_imagef (out, (int2)(out_x, out_y + out_height * 2), out_data_b[0]); 312 write_imagef (out, (int2)(out_x, out_y + 1 + out_height * 2), out_data_b[1]); 313 } 314} 315 316void shared_demosaic ( 317 __local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in, 318 int in_x, int in_y, 319 __write_only image2d_t out, uint output_height, int out_x, int out_y, 320 uint has_denoise, __local float *table, CLEeConfig ee_config) 321{ 322 if (has_denoise) { 323 demosaic_denoise_2_cell ( 324 x_data_in, y_data_in, z_data_in, w_data_in, in_x, in_y, 325 out, output_height, out_x, out_y, table, ee_config); 326 } else { 327 demosaic_2_cell ( 328 x_data_in, y_data_in, z_data_in, w_data_in, in_x, in_y, 329 out, output_height, out_x, out_y); 330 } 331} 332 333__kernel void kernel_bayer_pipe (__read_only image2d_t input, 334 uint input_height, 335 __write_only image2d_t output, 336 uint output_height, 337 __global float * bnr_table, 338 uint has_denoise, 339 CLEeConfig ee_config 340 ) 341{ 342 int g_id_x = get_global_id (0); 343 int g_id_y = get_global_id (1); 344 int g_size_x = get_global_size (0); 345 int g_size_y = get_global_size (1); 346 347 int l_id_x = get_local_id(0); 348 int l_id_y = get_local_id(1); 349 int l_size_x = get_local_size (0); 350 int l_size_y = get_local_size (1); 351 352 __local float p1_x[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_y[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_z[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_w[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE]; 353 __local float SLM_delta_coef_table[64]; 354 355 int out_x_start, out_y_start; 356 int x_start = get_group_id (0) * WORKGROUP_CELL_WIDTH; 357 int y_start = get_group_id (1) * WORKGROUP_CELL_HEIGHT; 358 int i = mad24 (l_id_y, l_size_x, l_id_x); 359 int j = i; 360 361 i *= 4; 362 if(i < SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE) 363 { 364 grbg_slm_load (p1_x, p1_y, p1_z, p1_w, i, 365 input, input_height, 366 x_start - SLM_CELL_X_OFFSET, y_start - SLM_CELL_Y_OFFSET); 367 } 368 if(j < 64) 369 SLM_delta_coef_table[j] = bnr_table[j]; 370 371 barrier(CLK_LOCAL_MEM_FENCE); 372 373 i = mad24 (l_id_y, l_size_x, l_id_x); 374 int workitem_x_size = (SLM_CELL_X_VALID_SIZE / DEMOSAIC_X_CELL_PER_WORKITEM); 375 int input_x = (i % workitem_x_size) * DEMOSAIC_X_CELL_PER_WORKITEM; 376 int input_y = i / workitem_x_size; 377 378 shared_demosaic ( 379 p1_x, p1_y, p1_z, p1_w, 380 input_x + SLM_CELL_X_OFFSET, input_y + SLM_CELL_Y_OFFSET, 381 output, output_height, 382 (input_x + x_start) * PIXEL_PER_CELL / 4, (input_y + y_start) * PIXEL_PER_CELL, has_denoise, SLM_delta_coef_table, ee_config); 383} 384 385