1/* 2 * function: kernel_bayer_copy 3 * sample code of default kernel arguments 4 * input: image2d_t as read only 5 * output: image2d_t as write only 6 */ 7 8//#define ENABLE_IMAGE_2D_INPUT 0 9 10#ifndef STATS_BITS 11#define STATS_BITS 8 12#endif 13 14/* 15 * GROUP_PIXEL_X_SIZE = 2 * GROUP_CELL_X_SIZE 16 * GROUP_PIXEL_Y_SIZE = 2 * GROUP_CELL_Y_SIZE 17*/ 18 19#define GROUP_CELL_X_SIZE 64 20#define GROUP_CELL_Y_SIZE 4 21 22//float4; 16 23#define SLM_X_SIZE (GROUP_CELL_X_SIZE / 4) 24#define SLM_Y_SIZE GROUP_CELL_Y_SIZE 25 26#define STATS_3A_CELL_X_SIZE 8 27#define STATS_3A_CELL_Y_SIZE GROUP_CELL_Y_SIZE 28 29typedef struct { 30 float level_gr; /* Black level for GR pixels */ 31 float level_r; /* Black level for R pixels */ 32 float level_b; /* Black level for B pixels */ 33 float level_gb; /* Black level for GB pixels */ 34 uint color_bits; 35} CLBLCConfig; 36 37 38typedef struct 39{ 40 float r_gain; 41 float gr_gain; 42 float gb_gain; 43 float b_gain; 44} CLWBConfig; 45 46inline int slm_pos (const int x, const int y) 47{ 48 return mad24 (y, SLM_X_SIZE, x); 49} 50 51inline void gamma_correct(float8 *in_out, __global float *table) 52{ 53 in_out->s0 = table[clamp(convert_int(in_out->s0 * 255.0f), 0, 255)]; 54 in_out->s1 = table[clamp(convert_int(in_out->s1 * 255.0f), 0, 255)]; 55 in_out->s2 = table[clamp(convert_int(in_out->s2 * 255.0f), 0, 255)]; 56 in_out->s3 = table[clamp(convert_int(in_out->s3 * 255.0f), 0, 255)]; 57 in_out->s4 = table[clamp(convert_int(in_out->s4 * 255.0f), 0, 255)]; 58 in_out->s5 = table[clamp(convert_int(in_out->s5 * 255.0f), 0, 255)]; 59 in_out->s6 = table[clamp(convert_int(in_out->s6 * 255.0f), 0, 255)]; 60 in_out->s7 = table[clamp(convert_int(in_out->s7 * 255.0f), 0, 255)]; 61} 62 63inline float avg_float8 (float8 data) 64{ 65 return (data.s0 + data.s1 + data.s2 + data.s3 + data.s4 + data.s5 + data.s6 + data.s7) * 0.125f; 66} 67 68inline void stats_3a_calculate ( 69 __local float4 * slm_gr, 70 __local float4 * slm_r, 71 __local float4 * slm_b, 72 __local float4 * slm_gb, 73 __global ushort8 * stats_output, 74 CLWBConfig *wb_config) 75{ 76 const int group_x_size = get_num_groups (0); 77 const int group_id_x = get_group_id (0); 78 const int group_id_y = get_group_id (1); 79 80 const int l_id_x = get_local_id (0); 81 const int l_id_y = get_local_id (1); 82 const int l_size_x = get_local_size (0); 83 const int stats_float4_x_count = STATS_3A_CELL_X_SIZE / 4; 84 int count = stats_float4_x_count * STATS_3A_CELL_Y_SIZE / 4; 85 86 int index = mad24 (l_id_y, l_size_x, l_id_x); 87 int index_x = index % SLM_X_SIZE; 88 int index_y = index / SLM_X_SIZE; 89 90 if (mad24 (index_y, stats_float4_x_count, index_x % stats_float4_x_count) < count) { 91 int pitch_count = count / stats_float4_x_count * SLM_X_SIZE; 92 int index1 = index + pitch_count; 93 int index2 = index1 + pitch_count; 94 int index3 = index2 + pitch_count; 95 slm_gr[index] = (slm_gr[index] + slm_gr[index1] + slm_gr[index2] + slm_gr[index3]) * 0.25f; 96 slm_r[index] = (slm_r[index] + slm_r[index1] + slm_r[index2] + slm_r[index3]) * 0.25f; 97 slm_b[index] = (slm_b[index] + slm_b[index1] + slm_b[index2] + slm_b[index3]) * 0.25f; 98 slm_gb[index] = (slm_gb[index] + slm_gb[index1] + slm_gb[index2] + slm_gb[index3]) * 0.25f; 99 } 100 barrier (CLK_LOCAL_MEM_FENCE); 101 102 if (index < SLM_X_SIZE / 2) { 103 float result_gr, result_r, result_b, result_gb, avg_y; 104 float8 tmp; 105 tmp = ((__local float8*)slm_gr)[index]; 106 result_gr = avg_float8 (tmp); 107 108 tmp = ((__local float8*)slm_r)[index]; 109 result_r = avg_float8 (tmp); 110 111 tmp = ((__local float8*)slm_b)[index]; 112 result_b = avg_float8 (tmp); 113 114 tmp = ((__local float8*)slm_gb)[index]; 115 result_gb = avg_float8 (tmp); 116 117 int out_index = mad24 (mad24 (group_id_y, group_x_size, group_id_x), 118 (GROUP_CELL_X_SIZE / STATS_3A_CELL_X_SIZE) * (GROUP_CELL_Y_SIZE / STATS_3A_CELL_Y_SIZE), 119 index); 120 121#if STATS_BITS==8 122 avg_y = mad ((result_gr * wb_config->gr_gain + result_gb * wb_config->gb_gain), 74.843f, 123 mad (result_r * wb_config->r_gain, 76.245f, result_b * 29.070f)); 124 125 //ushort avg_y; avg_r; avg_gr; avg_gb; avg_b; valid_wb_count; f_value1; f_value2; 126 stats_output[out_index] = (ushort8) ( 127 convert_ushort (convert_uchar_sat (avg_y)), 128 convert_ushort (convert_uchar_sat (result_r * 255.0f)), 129 convert_ushort (convert_uchar_sat (result_gr * 255.0f)), 130 convert_ushort (convert_uchar_sat (result_gb * 255.0f)), 131 convert_ushort (convert_uchar_sat (result_b * 255.0f)), 132 STATS_3A_CELL_X_SIZE * STATS_3A_CELL_Y_SIZE, 133 0, 134 0); 135#elif STATS_BITS==12 136 avg_y = mad ((result_gr * wb_config->gr_gain + result_gb * wb_config->gb_gain), 1201.883f, 137 mad (result_r * wb_config->r_gain, 1224.405f, result_b * 466.830f)); 138 139 stats_output[out_index] = (ushort8) ( 140 convert_ushort (clamp (avg_y, 0.0f, 4095.0f)), 141 convert_ushort (clamp (result_r * 4096.0f, 0.0f, 4095.0f)), 142 convert_ushort (clamp (result_gr * 4096.0f, 0.0f, 4095.0f)), 143 convert_ushort (clamp (result_gb * 4096.0f, 0.0f, 4095.0f)), 144 convert_ushort (clamp (result_b * 4096.0f, 0.0f, 4095.0f)), 145 STATS_3A_CELL_X_SIZE * STATS_3A_CELL_Y_SIZE, 146 0, 147 0); 148#else 149 printf ("kernel 3a-stats error, wrong bit depth:%d\n", STATS_BITS); 150#endif 151 } 152} 153 154 155__kernel void kernel_bayer_basic ( 156#if ENABLE_IMAGE_2D_INPUT 157 __read_only image2d_t input, 158#else 159 __global const ushort8 *input, 160#endif 161 uint input_aligned_width, 162 __write_only image2d_t output, 163 uint out_height, 164 CLBLCConfig blc_config, 165 CLWBConfig wb_config, 166 __global float *gamma_table, 167 __global ushort8 *stats_output 168) 169{ 170 int g_x = get_global_id (0); 171 int g_y = get_global_id (1); 172 173 const int l_x = get_local_id (0); 174 const int l_y = get_local_id (1); 175 const int l_x_size = get_local_size (0); 176 const int l_y_size = get_local_size (1); 177 const int group_id_x = get_group_id (0); 178 const int group_id_y = get_group_id (1); 179 180 sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; 181 182 int index = mad24 (l_y, l_x_size, l_x); 183 int x_cell_start = (GROUP_CELL_X_SIZE / 4) * group_id_x; 184 int y_cell_start = GROUP_CELL_Y_SIZE * group_id_y; 185 int x, y; 186 187 float blc_multiplier = (float)(1 << (16 - blc_config.color_bits)); 188 189 __local float4 slm_gr[SLM_X_SIZE * SLM_Y_SIZE], slm_r[SLM_X_SIZE * SLM_Y_SIZE], slm_b[SLM_X_SIZE * SLM_Y_SIZE], slm_gb[SLM_X_SIZE * SLM_Y_SIZE]; 190 191 for (; index < SLM_X_SIZE * SLM_Y_SIZE; index += l_x_size * l_y_size) { 192 float8 line1; 193 float8 line2; 194 195 x = index % SLM_X_SIZE + x_cell_start; 196 y = index / SLM_X_SIZE + y_cell_start; 197 198#if ENABLE_IMAGE_2D_INPUT 199 line1 = convert_float8 (as_ushort8 (read_imageui(input, sampler, (int2)(x, y * 2)))) / 65536.0f; 200 line2 = convert_float8 (as_ushort8 (read_imageui(input, sampler, (int2)(x, y * 2 + 1)))) / 65536.0f; 201#else 202 line1 = convert_float8 (input [y * 2 * input_aligned_width + x]) / 65536.0f; 203 line2 = convert_float8 (input [(y * 2 + 1) * input_aligned_width + x]) / 65536.0f; 204#endif 205 206 float4 gr = mad (line1.even, blc_multiplier, - blc_config.level_gr); 207 float4 r = mad (line1.odd, blc_multiplier, - blc_config.level_r); 208 float4 b = mad (line2.even, blc_multiplier, - blc_config.level_b); 209 float4 gb = mad (line2.odd, blc_multiplier, - blc_config.level_gb); 210 211 slm_gr[index] = gr; 212 slm_r[index] = r; 213 slm_b[index] = b; 214 slm_gb[index] = gb; 215 } 216 barrier(CLK_LOCAL_MEM_FENCE); 217 218 float8 data_gr, data_r, data_b, data_gb; 219 index = mad24 (l_y, l_x_size, l_x); 220 x = mad24 (GROUP_CELL_X_SIZE / 8, group_id_x, index % (SLM_X_SIZE / 2)); 221 y = mad24 (GROUP_CELL_Y_SIZE, group_id_y, index / (SLM_X_SIZE / 2)); 222 223 data_gr = ((__local float8*)slm_gr)[index]; 224 data_gr = data_gr * wb_config.gr_gain; 225 226 data_r = ((__local float8*)slm_r)[index]; 227 data_r = data_r * wb_config.r_gain; 228 229 data_b = ((__local float8*)slm_b)[index]; 230 data_b = data_b * wb_config.b_gain; 231 232 data_gb = ((__local float8*)slm_gb)[index]; 233 data_gb = data_gb * wb_config.gb_gain; 234 235#if ENABLE_GAMMA 236 gamma_correct (&data_gr, gamma_table); 237 gamma_correct (&data_r, gamma_table); 238 gamma_correct (&data_b, gamma_table); 239 gamma_correct (&data_gb, gamma_table); 240#endif 241 242#if 0 243 if (x % 16 == 0 && y % 16 == 0) { 244 uint8 value = convert_uint8(convert_uchar8_sat(data_gr * 255.0f)); 245 printf ("(x:%d, y:%d) (blc.bit:%d, level:%d) (wb.gr:%f)=> (%d, %d, %d, %d, %d, %d, %d, %d)\n", 246 x * 8, y, 247 blc_config.color_bits, convert_uint(blc_config.level_gr * 255.0f), 248 wb_config.gr_gain, 249 value.s0, value.s1, value.s2, value.s3, value.s4, value.s5, value.s6, value.s7); 250 } 251#endif 252 253 write_imageui (output, (int2)(x, y), as_uint4 (convert_ushort8 (data_gr * 65536.0f))); 254 write_imageui (output, (int2)(x, y + out_height), as_uint4 (convert_ushort8 (data_r * 65536.0f))); 255 write_imageui (output, (int2)(x, y + out_height * 2), as_uint4 (convert_ushort8 (data_b * 65536.0f))); 256 write_imageui (output, (int2)(x, y + out_height * 3), as_uint4 (convert_ushort8 (data_gb * 65536.0f))); 257 258 stats_3a_calculate (slm_gr, slm_r, slm_b, slm_gb, stats_output, &wb_config); 259} 260 261