1 2/* 3 * function: kernel_wavelet_denoise 4 * wavelet filter for denoise usage 5 * in: input image data as read only 6 * threshold: noise threshold 7 * low: 8 */ 9 10__constant float threshConst[5] = { 50.430166f, 20.376415f, 10.184031f, 6.640919f, 3.367972f }; 11 12__kernel void kernel_wavelet_denoise(__global uint *src, __global uint *approxOut, __global float *details, __global uint *dest, 13 int inputYOffset, int outputYOffset, uint inputUVOffset, uint outputUVOffset, 14 int layer, int decomLevels, float hardThresh, float softThresh) 15{ 16 int x = get_global_id(0); 17 int y = get_global_id(1); 18 size_t width = get_global_size(0); 19 size_t height = get_global_size(1); 20 21 int imageWidth = width * 16; 22 int imageHeight = height; 23 24 float stdev = 0.0f; 25 float thold = 0.0f; 26 float16 deviation = (float16)0.0f; 27 28 layer = (layer > 1) ? layer : 1; 29 layer = (layer < decomLevels) ? layer : decomLevels; 30 31 src += inputYOffset; 32 dest += outputYOffset; 33 34#if WAVELET_DENOISE_UV 35 int xScaler = pown(2.0f, layer); 36 int yScaler = pown(2.0f, (layer - 1)); 37#else 38 int xScaler = pown(2.0f, (layer - 1)); 39 int yScaler = xScaler; 40#endif 41 42 xScaler = ((x == 0) || (x > imageWidth / 16 - xScaler)) ? 0 : xScaler; 43 yScaler = ((y < yScaler) || (y > imageHeight - yScaler)) ? 0 : yScaler; 44 45 uint4 approx; 46 float16 detail; 47 48#if WAVELET_DENOISE_UV 49 int srcOffset = (layer % 2) ? (inputUVOffset * imageWidth / 4) : 0; 50 __global uchar *src_p = (__global uchar *)(src + srcOffset); 51#else 52 __global uchar *src_p = (__global uchar *)(src); 53#endif 54 55 int pixel_index = x * 16 + y * imageWidth; 56 int group_index = x * 4 + y * (imageWidth / 4); 57 58#if WAVELET_DENOISE_UV 59 uint4 luma; 60 int luma_index0 = x * 4 + (2 * y) * (imageWidth / 4); 61 int luma_index1 = x * 4 + (2 * y + 1) * (imageWidth / 4); 62#else 63 uint4 chroma; 64 int chroma_index = x * 4 + (y / 2) * (imageWidth / 4); 65#endif 66 67 ushort16 a; 68 ushort16 b; 69 ushort16 c; 70 ushort16 d; 71 ushort16 e; 72 ushort16 f; 73 ushort16 g; 74 ushort16 h; 75 ushort16 i; 76 77 float div = 1.0f / 16.0f; 78 79 a = (ushort16)(convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 1]), 80 convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 2]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 3]), 81 convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 4]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 5]), 82 convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 6]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 7]), 83 convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 8]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 9]), 84 convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 10]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 11]), 85 convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 12]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 13]), 86 convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 14]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 15]) 87 ); 88 89 b = (ushort16)(convert_ushort(src_p[pixel_index - yScaler * imageWidth]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 1]), 90 convert_ushort(src_p[pixel_index - yScaler * imageWidth + 2]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 3]), 91 convert_ushort(src_p[pixel_index - yScaler * imageWidth + 4]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 5]), 92 convert_ushort(src_p[pixel_index - yScaler * imageWidth + 6]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 7]), 93 convert_ushort(src_p[pixel_index - yScaler * imageWidth + 8]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 9]), 94 convert_ushort(src_p[pixel_index - yScaler * imageWidth + 10]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 11]), 95 convert_ushort(src_p[pixel_index - yScaler * imageWidth + 12]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 13]), 96 convert_ushort(src_p[pixel_index - yScaler * imageWidth + 14]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 15]) 97 ); 98 99 c = (ushort16)(convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 1]), 100 convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 2]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 3]), 101 convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 4]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 5]), 102 convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 6]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 7]), 103 convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 8]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 9]), 104 convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 10]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 11]), 105 convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 12]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 13]), 106 convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 14]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 15]) 107 ); 108 109 d = (ushort16)(convert_ushort(src_p[pixel_index - xScaler]), convert_ushort(src_p[pixel_index - xScaler + 1]), 110 convert_ushort(src_p[pixel_index - xScaler + 2]), convert_ushort(src_p[pixel_index - xScaler + 3]), 111 convert_ushort(src_p[pixel_index - xScaler + 4]), convert_ushort(src_p[pixel_index - xScaler + 5]), 112 convert_ushort(src_p[pixel_index - xScaler + 6]), convert_ushort(src_p[pixel_index - xScaler + 7]), 113 convert_ushort(src_p[pixel_index - xScaler + 8]), convert_ushort(src_p[pixel_index - xScaler + 9]), 114 convert_ushort(src_p[pixel_index - xScaler + 10]), convert_ushort(src_p[pixel_index - xScaler + 11]), 115 convert_ushort(src_p[pixel_index - xScaler + 12]), convert_ushort(src_p[pixel_index - xScaler + 13]), 116 convert_ushort(src_p[pixel_index - xScaler + 14]), convert_ushort(src_p[pixel_index - xScaler + 15]) 117 ); 118 119 e = (ushort16)(convert_ushort(src_p[pixel_index]), convert_ushort(src_p[pixel_index + 1]), 120 convert_ushort(src_p[pixel_index + 2]), convert_ushort(src_p[pixel_index + 3]), 121 convert_ushort(src_p[pixel_index + 4]), convert_ushort(src_p[pixel_index + 5]), 122 convert_ushort(src_p[pixel_index + 6]), convert_ushort(src_p[pixel_index + 7]), 123 convert_ushort(src_p[pixel_index + 8]), convert_ushort(src_p[pixel_index + 9]), 124 convert_ushort(src_p[pixel_index + 10]), convert_ushort(src_p[pixel_index + 11]), 125 convert_ushort(src_p[pixel_index + 12]), convert_ushort(src_p[pixel_index + 13]), 126 convert_ushort(src_p[pixel_index + 14]), convert_ushort(src_p[pixel_index + 15]) 127 ); 128 129 f = (ushort16)(convert_ushort(src_p[pixel_index + xScaler]), convert_ushort(src_p[pixel_index + xScaler + 1]), 130 convert_ushort(src_p[pixel_index + xScaler + 2]), convert_ushort(src_p[pixel_index + xScaler + 3]), 131 convert_ushort(src_p[pixel_index + xScaler + 4]), convert_ushort(src_p[pixel_index + xScaler + 5]), 132 convert_ushort(src_p[pixel_index + xScaler + 6]), convert_ushort(src_p[pixel_index + xScaler + 7]), 133 convert_ushort(src_p[pixel_index + xScaler + 8]), convert_ushort(src_p[pixel_index + xScaler + 9]), 134 convert_ushort(src_p[pixel_index + xScaler + 10]), convert_ushort(src_p[pixel_index + xScaler + 11]), 135 convert_ushort(src_p[pixel_index + xScaler + 12]), convert_ushort(src_p[pixel_index + xScaler + 13]), 136 convert_ushort(src_p[pixel_index + xScaler + 14]), convert_ushort(src_p[pixel_index + xScaler + 15]) 137 ); 138 139 g = (ushort16)(convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 1]), 140 convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 2]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 3]), 141 convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 4]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 5]), 142 convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 6]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 7]), 143 convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 8]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 9]), 144 convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 10]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 11]), 145 convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 12]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 13]), 146 convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 14]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 15]) 147 ); 148 149 h = (ushort16)(convert_ushort(src_p[pixel_index + yScaler * imageWidth]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 1]), 150 convert_ushort(src_p[pixel_index + yScaler * imageWidth + 2]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 3]), 151 convert_ushort(src_p[pixel_index + yScaler * imageWidth + 4]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 5]), 152 convert_ushort(src_p[pixel_index + yScaler * imageWidth + 6]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 7]), 153 convert_ushort(src_p[pixel_index + yScaler * imageWidth + 8]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 9]), 154 convert_ushort(src_p[pixel_index + yScaler * imageWidth + 10]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 11]), 155 convert_ushort(src_p[pixel_index + yScaler * imageWidth + 12]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 13]), 156 convert_ushort(src_p[pixel_index + yScaler * imageWidth + 14]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 15]) 157 ); 158 159 i = (ushort16)(convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 1]), 160 convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 2]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 3]), 161 convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 4]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 5]), 162 convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 6]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 7]), 163 convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 8]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 9]), 164 convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 10]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 11]), 165 convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 12]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 13]), 166 convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 14]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 15]) 167 ); 168 169 /* 170 { a, b, c } { 1, 2, 1 } 171 { d, e, f } { 2, 4, 2 } 172 { g, h, i } { 1, 2, 1 } 173 */ 174 ushort16 sum; 175 sum = (ushort16)1 * a + (ushort16)2 * b + (ushort16)1 * c + 176 (ushort16)2 * d + (ushort16)4 * e + (ushort16)2 * f + 177 (ushort16)1 * g + (ushort16)2 * h + (ushort16)1 * i; 178 179 approx = as_uint4(convert_uchar16(((convert_float16(sum) + 0.5f / div) * div))); 180 detail = convert_float16(convert_char16(e) - as_char16(approx)); 181 182 thold = hardThresh * threshConst[layer - 1]; 183 184 detail = (detail < -thold) ? detail + (thold - thold * softThresh) : detail; 185 detail = (detail > thold) ? detail - (thold - thold * softThresh) : detail; 186 detail = (detail > -thold && detail < thold) ? detail * softThresh : detail; 187 188 __global float16 *details_p = (__global float16 *)(&details[pixel_index]); 189 if (layer == 1) { 190 (*details_p) = detail; 191 192#if WAVELET_DENOISE_UV 193 // copy Y 194 luma = vload4(0, src + luma_index0); 195 vstore4(luma, 0, dest + luma_index0); 196 luma = vload4(0, src + luma_index1); 197 vstore4(luma, 0, dest + luma_index1); 198#else 199 // copy UV 200 if (y % 2 == 0) { 201 chroma = vload4(0, src + chroma_index + inputUVOffset * (imageWidth / 4)); 202 vstore4(chroma, 0, dest + chroma_index + outputUVOffset * (imageWidth / 4)); 203 } 204#endif 205 } else { 206 (*details_p) += detail; 207 } 208 209 if (layer < decomLevels) { 210#if WAVELET_DENOISE_UV 211 int approxOffset = (layer % 2) ? 0 : (inputUVOffset * imageWidth / 4); 212 (*(__global uint4*)(approxOut + group_index + approxOffset)) = approx; 213#else 214 (*(__global uint4*)(approxOut + group_index)) = approx; 215#endif 216 } 217 else 218 { 219 // Reconstruction 220#if WAVELET_DENOISE_UV 221 __global uint4* dest_p = (__global uint4*)(&dest[group_index + outputUVOffset * imageWidth / 4]); 222 (*dest_p) = as_uint4(convert_uchar16(*details_p + convert_float16(as_uchar16(approx)))); 223#else 224 __global uint4* dest_p = (__global uint4*)(&dest[group_index]); 225 (*dest_p) = as_uint4(convert_uchar16(*details_p + convert_float16(as_uchar16(approx)))); 226#endif 227 } 228} 229 230