• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1
2/*
3 * function: kernel_wavelet_denoise
4 *     wavelet filter for denoise usage
5 * in:        input image data as read only
6 * threshold:   noise threshold
7 * low:
8 */
9
10__constant float threshConst[5] = { 50.430166f, 20.376415f, 10.184031f, 6.640919f, 3.367972f };
11
12__kernel void kernel_wavelet_denoise(__global uint *src, __global uint *approxOut, __global float *details, __global uint *dest,
13                                     int inputYOffset, int outputYOffset, uint inputUVOffset, uint outputUVOffset,
14                                     int layer, int decomLevels, float hardThresh, float softThresh)
15{
16    int x = get_global_id(0);
17    int y = get_global_id(1);
18    size_t width = get_global_size(0);
19    size_t height = get_global_size(1);
20
21    int imageWidth = width * 16;
22    int imageHeight = height;
23
24    float stdev = 0.0f;
25    float thold = 0.0f;
26    float16 deviation = (float16)0.0f;
27
28    layer = (layer > 1) ? layer : 1;
29    layer = (layer < decomLevels) ? layer : decomLevels;
30
31    src += inputYOffset;
32    dest += outputYOffset;
33
34#if WAVELET_DENOISE_UV
35    int xScaler = pown(2.0f, layer);
36    int yScaler = pown(2.0f, (layer - 1));
37#else
38    int xScaler = pown(2.0f, (layer - 1));
39    int yScaler = xScaler;
40#endif
41
42    xScaler = ((x == 0) || (x > imageWidth / 16 - xScaler)) ? 0 : xScaler;
43    yScaler = ((y < yScaler) || (y > imageHeight - yScaler)) ? 0 : yScaler;
44
45    uint4 approx;
46    float16 detail;
47
48#if WAVELET_DENOISE_UV
49    int srcOffset = (layer % 2) ? (inputUVOffset * imageWidth / 4) : 0;
50    __global uchar *src_p = (__global uchar *)(src + srcOffset);
51#else
52    __global uchar *src_p = (__global uchar *)(src);
53#endif
54
55    int pixel_index = x * 16 + y * imageWidth;
56    int group_index = x * 4 + y * (imageWidth / 4);
57
58#if WAVELET_DENOISE_UV
59    uint4 luma;
60    int luma_index0 = x * 4 + (2 * y) * (imageWidth / 4);
61    int luma_index1 = x * 4 + (2 * y + 1) * (imageWidth / 4);
62#else
63    uint4 chroma;
64    int chroma_index = x * 4 + (y / 2) * (imageWidth / 4);
65#endif
66
67    ushort16 a;
68    ushort16 b;
69    ushort16 c;
70    ushort16 d;
71    ushort16 e;
72    ushort16 f;
73    ushort16 g;
74    ushort16 h;
75    ushort16 i;
76
77    float div = 1.0f / 16.0f;
78
79    a = (ushort16)(convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 1]),
80                   convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 2]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 3]),
81                   convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 4]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 5]),
82                   convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 6]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 7]),
83                   convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 8]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 9]),
84                   convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 10]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 11]),
85                   convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 12]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 13]),
86                   convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 14]), convert_ushort(src_p[pixel_index - yScaler * imageWidth - xScaler + 15])
87                  );
88
89    b = (ushort16)(convert_ushort(src_p[pixel_index - yScaler * imageWidth]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 1]),
90                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + 2]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 3]),
91                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + 4]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 5]),
92                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + 6]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 7]),
93                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + 8]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 9]),
94                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + 10]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 11]),
95                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + 12]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 13]),
96                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + 14]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + 15])
97                  );
98
99    c = (ushort16)(convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 1]),
100                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 2]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 3]),
101                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 4]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 5]),
102                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 6]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 7]),
103                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 8]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 9]),
104                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 10]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 11]),
105                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 12]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 13]),
106                   convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 14]), convert_ushort(src_p[pixel_index - yScaler * imageWidth + xScaler + 15])
107                  );
108
109    d = (ushort16)(convert_ushort(src_p[pixel_index - xScaler]), convert_ushort(src_p[pixel_index - xScaler + 1]),
110                   convert_ushort(src_p[pixel_index - xScaler + 2]), convert_ushort(src_p[pixel_index - xScaler + 3]),
111                   convert_ushort(src_p[pixel_index - xScaler + 4]), convert_ushort(src_p[pixel_index - xScaler + 5]),
112                   convert_ushort(src_p[pixel_index - xScaler + 6]), convert_ushort(src_p[pixel_index - xScaler + 7]),
113                   convert_ushort(src_p[pixel_index - xScaler + 8]), convert_ushort(src_p[pixel_index - xScaler + 9]),
114                   convert_ushort(src_p[pixel_index - xScaler + 10]), convert_ushort(src_p[pixel_index - xScaler + 11]),
115                   convert_ushort(src_p[pixel_index - xScaler + 12]), convert_ushort(src_p[pixel_index - xScaler + 13]),
116                   convert_ushort(src_p[pixel_index - xScaler + 14]), convert_ushort(src_p[pixel_index - xScaler + 15])
117                  );
118
119    e = (ushort16)(convert_ushort(src_p[pixel_index]), convert_ushort(src_p[pixel_index + 1]),
120                   convert_ushort(src_p[pixel_index + 2]), convert_ushort(src_p[pixel_index + 3]),
121                   convert_ushort(src_p[pixel_index + 4]), convert_ushort(src_p[pixel_index + 5]),
122                   convert_ushort(src_p[pixel_index + 6]), convert_ushort(src_p[pixel_index + 7]),
123                   convert_ushort(src_p[pixel_index + 8]), convert_ushort(src_p[pixel_index + 9]),
124                   convert_ushort(src_p[pixel_index + 10]), convert_ushort(src_p[pixel_index + 11]),
125                   convert_ushort(src_p[pixel_index + 12]), convert_ushort(src_p[pixel_index + 13]),
126                   convert_ushort(src_p[pixel_index + 14]), convert_ushort(src_p[pixel_index + 15])
127                  );
128
129    f = (ushort16)(convert_ushort(src_p[pixel_index + xScaler]), convert_ushort(src_p[pixel_index + xScaler + 1]),
130                   convert_ushort(src_p[pixel_index + xScaler + 2]), convert_ushort(src_p[pixel_index + xScaler + 3]),
131                   convert_ushort(src_p[pixel_index + xScaler + 4]), convert_ushort(src_p[pixel_index + xScaler + 5]),
132                   convert_ushort(src_p[pixel_index + xScaler + 6]), convert_ushort(src_p[pixel_index + xScaler + 7]),
133                   convert_ushort(src_p[pixel_index + xScaler + 8]), convert_ushort(src_p[pixel_index + xScaler + 9]),
134                   convert_ushort(src_p[pixel_index + xScaler + 10]), convert_ushort(src_p[pixel_index + xScaler + 11]),
135                   convert_ushort(src_p[pixel_index + xScaler + 12]), convert_ushort(src_p[pixel_index + xScaler + 13]),
136                   convert_ushort(src_p[pixel_index + xScaler + 14]), convert_ushort(src_p[pixel_index + xScaler + 15])
137                  );
138
139    g = (ushort16)(convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 1]),
140                   convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 2]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 3]),
141                   convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 4]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 5]),
142                   convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 6]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 7]),
143                   convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 8]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 9]),
144                   convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 10]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 11]),
145                   convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 12]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 13]),
146                   convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 14]), convert_ushort(src_p[pixel_index + yScaler * imageWidth - xScaler + 15])
147                  );
148
149    h = (ushort16)(convert_ushort(src_p[pixel_index + yScaler * imageWidth]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 1]),
150                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + 2]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 3]),
151                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + 4]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 5]),
152                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + 6]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 7]),
153                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + 8]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 9]),
154                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + 10]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 11]),
155                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + 12]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 13]),
156                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + 14]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + 15])
157                  );
158
159    i = (ushort16)(convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 1]),
160                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 2]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 3]),
161                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 4]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 5]),
162                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 6]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 7]),
163                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 8]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 9]),
164                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 10]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 11]),
165                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 12]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 13]),
166                   convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 14]), convert_ushort(src_p[pixel_index + yScaler * imageWidth + xScaler + 15])
167                  );
168
169    /*
170     { a, b, c } { 1, 2, 1 }
171     { d, e, f } { 2, 4, 2 }
172     { g, h, i } { 1, 2, 1 }
173    */
174    ushort16 sum;
175    sum = (ushort16)1 * a + (ushort16)2 * b + (ushort16)1 * c +
176          (ushort16)2 * d + (ushort16)4 * e + (ushort16)2 * f +
177          (ushort16)1 * g + (ushort16)2 * h + (ushort16)1 * i;
178
179    approx = as_uint4(convert_uchar16(((convert_float16(sum) + 0.5f / div) * div)));
180    detail = convert_float16(convert_char16(e) - as_char16(approx));
181
182    thold = hardThresh * threshConst[layer - 1];
183
184    detail = (detail < -thold) ? detail + (thold - thold * softThresh) : detail;
185    detail = (detail > thold) ? detail - (thold - thold * softThresh) : detail;
186    detail = (detail > -thold && detail < thold) ? detail * softThresh : detail;
187
188    __global float16 *details_p = (__global float16 *)(&details[pixel_index]);
189    if (layer == 1) {
190        (*details_p) = detail;
191
192#if WAVELET_DENOISE_UV
193        // copy Y
194        luma = vload4(0, src + luma_index0);
195        vstore4(luma, 0, dest + luma_index0);
196        luma = vload4(0, src + luma_index1);
197        vstore4(luma, 0, dest + luma_index1);
198#else
199        // copy UV
200        if (y % 2 == 0) {
201            chroma = vload4(0, src + chroma_index + inputUVOffset * (imageWidth / 4));
202            vstore4(chroma, 0, dest + chroma_index + outputUVOffset * (imageWidth / 4));
203        }
204#endif
205    } else {
206        (*details_p) += detail;
207    }
208
209    if (layer < decomLevels) {
210#if WAVELET_DENOISE_UV
211        int approxOffset = (layer % 2) ? 0 : (inputUVOffset * imageWidth / 4);
212        (*(__global uint4*)(approxOut + group_index + approxOffset)) = approx;
213#else
214        (*(__global uint4*)(approxOut + group_index)) = approx;
215#endif
216    }
217    else
218    {
219        // Reconstruction
220#if WAVELET_DENOISE_UV
221        __global uint4* dest_p = (__global uint4*)(&dest[group_index + outputUVOffset * imageWidth / 4]);
222        (*dest_p) = as_uint4(convert_uchar16(*details_p + convert_float16(as_uchar16(approx))));
223#else
224        __global uint4* dest_p = (__global uint4*)(&dest[group_index]);
225        (*dest_p) = as_uint4(convert_uchar16(*details_p + convert_float16(as_uchar16(approx))));
226#endif
227    }
228}
229
230