1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | 20 CLK_ADDRESS_CLAMP_TO_EDGE | 21 CLK_FILTER_NEAREST); 22 23kernel void horiz_sum(__global uint4 *integral_img, 24 __read_only image2d_t src, 25 int width, 26 int height, 27 int4 dx, 28 int4 dy) 29{ 30 31 int y = get_global_id(0); 32 int work_size = get_global_size(0); 33 34 uint4 sum = (uint4)(0); 35 float4 s2; 36 for (int i = 0; i < width; i++) { 37 float s1 = read_imagef(src, sampler, (int2)(i, y)).x; 38 s2.x = read_imagef(src, sampler, (int2)(i + dx.x, y + dy.x)).x; 39 s2.y = read_imagef(src, sampler, (int2)(i + dx.y, y + dy.y)).x; 40 s2.z = read_imagef(src, sampler, (int2)(i + dx.z, y + dy.z)).x; 41 s2.w = read_imagef(src, sampler, (int2)(i + dx.w, y + dy.w)).x; 42 sum += convert_uint4((s1 - s2) * (s1 - s2) * 255 * 255); 43 integral_img[y * width + i] = sum; 44 } 45} 46 47kernel void vert_sum(__global uint4 *integral_img, 48 __global int *overflow, 49 int width, 50 int height) 51{ 52 int x = get_global_id(0); 53 uint4 sum = 0; 54 for (int i = 0; i < height; i++) { 55 if (any((uint4)UINT_MAX - integral_img[i * width + x] < sum)) 56 atomic_inc(overflow); 57 integral_img[i * width + x] += sum; 58 sum = integral_img[i * width + x]; 59 } 60} 61 62kernel void weight_accum(global float *sum, global float *weight, 63 global uint4 *integral_img, __read_only image2d_t src, 64 int width, int height, int p, float h, 65 int4 dx, int4 dy) 66{ 67 // w(x) = integral_img(x-p, y-p) + 68 // integral_img(x+p, y+p) - 69 // integral_img(x+p, y-p) - 70 // integral_img(x-p, y+p) 71 // total_sum[x] += w(x, y) * src(x + dx, y + dy) 72 // total_weight += w(x, y) 73 74 int x = get_global_id(0); 75 int y = get_global_id(1); 76 int4 xoff = x + dx; 77 int4 yoff = y + dy; 78 uint4 a = 0, b = 0, c = 0, d = 0; 79 uint4 src_pix = 0; 80 81 // out-of-bounding-box? 82 int oobb = (x - p) < 0 || (y - p) < 0 || (y + p) >= height || (x + p) >= width; 83 84 src_pix.x = (int)(255 * read_imagef(src, sampler, (int2)(xoff.x, yoff.x)).x); 85 src_pix.y = (int)(255 * read_imagef(src, sampler, (int2)(xoff.y, yoff.y)).x); 86 src_pix.z = (int)(255 * read_imagef(src, sampler, (int2)(xoff.z, yoff.z)).x); 87 src_pix.w = (int)(255 * read_imagef(src, sampler, (int2)(xoff.w, yoff.w)).x); 88 if (!oobb) { 89 a = integral_img[(y - p) * width + x - p]; 90 b = integral_img[(y + p) * width + x - p]; 91 c = integral_img[(y - p) * width + x + p]; 92 d = integral_img[(y + p) * width + x + p]; 93 } 94 95 float4 patch_diff = convert_float4(d + a - c - b); 96 float4 w = native_exp(-patch_diff / (h * h)); 97 float w_sum = w.x + w.y + w.z + w.w; 98 weight[y * width + x] += w_sum; 99 sum[y * width + x] += dot(w, convert_float4(src_pix)); 100} 101 102kernel void average(__write_only image2d_t dst, 103 __read_only image2d_t src, 104 global float *sum, global float *weight) { 105 int x = get_global_id(0); 106 int y = get_global_id(1); 107 int2 dim = get_image_dim(dst); 108 109 float w = weight[y * dim.x + x]; 110 float s = sum[y * dim.x + x]; 111 float src_pix = read_imagef(src, sampler, (int2)(x, y)).x; 112 float r = (s + src_pix * 255) / (1.0f + w) / 255.0f; 113 if (x < dim.x && y < dim.y) 114 write_imagef(dst, (int2)(x, y), (float4)(r, 0.0f, 0.0f, 1.0f)); 115} 116