• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                           License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15// Third party copyrights are property of their respective owners.
16//
17// @Authors
18//    Zhang Ying, zhangying913@gmail.com
19//	  Niko Li, newlife20080214@gmail.com
20// Redistribution and use in source and binary forms, with or without modification,
21// are permitted provided that the following conditions are met:
22//
23//   * Redistribution's of source code must retain the above copyright notice,
24//     this list of conditions and the following disclaimer.
25//
26//   * Redistribution's in binary form must reproduce the above copyright notice,
27//     this list of conditions and the following disclaimer in the documentation
28//     and/or other materials provided with the distribution.
29//
30//   * The name of the copyright holders may not be used to endorse or promote products
31//     derived from this software without specific prior written permission.
32//
33// This software is provided by the copyright holders and contributors as is and
34// any express or implied warranties, including, but not limited to, the implied
35// warranties of merchantability and fitness for a particular purpose are disclaimed.
36// In no event shall the Intel Corporation or contributors be liable for any direct,
37// indirect, incidental, special, exemplary, or consequential damages
38// (including, but not limited to, procurement of substitute goods or services;
39// loss of use, data, or profits; or business interruption) however caused
40// and on any theory of liability, whether in contract, strict liability,
41// or tort (including negligence or otherwise) arising in any way out of
42// the use of this software, even if advised of the possibility of such damage.
43//
44//M*/
45
46#ifdef DOUBLE_SUPPORT
47#ifdef cl_amd_fp64
48#pragma OPENCL EXTENSION cl_amd_fp64:enable
49#elif defined (cl_khr_fp64)
50#pragma OPENCL EXTENSION cl_khr_fp64:enable
51#endif
52#endif
53
54#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
55#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
56#define INC(x,l) min(x+1,l-1)
57
58#define noconvert
59
60#if cn != 3
61#define loadpix(addr)  *(__global const T *)(addr)
62#define storepix(val, addr)  *(__global T *)(addr) = val
63#define TSIZE (int)sizeof(T)
64#else
65#define loadpix(addr)  vload3(0, (__global const T1 *)(addr))
66#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
67#define TSIZE (int)sizeof(T1)*cn
68#endif
69
70#if defined USE_SAMPLER
71
72#if cn == 1
73#define READ_IMAGE(X,Y,Z)  read_imagef(X,Y,Z).x
74#define INTERMEDIATE_TYPE  float
75#elif cn == 2
76#define READ_IMAGE(X,Y,Z)  read_imagef(X,Y,Z).xy
77#define INTERMEDIATE_TYPE  float2
78#elif cn == 3
79#define READ_IMAGE(X,Y,Z)  read_imagef(X,Y,Z).xyz
80#define INTERMEDIATE_TYPE  float3
81#elif cn == 4
82#define READ_IMAGE(X,Y,Z)  read_imagef(X,Y,Z)
83#define INTERMEDIATE_TYPE  float4
84#endif
85
86#define __CAT(x, y) x##y
87#define CAT(x, y) __CAT(x, y)
88//#define INTERMEDIATE_TYPE CAT(float, cn)
89#define float1 float
90
91#if depth == 0
92#define RESULT_SCALE    255.0f
93#elif depth == 1
94#define RESULT_SCALE    127.0f
95#elif depth == 2
96#define RESULT_SCALE    65535.0f
97#elif depth == 3
98#define RESULT_SCALE    32767.0f
99#else
100#define RESULT_SCALE    1.0f
101#endif
102
103__kernel void resizeSampler(__read_only image2d_t srcImage,
104                            __global uchar* dstptr, int dststep, int dstoffset,
105                            int dstrows, int dstcols,
106                            float ifx, float ify)
107{
108    const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
109                              CLK_ADDRESS_CLAMP_TO_EDGE |
110                              CLK_FILTER_LINEAR;
111
112    int dx = get_global_id(0);
113    int dy = get_global_id(1);
114
115    float sx = ((dx+0.5f) * ifx), sy = ((dy+0.5f) * ify);
116
117    INTERMEDIATE_TYPE intermediate = READ_IMAGE(srcImage, sampler, (float2)(sx, sy));
118
119#if depth <= 4
120    T uval = convertToDT(round(intermediate * RESULT_SCALE));
121#else
122    T uval = convertToDT(intermediate * RESULT_SCALE);
123#endif
124
125    if(dx < dstcols && dy < dstrows)
126    {
127        storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*TSIZE));
128    }
129}
130
131#elif defined INTER_LINEAR_INTEGER
132
133__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
134                       __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
135                       __global const uchar * buffer)
136{
137    int dx = get_global_id(0);
138    int dy = get_global_id(1);
139
140    if (dx < dst_cols && dy < dst_rows)
141    {
142        __global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols;
143        __global const short * ialpha = (__global const short *)(yofs + dst_rows);
144        __global const short * ibeta = ialpha + ((dst_cols + dy) << 1);
145        ialpha += dx << 1;
146
147        int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1),
148        sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1);
149        short a0 = ialpha[0], a1 = ialpha[1];
150        short b0 = ibeta[0], b1 = ibeta[1];
151
152        int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)),
153        src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset));
154        WT data0 = convertToWT(loadpix(srcptr + src_index0));
155        WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE));
156        WT data2 = convertToWT(loadpix(srcptr + src_index1));
157        WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE));
158
159        WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) +
160                 ( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16);
161
162        storepix(convertToDT((val + 2) >> 2),
163                 dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
164    }
165}
166
167#elif defined INTER_LINEAR
168
169__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
170                       __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
171                       float ifx, float ify)
172{
173    int dx = get_global_id(0);
174    int dy = get_global_id(1);
175
176    if (dx < dst_cols && dy < dst_rows)
177    {
178        float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
179        int x = floor(sx), y = floor(sy);
180
181        float u = sx - x, v = sy - y;
182
183        if ( x<0 ) x=0,u=0;
184        if ( x>=src_cols ) x=src_cols-1,u=0;
185        if ( y<0 ) y=0,v=0;
186        if ( y>=src_rows ) y=src_rows-1,v=0;
187
188        int y_ = INC(y, src_rows);
189        int x_ = INC(x, src_cols);
190
191#if depth <= 4
192        u = u * INTER_RESIZE_COEF_SCALE;
193        v = v * INTER_RESIZE_COEF_SCALE;
194
195        int U = rint(u);
196        int V = rint(v);
197        int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
198        int V1 = rint(INTER_RESIZE_COEF_SCALE - v);
199
200        WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
201        WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
202        WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
203        WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
204
205        WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) +
206                   mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3);
207
208        T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
209#else
210        float u1 = 1.f - u;
211        float v1 = 1.f - v;
212        WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
213        WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
214        WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
215        WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
216
217        T uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3;
218#endif
219        storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
220    }
221}
222
223#elif defined INTER_NEAREST
224
225__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
226                       __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
227                       float ifx, float ify)
228{
229    int dx = get_global_id(0);
230    int dy = get_global_id(1);
231
232    if (dx < dst_cols && dy < dst_rows)
233    {
234        float s1 = dx * ifx;
235        float s2 = dy * ify;
236        int sx = min(convert_int_rtz(s1), src_cols - 1);
237        int sy = min(convert_int_rtz(s2), src_rows - 1);
238
239        storepix(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))),
240                 dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
241    }
242}
243
244#elif defined INTER_AREA
245
246#ifdef INTER_AREA_FAST
247
248__kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
249                              __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
250{
251    int dx = get_global_id(0);
252    int dy = get_global_id(1);
253
254    if (dx < dst_cols && dy < dst_rows)
255    {
256        int dst_index = mad24(dy, dst_step, dst_offset);
257
258        int sx = XSCALE * dx;
259        int sy = YSCALE * dy;
260        WTV sum = (WTV)(0);
261
262        #pragma unroll
263        for (int py = 0; py < YSCALE; ++py)
264        {
265            int y = min(sy + py, src_rows - 1);
266            int src_index = mad24(y, src_step, src_offset);
267            #pragma unroll
268            for (int px = 0; px < XSCALE; ++px)
269            {
270                int x = min(sx + px, src_cols - 1);
271                sum += convertToWTV(loadpix(src + src_index + x*TSIZE));
272            }
273        }
274
275        storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index));
276    }
277}
278
279#else
280
281__kernel void resizeAREA(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
282                         __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
283                         float ifx, float ify, __global const int * ofs_tab,
284                         __global const int * map_tab, __global const float * alpha_tab)
285{
286    int dx = get_global_id(0);
287    int dy = get_global_id(1);
288
289    if (dx < dst_cols && dy < dst_rows)
290    {
291        int dst_index = mad24(dy, dst_step, dst_offset);
292
293        __global const int * xmap_tab = map_tab;
294        __global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1));
295        __global const float * xalpha_tab = alpha_tab;
296        __global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1));
297        __global const int * xofs_tab = ofs_tab;
298        __global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1);
299
300        int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1];
301        int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1];
302
303        int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1];
304        int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1];
305
306        WTV sum = (WTV)(0), buf;
307        int src_index = mad24(sy0, src_step, src_offset);
308
309        for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk)
310        {
311            WTV beta = (WTV)(yalpha_tab[yk]);
312            buf = (WTV)(0);
313
314            for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
315            {
316                WTV alpha = (WTV)(xalpha_tab[xk]);
317                buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha;
318            }
319            sum += buf * beta;
320        }
321
322        storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index));
323    }
324}
325
326#endif
327
328#endif
329