1/*M/////////////////////////////////////////////////////////////////////////////////////// 2// 3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4// 5// By downloading, copying, installing or using the software you agree to this license. 6// If you do not agree to this license, do not download, install, 7// copy or use the software. 8// 9// 10// License Agreement 11// For Open Source Computer Vision Library 12// 13// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. 14// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. 15// Third party copyrights are property of their respective owners. 16// 17// @Authors 18// Zhang Ying, zhangying913@gmail.com 19// Niko Li, newlife20080214@gmail.com 20// Redistribution and use in source and binary forms, with or without modification, 21// are permitted provided that the following conditions are met: 22// 23// * Redistribution's of source code must retain the above copyright notice, 24// this list of conditions and the following disclaimer. 25// 26// * Redistribution's in binary form must reproduce the above copyright notice, 27// this list of conditions and the following disclaimer in the documentation 28// and/or other materials provided with the distribution. 29// 30// * The name of the copyright holders may not be used to endorse or promote products 31// derived from this software without specific prior written permission. 32// 33// This software is provided by the copyright holders and contributors as is and 34// any express or implied warranties, including, but not limited to, the implied 35// warranties of merchantability and fitness for a particular purpose are disclaimed. 36// In no event shall the Intel Corporation or contributors be liable for any direct, 37// indirect, incidental, special, exemplary, or consequential damages 38// (including, but not limited to, procurement of substitute goods or services; 39// loss of use, data, or profits; or business interruption) however caused 40// and on any theory of liability, whether in contract, strict liability, 41// or tort (including negligence or otherwise) arising in any way out of 42// the use of this software, even if advised of the possibility of such damage. 43// 44//M*/ 45 46#ifdef DOUBLE_SUPPORT 47#ifdef cl_amd_fp64 48#pragma OPENCL EXTENSION cl_amd_fp64:enable 49#elif defined (cl_khr_fp64) 50#pragma OPENCL EXTENSION cl_khr_fp64:enable 51#endif 52#endif 53 54#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS) 55#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1) 56#define INC(x,l) min(x+1,l-1) 57 58#define noconvert 59 60#if cn != 3 61#define loadpix(addr) *(__global const T *)(addr) 62#define storepix(val, addr) *(__global T *)(addr) = val 63#define TSIZE (int)sizeof(T) 64#else 65#define loadpix(addr) vload3(0, (__global const T1 *)(addr)) 66#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr)) 67#define TSIZE (int)sizeof(T1)*cn 68#endif 69 70#if defined USE_SAMPLER 71 72#if cn == 1 73#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).x 74#define INTERMEDIATE_TYPE float 75#elif cn == 2 76#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xy 77#define INTERMEDIATE_TYPE float2 78#elif cn == 3 79#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xyz 80#define INTERMEDIATE_TYPE float3 81#elif cn == 4 82#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z) 83#define INTERMEDIATE_TYPE float4 84#endif 85 86#define __CAT(x, y) x##y 87#define CAT(x, y) __CAT(x, y) 88//#define INTERMEDIATE_TYPE CAT(float, cn) 89#define float1 float 90 91#if depth == 0 92#define RESULT_SCALE 255.0f 93#elif depth == 1 94#define RESULT_SCALE 127.0f 95#elif depth == 2 96#define RESULT_SCALE 65535.0f 97#elif depth == 3 98#define RESULT_SCALE 32767.0f 99#else 100#define RESULT_SCALE 1.0f 101#endif 102 103__kernel void resizeSampler(__read_only image2d_t srcImage, 104 __global uchar* dstptr, int dststep, int dstoffset, 105 int dstrows, int dstcols, 106 float ifx, float ify) 107{ 108 const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | 109 CLK_ADDRESS_CLAMP_TO_EDGE | 110 CLK_FILTER_LINEAR; 111 112 int dx = get_global_id(0); 113 int dy = get_global_id(1); 114 115 float sx = ((dx+0.5f) * ifx), sy = ((dy+0.5f) * ify); 116 117 INTERMEDIATE_TYPE intermediate = READ_IMAGE(srcImage, sampler, (float2)(sx, sy)); 118 119#if depth <= 4 120 T uval = convertToDT(round(intermediate * RESULT_SCALE)); 121#else 122 T uval = convertToDT(intermediate * RESULT_SCALE); 123#endif 124 125 if(dx < dstcols && dy < dstrows) 126 { 127 storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*TSIZE)); 128 } 129} 130 131#elif defined INTER_LINEAR_INTEGER 132 133__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 134 __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 135 __global const uchar * buffer) 136{ 137 int dx = get_global_id(0); 138 int dy = get_global_id(1); 139 140 if (dx < dst_cols && dy < dst_rows) 141 { 142 __global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols; 143 __global const short * ialpha = (__global const short *)(yofs + dst_rows); 144 __global const short * ibeta = ialpha + ((dst_cols + dy) << 1); 145 ialpha += dx << 1; 146 147 int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1), 148 sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1); 149 short a0 = ialpha[0], a1 = ialpha[1]; 150 short b0 = ibeta[0], b1 = ibeta[1]; 151 152 int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)), 153 src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset)); 154 WT data0 = convertToWT(loadpix(srcptr + src_index0)); 155 WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE)); 156 WT data2 = convertToWT(loadpix(srcptr + src_index1)); 157 WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE)); 158 159 WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) + 160 ( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16); 161 162 storepix(convertToDT((val + 2) >> 2), 163 dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); 164 } 165} 166 167#elif defined INTER_LINEAR 168 169__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 170 __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 171 float ifx, float ify) 172{ 173 int dx = get_global_id(0); 174 int dy = get_global_id(1); 175 176 if (dx < dst_cols && dy < dst_rows) 177 { 178 float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f); 179 int x = floor(sx), y = floor(sy); 180 181 float u = sx - x, v = sy - y; 182 183 if ( x<0 ) x=0,u=0; 184 if ( x>=src_cols ) x=src_cols-1,u=0; 185 if ( y<0 ) y=0,v=0; 186 if ( y>=src_rows ) y=src_rows-1,v=0; 187 188 int y_ = INC(y, src_rows); 189 int x_ = INC(x, src_cols); 190 191#if depth <= 4 192 u = u * INTER_RESIZE_COEF_SCALE; 193 v = v * INTER_RESIZE_COEF_SCALE; 194 195 int U = rint(u); 196 int V = rint(v); 197 int U1 = rint(INTER_RESIZE_COEF_SCALE - u); 198 int V1 = rint(INTER_RESIZE_COEF_SCALE - v); 199 200 WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)))); 201 WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset)))); 202 WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset)))); 203 WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset)))); 204 205 WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) + 206 mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3); 207 208 T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS); 209#else 210 float u1 = 1.f - u; 211 float v1 = 1.f - v; 212 WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)))); 213 WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset)))); 214 WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset)))); 215 WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset)))); 216 217 T uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3; 218#endif 219 storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); 220 } 221} 222 223#elif defined INTER_NEAREST 224 225__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, 226 __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, 227 float ifx, float ify) 228{ 229 int dx = get_global_id(0); 230 int dy = get_global_id(1); 231 232 if (dx < dst_cols && dy < dst_rows) 233 { 234 float s1 = dx * ifx; 235 float s2 = dy * ify; 236 int sx = min(convert_int_rtz(s1), src_cols - 1); 237 int sy = min(convert_int_rtz(s2), src_rows - 1); 238 239 storepix(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))), 240 dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); 241 } 242} 243 244#elif defined INTER_AREA 245 246#ifdef INTER_AREA_FAST 247 248__kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, 249 __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols) 250{ 251 int dx = get_global_id(0); 252 int dy = get_global_id(1); 253 254 if (dx < dst_cols && dy < dst_rows) 255 { 256 int dst_index = mad24(dy, dst_step, dst_offset); 257 258 int sx = XSCALE * dx; 259 int sy = YSCALE * dy; 260 WTV sum = (WTV)(0); 261 262 #pragma unroll 263 for (int py = 0; py < YSCALE; ++py) 264 { 265 int y = min(sy + py, src_rows - 1); 266 int src_index = mad24(y, src_step, src_offset); 267 #pragma unroll 268 for (int px = 0; px < XSCALE; ++px) 269 { 270 int x = min(sx + px, src_cols - 1); 271 sum += convertToWTV(loadpix(src + src_index + x*TSIZE)); 272 } 273 } 274 275 storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index)); 276 } 277} 278 279#else 280 281__kernel void resizeAREA(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, 282 __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, 283 float ifx, float ify, __global const int * ofs_tab, 284 __global const int * map_tab, __global const float * alpha_tab) 285{ 286 int dx = get_global_id(0); 287 int dy = get_global_id(1); 288 289 if (dx < dst_cols && dy < dst_rows) 290 { 291 int dst_index = mad24(dy, dst_step, dst_offset); 292 293 __global const int * xmap_tab = map_tab; 294 __global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1)); 295 __global const float * xalpha_tab = alpha_tab; 296 __global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1)); 297 __global const int * xofs_tab = ofs_tab; 298 __global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1); 299 300 int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1]; 301 int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1]; 302 303 int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1]; 304 int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1]; 305 306 WTV sum = (WTV)(0), buf; 307 int src_index = mad24(sy0, src_step, src_offset); 308 309 for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk) 310 { 311 WTV beta = (WTV)(yalpha_tab[yk]); 312 buf = (WTV)(0); 313 314 for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk) 315 { 316 WTV alpha = (WTV)(xalpha_tab[xk]); 317 buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha; 318 } 319 sum += buf * beta; 320 } 321 322 storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index)); 323 } 324} 325 326#endif 327 328#endif 329