1// This file is part of OpenCV project. 2// It is subject to the license terms in the LICENSE file found in the top-level directory 3// of this distribution and at http://opencv.org/license.html. 4 5// Copyright (C) 2014, Itseez, Inc., all rights reserved. 6// Third party copyrights are property of their respective owners. 7 8#ifdef DOUBLE_SUPPORT 9#ifdef cl_amd_fp64 10#pragma OPENCL EXTENSION cl_amd_fp64:enable 11#elif defined (cl_khr_fp64) 12#pragma OPENCL EXTENSION cl_khr_fp64:enable 13#endif 14#endif 15 16#ifdef DEPTH_0 17#define MIN_VAL 0 18#define MAX_VAL UCHAR_MAX 19#elif defined DEPTH_1 20#define MIN_VAL SCHAR_MIN 21#define MAX_VAL SCHAR_MAX 22#elif defined DEPTH_2 23#define MIN_VAL 0 24#define MAX_VAL USHRT_MAX 25#elif defined DEPTH_3 26#define MIN_VAL SHRT_MIN 27#define MAX_VAL SHRT_MAX 28#elif defined DEPTH_4 29#define MIN_VAL INT_MIN 30#define MAX_VAL INT_MAX 31#elif defined DEPTH_5 32#define MIN_VAL (-FLT_MAX) 33#define MAX_VAL FLT_MAX 34#elif defined DEPTH_6 35#define MIN_VAL (-DBL_MAX) 36#define MAX_VAL DBL_MAX 37#endif 38 39#define noconvert 40#define INDEX_MAX UINT_MAX 41 42#if wdepth <= 4 43#define MIN_ABS(a) convertFromU(abs(a)) 44#define MIN_ABS2(a, b) convertFromU(abs_diff(a, b)) 45#define MIN(a, b) min(a, b) 46#define MAX(a, b) max(a, b) 47#else 48#define MIN_ABS(a) fabs(a) 49#define MIN_ABS2(a, b) fabs(a - b) 50#define MIN(a, b) fmin(a, b) 51#define MAX(a, b) fmax(a, b) 52#endif 53 54#if kercn != 3 55#define loadpix(addr) *(__global const srcT *)(addr) 56#define srcTSIZE (int)sizeof(srcT) 57#else 58#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr)) 59#define srcTSIZE ((int)sizeof(srcT1) * 3) 60#endif 61 62#ifndef HAVE_MASK 63#undef srcTSIZE 64#define srcTSIZE (int)sizeof(srcT1) 65#endif 66 67#ifdef NEED_MINVAL 68#ifdef NEED_MINLOC 69#define CALC_MIN(p, inc) \ 70 if (minval > temp.p) \ 71 { \ 72 minval = temp.p; \ 73 minloc = id + inc; \ 74 } 75#else 76#define CALC_MIN(p, inc) \ 77 minval = MIN(minval, temp.p); 78#endif 79#else 80#define CALC_MIN(p, inc) 81#endif 82 83#ifdef NEED_MAXVAL 84#ifdef NEED_MAXLOC 85#define CALC_MAX(p, inc) \ 86 if (maxval < temp.p) \ 87 { \ 88 maxval = temp.p; \ 89 maxloc = id + inc; \ 90 } 91#else 92#define CALC_MAX(p, inc) \ 93 maxval = MAX(maxval, temp.p); 94#endif 95#else 96#define CALC_MAX(p, inc) 97#endif 98 99#ifdef OP_CALC2 100#define CALC_MAX2(p) \ 101 maxval2 = MAX(maxval2, temp2.p); 102#else 103#define CALC_MAX2(p) 104#endif 105 106#define CALC_P(p, inc) \ 107 CALC_MIN(p, inc) \ 108 CALC_MAX(p, inc) \ 109 CALC_MAX2(p) 110 111__kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_offset, int cols, 112 int total, int groupnum, __global uchar * dstptr 113#ifdef HAVE_MASK 114 , __global const uchar * mask, int mask_step, int mask_offset 115#endif 116#ifdef HAVE_SRC2 117 , __global const uchar * src2ptr, int src2_step, int src2_offset 118#endif 119 ) 120{ 121 int lid = get_local_id(0); 122 int gid = get_group_id(0); 123 int id = get_global_id(0) 124#ifndef HAVE_MASK 125 * kercn; 126#else 127 ; 128#endif 129 130 srcptr += src_offset; 131#ifdef HAVE_MASK 132 mask += mask_offset; 133#endif 134#ifdef HAVE_SRC2 135 src2ptr += src2_offset; 136#endif 137 138#ifdef NEED_MINVAL 139 __local dstT1 localmem_min[WGS2_ALIGNED]; 140 dstT1 minval = MAX_VAL; 141#ifdef NEED_MINLOC 142 __local uint localmem_minloc[WGS2_ALIGNED]; 143 uint minloc = INDEX_MAX; 144#endif 145#endif 146#ifdef NEED_MAXVAL 147 dstT1 maxval = MIN_VAL; 148 __local dstT1 localmem_max[WGS2_ALIGNED]; 149#ifdef NEED_MAXLOC 150 __local uint localmem_maxloc[WGS2_ALIGNED]; 151 uint maxloc = INDEX_MAX; 152#endif 153#endif 154#ifdef OP_CALC2 155 __local dstT1 localmem_max2[WGS2_ALIGNED]; 156 dstT1 maxval2 = MIN_VAL; 157#endif 158 159 int src_index; 160#ifdef HAVE_MASK 161 int mask_index; 162#endif 163#ifdef HAVE_SRC2 164 int src2_index; 165#endif 166 167 dstT temp; 168#ifdef HAVE_SRC2 169 dstT temp2; 170#endif 171 172 for (int grain = groupnum * WGS 173#ifndef HAVE_MASK 174 * kercn 175#endif 176 ; id < total; id += grain) 177 { 178#ifdef HAVE_MASK 179#ifdef HAVE_MASK_CONT 180 mask_index = id; 181#else 182 mask_index = mad24(id / cols, mask_step, id % cols); 183#endif 184 if (mask[mask_index]) 185#endif 186 { 187#ifdef HAVE_SRC_CONT 188 src_index = id * srcTSIZE;//mul24(id, srcTSIZE); 189#else 190 src_index = mad24(id / cols, src_step, mul24(id % cols, srcTSIZE)); 191#endif 192 temp = convertToDT(loadpix(srcptr + src_index)); 193#ifdef OP_ABS 194 temp = MIN_ABS(temp); 195#endif 196 197#ifdef HAVE_SRC2 198#ifdef HAVE_SRC2_CONT 199 src2_index = id * srcTSIZE; //mul24(id, srcTSIZE); 200#else 201 src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE)); 202#endif 203 temp2 = convertToDT(loadpix(src2ptr + src2_index)); 204 temp = MIN_ABS2(temp, temp2); 205#ifdef OP_CALC2 206 temp2 = MIN_ABS(temp2); 207#endif 208#endif 209 210#if kercn == 1 211#ifdef NEED_MINVAL 212#ifdef NEED_MINLOC 213 if (minval > temp) 214 { 215 minval = temp; 216 minloc = id; 217 } 218#else 219 minval = MIN(minval, temp); 220#endif 221#endif 222#ifdef NEED_MAXVAL 223#ifdef NEED_MAXLOC 224 if (maxval < temp) 225 { 226 maxval = temp; 227 maxloc = id; 228 } 229#else 230 maxval = MAX(maxval, temp); 231#endif 232#ifdef OP_CALC2 233 maxval2 = MAX(maxval2, temp2); 234#endif 235#endif 236#elif kercn >= 2 237 CALC_P(s0, 0) 238 CALC_P(s1, 1) 239#if kercn >= 3 240 CALC_P(s2, 2) 241#if kercn >= 4 242 CALC_P(s3, 3) 243#if kercn >= 8 244 CALC_P(s4, 4) 245 CALC_P(s5, 5) 246 CALC_P(s6, 6) 247 CALC_P(s7, 7) 248#if kercn == 16 249 CALC_P(s8, 8) 250 CALC_P(s9, 9) 251 CALC_P(sA, 10) 252 CALC_P(sB, 11) 253 CALC_P(sC, 12) 254 CALC_P(sD, 13) 255 CALC_P(sE, 14) 256 CALC_P(sF, 15) 257#endif 258#endif 259#endif 260#endif 261#endif 262 } 263 } 264 265 if (lid < WGS2_ALIGNED) 266 { 267#ifdef NEED_MINVAL 268 localmem_min[lid] = minval; 269#endif 270#ifdef NEED_MAXVAL 271 localmem_max[lid] = maxval; 272#endif 273#ifdef NEED_MINLOC 274 localmem_minloc[lid] = minloc; 275#endif 276#ifdef NEED_MAXLOC 277 localmem_maxloc[lid] = maxloc; 278#endif 279#ifdef OP_CALC2 280 localmem_max2[lid] = maxval2; 281#endif 282 } 283 barrier(CLK_LOCAL_MEM_FENCE); 284 285 if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED) 286 { 287 int lid3 = lid - WGS2_ALIGNED; 288#ifdef NEED_MINVAL 289#ifdef NEED_MINLOC 290 if (localmem_min[lid3] >= minval) 291 { 292 if (localmem_min[lid3] == minval) 293 localmem_minloc[lid3] = min(localmem_minloc[lid3], minloc); 294 else 295 localmem_minloc[lid3] = minloc, 296 localmem_min[lid3] = minval; 297 } 298#else 299 localmem_min[lid3] = MIN(localmem_min[lid3], minval); 300#endif 301#endif 302#ifdef NEED_MAXVAL 303#ifdef NEED_MAXLOC 304 if (localmem_max[lid3] <= maxval) 305 { 306 if (localmem_max[lid3] == maxval) 307 localmem_maxloc[lid3] = min(localmem_maxloc[lid3], maxloc); 308 else 309 localmem_maxloc[lid3] = maxloc, 310 localmem_max[lid3] = maxval; 311 } 312#else 313 localmem_max[lid3] = MAX(localmem_max[lid3], maxval); 314#endif 315#endif 316#ifdef OP_CALC2 317 localmem_max2[lid3] = MAX(localmem_max2[lid3], maxval2); 318#endif 319 } 320 barrier(CLK_LOCAL_MEM_FENCE); 321 322 for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1) 323 { 324 if (lid < lsize) 325 { 326 int lid2 = lsize + lid; 327 328#ifdef NEED_MINVAL 329#ifdef NEED_MINLOC 330 if (localmem_min[lid] >= localmem_min[lid2]) 331 { 332 if (localmem_min[lid] == localmem_min[lid2]) 333 localmem_minloc[lid] = min(localmem_minloc[lid2], localmem_minloc[lid]); 334 else 335 localmem_minloc[lid] = localmem_minloc[lid2], 336 localmem_min[lid] = localmem_min[lid2]; 337 } 338#else 339 localmem_min[lid] = MIN(localmem_min[lid], localmem_min[lid2]); 340#endif 341#endif 342#ifdef NEED_MAXVAL 343#ifdef NEED_MAXLOC 344 if (localmem_max[lid] <= localmem_max[lid2]) 345 { 346 if (localmem_max[lid] == localmem_max[lid2]) 347 localmem_maxloc[lid] = min(localmem_maxloc[lid2], localmem_maxloc[lid]); 348 else 349 localmem_maxloc[lid] = localmem_maxloc[lid2], 350 localmem_max[lid] = localmem_max[lid2]; 351 } 352#else 353 localmem_max[lid] = MAX(localmem_max[lid], localmem_max[lid2]); 354#endif 355#endif 356#ifdef OP_CALC2 357 localmem_max2[lid] = MAX(localmem_max2[lid], localmem_max2[lid2]); 358#endif 359 } 360 barrier(CLK_LOCAL_MEM_FENCE); 361 } 362 363 if (lid == 0) 364 { 365 int pos = 0; 366#ifdef NEED_MINVAL 367 *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0]; 368 pos = mad24(groupnum, (int)sizeof(dstT1), pos); 369#endif 370#ifdef NEED_MAXVAL 371 *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0]; 372 pos = mad24(groupnum, (int)sizeof(dstT1), pos); 373#endif 374#ifdef NEED_MINLOC 375 *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0]; 376 pos = mad24(groupnum, (int)sizeof(uint), pos); 377#endif 378#ifdef NEED_MAXLOC 379 *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0]; 380#ifdef OP_CALC2 381 pos = mad24(groupnum, (int)sizeof(uint), pos); 382#endif 383#endif 384#ifdef OP_CALC2 385 *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max2[0]; 386#endif 387 } 388} 389