• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                           License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16// Third party copyrights are property of their respective owners.
17//
18// @Authors
19//    Jia Haipeng, jiahaipeng95@gmail.com
20//
21//
22// Redistribution and use in source and binary forms, with or without modification,
23// are permitted provided that the following conditions are met:
24//
25//   * Redistribution's of source code must retain the above copyright notice,
26//     this list of conditions and the following disclaimer.
27//
28//   * Redistribution's in binary form must reproduce the above copyright notice,
29//     this list of conditions and the following disclaimer in the documentation
30//     and/or other materials provided with the distribution.
31//
32//   * The name of the copyright holders may not be used to endorse or promote products
33//     derived from this software without specific prior written permission.
34//
35// This software is provided by the copyright holders and contributors as is and
36// any express or implied warranties, including, but not limited to, the implied
37// warranties of merchantability and fitness for a particular purpose are disclaimed.
38// In no event shall the copyright holders or contributors be liable for any direct,
39// indirect, incidental, special, exemplary, or consequential damages
40// (including, but not limited to, procurement of substitute goods or services;
41// loss of use, data, or profits; or business interruption) however caused
42// and on any theory of liability, whether in contract, strict liability,
43// or tort (including negligence or otherwise) arising in any way out of
44// the use of this software, even if advised of the possibility of such damage.
45//
46//M*/
47
48/*
49  Usage:
50     after compiling this program user gets a single kernel called KF.
51     the following flags should be passed:
52     1) one of "-D BINARY_OP", "-D UNARY_OP", "-D MASK_BINARY_OP" or "-D MASK_UNARY_OP"
53     2) the actual operation performed, one of "-D OP_...", see below the list of operations.
54     2a) "-D dstDepth=<destination depth> [-D cn=<num channels]"
55         for some operations, like min/max/and/or/xor it's enough
56     2b) "-D srcDepth1=<source1 depth> -D srcDepth2=<source2 depth> -D dstDepth=<destination depth>
57          -D workDepth=<work depth> [-D cn=<num channels>]" - for mixed-type operations
58*/
59
60#ifdef DOUBLE_SUPPORT
61#ifdef cl_amd_fp64
62#pragma OPENCL EXTENSION cl_amd_fp64:enable
63#elif defined cl_khr_fp64
64#pragma OPENCL EXTENSION cl_khr_fp64:enable
65#endif
66#endif
67
68#ifdef INTEL_DEVICE
69#pragma OPENCL FP_CONTRACT ON
70#pragma OPENCL FP_FAST_FMAF ON
71#pragma OPENCL FP_FAST_FMA ON
72#endif
73
74#if depth <= 5
75#define CV_PI M_PI_F
76#else
77#define CV_PI M_PI
78#endif
79
80#ifndef cn
81#define cn 1
82#endif
83
84#if cn == 1
85#undef srcT1_C1
86#undef srcT2_C1
87#undef dstT_C1
88#define srcT1_C1 srcT1
89#define srcT2_C1 srcT2
90#define dstT_C1 dstT
91#endif
92
93#if cn != 3
94    #define storedst(val) *(__global dstT *)(dstptr + dst_index) = val
95    #define storedst2(val) *(__global dstT *)(dstptr2 + dst_index2) = val
96#else
97    #define storedst(val) vstore3(val, 0, (__global dstT_C1 *)(dstptr + dst_index))
98    #define storedst2(val) vstore3(val, 0, (__global dstT_C1 *)(dstptr2 + dst_index2))
99#endif
100
101#define noconvert
102
103#ifndef workT
104
105    #ifndef srcT1
106    #define srcT1 dstT
107    #endif
108
109    #ifndef srcT1_C1
110    #define srcT1_C1 dstT_C1
111    #endif
112
113    #ifndef srcT2
114    #define srcT2 dstT
115    #endif
116
117    #ifndef srcT2_C1
118    #define srcT2_C1 dstT_C1
119    #endif
120
121    #define workT dstT
122    #if cn != 3
123        #define srcelem1 *(__global srcT1 *)(srcptr1 + src1_index)
124        #define srcelem2 *(__global srcT2 *)(srcptr2 + src2_index)
125    #else
126        #define srcelem1 vload3(0, (__global srcT1_C1 *)(srcptr1 + src1_index))
127        #define srcelem2 vload3(0, (__global srcT2_C1 *)(srcptr2 + src2_index))
128    #endif
129    #ifndef convertToDT
130    #define convertToDT noconvert
131    #endif
132
133#else
134
135    #ifndef convertToWT2
136    #define convertToWT2 convertToWT1
137    #endif
138    #if cn != 3
139        #define srcelem1 convertToWT1(*(__global srcT1 *)(srcptr1 + src1_index))
140        #define srcelem2 convertToWT2(*(__global srcT2 *)(srcptr2 + src2_index))
141    #else
142        #define srcelem1 convertToWT1(vload3(0, (__global srcT1_C1 *)(srcptr1 + src1_index)))
143        #define srcelem2 convertToWT2(vload3(0, (__global srcT2_C1 *)(srcptr2 + src2_index)))
144    #endif
145
146#endif
147
148#ifndef workST
149#define workST workT
150#endif
151
152#define EXTRA_PARAMS
153#define EXTRA_INDEX
154#define EXTRA_INDEX_ADD
155
156#if defined OP_ADD
157#define PROCESS_ELEM storedst(convertToDT(srcelem1 + srcelem2))
158
159#elif defined OP_SUB
160#define PROCESS_ELEM storedst(convertToDT(srcelem1 - srcelem2))
161
162#elif defined OP_RSUB
163#define PROCESS_ELEM storedst(convertToDT(srcelem2 - srcelem1))
164
165#elif defined OP_ABSDIFF
166#if wdepth <= 4
167#define PROCESS_ELEM \
168    storedst(convertToDT(convertFromU(abs_diff(srcelem1, srcelem2))))
169#else
170#define PROCESS_ELEM \
171    storedst(convertToDT(fabs(srcelem1 - srcelem2)))
172#endif
173
174#elif defined OP_AND
175#define PROCESS_ELEM storedst(srcelem1 & srcelem2)
176
177#elif defined OP_OR
178#define PROCESS_ELEM storedst(srcelem1 | srcelem2)
179
180#elif defined OP_XOR
181#define PROCESS_ELEM storedst(srcelem1 ^ srcelem2)
182
183#elif defined OP_NOT
184#define PROCESS_ELEM storedst(~srcelem1)
185
186#elif defined OP_MIN
187#define PROCESS_ELEM storedst(min(srcelem1, srcelem2))
188
189#elif defined OP_MAX
190#define PROCESS_ELEM storedst(max(srcelem1, srcelem2))
191
192#elif defined OP_MUL
193#define PROCESS_ELEM storedst(convertToDT(srcelem1 * srcelem2))
194
195#elif defined OP_MUL_SCALE
196#undef EXTRA_PARAMS
197#ifdef UNARY_OP
198#define EXTRA_PARAMS , workST srcelem2_, scaleT scale
199#undef srcelem2
200#define srcelem2 srcelem2_
201#else
202#define EXTRA_PARAMS , scaleT scale
203#endif
204#define PROCESS_ELEM storedst(convertToDT(srcelem1 * scale * srcelem2))
205
206#elif defined OP_DIV
207#define PROCESS_ELEM \
208        workT e2 = srcelem2, zero = (workT)(0); \
209        storedst(convertToDT(e2 != zero ? srcelem1 / e2 : zero))
210
211#elif defined OP_DIV_SCALE
212#undef EXTRA_PARAMS
213#ifdef UNARY_OP
214#define EXTRA_PARAMS , workST srcelem2_, scaleT scale
215#undef srcelem2
216#define srcelem2 srcelem2_
217#else
218#define EXTRA_PARAMS , scaleT scale
219#endif
220#define PROCESS_ELEM \
221        workT e2 = srcelem2, zero = (workT)(0); \
222        storedst(convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2)))
223
224#elif defined OP_RDIV_SCALE
225#undef EXTRA_PARAMS
226#ifdef UNARY_OP
227#define EXTRA_PARAMS , workST srcelem2_, scaleT scale
228#undef srcelem2
229#define srcelem2 srcelem2_
230#else
231#define EXTRA_PARAMS , scaleT scale
232#endif
233#define PROCESS_ELEM \
234        workT e1 = srcelem1, zero = (workT)(0); \
235        storedst(convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1)))
236
237#elif defined OP_RECIP_SCALE
238#undef EXTRA_PARAMS
239#define EXTRA_PARAMS , scaleT scale
240#define PROCESS_ELEM \
241        workT e1 = srcelem1, zero = (workT)(0); \
242        storedst(convertToDT(e1 != zero ? scale / e1 : zero))
243
244#elif defined OP_ADDW
245#undef EXTRA_PARAMS
246#define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma
247#if wdepth <= 4
248#define PROCESS_ELEM storedst(convertToDT(mad24(srcelem1, alpha, mad24(srcelem2, beta, gamma))))
249#else
250#define PROCESS_ELEM storedst(convertToDT(fma(srcelem1, alpha, fma(srcelem2, beta, gamma))))
251#endif
252
253#elif defined OP_MAG
254#define PROCESS_ELEM storedst(hypot(srcelem1, srcelem2))
255
256#elif defined OP_PHASE_RADIANS
257#define PROCESS_ELEM \
258    workT tmp = atan2(srcelem2, srcelem1); \
259    if (tmp < 0) \
260        tmp += 2 * CV_PI; \
261    storedst(tmp)
262
263#elif defined OP_PHASE_DEGREES
264    #define PROCESS_ELEM \
265    workT tmp = degrees(atan2(srcelem2, srcelem1)); \
266    if (tmp < 0) \
267        tmp += 360; \
268    storedst(tmp)
269
270#elif defined OP_EXP
271#if wdepth == 5
272#define PROCESS_ELEM storedst(native_exp(srcelem1))
273#else
274#define PROCESS_ELEM storedst(exp(srcelem1))
275#endif
276
277#elif defined OP_POW
278#define PROCESS_ELEM storedst(pow(srcelem1, srcelem2))
279
280#elif defined OP_POWN
281#undef workT
282#define workT int
283#define PROCESS_ELEM storedst(pown(srcelem1, srcelem2))
284
285#elif defined OP_SQRT
286#if depth <= 5
287#define PROCESS_ELEM storedst(native_sqrt(srcelem1))
288#else
289#define PROCESS_ELEM storedst(sqrt(srcelem1))
290#endif
291
292#elif defined OP_LOG
293#define PROCESS_ELEM \
294    storedst(log(fabs(srcelem1)))
295
296#elif defined OP_CMP
297#define srcT2 srcT1
298#ifndef convertToWT1
299#define convertToWT1
300#endif
301#define PROCESS_ELEM \
302    storedst(srcelem1 CMP_OPERATOR srcelem2 ? (dstT)(255) : (dstT)(0))
303
304#elif defined OP_CONVERT_SCALE_ABS
305#undef EXTRA_PARAMS
306#define EXTRA_PARAMS , workT1 alpha, workT1 beta
307#if wdepth <= 4
308#define PROCESS_ELEM \
309    workT value = mad24(srcelem1, (workT)(alpha), (workT)(beta)); \
310    storedst(convertToDT(abs(value)))
311#else
312#define PROCESS_ELEM \
313    workT value = fma(srcelem1, (workT)(alpha), (workT)(beta)); \
314    storedst(convertToDT(fabs(value)))
315#endif
316
317#elif defined OP_SCALE_ADD
318#undef EXTRA_PARAMS
319#define EXTRA_PARAMS , workT1 alpha
320#if wdepth <= 4
321#define PROCESS_ELEM storedst(convertToDT(mad24(srcelem1, (workT)(alpha), srcelem2)))
322#else
323#define PROCESS_ELEM storedst(convertToDT(fma(srcelem1, (workT)(alpha), srcelem2)))
324#endif
325
326#elif defined OP_CTP_AD || defined OP_CTP_AR
327#if depth <= 5
328#define CV_EPSILON FLT_EPSILON
329#else
330#define CV_EPSILON DBL_EPSILON
331#endif
332#ifdef OP_CTP_AD
333#define TO_DEGREE cartToPolar = degrees(cartToPolar);
334#elif defined OP_CTP_AR
335#define TO_DEGREE
336#endif
337#define PROCESS_ELEM \
338    dstT x = srcelem1, y = srcelem2; \
339    dstT x2 = x * x, y2 = y * y; \
340    dstT magnitude = sqrt(x2 + y2); \
341    dstT tmp = y >= 0 ? 0 : CV_PI * 2; \
342    tmp = x < 0 ? CV_PI : tmp; \
343    dstT tmp1 = y >= 0 ? CV_PI * 0.5f : CV_PI * 1.5f; \
344    dstT cartToPolar = y2 <= x2 ? x * y / mad((dstT)(0.28f), y2, x2 + CV_EPSILON) + tmp : (tmp1 - x * y / mad((dstT)(0.28f), x2, y2 + CV_EPSILON)); \
345    TO_DEGREE \
346    storedst(magnitude); \
347    storedst2(cartToPolar)
348
349#elif defined OP_PTC_AD || defined OP_PTC_AR
350#ifdef OP_PTC_AD
351#define FROM_DEGREE y = radians(y)
352#else
353#define FROM_DEGREE
354#endif
355#define PROCESS_ELEM \
356    dstT x = srcelem1, y = srcelem2, cosval; \
357    FROM_DEGREE; \
358    storedst2(sincos(y, &cosval) * x); \
359    storedst(cosval * x);
360
361#elif defined OP_PATCH_NANS
362#undef EXTRA_PARAMS
363#define EXTRA_PARAMS , dstT val
364#define PROCESS_ELEM \
365    if (isnan(srcelem1)) \
366        storedst(val)
367
368#else
369#error "unknown op type"
370#endif
371
372#if defined OP_CTP_AD || defined OP_CTP_AR || defined OP_PTC_AD || defined OP_PTC_AR
373    #undef EXTRA_PARAMS
374    #define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2
375    #undef EXTRA_INDEX
376    #define EXTRA_INDEX int dst_index2 = mad24(y0, dststep2, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset2))
377    #undef EXTRA_INDEX_ADD
378    #define EXTRA_INDEX_ADD dst_index2 += dststep2
379#endif
380
381#if defined UNARY_OP || defined MASK_UNARY_OP
382
383#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
384    defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
385    defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \
386    defined OP_MUL || defined OP_DIV || defined OP_POWN || defined OP_POWR || defined OP_ROOTN
387    #undef EXTRA_PARAMS
388    #define EXTRA_PARAMS , workST srcelem2_
389    #undef srcelem2
390    #define srcelem2 srcelem2_
391#endif
392
393#if cn == 3
394#undef srcelem2
395#define srcelem2 (workT)(srcelem2_.x, srcelem2_.y, srcelem2_.z)
396#endif
397
398#endif
399
400#if defined BINARY_OP
401
402__kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
403                 __global const uchar * srcptr2, int srcstep2, int srcoffset2,
404                 __global uchar * dstptr, int dststep, int dstoffset,
405                 int rows, int cols EXTRA_PARAMS )
406{
407    int x = get_global_id(0);
408    int y0 = get_global_id(1) * rowsPerWI;
409
410    if (x < cols)
411    {
412        int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
413#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
414        int src2_index = mad24(y0, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
415#endif
416        int dst_index  = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
417        EXTRA_INDEX;
418
419        for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, dst_index += dststep)
420        {
421            PROCESS_ELEM;
422#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
423            src2_index += srcstep2;
424#endif
425            EXTRA_INDEX_ADD;
426        }
427    }
428}
429
430#elif defined MASK_BINARY_OP
431
432__kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
433                 __global const uchar * srcptr2, int srcstep2, int srcoffset2,
434                 __global const uchar * mask, int maskstep, int maskoffset,
435                 __global uchar * dstptr, int dststep, int dstoffset,
436                 int rows, int cols EXTRA_PARAMS )
437{
438    int x = get_global_id(0);
439    int y0 = get_global_id(1) * rowsPerWI;
440
441    if (x < cols)
442    {
443        int mask_index = mad24(y0, maskstep, x + maskoffset);
444        int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
445        int src2_index = mad24(y0, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
446        int dst_index  = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
447
448        for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, src2_index += srcstep2,
449                                                                mask_index += maskstep, dst_index += dststep)
450            if (mask[mask_index])
451            {
452                PROCESS_ELEM;
453            }
454    }
455}
456
457#elif defined UNARY_OP
458
459__kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
460                 __global uchar * dstptr, int dststep, int dstoffset,
461                 int rows, int cols EXTRA_PARAMS )
462{
463    int x = get_global_id(0);
464    int y0 = get_global_id(1) * rowsPerWI;
465
466    if (x < cols)
467    {
468        int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
469        int dst_index  = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
470
471        for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, dst_index += dststep)
472        {
473            PROCESS_ELEM;
474        }
475    }
476}
477
478#elif defined MASK_UNARY_OP
479
480__kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
481                 __global const uchar * mask, int maskstep, int maskoffset,
482                 __global uchar * dstptr, int dststep, int dstoffset,
483                 int rows, int cols EXTRA_PARAMS )
484{
485    int x = get_global_id(0);
486    int y0 = get_global_id(1) * rowsPerWI;
487
488    if (x < cols)
489    {
490        int mask_index = mad24(y0, maskstep, x + maskoffset);
491        int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
492        int dst_index  = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
493
494        for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, mask_index += maskstep, dst_index += dststep)
495            if (mask[mask_index])
496            {
497                PROCESS_ELEM;
498            }
499    }
500}
501
502#else
503
504#error "Unknown operation type"
505
506#endif
507