• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4
5// Copyright (C) 2014, Itseez, Inc., all rights reserved.
6// Third party copyrights are property of their respective owners.
7
8#ifdef DOUBLE_SUPPORT
9#ifdef cl_amd_fp64
10#pragma OPENCL EXTENSION cl_amd_fp64:enable
11#elif defined (cl_khr_fp64)
12#pragma OPENCL EXTENSION cl_khr_fp64:enable
13#endif
14#endif
15
16#ifdef DEPTH_0
17#define MIN_VAL 0
18#define MAX_VAL UCHAR_MAX
19#elif defined DEPTH_1
20#define MIN_VAL SCHAR_MIN
21#define MAX_VAL SCHAR_MAX
22#elif defined DEPTH_2
23#define MIN_VAL 0
24#define MAX_VAL USHRT_MAX
25#elif defined DEPTH_3
26#define MIN_VAL SHRT_MIN
27#define MAX_VAL SHRT_MAX
28#elif defined DEPTH_4
29#define MIN_VAL INT_MIN
30#define MAX_VAL INT_MAX
31#elif defined DEPTH_5
32#define MIN_VAL (-FLT_MAX)
33#define MAX_VAL FLT_MAX
34#elif defined DEPTH_6
35#define MIN_VAL (-DBL_MAX)
36#define MAX_VAL DBL_MAX
37#endif
38
39#define noconvert
40#define INDEX_MAX UINT_MAX
41
42#if wdepth <= 4
43#define MIN_ABS(a) convertFromU(abs(a))
44#define MIN_ABS2(a, b) convertFromU(abs_diff(a, b))
45#define MIN(a, b) min(a, b)
46#define MAX(a, b) max(a, b)
47#else
48#define MIN_ABS(a) fabs(a)
49#define MIN_ABS2(a, b) fabs(a - b)
50#define MIN(a, b) fmin(a, b)
51#define MAX(a, b) fmax(a, b)
52#endif
53
54#if kercn != 3
55#define loadpix(addr) *(__global const srcT *)(addr)
56#define srcTSIZE (int)sizeof(srcT)
57#else
58#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
59#define srcTSIZE ((int)sizeof(srcT1) * 3)
60#endif
61
62#ifndef HAVE_MASK
63#undef srcTSIZE
64#define srcTSIZE (int)sizeof(srcT1)
65#endif
66
67#ifdef NEED_MINVAL
68#ifdef NEED_MINLOC
69#define CALC_MIN(p, inc) \
70    if (minval > temp.p) \
71    { \
72        minval = temp.p; \
73        minloc = id + inc; \
74    }
75#else
76#define CALC_MIN(p, inc) \
77    minval = MIN(minval, temp.p);
78#endif
79#else
80#define CALC_MIN(p, inc)
81#endif
82
83#ifdef NEED_MAXVAL
84#ifdef NEED_MAXLOC
85#define CALC_MAX(p, inc) \
86    if (maxval < temp.p) \
87    { \
88        maxval = temp.p; \
89        maxloc = id + inc; \
90    }
91#else
92#define CALC_MAX(p, inc) \
93    maxval = MAX(maxval, temp.p);
94#endif
95#else
96#define CALC_MAX(p, inc)
97#endif
98
99#ifdef OP_CALC2
100#define CALC_MAX2(p) \
101    maxval2 = MAX(maxval2, temp2.p);
102#else
103#define CALC_MAX2(p)
104#endif
105
106#define CALC_P(p, inc) \
107    CALC_MIN(p, inc) \
108    CALC_MAX(p, inc) \
109    CALC_MAX2(p)
110
111__kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_offset, int cols,
112                        int total, int groupnum, __global uchar * dstptr
113#ifdef HAVE_MASK
114                        , __global const uchar * mask, int mask_step, int mask_offset
115#endif
116#ifdef HAVE_SRC2
117                        , __global const uchar * src2ptr, int src2_step, int src2_offset
118#endif
119                        )
120{
121    int lid = get_local_id(0);
122    int gid = get_group_id(0);
123    int  id = get_global_id(0)
124#ifndef HAVE_MASK
125    * kercn;
126#else
127    ;
128#endif
129
130    srcptr += src_offset;
131#ifdef HAVE_MASK
132    mask += mask_offset;
133#endif
134#ifdef HAVE_SRC2
135    src2ptr += src2_offset;
136#endif
137
138#ifdef NEED_MINVAL
139    __local dstT1 localmem_min[WGS2_ALIGNED];
140    dstT1 minval = MAX_VAL;
141#ifdef NEED_MINLOC
142    __local uint localmem_minloc[WGS2_ALIGNED];
143    uint minloc = INDEX_MAX;
144#endif
145#endif
146#ifdef NEED_MAXVAL
147    dstT1 maxval = MIN_VAL;
148    __local dstT1 localmem_max[WGS2_ALIGNED];
149#ifdef NEED_MAXLOC
150    __local uint localmem_maxloc[WGS2_ALIGNED];
151    uint maxloc = INDEX_MAX;
152#endif
153#endif
154#ifdef OP_CALC2
155    __local dstT1 localmem_max2[WGS2_ALIGNED];
156    dstT1 maxval2 = MIN_VAL;
157#endif
158
159    int src_index;
160#ifdef HAVE_MASK
161    int mask_index;
162#endif
163#ifdef HAVE_SRC2
164    int src2_index;
165#endif
166
167    dstT temp;
168#ifdef HAVE_SRC2
169    dstT temp2;
170#endif
171
172    for (int grain = groupnum * WGS
173#ifndef HAVE_MASK
174        * kercn
175#endif
176        ; id < total; id += grain)
177    {
178#ifdef HAVE_MASK
179#ifdef HAVE_MASK_CONT
180        mask_index = id;
181#else
182        mask_index = mad24(id / cols, mask_step, id % cols);
183#endif
184        if (mask[mask_index])
185#endif
186        {
187#ifdef HAVE_SRC_CONT
188            src_index = id * srcTSIZE;//mul24(id, srcTSIZE);
189#else
190            src_index = mad24(id / cols, src_step, mul24(id % cols, srcTSIZE));
191#endif
192            temp = convertToDT(loadpix(srcptr + src_index));
193#ifdef OP_ABS
194            temp = MIN_ABS(temp);
195#endif
196
197#ifdef HAVE_SRC2
198#ifdef HAVE_SRC2_CONT
199            src2_index = id * srcTSIZE; //mul24(id, srcTSIZE);
200#else
201            src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE));
202#endif
203            temp2 = convertToDT(loadpix(src2ptr + src2_index));
204            temp = MIN_ABS2(temp, temp2);
205#ifdef OP_CALC2
206            temp2 = MIN_ABS(temp2);
207#endif
208#endif
209
210#if kercn == 1
211#ifdef NEED_MINVAL
212#ifdef NEED_MINLOC
213            if (minval > temp)
214            {
215                minval = temp;
216                minloc = id;
217            }
218#else
219            minval = MIN(minval, temp);
220#endif
221#endif
222#ifdef NEED_MAXVAL
223#ifdef NEED_MAXLOC
224            if (maxval < temp)
225            {
226                maxval = temp;
227                maxloc = id;
228            }
229#else
230            maxval = MAX(maxval, temp);
231#endif
232#ifdef OP_CALC2
233            maxval2 = MAX(maxval2, temp2);
234#endif
235#endif
236#elif kercn >= 2
237            CALC_P(s0, 0)
238            CALC_P(s1, 1)
239#if kercn >= 3
240            CALC_P(s2, 2)
241#if kercn >= 4
242            CALC_P(s3, 3)
243#if kercn >= 8
244            CALC_P(s4, 4)
245            CALC_P(s5, 5)
246            CALC_P(s6, 6)
247            CALC_P(s7, 7)
248#if kercn == 16
249            CALC_P(s8, 8)
250            CALC_P(s9, 9)
251            CALC_P(sA, 10)
252            CALC_P(sB, 11)
253            CALC_P(sC, 12)
254            CALC_P(sD, 13)
255            CALC_P(sE, 14)
256            CALC_P(sF, 15)
257#endif
258#endif
259#endif
260#endif
261#endif
262        }
263    }
264
265    if (lid < WGS2_ALIGNED)
266    {
267#ifdef NEED_MINVAL
268        localmem_min[lid] = minval;
269#endif
270#ifdef NEED_MAXVAL
271        localmem_max[lid] = maxval;
272#endif
273#ifdef NEED_MINLOC
274        localmem_minloc[lid] = minloc;
275#endif
276#ifdef NEED_MAXLOC
277        localmem_maxloc[lid] = maxloc;
278#endif
279#ifdef OP_CALC2
280        localmem_max2[lid] = maxval2;
281#endif
282    }
283    barrier(CLK_LOCAL_MEM_FENCE);
284
285    if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED)
286    {
287        int lid3 = lid - WGS2_ALIGNED;
288#ifdef NEED_MINVAL
289#ifdef NEED_MINLOC
290        if (localmem_min[lid3] >= minval)
291        {
292            if (localmem_min[lid3] == minval)
293                localmem_minloc[lid3] = min(localmem_minloc[lid3], minloc);
294            else
295                localmem_minloc[lid3] = minloc,
296            localmem_min[lid3] = minval;
297        }
298#else
299        localmem_min[lid3] = MIN(localmem_min[lid3], minval);
300#endif
301#endif
302#ifdef NEED_MAXVAL
303#ifdef NEED_MAXLOC
304        if (localmem_max[lid3] <= maxval)
305        {
306            if (localmem_max[lid3] == maxval)
307                localmem_maxloc[lid3] = min(localmem_maxloc[lid3], maxloc);
308            else
309                localmem_maxloc[lid3] = maxloc,
310            localmem_max[lid3] = maxval;
311        }
312#else
313        localmem_max[lid3] = MAX(localmem_max[lid3], maxval);
314#endif
315#endif
316#ifdef OP_CALC2
317        localmem_max2[lid3] = MAX(localmem_max2[lid3], maxval2);
318#endif
319    }
320    barrier(CLK_LOCAL_MEM_FENCE);
321
322    for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1)
323    {
324        if (lid < lsize)
325        {
326            int lid2 = lsize + lid;
327
328#ifdef NEED_MINVAL
329#ifdef NEED_MINLOC
330            if (localmem_min[lid] >= localmem_min[lid2])
331            {
332                if (localmem_min[lid] == localmem_min[lid2])
333                    localmem_minloc[lid] = min(localmem_minloc[lid2], localmem_minloc[lid]);
334                else
335                    localmem_minloc[lid] = localmem_minloc[lid2],
336                localmem_min[lid] = localmem_min[lid2];
337            }
338#else
339            localmem_min[lid] = MIN(localmem_min[lid], localmem_min[lid2]);
340#endif
341#endif
342#ifdef NEED_MAXVAL
343#ifdef NEED_MAXLOC
344            if (localmem_max[lid] <= localmem_max[lid2])
345            {
346                if (localmem_max[lid] == localmem_max[lid2])
347                    localmem_maxloc[lid] = min(localmem_maxloc[lid2], localmem_maxloc[lid]);
348                else
349                    localmem_maxloc[lid] = localmem_maxloc[lid2],
350                localmem_max[lid] = localmem_max[lid2];
351            }
352#else
353            localmem_max[lid] = MAX(localmem_max[lid], localmem_max[lid2]);
354#endif
355#endif
356#ifdef OP_CALC2
357            localmem_max2[lid] = MAX(localmem_max2[lid], localmem_max2[lid2]);
358#endif
359        }
360        barrier(CLK_LOCAL_MEM_FENCE);
361    }
362
363    if (lid == 0)
364    {
365        int pos = 0;
366#ifdef NEED_MINVAL
367        *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0];
368        pos = mad24(groupnum, (int)sizeof(dstT1), pos);
369#endif
370#ifdef NEED_MAXVAL
371        *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0];
372        pos = mad24(groupnum, (int)sizeof(dstT1), pos);
373#endif
374#ifdef NEED_MINLOC
375        *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0];
376        pos = mad24(groupnum, (int)sizeof(uint), pos);
377#endif
378#ifdef NEED_MAXLOC
379        *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0];
380#ifdef OP_CALC2
381        pos = mad24(groupnum, (int)sizeof(uint), pos);
382#endif
383#endif
384#ifdef OP_CALC2
385        *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max2[0];
386#endif
387    }
388}
389