• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4
5// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
6// Third party copyrights are property of their respective owners.
7
8#ifdef DOUBLE_SUPPORT
9#ifdef cl_amd_fp64
10#pragma OPENCL EXTENSION cl_amd_fp64:enable
11#elif defined (cl_khr_fp64)
12#pragma OPENCL EXTENSION cl_khr_fp64:enable
13#endif
14#endif
15
16#define SRC_TSIZE cn * (int)sizeof(srcT1)
17#define DST_TSIZE cn * (int)sizeof(dstT1)
18
19#define noconvert
20
21__kernel void accumulate(__global const uchar * srcptr, int src_step, int src_offset,
22#ifdef ACCUMULATE_PRODUCT
23                         __global const uchar * src2ptr, int src2_step, int src2_offset,
24#endif
25                         __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols
26#ifdef ACCUMULATE_WEIGHTED
27                         , dstT1 alpha
28#endif
29#ifdef HAVE_MASK
30                         , __global const uchar * mask, int mask_step, int mask_offset
31#endif
32                         )
33{
34    int x = get_global_id(0);
35    int y = get_global_id(1) * rowsPerWI;
36
37    if (x < dst_cols)
38    {
39        int src_index = mad24(y, src_step, mad24(x, SRC_TSIZE, src_offset));
40#ifdef HAVE_MASK
41        int mask_index = mad24(y, mask_step, mask_offset + x);
42        mask += mask_index;
43#endif
44#ifdef ACCUMULATE_PRODUCT
45        int src2_index = mad24(y, src2_step, mad24(x, SRC_TSIZE, src2_offset));
46#endif
47        int dst_index = mad24(y, dst_step, mad24(x, DST_TSIZE, dst_offset));
48
49        #pragma unroll
50        for (int i = 0; i < rowsPerWI; ++i)
51            if (y < dst_rows)
52            {
53                __global const srcT1 * src = (__global const srcT1 *)(srcptr + src_index);
54#ifdef ACCUMULATE_PRODUCT
55                __global const srcT1 * src2 = (__global const srcT1 *)(src2ptr + src2_index);
56#endif
57                __global dstT1 * dst = (__global dstT1 *)(dstptr + dst_index);
58
59#ifdef HAVE_MASK
60                if (mask[0])
61#endif
62                    #pragma unroll
63                    for (int c = 0; c < cn; ++c)
64                    {
65#ifdef ACCUMULATE
66                        dst[c] += convertToDT(src[c]);
67#elif defined ACCUMULATE_SQUARE
68                        dstT1 val = convertToDT(src[c]);
69                        dst[c] = fma(val, val, dst[c]);
70#elif defined ACCUMULATE_PRODUCT
71                        dst[c] = fma(convertToDT(src[c]), convertToDT(src2[c]), dst[c]);
72#elif defined ACCUMULATE_WEIGHTED
73                        dst[c] = fma(1 - alpha, dst[c], src[c] * alpha);
74#else
75#error "Unknown accumulation type"
76#endif
77                    }
78
79                src_index += src_step;
80#ifdef ACCUMULATE_PRODUCT
81                src2_index += src2_step;
82#endif
83#ifdef HAVE_MASK
84                mask += mask_step;
85#endif
86                dst_index += dst_step;
87                ++y;
88            }
89    }
90}
91