• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2016, 2017 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
26#define VATOMIC_INC16(histogram, win_pos)   \
27    {                                       \
28        atomic_inc(histogram + win_pos.s0); \
29        atomic_inc(histogram + win_pos.s1); \
30        atomic_inc(histogram + win_pos.s2); \
31        atomic_inc(histogram + win_pos.s3); \
32        atomic_inc(histogram + win_pos.s4); \
33        atomic_inc(histogram + win_pos.s5); \
34        atomic_inc(histogram + win_pos.s6); \
35        atomic_inc(histogram + win_pos.s7); \
36        atomic_inc(histogram + win_pos.s8); \
37        atomic_inc(histogram + win_pos.s9); \
38        atomic_inc(histogram + win_pos.sa); \
39        atomic_inc(histogram + win_pos.sb); \
40        atomic_inc(histogram + win_pos.sc); \
41        atomic_inc(histogram + win_pos.sd); \
42        atomic_inc(histogram + win_pos.se); \
43        atomic_inc(histogram + win_pos.sf); \
44    }
45
46/** Calculate the histogram of an 8 bit grayscale image.
47 *
48 * Each thread will process 16 pixels and use one local atomic operation per pixel.
49 * When all work items in a work group are done the resulting local histograms are
50 * added to the global histogram using global atomics.
51 *
52 * @note The input image is represented as a two-dimensional array of type uchar.
53 * The output is represented as a one-dimensional uint array of length of num_bins
54 *
55 * @param[in]  input_ptr                           Pointer to the first source image. Supported data types: U8
56 * @param[in]  input_stride_x                      Stride of the first source image in X dimension (in bytes)
57 * @param[in]  input_step_x                        input_stride_x * number of elements along X processed per workitem(in bytes)
58 * @param[in]  input_stride_y                      Stride of the first source image in Y dimension (in bytes)
59 * @param[in]  input_step_y                        input_stride_y * number of elements along Y processed per workitem(in bytes)
60 * @param[in]  input_offset_first_element_in_bytes The offset of the first element in the first source image
61 * @param[in]  histogram_local                     The local buffer to hold histogram result in per workgroup. Supported data types: U32
62 * @param[out] histogram                           The output buffer to hold histogram final result. Supported data types: U32
63 * @param[out] num_bins                            The number of bins
64 * @param[out] offset                              The start of values to use (inclusive)
65 * @param[out] range                               The range of a bin
66 * @param[out] offrange                            The maximum value (exclusive)
67 */
68__kernel void hist_local_kernel(IMAGE_DECLARATION(input),
69                                __local uint *histogram_local,
70                                __global uint *restrict histogram,
71                                uint                    num_bins,
72                                uint                    offset,
73                                uint                    range,
74                                uint                    offrange)
75{
76    Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input);
77    uint  local_id_x   = get_local_id(0);
78
79    uint local_x_size = get_local_size(0);
80
81    if(num_bins > local_x_size)
82    {
83        for(int i = local_id_x; i < num_bins; i += local_x_size)
84        {
85            histogram_local[i] = 0;
86        }
87    }
88    else
89    {
90        if(local_id_x <= num_bins)
91        {
92            histogram_local[local_id_x] = 0;
93        }
94    }
95
96    uint16 vals = convert_uint16(vload16(0, input_buffer.ptr));
97
98    uint16 win_pos = select(num_bins, ((vals - offset) * num_bins) / range, (vals >= offset && vals < offrange));
99
100    barrier(CLK_LOCAL_MEM_FENCE);
101    VATOMIC_INC16(histogram_local, win_pos);
102    barrier(CLK_LOCAL_MEM_FENCE);
103
104    if(num_bins > local_x_size)
105    {
106        for(int i = local_id_x; i < num_bins; i += local_x_size)
107        {
108            atomic_add(histogram + i, histogram_local[i]);
109        }
110    }
111    else
112    {
113        if(local_id_x <= num_bins)
114        {
115            atomic_add(histogram + local_id_x, histogram_local[local_id_x]);
116        }
117    }
118}
119
120/** Calculate the histogram of an 8 bit grayscale image's border.
121 *
122 * Each thread will process one pixel using global atomic.
123 * When all work items in a work group are done the resulting local histograms are
124 * added to the global histogram using global atomics.
125 *
126 * @note The input image is represented as a two-dimensional array of type uchar.
127 * The output is represented as a one-dimensional uint array of length of num_bins
128 *
129 * @param[in]  input_ptr                           Pointer to the first source image. Supported data types: U8
130 * @param[in]  input_stride_x                      Stride of the first source image in X dimension (in bytes)
131 * @param[in]  input_step_x                        input_stride_x * number of elements along X processed per workitem(in bytes)
132 * @param[in]  input_stride_y                      Stride of the first source image in Y dimension (in bytes)
133 * @param[in]  input_step_y                        input_stride_y * number of elements along Y processed per workitem(in bytes)
134 * @param[in]  input_offset_first_element_in_bytes The offset of the first element in the first source image
135 * @param[out] histogram                           The output buffer to hold histogram final result. Supported data types: U32
136 * @param[out] num_bins                            The number of bins
137 * @param[out] offset                              The start of values to use (inclusive)
138 * @param[out] range                               The range of a bin
139 * @param[out] offrange                            The maximum value (exclusive)
140 */
141__kernel void hist_border_kernel(IMAGE_DECLARATION(input),
142                                 __global uint *restrict histogram,
143                                 uint                    num_bins,
144                                 uint                    offset,
145                                 uint                    range,
146                                 uint                    offrange)
147{
148    Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input);
149
150    uint val = (uint)(*input_buffer.ptr);
151
152    uint win_pos = (val >= offset) ? (((val - offset) * num_bins) / range) : 0;
153
154    if(val >= offset && (val < offrange))
155    {
156        atomic_inc(histogram + win_pos);
157    }
158}
159
160/** Calculate the histogram of an 8 bit grayscale image with bin size of 256 and window size of 1.
161 *
162 * Each thread will process 16 pixels and use one local atomic operation per pixel.
163 * When all work items in a work group are done the resulting local histograms are
164 * added to the global histogram using global atomics.
165 *
166 * @note The input image is represented as a two-dimensional array of type uchar.
167 * The output is represented as a one-dimensional uint array of 256 elements
168 *
169 * @param[in]  input_ptr                           Pointer to the first source image. Supported data types: U8
170 * @param[in]  input_stride_x                      Stride of the first source image in X dimension (in bytes)
171 * @param[in]  input_step_x                        input_stride_x * number of elements along X processed per workitem(in bytes)
172 * @param[in]  input_stride_y                      Stride of the first source image in Y dimension (in bytes)
173 * @param[in]  input_step_y                        input_stride_y * number of elements along Y processed per workitem(in bytes)
174 * @param[in]  input_offset_first_element_in_bytes The offset of the first element in the first source image
175 * @param[in]  histogram_local                     The local buffer to hold histogram result in per workgroup. Supported data types: U32
176 * @param[out] histogram                           The output buffer to hold histogram final result. Supported data types: U32
177 */
178__kernel void hist_local_kernel_fixed(IMAGE_DECLARATION(input),
179                                      __local uint *histogram_local,
180                                      __global uint *restrict histogram)
181{
182    Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input);
183
184    uint local_index  = get_local_id(0);
185    uint local_x_size = get_local_size(0);
186
187    for(int i = local_index; i < 256; i += local_x_size)
188    {
189        histogram_local[i] = 0;
190    }
191
192    uint16 vals = convert_uint16(vload16(0, input_buffer.ptr));
193
194    barrier(CLK_LOCAL_MEM_FENCE);
195
196    atomic_inc(histogram_local + vals.s0);
197    atomic_inc(histogram_local + vals.s1);
198    atomic_inc(histogram_local + vals.s2);
199    atomic_inc(histogram_local + vals.s3);
200    atomic_inc(histogram_local + vals.s4);
201    atomic_inc(histogram_local + vals.s5);
202    atomic_inc(histogram_local + vals.s6);
203    atomic_inc(histogram_local + vals.s7);
204    atomic_inc(histogram_local + vals.s8);
205    atomic_inc(histogram_local + vals.s9);
206    atomic_inc(histogram_local + vals.sa);
207    atomic_inc(histogram_local + vals.sb);
208    atomic_inc(histogram_local + vals.sc);
209    atomic_inc(histogram_local + vals.sd);
210    atomic_inc(histogram_local + vals.se);
211    atomic_inc(histogram_local + vals.sf);
212
213    barrier(CLK_LOCAL_MEM_FENCE);
214
215    for(int i = local_index; i < 256; i += local_x_size)
216    {
217        atomic_add(histogram + i, histogram_local[i]);
218    }
219}
220
221/** Calculate the histogram of an 8 bit grayscale image with bin size as 256 and window size as 1.
222 *
223 * Each thread will process one pixel using global atomic.
224 * When all work items in a work group are done the resulting local histograms are
225 * added to the global histogram using global atomics.
226 *
227 * @note The input image is represented as a two-dimensional array of type uchar.
228 * The output is represented as a one-dimensional uint array of 256
229 *
230 * @param[in]  input_ptr                           Pointer to the first source image. Supported data types: U8
231 * @param[in]  input_stride_x                      Stride of the first source image in X dimension (in bytes)
232 * @param[in]  input_step_x                        input_stride_x * number of elements along X processed per workitem(in bytes)
233 * @param[in]  input_stride_y                      Stride of the first source image in Y dimension (in bytes)
234 * @param[in]  input_step_y                        input_stride_y * number of elements along Y processed per workitem(in bytes)
235 * @param[in]  input_offset_first_element_in_bytes The offset of the first element in the first source image
236 * @param[out] histogram                           The output buffer to hold histogram final result. Supported data types: U32
237 */
238__kernel void hist_border_kernel_fixed(IMAGE_DECLARATION(input),
239                                       __global uint *restrict histogram)
240{
241    Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input);
242    atomic_inc(histogram + *input_buffer.ptr);
243}
244