1/* 2 * Copyright (c) 2016, 2017 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24#include "helpers.h" 25 26#define VATOMIC_INC16(histogram, win_pos) \ 27 { \ 28 atomic_inc(histogram + win_pos.s0); \ 29 atomic_inc(histogram + win_pos.s1); \ 30 atomic_inc(histogram + win_pos.s2); \ 31 atomic_inc(histogram + win_pos.s3); \ 32 atomic_inc(histogram + win_pos.s4); \ 33 atomic_inc(histogram + win_pos.s5); \ 34 atomic_inc(histogram + win_pos.s6); \ 35 atomic_inc(histogram + win_pos.s7); \ 36 atomic_inc(histogram + win_pos.s8); \ 37 atomic_inc(histogram + win_pos.s9); \ 38 atomic_inc(histogram + win_pos.sa); \ 39 atomic_inc(histogram + win_pos.sb); \ 40 atomic_inc(histogram + win_pos.sc); \ 41 atomic_inc(histogram + win_pos.sd); \ 42 atomic_inc(histogram + win_pos.se); \ 43 atomic_inc(histogram + win_pos.sf); \ 44 } 45 46/** Calculate the histogram of an 8 bit grayscale image. 47 * 48 * Each thread will process 16 pixels and use one local atomic operation per pixel. 49 * When all work items in a work group are done the resulting local histograms are 50 * added to the global histogram using global atomics. 51 * 52 * @note The input image is represented as a two-dimensional array of type uchar. 53 * The output is represented as a one-dimensional uint array of length of num_bins 54 * 55 * @param[in] input_ptr Pointer to the first source image. Supported data types: U8 56 * @param[in] input_stride_x Stride of the first source image in X dimension (in bytes) 57 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) 58 * @param[in] input_stride_y Stride of the first source image in Y dimension (in bytes) 59 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) 60 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image 61 * @param[in] histogram_local The local buffer to hold histogram result in per workgroup. Supported data types: U32 62 * @param[out] histogram The output buffer to hold histogram final result. Supported data types: U32 63 * @param[out] num_bins The number of bins 64 * @param[out] offset The start of values to use (inclusive) 65 * @param[out] range The range of a bin 66 * @param[out] offrange The maximum value (exclusive) 67 */ 68__kernel void hist_local_kernel(IMAGE_DECLARATION(input), 69 __local uint *histogram_local, 70 __global uint *restrict histogram, 71 uint num_bins, 72 uint offset, 73 uint range, 74 uint offrange) 75{ 76 Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input); 77 uint local_id_x = get_local_id(0); 78 79 uint local_x_size = get_local_size(0); 80 81 if(num_bins > local_x_size) 82 { 83 for(int i = local_id_x; i < num_bins; i += local_x_size) 84 { 85 histogram_local[i] = 0; 86 } 87 } 88 else 89 { 90 if(local_id_x <= num_bins) 91 { 92 histogram_local[local_id_x] = 0; 93 } 94 } 95 96 uint16 vals = convert_uint16(vload16(0, input_buffer.ptr)); 97 98 uint16 win_pos = select(num_bins, ((vals - offset) * num_bins) / range, (vals >= offset && vals < offrange)); 99 100 barrier(CLK_LOCAL_MEM_FENCE); 101 VATOMIC_INC16(histogram_local, win_pos); 102 barrier(CLK_LOCAL_MEM_FENCE); 103 104 if(num_bins > local_x_size) 105 { 106 for(int i = local_id_x; i < num_bins; i += local_x_size) 107 { 108 atomic_add(histogram + i, histogram_local[i]); 109 } 110 } 111 else 112 { 113 if(local_id_x <= num_bins) 114 { 115 atomic_add(histogram + local_id_x, histogram_local[local_id_x]); 116 } 117 } 118} 119 120/** Calculate the histogram of an 8 bit grayscale image's border. 121 * 122 * Each thread will process one pixel using global atomic. 123 * When all work items in a work group are done the resulting local histograms are 124 * added to the global histogram using global atomics. 125 * 126 * @note The input image is represented as a two-dimensional array of type uchar. 127 * The output is represented as a one-dimensional uint array of length of num_bins 128 * 129 * @param[in] input_ptr Pointer to the first source image. Supported data types: U8 130 * @param[in] input_stride_x Stride of the first source image in X dimension (in bytes) 131 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) 132 * @param[in] input_stride_y Stride of the first source image in Y dimension (in bytes) 133 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) 134 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image 135 * @param[out] histogram The output buffer to hold histogram final result. Supported data types: U32 136 * @param[out] num_bins The number of bins 137 * @param[out] offset The start of values to use (inclusive) 138 * @param[out] range The range of a bin 139 * @param[out] offrange The maximum value (exclusive) 140 */ 141__kernel void hist_border_kernel(IMAGE_DECLARATION(input), 142 __global uint *restrict histogram, 143 uint num_bins, 144 uint offset, 145 uint range, 146 uint offrange) 147{ 148 Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input); 149 150 uint val = (uint)(*input_buffer.ptr); 151 152 uint win_pos = (val >= offset) ? (((val - offset) * num_bins) / range) : 0; 153 154 if(val >= offset && (val < offrange)) 155 { 156 atomic_inc(histogram + win_pos); 157 } 158} 159 160/** Calculate the histogram of an 8 bit grayscale image with bin size of 256 and window size of 1. 161 * 162 * Each thread will process 16 pixels and use one local atomic operation per pixel. 163 * When all work items in a work group are done the resulting local histograms are 164 * added to the global histogram using global atomics. 165 * 166 * @note The input image is represented as a two-dimensional array of type uchar. 167 * The output is represented as a one-dimensional uint array of 256 elements 168 * 169 * @param[in] input_ptr Pointer to the first source image. Supported data types: U8 170 * @param[in] input_stride_x Stride of the first source image in X dimension (in bytes) 171 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) 172 * @param[in] input_stride_y Stride of the first source image in Y dimension (in bytes) 173 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) 174 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image 175 * @param[in] histogram_local The local buffer to hold histogram result in per workgroup. Supported data types: U32 176 * @param[out] histogram The output buffer to hold histogram final result. Supported data types: U32 177 */ 178__kernel void hist_local_kernel_fixed(IMAGE_DECLARATION(input), 179 __local uint *histogram_local, 180 __global uint *restrict histogram) 181{ 182 Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input); 183 184 uint local_index = get_local_id(0); 185 uint local_x_size = get_local_size(0); 186 187 for(int i = local_index; i < 256; i += local_x_size) 188 { 189 histogram_local[i] = 0; 190 } 191 192 uint16 vals = convert_uint16(vload16(0, input_buffer.ptr)); 193 194 barrier(CLK_LOCAL_MEM_FENCE); 195 196 atomic_inc(histogram_local + vals.s0); 197 atomic_inc(histogram_local + vals.s1); 198 atomic_inc(histogram_local + vals.s2); 199 atomic_inc(histogram_local + vals.s3); 200 atomic_inc(histogram_local + vals.s4); 201 atomic_inc(histogram_local + vals.s5); 202 atomic_inc(histogram_local + vals.s6); 203 atomic_inc(histogram_local + vals.s7); 204 atomic_inc(histogram_local + vals.s8); 205 atomic_inc(histogram_local + vals.s9); 206 atomic_inc(histogram_local + vals.sa); 207 atomic_inc(histogram_local + vals.sb); 208 atomic_inc(histogram_local + vals.sc); 209 atomic_inc(histogram_local + vals.sd); 210 atomic_inc(histogram_local + vals.se); 211 atomic_inc(histogram_local + vals.sf); 212 213 barrier(CLK_LOCAL_MEM_FENCE); 214 215 for(int i = local_index; i < 256; i += local_x_size) 216 { 217 atomic_add(histogram + i, histogram_local[i]); 218 } 219} 220 221/** Calculate the histogram of an 8 bit grayscale image with bin size as 256 and window size as 1. 222 * 223 * Each thread will process one pixel using global atomic. 224 * When all work items in a work group are done the resulting local histograms are 225 * added to the global histogram using global atomics. 226 * 227 * @note The input image is represented as a two-dimensional array of type uchar. 228 * The output is represented as a one-dimensional uint array of 256 229 * 230 * @param[in] input_ptr Pointer to the first source image. Supported data types: U8 231 * @param[in] input_stride_x Stride of the first source image in X dimension (in bytes) 232 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) 233 * @param[in] input_stride_y Stride of the first source image in Y dimension (in bytes) 234 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) 235 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image 236 * @param[out] histogram The output buffer to hold histogram final result. Supported data types: U32 237 */ 238__kernel void hist_border_kernel_fixed(IMAGE_DECLARATION(input), 239 __global uint *restrict histogram) 240{ 241 Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input); 242 atomic_inc(histogram + *input_buffer.ptr); 243} 244