• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/NEON/kernels/NEHistogramKernel.h"
25 
26 #include "arm_compute/core/Error.h"
27 #include "arm_compute/core/Helpers.h"
28 #include "arm_compute/core/IDistribution1D.h"
29 #include "arm_compute/core/ITensor.h"
30 #include "arm_compute/core/TensorInfo.h"
31 #include "arm_compute/core/Types.h"
32 #include "arm_compute/core/Window.h"
33 #include "src/core/helpers/AutoConfiguration.h"
34 #include "src/core/helpers/WindowHelpers.h"
35 
36 #include <algorithm>
37 #include <arm_neon.h>
38 #include <array>
39 
40 namespace arm_compute
41 {
42 class Coordinates;
43 
merge_histogram(uint32_t * global_hist,const uint32_t * local_hist,size_t bins)44 inline void NEHistogramKernel::merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins)
45 {
46     arm_compute::lock_guard<arm_compute::Mutex> lock(_hist_mtx);
47 
48     const unsigned int v_end = (bins / 4) * 4;
49 
50     for(unsigned int b = 0; b < v_end; b += 4)
51     {
52         const uint32x4_t tmp_global = vld1q_u32(global_hist + b);
53         const uint32x4_t tmp_local  = vld1q_u32(local_hist + b);
54         vst1q_u32(global_hist + b, vaddq_u32(tmp_global, tmp_local));
55     }
56 
57     for(unsigned int b = v_end; b < bins; ++b)
58     {
59         global_hist[b] += local_hist[b];
60     }
61 }
62 
NEHistogramKernel()63 NEHistogramKernel::NEHistogramKernel()
64     : _func(nullptr), _input(nullptr), _output(nullptr), _local_hist(nullptr), _window_lut(nullptr), _hist_mtx()
65 {
66 }
67 
histogram_U8(Window win,const ThreadInfo & info)68 void NEHistogramKernel::histogram_U8(Window win, const ThreadInfo &info)
69 {
70     ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
71 
72     const size_t          bins       = _output->num_bins();
73     const int32_t         offset     = _output->offset();
74     const uint32_t        offrange   = offset + _output->range();
75     const uint32_t *const w_lut      = _window_lut;
76     uint32_t *const       local_hist = _local_hist + info.thread_id * bins;
77 
78     // Clear local_histogram
79     std::fill_n(local_hist, bins, 0);
80 
81     auto update_local_hist = [&](uint8_t p)
82     {
83         if(offset <= p && p < offrange)
84         {
85             ++local_hist[w_lut[p]];
86         }
87     };
88 
89     const int x_start = win.x().start();
90     const int x_end   = win.x().end();
91 
92     // Handle X dimension manually to split into two loops
93     // First one will use vector operations, second one processes the left over
94     // pixels
95     win.set(Window::DimX, Window::Dimension(0, 1, 1));
96 
97     Iterator input(_input, win);
98 
99     // Calculate local histogram
100     execute_window_loop(win, [&](const Coordinates &)
101     {
102         int x = x_start;
103 
104         // Vector loop
105         for(; x <= x_end - 8; x += 8)
106         {
107             const uint8x8_t pixels = vld1_u8(input.ptr() + x);
108 
109             update_local_hist(vget_lane_u8(pixels, 0));
110             update_local_hist(vget_lane_u8(pixels, 1));
111             update_local_hist(vget_lane_u8(pixels, 2));
112             update_local_hist(vget_lane_u8(pixels, 3));
113             update_local_hist(vget_lane_u8(pixels, 4));
114             update_local_hist(vget_lane_u8(pixels, 5));
115             update_local_hist(vget_lane_u8(pixels, 6));
116             update_local_hist(vget_lane_u8(pixels, 7));
117         }
118 
119         // Process leftover pixels
120         for(; x < x_end; ++x)
121         {
122             update_local_hist(input.ptr()[x]);
123         }
124     },
125     input);
126 
127     // Merge histograms
128     merge_histogram(_output->buffer(), local_hist, bins);
129 }
130 
histogram_fixed_U8(Window win,const ThreadInfo & info)131 void NEHistogramKernel::histogram_fixed_U8(Window win, const ThreadInfo &info)
132 {
133     ARM_COMPUTE_UNUSED(info);
134     ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
135 
136     std::array<uint32_t, _max_range_size> local_hist{ { 0 } };
137 
138     const int x_start = win.x().start();
139     const int x_end   = win.x().end();
140 
141     // Handle X dimension manually to split into two loops
142     // First one will use vector operations, second one processes the left over
143     // pixels
144     win.set(Window::DimX, Window::Dimension(0, 1, 1));
145 
146     Iterator input(_input, win);
147 
148     // Calculate local histogram
149     execute_window_loop(win, [&](const Coordinates &)
150     {
151         int x = x_start;
152 
153         // Vector loop
154         for(; x <= x_end - 8; x += 8)
155         {
156             const uint8x8_t pixels = vld1_u8(input.ptr() + x);
157 
158             ++local_hist[vget_lane_u8(pixels, 0)];
159             ++local_hist[vget_lane_u8(pixels, 1)];
160             ++local_hist[vget_lane_u8(pixels, 2)];
161             ++local_hist[vget_lane_u8(pixels, 3)];
162             ++local_hist[vget_lane_u8(pixels, 4)];
163             ++local_hist[vget_lane_u8(pixels, 5)];
164             ++local_hist[vget_lane_u8(pixels, 6)];
165             ++local_hist[vget_lane_u8(pixels, 7)];
166         }
167 
168         // Process leftover pixels
169         for(; x < x_end; ++x)
170         {
171             ++local_hist[input.ptr()[x]];
172         }
173     },
174     input);
175 
176     // Merge histograms
177     merge_histogram(_output->buffer(), local_hist.data(), _max_range_size);
178 }
179 
calculate_window_lut() const180 void NEHistogramKernel::calculate_window_lut() const
181 {
182     const int32_t  offset = _output->offset();
183     const size_t   bins   = _output->num_bins();
184     const uint32_t range  = _output->range();
185 
186     std::fill_n(_window_lut, offset, 0);
187 
188     for(unsigned int p = offset; p < _max_range_size; ++p)
189     {
190         _window_lut[p] = ((p - offset) * bins) / range;
191     }
192 }
193 
configure(const IImage * input,IDistribution1D * output,uint32_t * local_hist,uint32_t * window_lut)194 void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut)
195 {
196     ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
197     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
198     ARM_COMPUTE_ERROR_ON(nullptr == output);
199     ARM_COMPUTE_ERROR_ON(nullptr == local_hist);
200     ARM_COMPUTE_ERROR_ON(nullptr == window_lut);
201 
202     _input      = input;
203     _output     = output;
204     _local_hist = local_hist;
205     _window_lut = window_lut;
206 
207     //Check offset
208     ARM_COMPUTE_ERROR_ON_MSG(0 > _output->offset() || _output->offset() > static_cast<int32_t>(_max_range_size), "Offset is larger than the image value range.");
209 
210     //Check range
211     ARM_COMPUTE_ERROR_ON_MSG(static_cast<int32_t>(_output->range()) > static_cast<int32_t>(_max_range_size) /* max range */, "Range larger than the image value range.");
212 
213     // Calculate LUT
214     calculate_window_lut();
215 
216     // Set appropriate function
217     _func = &NEHistogramKernel::histogram_U8;
218 
219     Window win = calculate_max_window(*input->info(), Steps());
220 
221     INEKernel::configure(win);
222 }
223 
configure(const IImage * input,IDistribution1D * output)224 void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output)
225 {
226     ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
227     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
228     ARM_COMPUTE_ERROR_ON(nullptr == output);
229 
230     _input  = input;
231     _output = output;
232 
233     // Set appropriate function
234     _func = &NEHistogramKernel::histogram_fixed_U8;
235 
236     Window win = calculate_max_window(*input->info(), Steps());
237 
238     INEKernel::configure(win);
239 }
240 
run(const Window & window,const ThreadInfo & info)241 void NEHistogramKernel::run(const Window &window, const ThreadInfo &info)
242 {
243     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
244     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
245     ARM_COMPUTE_ERROR_ON(_func == nullptr);
246 
247     (this->*_func)(window, info);
248 }
249 } // namespace arm_compute
250