• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/NEON/kernels/NENormalizationLayerKernel.h"
25 
26 #include "arm_compute/core/Error.h"
27 #include "arm_compute/core/Helpers.h"
28 #include "arm_compute/core/TensorInfo.h"
29 #include "arm_compute/core/Utils.h"
30 #include "arm_compute/core/Validate.h"
31 #include "arm_compute/core/Window.h"
32 #include "src/core/CPP/Validate.h"
33 #include "src/core/NEON/NEFixedPoint.h"
34 #include "src/core/NEON/NEMath.h"
35 #include "src/core/NEON/wrapper/wrapper.h"
36 #include "src/core/helpers/AutoConfiguration.h"
37 #include "src/core/helpers/NormalizationHelpers.h"
38 #include "src/core/helpers/WindowHelpers.h"
39 
40 namespace arm_compute
41 {
42 namespace
43 {
validate_arguments(const ITensorInfo * input,const ITensorInfo * input_squared,const ITensorInfo * output,const NormalizationLayerInfo & norm_info)44 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, const NormalizationLayerInfo &norm_info)
45 {
46     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_squared, output);
47     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
48     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
49 
50     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared);
51     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, input_squared);
52     ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
53 
54     // Checks performed when output is configured
55     if(output->total_size() != 0)
56     {
57         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
58         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
59         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
60     }
61 
62     return Status{};
63 }
64 
65 } // namespace
66 
NENormalizationLayerKernel()67 NENormalizationLayerKernel::NENormalizationLayerKernel()
68     : _func(nullptr), _input(nullptr), _input_squared(nullptr), _output(nullptr), _norm_info(NormType::IN_MAP_1D)
69 {
70 }
71 
configure(const ITensor * input,const ITensor * input_squared,ITensor * output,NormalizationLayerInfo norm_info)72 void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info)
73 {
74     ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_squared, output);
75     // Output tensor auto initialization if not yet initialized
76     auto_init_if_empty(*output->info(), *input->info());
77 
78     // Perform validation step
79     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), input_squared->info(), output->info(), norm_info));
80 
81     const unsigned int norm_idx = get_normalization_dimension_index(input->info()->data_layout(), norm_info);
82 
83     _input         = input;
84     _input_squared = input_squared;
85     _output        = output;
86     _norm_info     = norm_info;
87 
88     switch(_input->info()->data_type())
89     {
90         case DataType::F32:
91         {
92             switch(norm_idx)
93             {
94                 case 0:
95                 {
96                     if(norm_info.type() == NormType::IN_MAP_2D)
97                     {
98                         _func = &NENormalizationLayerKernel::normalize_float<float, 4, 0, true>;
99                     }
100                     else
101                     {
102                         _func = &NENormalizationLayerKernel::normalize_float<float, 4, 0, false>;
103                     }
104                     break;
105                 }
106                 case 1:
107                     if(norm_info.type() == NormType::IN_MAP_2D)
108                     {
109                         _func = &NENormalizationLayerKernel::normalize_float<float, 4, 1, true>;
110                     }
111                     else
112                     {
113                         _func = &NENormalizationLayerKernel::normalize_float<float, 4, 1, false>;
114                     }
115                     break;
116                 case 2:
117                     _func = &NENormalizationLayerKernel::normalize_float<float, 4, 2, false>;
118                     break;
119                 default:
120                     break;
121             }
122             break;
123         }
124 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
125         case DataType::F16:
126         {
127             switch(norm_idx)
128             {
129                 case 0:
130                 {
131                     if(norm_info.type() == NormType::IN_MAP_2D)
132                     {
133                         _func = &NENormalizationLayerKernel::normalize_float<float16_t, 8, 0, true>;
134                     }
135                     else
136                     {
137                         _func = &NENormalizationLayerKernel::normalize_float<float16_t, 8, 0, false>;
138                     }
139                     break;
140                 }
141                 case 1:
142                     if(norm_info.type() == NormType::IN_MAP_2D)
143                     {
144                         _func = &NENormalizationLayerKernel::normalize_float<float16_t, 8, 1, true>;
145                     }
146                     else
147                     {
148                         _func = &NENormalizationLayerKernel::normalize_float<float16_t, 8, 1, false>;
149                     }
150                     break;
151                 case 2:
152                     _func = &NENormalizationLayerKernel::normalize_float<float16_t, 8, 2, false>;
153                     break;
154                 default:
155                     break;
156             }
157             break;
158         }
159 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
160         default:
161             ARM_COMPUTE_ERROR("NOT SUPPORTED!");
162     }
163 
164     // Configure kernel window
165     Window win = calculate_max_window(*input->info(), Steps());
166     INEKernel::configure(win);
167 }
168 
169 template <typename T, unsigned int S, unsigned int dim, bool do_2D_norm>
normalize_float(const Window & window)170 void NENormalizationLayerKernel::normalize_float(const Window &window)
171 {
172     /** SIMD vector tag type. */
173     using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
174 
175     Window win(window);
176     win.set(Window::DimX, Window::Dimension(0, 1, 1));
177 
178     const auto window_start_x = static_cast<int>(window.x().start());
179     const auto window_end_x   = static_cast<int>(window.x().end());
180     const int  window_step_x  = S;
181 
182     Iterator input(_input, win);
183     Iterator input_squared(_input_squared, win);
184     Iterator output(_output, win);
185 
186     const int dim_y                      = _input->info()->data_layout() == DataLayout::NCHW ? 1 : 2;
187     const int radius                     = _norm_info.norm_size() / 2;
188     const int input_squared_stride_x     = _input_squared->info()->strides_in_bytes()[0];
189     const int input_squared_stride_slice = _input_squared->info()->strides_in_bytes()[dim];
190     const int input_squared_stride_row   = _input_squared->info()->strides_in_bytes()[dim_y];
191 
192     const int max_right  = _input->info()->dimension(dim) - 1;
193     const int max_bottom = _input->info()->dimension(dim_y) - 1;
194 
195     const auto coeff_vec = wrapper::vdup_n(static_cast<T>(_norm_info.scale_coeff()), ExactTagType{});
196     const auto beta_vec  = wrapper::vdup_n(static_cast<T>(_norm_info.beta()), ExactTagType{});
197     const auto kappa_vec = wrapper::vdup_n(static_cast<T>(_norm_info.kappa()), ExactTagType{});
198 
199     auto sequential_normalization = [&](const int x, const Coordinates & id, const int current_row, const int first_row, const int last_row, const T * input_ptr, const uint8_t *input_squared_start_ptr,
200                                         T * output_ptr)
201     {
202         const int current_slice = dim == 0 ? x : id[dim];
203         const int first_slice   = std::max(current_slice - radius, 0);
204         const int last_slice    = std::min(current_slice + radius, max_right);
205 
206         const uint8_t *const input_squared_x_ptr = input_squared_start_ptr + x * input_squared_stride_x;
207         // Accumulate 2D In-Map values
208         auto accu = static_cast<T>(0.f);
209         for(int j = first_row; j <= last_row; ++j)
210         {
211             // Compute row displacement
212             const uint8_t *const input_squared_ptr = input_squared_x_ptr + (j - current_row) * input_squared_stride_row;
213             for(int i = first_slice; i <= last_slice; ++i)
214             {
215                 accu += *reinterpret_cast<const T *>(input_squared_ptr + (i - current_slice) * input_squared_stride_slice);
216             }
217         }
218 
219         // Normalize
220         const auto normalized       = std::pow(accu * static_cast<T>(_norm_info.scale_coeff()) + static_cast<T>(_norm_info.kappa()), _norm_info.beta());
221         const auto normalized_pixel = (*(input_ptr + x)) / normalized;
222         *(output_ptr + x)           = normalized_pixel;
223     };
224 
225     execute_window_loop(win, [&](const Coordinates & id)
226     {
227         const auto input_ptr  = reinterpret_cast<const T *>(input.ptr());
228         auto       output_ptr = reinterpret_cast<T *>(output.ptr());
229 
230         // Get range to normalize
231         const int current_row = do_2D_norm ? id[dim_y] : 0;
232         const int first_row   = do_2D_norm ? std::max(current_row - radius, 0) : 0;
233         const int last_row    = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0;
234 
235         int x = window_start_x;
236         // Compute serially starting elements for the case x dimension is width
237         for(; x < radius && x < window_end_x && dim == 0; ++x)
238         {
239             sequential_normalization(x, id, current_row, first_row, last_row, input_ptr, input_squared.ptr(), output_ptr);
240         }
241 
242         // Compute vectorized
243         for(; x <= window_end_x - window_step_x - radius; x += window_step_x)
244         {
245             const int current_slice = dim == 0 ? x : id[dim];
246             const int first_slice   = std::max(current_slice - radius, 0);
247             const int last_slice    = std::min(current_slice + radius, max_right);
248 
249             const uint8_t *const input_squared_x_ptr = input_squared.ptr() + x * input_squared_stride_x;
250             // Accumulate 2D In-Map values
251             auto accu = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
252             for(int j = first_row; j <= last_row; ++j)
253             {
254                 // Compute row displacement
255                 const uint8_t *const input_squared_ptr = input_squared_x_ptr + (j - current_row) * input_squared_stride_row;
256                 for(int i = first_slice; i <= last_slice; ++i)
257                 {
258                     accu = wrapper::vadd(accu, wrapper::vloadq(reinterpret_cast<const T *>(input_squared_ptr + (i - current_slice) * input_squared_stride_slice)));
259                 }
260             }
261 
262             // Normalize
263             const auto normalized       = wrapper::vpow(wrapper::vmla(kappa_vec, coeff_vec, accu), beta_vec);
264             const auto normalized_pixel = wrapper::vmul(wrapper::vloadq(input_ptr + x), wrapper::vinv(normalized));
265             wrapper::vstore(reinterpret_cast<T *>(output_ptr + x), normalized_pixel);
266         }
267 
268         // Compute left-over elements
269         for(; x < window_end_x; ++x)
270         {
271             sequential_normalization(x, id, current_row, first_row, last_row, input_ptr, input_squared.ptr(), output_ptr);
272         }
273     },
274     input, input_squared, output);
275 }
276 
validate(const ITensorInfo * input,const ITensorInfo * input_squared,const ITensorInfo * output,const NormalizationLayerInfo norm_info)277 Status NENormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, const NormalizationLayerInfo norm_info)
278 {
279     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, input_squared, output, norm_info));
280 
281     return Status{};
282 }
283 
run(const Window & window,const ThreadInfo & info)284 void NENormalizationLayerKernel::run(const Window &window, const ThreadInfo &info)
285 {
286     ARM_COMPUTE_UNUSED(info);
287     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
288     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
289     ARM_COMPUTE_ERROR_ON(_func == nullptr);
290 
291     // Run function
292     (this->*_func)(window);
293 }
294 } // namespace arm_compute
295