• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
25 
26 #include "arm_compute/core/Helpers.h"
27 #include "arm_compute/core/TensorInfo.h"
28 #include "arm_compute/core/Utils.h"
29 #include "arm_compute/core/Window.h"
30 #include "src/core/AccessWindowStatic.h"
31 #include "src/core/CPP/Validate.h"
32 #include "src/core/helpers/AutoConfiguration.h"
33 #include "src/core/helpers/WindowHelpers.h"
34 
35 #include <arm_neon.h>
36 
37 namespace arm_compute
38 {
39 namespace
40 {
validate_arguments(const ITensorInfo * boxes,const ITensorInfo * pred_boxes,const ITensorInfo * deltas,const BoundingBoxTransformInfo & info)41 Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info)
42 {
43     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas);
44     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(boxes);
45     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(boxes, DataType::QASYMM16, DataType::F32, DataType::F16);
46     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(deltas, DataType::QASYMM8, DataType::F32, DataType::F16);
47     ARM_COMPUTE_RETURN_ERROR_ON(deltas->tensor_shape()[1] != boxes->tensor_shape()[1]);
48     ARM_COMPUTE_RETURN_ERROR_ON(deltas->tensor_shape()[0] % 4 != 0);
49     ARM_COMPUTE_RETURN_ERROR_ON(boxes->tensor_shape()[0] != 4);
50     ARM_COMPUTE_RETURN_ERROR_ON(deltas->num_dimensions() > 2);
51     ARM_COMPUTE_RETURN_ERROR_ON(boxes->num_dimensions() > 2);
52     ARM_COMPUTE_RETURN_ERROR_ON(info.scale() <= 0);
53 
54     if(boxes->data_type() == DataType::QASYMM16)
55     {
56         ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(deltas, 1, DataType::QASYMM8);
57         const UniformQuantizationInfo deltas_qinfo = deltas->quantization_info().uniform();
58         ARM_COMPUTE_RETURN_ERROR_ON(deltas_qinfo.scale != 0.125f);
59         ARM_COMPUTE_RETURN_ERROR_ON(deltas_qinfo.offset != 0);
60     }
61     else
62     {
63         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(boxes, deltas);
64     }
65 
66     if(pred_boxes->total_size() > 0)
67     {
68         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(pred_boxes->tensor_shape(), deltas->tensor_shape());
69         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(pred_boxes, deltas);
70         ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes->num_dimensions() > 2);
71         if(pred_boxes->data_type() == DataType::QASYMM16)
72         {
73             const UniformQuantizationInfo pred_qinfo = pred_boxes->quantization_info().uniform();
74             ARM_COMPUTE_RETURN_ERROR_ON(pred_qinfo.scale != 0.125f);
75             ARM_COMPUTE_RETURN_ERROR_ON(pred_qinfo.offset != 0);
76         }
77     }
78 
79     return Status{};
80 }
81 } // namespace
82 
NEBoundingBoxTransformKernel()83 NEBoundingBoxTransformKernel::NEBoundingBoxTransformKernel()
84     : _boxes(nullptr), _pred_boxes(nullptr), _deltas(nullptr), _bbinfo(0, 0, 0)
85 {
86 }
87 
configure(const ITensor * boxes,ITensor * pred_boxes,const ITensor * deltas,const BoundingBoxTransformInfo & info)88 void NEBoundingBoxTransformKernel::configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info)
89 {
90     ARM_COMPUTE_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas);
91     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(boxes->info(), pred_boxes->info(), deltas->info(), info));
92 
93     // Configure kernel window
94     auto_init_if_empty(*pred_boxes->info(), deltas->info()->clone()->set_data_type(boxes->info()->data_type()).set_quantization_info(boxes->info()->quantization_info()));
95 
96     // Set instance variables
97     _boxes      = boxes;
98     _pred_boxes = pred_boxes;
99     _deltas     = deltas;
100     _bbinfo     = info;
101 
102     const unsigned int num_boxes = boxes->info()->dimension(1);
103     Window             win       = calculate_max_window(*pred_boxes->info(), Steps());
104     Coordinates        coord;
105     coord.set_num_dimensions(pred_boxes->info()->num_dimensions());
106     pred_boxes->info()->set_valid_region(ValidRegion(coord, pred_boxes->info()->tensor_shape()));
107     win.set(Window::DimX, Window::Dimension(0, 1u));
108     win.set(Window::DimY, Window::Dimension(0, num_boxes));
109 
110     INEKernel::configure(win);
111 }
112 
validate(const ITensorInfo * boxes,const ITensorInfo * pred_boxes,const ITensorInfo * deltas,const BoundingBoxTransformInfo & info)113 Status NEBoundingBoxTransformKernel::validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info)
114 {
115     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(boxes, pred_boxes, deltas, info));
116     return Status{};
117 }
118 
119 template <>
internal_run(const Window & window)120 void NEBoundingBoxTransformKernel::internal_run<uint16_t>(const Window &window)
121 {
122     const size_t num_classes  = _deltas->info()->tensor_shape()[0] >> 2;
123     const size_t deltas_width = _deltas->info()->tensor_shape()[0];
124     const int    img_h        = std::floor(_bbinfo.img_height() / _bbinfo.scale() + 0.5f);
125     const int    img_w        = std::floor(_bbinfo.img_width() / _bbinfo.scale() + 0.5f);
126 
127     const auto scale_after  = (_bbinfo.apply_scale() ? _bbinfo.scale() : 1.f);
128     const auto scale_before = _bbinfo.scale();
129     const auto offset       = (_bbinfo.correct_transform_coords() ? 1.f : 0.f);
130 
131     auto pred_ptr  = reinterpret_cast<uint16_t *>(_pred_boxes->buffer() + _pred_boxes->info()->offset_first_element_in_bytes());
132     auto delta_ptr = reinterpret_cast<uint8_t *>(_deltas->buffer() + _deltas->info()->offset_first_element_in_bytes());
133 
134     const auto boxes_qinfo  = _boxes->info()->quantization_info().uniform();
135     const auto deltas_qinfo = _deltas->info()->quantization_info().uniform();
136     const auto pred_qinfo   = _pred_boxes->info()->quantization_info().uniform();
137 
138     Iterator box_it(_boxes, window);
139     execute_window_loop(window, [&](const Coordinates & id)
140     {
141         const auto  ptr    = reinterpret_cast<uint16_t *>(box_it.ptr());
142         const auto  b0     = dequantize_qasymm16(*ptr, boxes_qinfo);
143         const auto  b1     = dequantize_qasymm16(*(ptr + 1), boxes_qinfo);
144         const auto  b2     = dequantize_qasymm16(*(ptr + 2), boxes_qinfo);
145         const auto  b3     = dequantize_qasymm16(*(ptr + 3), boxes_qinfo);
146         const float width  = (b2 / scale_before) - (b0 / scale_before) + 1.f;
147         const float height = (b3 / scale_before) - (b1 / scale_before) + 1.f;
148         const float ctr_x  = (b0 / scale_before) + 0.5f * width;
149         const float ctr_y  = (b1 / scale_before) + 0.5f * height;
150         for(size_t j = 0; j < num_classes; ++j)
151         {
152             // Extract deltas
153             const size_t delta_id = id.y() * deltas_width + 4u * j;
154             const float  dx       = dequantize_qasymm8(delta_ptr[delta_id], deltas_qinfo) / _bbinfo.weights()[0];
155             const float  dy       = dequantize_qasymm8(delta_ptr[delta_id + 1], deltas_qinfo) / _bbinfo.weights()[1];
156             float        dw       = dequantize_qasymm8(delta_ptr[delta_id + 2], deltas_qinfo) / _bbinfo.weights()[2];
157             float        dh       = dequantize_qasymm8(delta_ptr[delta_id + 3], deltas_qinfo) / _bbinfo.weights()[3];
158             // Clip dw and dh
159             dw = std::min(dw, _bbinfo.bbox_xform_clip());
160             dh = std::min(dh, _bbinfo.bbox_xform_clip());
161             // Determine the predictions
162             const float pred_ctr_x = dx * width + ctr_x;
163             const float pred_ctr_y = dy * height + ctr_y;
164             const float pred_w     = std::exp(dw) * width;
165             const float pred_h     = std::exp(dh) * height;
166             // Store the prediction into the output tensor
167             pred_ptr[delta_id]     = quantize_qasymm16(scale_after * utility::clamp<float>(pred_ctr_x - 0.5f * pred_w, 0.f, img_w - 1.f), pred_qinfo);
168             pred_ptr[delta_id + 1] = quantize_qasymm16(scale_after * utility::clamp<float>(pred_ctr_y - 0.5f * pred_h, 0.f, img_h - 1.f), pred_qinfo);
169             pred_ptr[delta_id + 2] = quantize_qasymm16(scale_after * utility::clamp<float>(pred_ctr_x + 0.5f * pred_w - offset, 0.f, img_w - 1.f), pred_qinfo);
170             pred_ptr[delta_id + 3] = quantize_qasymm16(scale_after * utility::clamp<float>(pred_ctr_y + 0.5f * pred_h - offset, 0.f, img_h - 1.f), pred_qinfo);
171         }
172     },
173     box_it);
174 }
175 
176 template <typename T>
internal_run(const Window & window)177 void NEBoundingBoxTransformKernel::internal_run(const Window &window)
178 {
179     const size_t num_classes  = _deltas->info()->tensor_shape()[0] >> 2;
180     const size_t deltas_width = _deltas->info()->tensor_shape()[0];
181     const int    img_h        = std::floor(_bbinfo.img_height() / _bbinfo.scale() + 0.5f);
182     const int    img_w        = std::floor(_bbinfo.img_width() / _bbinfo.scale() + 0.5f);
183 
184     const auto scale_after  = (_bbinfo.apply_scale() ? T(_bbinfo.scale()) : T(1));
185     const auto scale_before = T(_bbinfo.scale());
186     ARM_COMPUTE_ERROR_ON(scale_before <= 0);
187     const auto offset = (_bbinfo.correct_transform_coords() ? T(1.f) : T(0.f));
188 
189     auto pred_ptr  = reinterpret_cast<T *>(_pred_boxes->buffer() + _pred_boxes->info()->offset_first_element_in_bytes());
190     auto delta_ptr = reinterpret_cast<T *>(_deltas->buffer() + _deltas->info()->offset_first_element_in_bytes());
191 
192     Iterator box_it(_boxes, window);
193     execute_window_loop(window, [&](const Coordinates & id)
194     {
195         const auto ptr    = reinterpret_cast<T *>(box_it.ptr());
196         const auto b0     = *ptr;
197         const auto b1     = *(ptr + 1);
198         const auto b2     = *(ptr + 2);
199         const auto b3     = *(ptr + 3);
200         const T    width  = (b2 / scale_before) - (b0 / scale_before) + T(1.f);
201         const T    height = (b3 / scale_before) - (b1 / scale_before) + T(1.f);
202         const T    ctr_x  = (b0 / scale_before) + T(0.5f) * width;
203         const T    ctr_y  = (b1 / scale_before) + T(0.5f) * height;
204         for(size_t j = 0; j < num_classes; ++j)
205         {
206             // Extract deltas
207             const size_t delta_id = id.y() * deltas_width + 4u * j;
208             const T      dx       = delta_ptr[delta_id] / T(_bbinfo.weights()[0]);
209             const T      dy       = delta_ptr[delta_id + 1] / T(_bbinfo.weights()[1]);
210             T            dw       = delta_ptr[delta_id + 2] / T(_bbinfo.weights()[2]);
211             T            dh       = delta_ptr[delta_id + 3] / T(_bbinfo.weights()[3]);
212             // Clip dw and dh
213             dw = std::min(dw, T(_bbinfo.bbox_xform_clip()));
214             dh = std::min(dh, T(_bbinfo.bbox_xform_clip()));
215             // Determine the predictions
216             const T pred_ctr_x = dx * width + ctr_x;
217             const T pred_ctr_y = dy * height + ctr_y;
218             const T pred_w     = std::exp(dw) * width;
219             const T pred_h     = std::exp(dh) * height;
220             // Store the prediction into the output tensor
221             pred_ptr[delta_id]     = scale_after * utility::clamp<T>(pred_ctr_x - T(0.5f) * pred_w, T(0), T(img_w - 1));
222             pred_ptr[delta_id + 1] = scale_after * utility::clamp<T>(pred_ctr_y - T(0.5f) * pred_h, T(0), T(img_h - 1));
223             pred_ptr[delta_id + 2] = scale_after * utility::clamp<T>(pred_ctr_x + T(0.5f) * pred_w - offset, T(0), T(img_w - 1));
224             pred_ptr[delta_id + 3] = scale_after * utility::clamp<T>(pred_ctr_y + T(0.5f) * pred_h - offset, T(0), T(img_h - 1));
225         }
226     },
227     box_it);
228 }
229 
run(const Window & window,const ThreadInfo & info)230 void NEBoundingBoxTransformKernel::run(const Window &window, const ThreadInfo &info)
231 {
232     ARM_COMPUTE_UNUSED(info);
233     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
234     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
235     switch(_boxes->info()->data_type())
236     {
237         case DataType::F32:
238         {
239             internal_run<float>(window);
240             break;
241         }
242         case DataType::QASYMM16:
243         {
244             internal_run<uint16_t>(window);
245             break;
246         }
247 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
248         case DataType::F16:
249         {
250             internal_run<float16_t>(window);
251             break;
252         }
253 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
254         default:
255         {
256             ARM_COMPUTE_ERROR("Data type not supported");
257         }
258     }
259 }
260 } // namespace arm_compute
261