• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h"
25 
26 #include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
27 #include "arm_compute/runtime/Scheduler.h"
28 
29 namespace arm_compute
30 {
31 namespace
32 {
dequantize_tensor(const ITensor * input,ITensor * output)33 void dequantize_tensor(const ITensor *input, ITensor *output)
34 {
35     const UniformQuantizationInfo qinfo     = input->info()->quantization_info().uniform();
36     const DataType                data_type = input->info()->data_type();
37 
38     Window window;
39     window.use_tensor_dimensions(input->info()->tensor_shape());
40     Iterator input_it(input, window);
41     Iterator output_it(output, window);
42 
43     switch(data_type)
44     {
45         case DataType::QASYMM8:
46             execute_window_loop(window, [&](const Coordinates &)
47             {
48                 *reinterpret_cast<float *>(output_it.ptr()) = dequantize(*reinterpret_cast<const uint8_t *>(input_it.ptr()), qinfo.scale, qinfo.offset);
49             },
50             input_it, output_it);
51             break;
52         case DataType::QASYMM8_SIGNED:
53             execute_window_loop(window, [&](const Coordinates &)
54             {
55                 *reinterpret_cast<float *>(output_it.ptr()) = dequantize_qasymm8_signed(*reinterpret_cast<const int8_t *>(input_it.ptr()), qinfo);
56             },
57             input_it, output_it);
58             break;
59         case DataType::QASYMM16:
60             execute_window_loop(window, [&](const Coordinates &)
61             {
62                 *reinterpret_cast<float *>(output_it.ptr()) = dequantize(*reinterpret_cast<const uint16_t *>(input_it.ptr()), qinfo.scale, qinfo.offset);
63             },
64             input_it, output_it);
65             break;
66         default:
67             ARM_COMPUTE_ERROR("Unsupported data type");
68     }
69 }
70 
quantize_tensor(const ITensor * input,ITensor * output)71 void quantize_tensor(const ITensor *input, ITensor *output)
72 {
73     const UniformQuantizationInfo qinfo     = output->info()->quantization_info().uniform();
74     const DataType                data_type = output->info()->data_type();
75 
76     Window window;
77     window.use_tensor_dimensions(input->info()->tensor_shape());
78     Iterator input_it(input, window);
79     Iterator output_it(output, window);
80 
81     switch(data_type)
82     {
83         case DataType::QASYMM8:
84             execute_window_loop(window, [&](const Coordinates &)
85             {
86                 *reinterpret_cast<uint8_t *>(output_it.ptr()) = quantize_qasymm8(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
87             },
88             input_it, output_it);
89             break;
90         case DataType::QASYMM8_SIGNED:
91             execute_window_loop(window, [&](const Coordinates &)
92             {
93                 *reinterpret_cast<int8_t *>(output_it.ptr()) = quantize_qasymm8_signed(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
94             },
95             input_it, output_it);
96             break;
97         case DataType::QASYMM16:
98             execute_window_loop(window, [&](const Coordinates &)
99             {
100                 *reinterpret_cast<uint16_t *>(output_it.ptr()) = quantize_qasymm16(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
101             },
102             input_it, output_it);
103             break;
104         default:
105             ARM_COMPUTE_ERROR("Unsupported data type");
106     }
107 }
108 } // namespace
109 
CPPBoxWithNonMaximaSuppressionLimit(std::shared_ptr<IMemoryManager> memory_manager)110 CPPBoxWithNonMaximaSuppressionLimit::CPPBoxWithNonMaximaSuppressionLimit(std::shared_ptr<IMemoryManager> memory_manager)
111     : _memory_group(std::move(memory_manager)),
112       _box_with_nms_limit_kernel(),
113       _scores_in(),
114       _boxes_in(),
115       _batch_splits_in(),
116       _scores_out(),
117       _boxes_out(),
118       _classes(),
119       _batch_splits_out(),
120       _keeps(),
121       _scores_in_f32(),
122       _boxes_in_f32(),
123       _batch_splits_in_f32(),
124       _scores_out_f32(),
125       _boxes_out_f32(),
126       _classes_f32(),
127       _batch_splits_out_f32(),
128       _keeps_f32(),
129       _is_qasymm8(false)
130 {
131 }
132 
configure(const ITensor * scores_in,const ITensor * boxes_in,const ITensor * batch_splits_in,ITensor * scores_out,ITensor * boxes_out,ITensor * classes,ITensor * batch_splits_out,ITensor * keeps,ITensor * keeps_size,const BoxNMSLimitInfo info)133 void CPPBoxWithNonMaximaSuppressionLimit::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
134                                                     ITensor *batch_splits_out, ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info)
135 {
136     ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
137 
138     _is_qasymm8 = scores_in->info()->data_type() == DataType::QASYMM8 || scores_in->info()->data_type() == DataType::QASYMM8_SIGNED;
139 
140     _scores_in        = scores_in;
141     _boxes_in         = boxes_in;
142     _batch_splits_in  = batch_splits_in;
143     _scores_out       = scores_out;
144     _boxes_out        = boxes_out;
145     _classes          = classes;
146     _batch_splits_out = batch_splits_out;
147     _keeps            = keeps;
148 
149     if(_is_qasymm8)
150     {
151         // Manage intermediate buffers
152         _memory_group.manage(&_scores_in_f32);
153         _memory_group.manage(&_boxes_in_f32);
154         _memory_group.manage(&_scores_out_f32);
155         _memory_group.manage(&_boxes_out_f32);
156         _memory_group.manage(&_classes_f32);
157         _scores_in_f32.allocator()->init(scores_in->info()->clone()->set_data_type(DataType::F32));
158         _boxes_in_f32.allocator()->init(boxes_in->info()->clone()->set_data_type(DataType::F32));
159         if(batch_splits_in != nullptr)
160         {
161             _memory_group.manage(&_batch_splits_in_f32);
162             _batch_splits_in_f32.allocator()->init(batch_splits_in->info()->clone()->set_data_type(DataType::F32));
163         }
164         _scores_out_f32.allocator()->init(scores_out->info()->clone()->set_data_type(DataType::F32));
165         _boxes_out_f32.allocator()->init(boxes_out->info()->clone()->set_data_type(DataType::F32));
166         _classes_f32.allocator()->init(classes->info()->clone()->set_data_type(DataType::F32));
167         if(batch_splits_out != nullptr)
168         {
169             _memory_group.manage(&_batch_splits_out_f32);
170             _batch_splits_out_f32.allocator()->init(batch_splits_out->info()->clone()->set_data_type(DataType::F32));
171         }
172         if(keeps != nullptr)
173         {
174             _memory_group.manage(&_keeps_f32);
175             _keeps_f32.allocator()->init(keeps->info()->clone()->set_data_type(DataType::F32));
176         }
177 
178         _box_with_nms_limit_kernel.configure(&_scores_in_f32, &_boxes_in_f32, (batch_splits_in != nullptr) ? &_batch_splits_in_f32 : nullptr,
179                                              &_scores_out_f32, &_boxes_out_f32, &_classes_f32,
180                                              (batch_splits_out != nullptr) ? &_batch_splits_out_f32 : nullptr, (keeps != nullptr) ? &_keeps_f32 : nullptr,
181                                              keeps_size, info);
182     }
183     else
184     {
185         _box_with_nms_limit_kernel.configure(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes, batch_splits_out, keeps, keeps_size, info);
186     }
187 
188     if(_is_qasymm8)
189     {
190         _scores_in_f32.allocator()->allocate();
191         _boxes_in_f32.allocator()->allocate();
192         if(_batch_splits_in != nullptr)
193         {
194             _batch_splits_in_f32.allocator()->allocate();
195         }
196         _scores_out_f32.allocator()->allocate();
197         _boxes_out_f32.allocator()->allocate();
198         _classes_f32.allocator()->allocate();
199         if(batch_splits_out != nullptr)
200         {
201             _batch_splits_out_f32.allocator()->allocate();
202         }
203         if(keeps != nullptr)
204         {
205             _keeps_f32.allocator()->allocate();
206         }
207     }
208 }
209 
validate(const ITensorInfo * scores_in,const ITensorInfo * boxes_in,const ITensorInfo * batch_splits_in,const ITensorInfo * scores_out,const ITensorInfo * boxes_out,const ITensorInfo * classes,const ITensorInfo * batch_splits_out,const ITensorInfo * keeps,const ITensorInfo * keeps_size,const BoxNMSLimitInfo info)210 Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes,
211                 const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
212 {
213     ARM_COMPUTE_UNUSED(batch_splits_in, batch_splits_out, keeps, keeps_size, info);
214     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
215     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
216 
217     const bool is_qasymm8 = scores_in->data_type() == DataType::QASYMM8 || scores_in->data_type() == DataType::QASYMM8_SIGNED;
218     if(is_qasymm8)
219     {
220         ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(boxes_in, 1, DataType::QASYMM16);
221         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(boxes_in, boxes_out);
222         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(boxes_in, boxes_out);
223         const UniformQuantizationInfo boxes_qinfo = boxes_in->quantization_info().uniform();
224         ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.scale != 0.125f);
225         ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.offset != 0);
226     }
227 
228     return Status{};
229 }
230 
run()231 void CPPBoxWithNonMaximaSuppressionLimit::run()
232 {
233     // Acquire all the temporaries
234     MemoryGroupResourceScope scope_mg(_memory_group);
235 
236     if(_is_qasymm8)
237     {
238         dequantize_tensor(_scores_in, &_scores_in_f32);
239         dequantize_tensor(_boxes_in, &_boxes_in_f32);
240         if(_batch_splits_in != nullptr)
241         {
242             dequantize_tensor(_batch_splits_in, &_batch_splits_in_f32);
243         }
244     }
245 
246     Scheduler::get().schedule(&_box_with_nms_limit_kernel, Window::DimY);
247 
248     if(_is_qasymm8)
249     {
250         quantize_tensor(&_scores_out_f32, _scores_out);
251         quantize_tensor(&_boxes_out_f32, _boxes_out);
252         quantize_tensor(&_classes_f32, _classes);
253         if(_batch_splits_out != nullptr)
254         {
255             quantize_tensor(&_batch_splits_out_f32, _batch_splits_out);
256         }
257         if(_keeps != nullptr)
258         {
259             quantize_tensor(&_keeps_f32, _keeps);
260         }
261     }
262 }
263 } // namespace arm_compute
264