1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "src/runtime/kernel/arm/base/detection_post_process_base.h"
17 #include <cfloat>
18 #include <cmath>
19 #include <vector>
20 #include "schema/model_generated.h"
21 #include "src/kernel_registry.h"
22 #include "include/errorcode.h"
23 #include "nnacl/int8/quant_dtype_cast_int8.h"
24
25 using mindspore::kernel::KERNEL_ARCH;
26 using mindspore::lite::RET_ERROR;
27 using mindspore::lite::RET_NULL_PTR;
28 using mindspore::lite::RET_OK;
29 using mindspore::schema::PrimitiveType_DetectionPostProcess;
30
31 namespace mindspore::kernel {
PartialArgSort(const float * scores,int * indexes,int num_to_sort,int num_values)32 void PartialArgSort(const float *scores, int *indexes, int num_to_sort, int num_values) {
33 std::partial_sort(indexes, indexes + num_to_sort, indexes + num_values, [&scores](const int i, const int j) {
34 if (std::abs(scores[i] - scores[j]) < FLT_EPSILON) {
35 return i < j;
36 }
37 return scores[i] > scores[j];
38 });
39 }
40
Init()41 int DetectionPostProcessBaseCPUKernel::Init() {
42 params_->decoded_boxes_ = nullptr;
43 params_->nms_candidate_ = nullptr;
44 params_->indexes_ = nullptr;
45 params_->scores_ = nullptr;
46 params_->all_class_indexes_ = nullptr;
47 params_->all_class_scores_ = nullptr;
48 params_->single_class_indexes_ = nullptr;
49 params_->selected_ = nullptr;
50 params_->anchors_ = nullptr;
51 auto anchor_tensor = in_tensors_.at(2);
52 CHECK_NULL_RETURN(anchor_tensor->data());
53 if (anchor_tensor->data_type() == kNumberTypeInt8) {
54 auto quant_param = anchor_tensor->quant_params().front();
55 auto anchor_int8 = reinterpret_cast<int8_t *>(anchor_tensor->data());
56 auto anchor_fp32 = new (std::nothrow) float[anchor_tensor->ElementsNum()];
57 if (anchor_fp32 == nullptr) {
58 MS_LOG(ERROR) << "Malloc anchor failed";
59 return RET_ERROR;
60 }
61 DoDequantizeInt8ToFp32(anchor_int8, anchor_fp32, quant_param.scale, quant_param.zeroPoint,
62 anchor_tensor->ElementsNum());
63 params_->anchors_ = anchor_fp32;
64 } else if (anchor_tensor->data_type() == kNumberTypeUInt8) {
65 auto quant_param = anchor_tensor->quant_params().front();
66 auto anchor_uint8 = reinterpret_cast<uint8_t *>(anchor_tensor->data());
67 auto anchor_fp32 = new (std::nothrow) float[anchor_tensor->ElementsNum()];
68 if (anchor_fp32 == nullptr) {
69 MS_LOG(ERROR) << "Malloc anchor failed";
70 return RET_ERROR;
71 }
72 DoDequantizeUInt8ToFp32(anchor_uint8, anchor_fp32, quant_param.scale, quant_param.zeroPoint,
73 anchor_tensor->ElementsNum());
74 params_->anchors_ = anchor_fp32;
75 } else if (anchor_tensor->data_type() == kNumberTypeFloat32 || anchor_tensor->data_type() == kNumberTypeFloat) {
76 params_->anchors_ = new (std::nothrow) float[anchor_tensor->ElementsNum()];
77 if (params_->anchors_ == nullptr) {
78 MS_LOG(ERROR) << "Malloc anchor failed";
79 return RET_ERROR;
80 }
81 memcpy(params_->anchors_, anchor_tensor->data(), anchor_tensor->Size());
82 } else {
83 MS_LOG(ERROR) << "unsupported anchor data type " << anchor_tensor->data_type();
84 return RET_ERROR;
85 }
86 return RET_OK;
87 }
88
~DetectionPostProcessBaseCPUKernel()89 DetectionPostProcessBaseCPUKernel::~DetectionPostProcessBaseCPUKernel() { delete[](params_->anchors_); }
90
ReSize()91 int DetectionPostProcessBaseCPUKernel::ReSize() { return RET_OK; }
92
NmsMultiClassesFastCoreRun(void * cdata,int task_id,float lhs_scale,float rhs_scale)93 int NmsMultiClassesFastCoreRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
94 auto KernelData = reinterpret_cast<DetectionPostProcessBaseCPUKernel *>(cdata);
95 int ret = NmsMultiClassesFastCore(KernelData->num_boxes_, KernelData->num_classes_with_bg_, KernelData->input_scores_,
96 PartialArgSort, KernelData->params_, task_id, KernelData->thread_num_);
97 if (ret != RET_OK) {
98 MS_LOG(ERROR) << "NmsMultiClassesFastCore error task_id[" << task_id << "] error_code[" << ret << "]";
99 return RET_ERROR;
100 }
101 return RET_OK;
102 }
103
FreeAllocatedBuffer()104 void DetectionPostProcessBaseCPUKernel::FreeAllocatedBuffer() {
105 if (params_->decoded_boxes_ != nullptr) {
106 ms_context_->allocator->Free(params_->decoded_boxes_);
107 params_->decoded_boxes_ = nullptr;
108 }
109 if (params_->nms_candidate_ != nullptr) {
110 ms_context_->allocator->Free(params_->nms_candidate_);
111 params_->nms_candidate_ = nullptr;
112 }
113 if (params_->indexes_ != nullptr) {
114 ms_context_->allocator->Free(params_->indexes_);
115 params_->indexes_ = nullptr;
116 }
117 if (params_->scores_ != nullptr) {
118 ms_context_->allocator->Free(params_->scores_);
119 params_->scores_ = nullptr;
120 }
121 if (params_->all_class_indexes_ != nullptr) {
122 ms_context_->allocator->Free(params_->all_class_indexes_);
123 params_->all_class_indexes_ = nullptr;
124 }
125 if (params_->all_class_scores_ != nullptr) {
126 ms_context_->allocator->Free(params_->all_class_scores_);
127 params_->all_class_scores_ = nullptr;
128 }
129 if (params_->single_class_indexes_ != nullptr) {
130 ms_context_->allocator->Free(params_->single_class_indexes_);
131 params_->single_class_indexes_ = nullptr;
132 }
133 if (params_->selected_ != nullptr) {
134 ms_context_->allocator->Free(params_->selected_);
135 params_->selected_ = nullptr;
136 }
137 }
138
ParamInit()139 int DetectionPostProcessBaseCPUKernel::ParamInit() {
140 num_boxes_ = in_tensors_.at(0)->shape().at(1);
141 num_classes_with_bg_ = in_tensors_.at(1)->shape().at(2);
142 params_->decoded_boxes_ = ms_context_->allocator->Malloc(num_boxes_ * DIMENSION_4D * sizeof(float));
143 if (params_->decoded_boxes_ == nullptr) {
144 MS_LOG(ERROR) << "malloc params->decoded_boxes_ failed.";
145 FreeAllocatedBuffer();
146 return RET_ERROR;
147 }
148 params_->nms_candidate_ = ms_context_->allocator->Malloc(num_boxes_ * sizeof(uint8_t));
149 if (params_->nms_candidate_ == nullptr) {
150 MS_LOG(ERROR) << "malloc params->nms_candidate_ failed.";
151 FreeAllocatedBuffer();
152 return RET_ERROR;
153 }
154 params_->selected_ = ms_context_->allocator->Malloc(num_boxes_ * sizeof(int));
155 if (params_->selected_ == nullptr) {
156 MS_LOG(ERROR) << "malloc params->selected_ failed.";
157 FreeAllocatedBuffer();
158 return RET_ERROR;
159 }
160 params_->single_class_indexes_ = ms_context_->allocator->Malloc(num_boxes_ * sizeof(int));
161 if (params_->single_class_indexes_ == nullptr) {
162 MS_LOG(ERROR) << "malloc params->single_class_indexes_ failed.";
163 FreeAllocatedBuffer();
164 return RET_ERROR;
165 }
166
167 if (params_->use_regular_nms_) {
168 params_->scores_ = ms_context_->allocator->Malloc((num_boxes_ + params_->max_detections_) * sizeof(float));
169 if (params_->scores_ == nullptr) {
170 MS_LOG(ERROR) << "malloc params->scores_ failed";
171 FreeAllocatedBuffer();
172 return RET_ERROR;
173 }
174 params_->indexes_ = ms_context_->allocator->Malloc((num_boxes_ + params_->max_detections_) * sizeof(int));
175 if (params_->indexes_ == nullptr) {
176 MS_LOG(ERROR) << "malloc params->indexes_ failed";
177 FreeAllocatedBuffer();
178 return RET_ERROR;
179 }
180 params_->all_class_scores_ =
181 ms_context_->allocator->Malloc((num_boxes_ + params_->max_detections_) * sizeof(float));
182 if (params_->all_class_scores_ == nullptr) {
183 MS_LOG(ERROR) << "malloc params->all_class_scores_ failed";
184 FreeAllocatedBuffer();
185 return RET_ERROR;
186 }
187 params_->all_class_indexes_ = ms_context_->allocator->Malloc((num_boxes_ + params_->max_detections_) * sizeof(int));
188 if (params_->all_class_indexes_ == nullptr) {
189 MS_LOG(ERROR) << "malloc params->all_class_indexes_ failed";
190 FreeAllocatedBuffer();
191 return RET_ERROR;
192 }
193 } else {
194 params_->scores_ = ms_context_->allocator->Malloc(num_boxes_ * sizeof(float));
195 if (params_->scores_ == nullptr) {
196 MS_LOG(ERROR) << "malloc params->scores_ failed";
197 FreeAllocatedBuffer();
198 return RET_ERROR;
199 }
200 params_->indexes_ = ms_context_->allocator->Malloc(num_boxes_ * params_->num_classes_ * sizeof(int));
201 if (!params_->indexes_) {
202 MS_LOG(ERROR) << "malloc params->indexes_ failed.";
203 FreeAllocatedBuffer();
204 return RET_ERROR;
205 }
206 }
207 return RET_OK;
208 }
209
Run()210 int DetectionPostProcessBaseCPUKernel::Run() {
211 MS_ASSERT(ms_context_->allocator != nullptr);
212 int status = GetInputData();
213 if (status != RET_OK) {
214 return status;
215 }
216 auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->data());
217 auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->data());
218 auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->data());
219 auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->data());
220 if (output_boxes == nullptr || output_classes == nullptr || output_scores == nullptr || output_num == nullptr) {
221 return RET_NULL_PTR;
222 }
223
224 if (ParamInit() != RET_OK) {
225 MS_LOG(ERROR) << "ParamInit error";
226 return status;
227 }
228
229 status = DecodeBoxes(num_boxes_, input_boxes_, params_->anchors_, params_);
230 if (status != RET_OK) {
231 MS_LOG(ERROR) << "DecodeBoxes error";
232 FreeAllocatedBuffer();
233 return status;
234 }
235
236 if (params_->use_regular_nms_) {
237 status = DetectionPostProcessRegular(num_boxes_, num_classes_with_bg_, input_scores_, output_boxes, output_classes,
238 output_scores, output_num, PartialArgSort, params_);
239 if (status != RET_OK) {
240 MS_LOG(ERROR) << "DetectionPostProcessRegular error error_code[" << status << "]";
241 FreeAllocatedBuffer();
242 return status;
243 }
244 } else {
245 status = ParallelLaunch(this->ms_context_, NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
246 if (status != RET_OK) {
247 MS_LOG(ERROR) << "NmsMultiClassesFastCoreRun error error_code[" << status << "]";
248 FreeAllocatedBuffer();
249 return status;
250 }
251 status = DetectionPostProcessFast(num_boxes_, num_classes_with_bg_, input_scores_,
252 reinterpret_cast<float *>(params_->decoded_boxes_), output_boxes, output_classes,
253 output_scores, output_num, PartialArgSort, params_);
254 if (status != RET_OK) {
255 MS_LOG(ERROR) << "DetectionPostProcessFast error error_code[" << status << "]";
256 FreeAllocatedBuffer();
257 return status;
258 }
259 }
260 FreeAllocatedBuffer();
261 return RET_OK;
262 }
263 } // namespace mindspore::kernel
264