• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "src/runtime/kernel/arm/base/detection_post_process_base.h"
17 #include <cfloat>
18 #include <cmath>
19 #include <vector>
20 #include "schema/model_generated.h"
21 #include "src/kernel_registry.h"
22 #include "include/errorcode.h"
23 #include "nnacl/int8/quant_dtype_cast_int8.h"
24 
25 using mindspore::kernel::KERNEL_ARCH;
26 using mindspore::lite::RET_ERROR;
27 using mindspore::lite::RET_NULL_PTR;
28 using mindspore::lite::RET_OK;
29 using mindspore::schema::PrimitiveType_DetectionPostProcess;
30 
31 namespace mindspore::kernel {
PartialArgSort(const float * scores,int * indexes,int num_to_sort,int num_values)32 void PartialArgSort(const float *scores, int *indexes, int num_to_sort, int num_values) {
33   std::partial_sort(indexes, indexes + num_to_sort, indexes + num_values, [&scores](const int i, const int j) {
34     if (std::abs(scores[i] - scores[j]) < FLT_EPSILON) {
35       return i < j;
36     }
37     return scores[i] > scores[j];
38   });
39 }
40 
Init()41 int DetectionPostProcessBaseCPUKernel::Init() {
42   params_->decoded_boxes_ = nullptr;
43   params_->nms_candidate_ = nullptr;
44   params_->indexes_ = nullptr;
45   params_->scores_ = nullptr;
46   params_->all_class_indexes_ = nullptr;
47   params_->all_class_scores_ = nullptr;
48   params_->single_class_indexes_ = nullptr;
49   params_->selected_ = nullptr;
50   params_->anchors_ = nullptr;
51   auto anchor_tensor = in_tensors_.at(2);
52   CHECK_NULL_RETURN(anchor_tensor->data());
53   if (anchor_tensor->data_type() == kNumberTypeInt8) {
54     auto quant_param = anchor_tensor->quant_params().front();
55     auto anchor_int8 = reinterpret_cast<int8_t *>(anchor_tensor->data());
56     auto anchor_fp32 = new (std::nothrow) float[anchor_tensor->ElementsNum()];
57     if (anchor_fp32 == nullptr) {
58       MS_LOG(ERROR) << "Malloc anchor failed";
59       return RET_ERROR;
60     }
61     DoDequantizeInt8ToFp32(anchor_int8, anchor_fp32, quant_param.scale, quant_param.zeroPoint,
62                            anchor_tensor->ElementsNum());
63     params_->anchors_ = anchor_fp32;
64   } else if (anchor_tensor->data_type() == kNumberTypeUInt8) {
65     auto quant_param = anchor_tensor->quant_params().front();
66     auto anchor_uint8 = reinterpret_cast<uint8_t *>(anchor_tensor->data());
67     auto anchor_fp32 = new (std::nothrow) float[anchor_tensor->ElementsNum()];
68     if (anchor_fp32 == nullptr) {
69       MS_LOG(ERROR) << "Malloc anchor failed";
70       return RET_ERROR;
71     }
72     DoDequantizeUInt8ToFp32(anchor_uint8, anchor_fp32, quant_param.scale, quant_param.zeroPoint,
73                             anchor_tensor->ElementsNum());
74     params_->anchors_ = anchor_fp32;
75   } else if (anchor_tensor->data_type() == kNumberTypeFloat32 || anchor_tensor->data_type() == kNumberTypeFloat) {
76     params_->anchors_ = new (std::nothrow) float[anchor_tensor->ElementsNum()];
77     if (params_->anchors_ == nullptr) {
78       MS_LOG(ERROR) << "Malloc anchor failed";
79       return RET_ERROR;
80     }
81     memcpy(params_->anchors_, anchor_tensor->data(), anchor_tensor->Size());
82   } else {
83     MS_LOG(ERROR) << "unsupported anchor data type " << anchor_tensor->data_type();
84     return RET_ERROR;
85   }
86   return RET_OK;
87 }
88 
~DetectionPostProcessBaseCPUKernel()89 DetectionPostProcessBaseCPUKernel::~DetectionPostProcessBaseCPUKernel() { delete[](params_->anchors_); }
90 
ReSize()91 int DetectionPostProcessBaseCPUKernel::ReSize() { return RET_OK; }
92 
NmsMultiClassesFastCoreRun(void * cdata,int task_id,float lhs_scale,float rhs_scale)93 int NmsMultiClassesFastCoreRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
94   auto KernelData = reinterpret_cast<DetectionPostProcessBaseCPUKernel *>(cdata);
95   int ret = NmsMultiClassesFastCore(KernelData->num_boxes_, KernelData->num_classes_with_bg_, KernelData->input_scores_,
96                                     PartialArgSort, KernelData->params_, task_id, KernelData->thread_num_);
97   if (ret != RET_OK) {
98     MS_LOG(ERROR) << "NmsMultiClassesFastCore error task_id[" << task_id << "] error_code[" << ret << "]";
99     return RET_ERROR;
100   }
101   return RET_OK;
102 }
103 
FreeAllocatedBuffer()104 void DetectionPostProcessBaseCPUKernel::FreeAllocatedBuffer() {
105   if (params_->decoded_boxes_ != nullptr) {
106     ms_context_->allocator->Free(params_->decoded_boxes_);
107     params_->decoded_boxes_ = nullptr;
108   }
109   if (params_->nms_candidate_ != nullptr) {
110     ms_context_->allocator->Free(params_->nms_candidate_);
111     params_->nms_candidate_ = nullptr;
112   }
113   if (params_->indexes_ != nullptr) {
114     ms_context_->allocator->Free(params_->indexes_);
115     params_->indexes_ = nullptr;
116   }
117   if (params_->scores_ != nullptr) {
118     ms_context_->allocator->Free(params_->scores_);
119     params_->scores_ = nullptr;
120   }
121   if (params_->all_class_indexes_ != nullptr) {
122     ms_context_->allocator->Free(params_->all_class_indexes_);
123     params_->all_class_indexes_ = nullptr;
124   }
125   if (params_->all_class_scores_ != nullptr) {
126     ms_context_->allocator->Free(params_->all_class_scores_);
127     params_->all_class_scores_ = nullptr;
128   }
129   if (params_->single_class_indexes_ != nullptr) {
130     ms_context_->allocator->Free(params_->single_class_indexes_);
131     params_->single_class_indexes_ = nullptr;
132   }
133   if (params_->selected_ != nullptr) {
134     ms_context_->allocator->Free(params_->selected_);
135     params_->selected_ = nullptr;
136   }
137 }
138 
ParamInit()139 int DetectionPostProcessBaseCPUKernel::ParamInit() {
140   num_boxes_ = in_tensors_.at(0)->shape().at(1);
141   num_classes_with_bg_ = in_tensors_.at(1)->shape().at(2);
142   params_->decoded_boxes_ = ms_context_->allocator->Malloc(num_boxes_ * DIMENSION_4D * sizeof(float));
143   if (params_->decoded_boxes_ == nullptr) {
144     MS_LOG(ERROR) << "malloc params->decoded_boxes_ failed.";
145     FreeAllocatedBuffer();
146     return RET_ERROR;
147   }
148   params_->nms_candidate_ = ms_context_->allocator->Malloc(num_boxes_ * sizeof(uint8_t));
149   if (params_->nms_candidate_ == nullptr) {
150     MS_LOG(ERROR) << "malloc params->nms_candidate_ failed.";
151     FreeAllocatedBuffer();
152     return RET_ERROR;
153   }
154   params_->selected_ = ms_context_->allocator->Malloc(num_boxes_ * sizeof(int));
155   if (params_->selected_ == nullptr) {
156     MS_LOG(ERROR) << "malloc params->selected_ failed.";
157     FreeAllocatedBuffer();
158     return RET_ERROR;
159   }
160   params_->single_class_indexes_ = ms_context_->allocator->Malloc(num_boxes_ * sizeof(int));
161   if (params_->single_class_indexes_ == nullptr) {
162     MS_LOG(ERROR) << "malloc params->single_class_indexes_ failed.";
163     FreeAllocatedBuffer();
164     return RET_ERROR;
165   }
166 
167   if (params_->use_regular_nms_) {
168     params_->scores_ = ms_context_->allocator->Malloc((num_boxes_ + params_->max_detections_) * sizeof(float));
169     if (params_->scores_ == nullptr) {
170       MS_LOG(ERROR) << "malloc params->scores_ failed";
171       FreeAllocatedBuffer();
172       return RET_ERROR;
173     }
174     params_->indexes_ = ms_context_->allocator->Malloc((num_boxes_ + params_->max_detections_) * sizeof(int));
175     if (params_->indexes_ == nullptr) {
176       MS_LOG(ERROR) << "malloc params->indexes_ failed";
177       FreeAllocatedBuffer();
178       return RET_ERROR;
179     }
180     params_->all_class_scores_ =
181       ms_context_->allocator->Malloc((num_boxes_ + params_->max_detections_) * sizeof(float));
182     if (params_->all_class_scores_ == nullptr) {
183       MS_LOG(ERROR) << "malloc params->all_class_scores_ failed";
184       FreeAllocatedBuffer();
185       return RET_ERROR;
186     }
187     params_->all_class_indexes_ = ms_context_->allocator->Malloc((num_boxes_ + params_->max_detections_) * sizeof(int));
188     if (params_->all_class_indexes_ == nullptr) {
189       MS_LOG(ERROR) << "malloc params->all_class_indexes_ failed";
190       FreeAllocatedBuffer();
191       return RET_ERROR;
192     }
193   } else {
194     params_->scores_ = ms_context_->allocator->Malloc(num_boxes_ * sizeof(float));
195     if (params_->scores_ == nullptr) {
196       MS_LOG(ERROR) << "malloc params->scores_ failed";
197       FreeAllocatedBuffer();
198       return RET_ERROR;
199     }
200     params_->indexes_ = ms_context_->allocator->Malloc(num_boxes_ * params_->num_classes_ * sizeof(int));
201     if (!params_->indexes_) {
202       MS_LOG(ERROR) << "malloc params->indexes_ failed.";
203       FreeAllocatedBuffer();
204       return RET_ERROR;
205     }
206   }
207   return RET_OK;
208 }
209 
Run()210 int DetectionPostProcessBaseCPUKernel::Run() {
211   MS_ASSERT(ms_context_->allocator != nullptr);
212   int status = GetInputData();
213   if (status != RET_OK) {
214     return status;
215   }
216   auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->data());
217   auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->data());
218   auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->data());
219   auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->data());
220   if (output_boxes == nullptr || output_classes == nullptr || output_scores == nullptr || output_num == nullptr) {
221     return RET_NULL_PTR;
222   }
223 
224   if (ParamInit() != RET_OK) {
225     MS_LOG(ERROR) << "ParamInit error";
226     return status;
227   }
228 
229   status = DecodeBoxes(num_boxes_, input_boxes_, params_->anchors_, params_);
230   if (status != RET_OK) {
231     MS_LOG(ERROR) << "DecodeBoxes error";
232     FreeAllocatedBuffer();
233     return status;
234   }
235 
236   if (params_->use_regular_nms_) {
237     status = DetectionPostProcessRegular(num_boxes_, num_classes_with_bg_, input_scores_, output_boxes, output_classes,
238                                          output_scores, output_num, PartialArgSort, params_);
239     if (status != RET_OK) {
240       MS_LOG(ERROR) << "DetectionPostProcessRegular error error_code[" << status << "]";
241       FreeAllocatedBuffer();
242       return status;
243     }
244   } else {
245     status = ParallelLaunch(this->ms_context_, NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
246     if (status != RET_OK) {
247       MS_LOG(ERROR) << "NmsMultiClassesFastCoreRun error error_code[" << status << "]";
248       FreeAllocatedBuffer();
249       return status;
250     }
251     status = DetectionPostProcessFast(num_boxes_, num_classes_with_bg_, input_scores_,
252                                       reinterpret_cast<float *>(params_->decoded_boxes_), output_boxes, output_classes,
253                                       output_scores, output_num, PartialArgSort, params_);
254     if (status != RET_OK) {
255       MS_LOG(ERROR) << "DetectionPostProcessFast error error_code[" << status << "]";
256       FreeAllocatedBuffer();
257       return status;
258     }
259   }
260   FreeAllocatedBuffer();
261   return RET_OK;
262 }
263 }  // namespace mindspore::kernel
264