1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/fp32/detection_post_process_fp32.h"
18 #include <math.h>
19 #include "nnacl/errorcode.h"
20 #include "nnacl/op_base.h"
21 #include "nnacl/nnacl_utils.h"
22
IntersectionOverUnion(const BboxCorner * a,const BboxCorner * b)23 float IntersectionOverUnion(const BboxCorner *a, const BboxCorner *b) {
24 const float area_a = (a->ymax - a->ymin) * (a->xmax - a->xmin);
25 const float area_b = (b->ymax - b->ymin) * (b->xmax - b->xmin);
26 if (area_a <= 0 || area_b <= 0) {
27 return 0.0f;
28 }
29 const float ymin = a->ymin > b->ymin ? a->ymin : b->ymin;
30 const float xmin = a->xmin > b->xmin ? a->xmin : b->xmin;
31 const float ymax = a->ymax < b->ymax ? a->ymax : b->ymax;
32 const float xmax = a->xmax < b->xmax ? a->xmax : b->xmax;
33 const float h = ymax - ymin > 0.0f ? ymax - ymin : 0.0f;
34 const float w = xmax - xmin > 0.0f ? xmax - xmin : 0.0f;
35 const float inter = h * w;
36 return inter / (area_a + area_b - inter);
37 }
38
DecodeBoxes(int num_boxes,const float * input_boxes,const float * anchors,const DetectionPostProcessParameter * param)39 int DecodeBoxes(int num_boxes, const float *input_boxes, const float *anchors,
40 const DetectionPostProcessParameter *param) {
41 if (input_boxes == NULL || anchors == NULL || param == NULL) {
42 return NNACL_NULL_PTR;
43 }
44 float *decoded_boxes = (float *)param->decoded_boxes_;
45 BboxCenter scaler;
46 scaler.y = param->y_scale_;
47 scaler.x = param->x_scale_;
48 scaler.h = param->h_scale_;
49 scaler.w = param->w_scale_;
50 for (int i = 0; i < num_boxes; ++i) {
51 BboxCenter *box = (BboxCenter *)(input_boxes) + i;
52 BboxCenter *anchor = (BboxCenter *)(anchors) + i;
53 BboxCorner *decoded_box = (BboxCorner *)(decoded_boxes) + i;
54 float y_center = box->y / scaler.y * anchor->h + anchor->y;
55 float x_center = box->x / scaler.x * anchor->w + anchor->x;
56 const float h_half = 0.5f * expf(box->h / scaler.h) * anchor->h;
57 const float w_half = 0.5f * expf(box->w / scaler.w) * anchor->w;
58 decoded_box->ymin = y_center - h_half;
59 decoded_box->xmin = x_center - w_half;
60 decoded_box->ymax = y_center + h_half;
61 decoded_box->xmax = x_center + w_half;
62 }
63 return NNACL_OK;
64 }
65
NmsSingleClass(const int num_boxes,const float * decoded_boxes,const int max_detections,const float * scores,int * selected,void (* PartialArgSort)(const float *,int *,int,int),const DetectionPostProcessParameter * param)66 int NmsSingleClass(const int num_boxes, const float *decoded_boxes, const int max_detections, const float *scores,
67 int *selected, void (*PartialArgSort)(const float *, int *, int, int),
68 const DetectionPostProcessParameter *param) {
69 if (PartialArgSort == NULL) {
70 return NNACL_NULL_PTR;
71 }
72 uint8_t *nms_candidate = param->nms_candidate_;
73 const int output_num = num_boxes < max_detections ? num_boxes : max_detections;
74 int possible_candidate_num = num_boxes;
75 int selected_num = 0;
76 int *indexes = (int *)param->single_class_indexes_;
77 for (int i = 0; i < num_boxes; ++i) {
78 indexes[i] = i;
79 nms_candidate[i] = 1;
80 }
81 PartialArgSort(scores, indexes, num_boxes, num_boxes);
82 for (int i = 0; i < num_boxes; ++i) {
83 if (possible_candidate_num == 0 || selected_num >= output_num || scores[indexes[i]] < param->nms_score_threshold_) {
84 break;
85 }
86 if (nms_candidate[indexes[i]] == 0) {
87 continue;
88 }
89 selected[selected_num++] = indexes[i];
90 nms_candidate[indexes[i]] = 0;
91 possible_candidate_num--;
92 const BboxCorner *bbox_i = (BboxCorner *)(decoded_boxes) + indexes[i];
93 for (int t = i + 1; t < num_boxes; ++t) {
94 if (scores[indexes[t]] < param->nms_score_threshold_) break;
95 if (nms_candidate[indexes[t]] == 1) {
96 const BboxCorner *bbox_t = (BboxCorner *)(decoded_boxes) + indexes[t];
97 const float iou = IntersectionOverUnion(bbox_i, bbox_t);
98 if (iou > param->nms_iou_threshold_) {
99 nms_candidate[indexes[t]] = 0;
100 possible_candidate_num--;
101 }
102 }
103 }
104 }
105 return selected_num;
106 }
107
NmsMultiClassesFastCore(const int num_boxes,const int num_classes_with_bg,const float * input_scores,void (* PartialArgSort)(const float *,int *,int,int),const DetectionPostProcessParameter * param,const int task_id,const int thread_num)108 int NmsMultiClassesFastCore(const int num_boxes, const int num_classes_with_bg, const float *input_scores,
109 void (*PartialArgSort)(const float *, int *, int, int),
110 const DetectionPostProcessParameter *param, const int task_id, const int thread_num) {
111 if (input_scores == NULL || param == NULL || PartialArgSort == NULL) {
112 return NNACL_NULL_PTR;
113 }
114 if (thread_num == 0) {
115 return NNACL_PARAM_INVALID;
116 }
117 const int first_class_index = num_classes_with_bg - (int)(param->num_classes_);
118 const int64_t max_classes_per_anchor =
119 param->max_classes_per_detection_ < param->num_classes_ ? param->max_classes_per_detection_ : param->num_classes_;
120 float *scores = (float *)param->scores_;
121 for (int i = task_id; i < num_boxes; i += thread_num) {
122 int *indexes = (int *)param->indexes_ + i * param->num_classes_;
123 for (int j = 0; j < param->num_classes_; ++j) {
124 indexes[j] = i * num_classes_with_bg + first_class_index + j;
125 }
126 PartialArgSort(input_scores, indexes, max_classes_per_anchor, param->num_classes_);
127 scores[i] = input_scores[indexes[0]];
128 }
129 return NNACL_OK;
130 }
131
DetectionPostProcessFast(const int num_boxes,const int num_classes_with_bg,const float * input_scores,const float * decoded_boxes,float * output_boxes,float * output_classes,float * output_scores,float * output_num,void (* PartialArgSort)(const float *,int *,int,int),const DetectionPostProcessParameter * param)132 int DetectionPostProcessFast(const int num_boxes, const int num_classes_with_bg, const float *input_scores,
133 const float *decoded_boxes, float *output_boxes, float *output_classes,
134 float *output_scores, float *output_num,
135 void (*PartialArgSort)(const float *, int *, int, int),
136 const DetectionPostProcessParameter *param) {
137 if (input_scores == NULL || decoded_boxes == NULL || output_boxes == NULL || output_classes == NULL ||
138 output_scores == NULL || output_num == NULL || param == NULL || PartialArgSort == NULL) {
139 return NNACL_NULL_PTR;
140 }
141 int out_num = 0;
142 const int first_class_index = num_classes_with_bg - (int)(param->num_classes_);
143 const int64_t max_classes_per_anchor =
144 param->max_classes_per_detection_ < param->num_classes_ ? param->max_classes_per_detection_ : param->num_classes_;
145 int *selected = (int *)param->selected_;
146 int selected_num = NmsSingleClass(num_boxes, decoded_boxes, param->max_detections_, (float *)param->scores_, selected,
147 PartialArgSort, param);
148 for (int i = 0; i < selected_num; ++i) {
149 int *indexes = (int *)param->indexes_ + selected[i] * param->num_classes_;
150 BboxCorner *box = (BboxCorner *)(decoded_boxes) + selected[i];
151 for (int j = 0; j < max_classes_per_anchor; ++j) {
152 *((BboxCorner *)(output_boxes) + out_num) = *box;
153 output_scores[out_num] = input_scores[indexes[j]];
154 NNACL_ASSERT(num_classes_with_bg != 0);
155 output_classes[out_num++] = (float)(indexes[j] % num_classes_with_bg - first_class_index);
156 }
157 }
158 *output_num = (float)out_num;
159 for (int i = out_num; i < param->max_detections_ * param->max_classes_per_detection_; ++i) {
160 ((BboxCorner *)(output_boxes) + i)->ymin = 0;
161 ((BboxCorner *)(output_boxes) + i)->xmin = 0;
162 ((BboxCorner *)(output_boxes) + i)->ymax = 0;
163 ((BboxCorner *)(output_boxes) + i)->xmax = 0;
164 output_scores[i] = 0;
165 output_classes[i] = 0;
166 }
167 return NNACL_OK;
168 }
169
DetectionPostProcessRegular(const int num_boxes,const int num_classes_with_bg,const float * input_scores,float * output_boxes,float * output_classes,float * output_scores,float * output_num,void (* PartialArgSort)(const float *,int *,int,int),const DetectionPostProcessParameter * param)170 int DetectionPostProcessRegular(const int num_boxes, const int num_classes_with_bg, const float *input_scores,
171 float *output_boxes, float *output_classes, float *output_scores, float *output_num,
172 void (*PartialArgSort)(const float *, int *, int, int),
173 const DetectionPostProcessParameter *param) {
174 if (input_scores == NULL || output_boxes == NULL || output_classes == NULL || output_scores == NULL ||
175 output_num == NULL || param == NULL || PartialArgSort == NULL) {
176 return NNACL_NULL_PTR;
177 }
178 const int first_class_index = num_classes_with_bg - (int)(param->num_classes_);
179 float *decoded_boxes = (float *)param->decoded_boxes_;
180 int *selected = (int *)param->selected_;
181 float *scores = (float *)param->scores_;
182 float *all_scores = (float *)param->all_class_scores_;
183 int *indexes = (int *)(param->indexes_);
184 int *all_indexes = (int *)(param->all_class_indexes_);
185 int all_classes_sorted_num = 0;
186 int all_classes_output_num = 0;
187 for (int j = first_class_index; j < num_classes_with_bg; ++j) {
188 // process single class
189 for (int i = 0; i < num_boxes; ++i) {
190 scores[i] = input_scores[i * num_classes_with_bg + j];
191 }
192 int selected_num =
193 NmsSingleClass(num_boxes, decoded_boxes, param->detections_per_class_, scores, selected, PartialArgSort, param);
194 for (int i = 0; i < all_classes_sorted_num; ++i) {
195 indexes[i] = all_indexes[i];
196 all_indexes[i] = i;
197 }
198 // process all classes
199 for (int i = 0; i < selected_num; ++i) {
200 indexes[all_classes_sorted_num] = selected[i] * num_classes_with_bg + j;
201 all_indexes[all_classes_sorted_num] = all_classes_sorted_num;
202 all_scores[all_classes_sorted_num++] = scores[selected[i]];
203 }
204 all_classes_output_num =
205 all_classes_sorted_num < param->max_detections_ ? all_classes_sorted_num : param->max_detections_;
206 PartialArgSort(all_scores, all_indexes, all_classes_output_num, all_classes_sorted_num);
207 for (int i = 0; i < all_classes_output_num; ++i) {
208 scores[i] = all_scores[all_indexes[i]];
209 all_indexes[i] = indexes[all_indexes[i]];
210 }
211 for (int i = 0; i < all_classes_output_num; ++i) {
212 all_scores[i] = scores[i];
213 }
214 all_classes_sorted_num = all_classes_output_num;
215 }
216 for (int i = 0; i < param->max_detections_ * param->max_classes_per_detection_; ++i) {
217 if (i < all_classes_output_num) {
218 NNACL_CHECK_ZERO_RETURN_ERR(num_classes_with_bg);
219 const int box_index = all_indexes[i] / num_classes_with_bg;
220 const int class_index = all_indexes[i] % num_classes_with_bg - first_class_index;
221 *((BboxCorner *)(output_boxes) + i) = *((BboxCorner *)(decoded_boxes) + box_index);
222 output_classes[i] = (float)class_index;
223 output_scores[i] = all_scores[i];
224 } else {
225 ((BboxCorner *)(output_boxes) + i)->ymin = 0;
226 ((BboxCorner *)(output_boxes) + i)->xmin = 0;
227 ((BboxCorner *)(output_boxes) + i)->ymax = 0;
228 ((BboxCorner *)(output_boxes) + i)->xmax = 0;
229 output_classes[i] = 0.0f;
230 output_scores[i] = 0.0f;
231 }
232 }
233 *output_num = (float)all_classes_output_num;
234 return NNACL_OK;
235 }
236