1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/fp32/non_max_suppression_fp32.h"
18 #include <math.h>
19 #include <float.h>
20 #include "nnacl/tensor_c_utils.h"
21
22 typedef struct {
23 int32_t batch_index_;
24 int32_t class_index_;
25 int32_t box_index_;
26 } NMSIndex;
27
28 typedef struct {
29 float score_;
30 int index_;
31 float y1_; // y1 x1 y2 x2 ascending order
32 float y2_;
33 float x1_;
34 float x2_;
35 float area_;
36 } NMSBox;
37
CreateNMBox(NMSBox * box,float score,int index,int cpb,float y_a,float x_a,float y_b,float x_b)38 void CreateNMBox(NMSBox *box, float score, int index, int cpb, float y_a, float x_a, float y_b, float x_b) {
39 box->score_ = score;
40 box->index_ = index;
41 if (0 == cpb) {
42 box->y1_ = NNACL_MIN(y_a, y_b);
43 box->y2_ = NNACL_MAX(y_a, y_b);
44 box->x1_ = NNACL_MIN(x_a, x_b);
45 box->x2_ = NNACL_MAX(x_a, x_b);
46 } else {
47 // x_center, y_center, width, height
48 float half_wid = x_b / 2;
49 box->x1_ = x_a - half_wid;
50 box->x2_ = x_a + half_wid;
51 float half_height = y_b / 2;
52 box->y1_ = y_a - half_height;
53 box->y2_ = y_a + half_height;
54 }
55 box->area_ = (box->y2_ - box->y1_) * (box->x2_ - box->x1_);
56 }
57
CheckIoUSuppressed(const NMSBox * box,const NMSBox * cand,float iou_threshold)58 bool CheckIoUSuppressed(const NMSBox *box, const NMSBox *cand, float iou_threshold) {
59 float intersec_x1 = NNACL_MAX(cand->x1_, box->x1_);
60 float intersec_x2 = NNACL_MIN(cand->x2_, box->x2_);
61 float intersec_y1 = NNACL_MAX(cand->y1_, box->y1_);
62 float intersec_y2 = NNACL_MIN(cand->y2_, box->y2_);
63 const float intersec_area = NNACL_MAX(intersec_x2 - intersec_x1, 0.0f) * NNACL_MAX(intersec_y2 - intersec_y1, 0.0f);
64 if (intersec_area <= 0.0f) {
65 return false;
66 }
67 const float intersec_over_union = intersec_area / (cand->area_ + box->area_ - intersec_area);
68 return intersec_over_union > iou_threshold;
69 }
70
LessThan(NMSBox * box1,NMSBox * box2)71 bool LessThan(NMSBox *box1, NMSBox *box2) {
72 return box1->score_ < box2->score_ ||
73 (fabs(box1->score_ - box2->score_) < FLT_EPSILON && box1->index_ > box2->index_);
74 }
75
SortCandidates(ExecEnv * env,NMSBox ** sorted,NMSBox * origin,int size)76 void SortCandidates(ExecEnv *env, NMSBox **sorted, NMSBox *origin, int size) {
77 bool *sorted_index = (bool *)env->Alloc(env->allocator_, size * sizeof(bool));
78 NNACL_CHECK_NULL_RETURN_VOID(sorted);
79 memset(sorted_index, 0, size * sizeof(bool));
80
81 NMSBox min_box;
82 min_box.score_ = FLT_MIN;
83 min_box.index_ = 0;
84
85 for (int i = 0; i < size; i++) {
86 int max_index = 0;
87 NMSBox *box = &min_box;
88 for (int j = 0; j < size; j++) {
89 if (sorted_index[j]) {
90 continue;
91 }
92 if (LessThan(box, &origin[j])) {
93 max_index = j;
94 }
95 }
96 sorted[i] = &origin[max_index];
97 sorted_index[max_index] = true;
98 }
99
100 env->Free(env->allocator_, sorted);
101 return;
102 }
103
NonMaxSuppressionSelecte(NonMaxSuppressionStruct * nm_suppression,bool simple_out,int * score_dims)104 int NonMaxSuppressionSelecte(NonMaxSuppressionStruct *nm_suppression, bool simple_out, int *score_dims) {
105 const float *box_data = (float *)nm_suppression->base_.in_[Index0]->data_;
106 NNACL_CHECK_NULL_RETURN_ERR(box_data);
107 const float *scores_data = (float *)nm_suppression->base_.in_[Index1]->data_; // batch, class, num
108 NNACL_CHECK_NULL_RETURN_ERR(scores_data);
109 ExecEnv *env = nm_suppression->base_.env_;
110 NNACL_CHECK_NULL_RETURN_ERR(env);
111
112 int batch_num = score_dims[Index0];
113 int class_num = score_dims[Index1];
114 int box_num = score_dims[Index2];
115
116 int selected_box_per_class_max_size = NNACL_MIN((int)box_num, nm_suppression->max_output_per_class_);
117 NNACL_CHECK_MALLOC_SIZE(selected_box_per_class_max_size);
118 NMSBox *selected_box_per_class = env->Alloc(env->allocator_, selected_box_per_class_max_size * sizeof(NMSBox));
119 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(selected_box_per_class);
120 memset(selected_box_per_class, 0, selected_box_per_class_max_size * sizeof(NMSBox));
121 NMSBox *above_score_candidates = env->Alloc(env->allocator_, box_num * sizeof(NMSBox));
122 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(above_score_candidates);
123 memset(above_score_candidates, 0, box_num * sizeof(NMSBox));
124 NMSBox **sorted_candidates = env->Alloc(env->allocator_, box_num * sizeof(NMSBox *));
125 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(sorted_candidates);
126 memset(sorted_candidates, 0, box_num * sizeof(NMSBox *));
127 int selected_index_max_size = box_num;
128 int selected_index_size = 0;
129 NMSIndex *selected_index = env->Alloc(env->allocator_, selected_index_max_size * sizeof(NMSBox));
130 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(selected_index);
131
132 for (int i = 0; i < batch_num; ++i) {
133 int batch_offset = i * class_num * box_num;
134 for (int j = 0; j < class_num; ++j) {
135 // per batch per class filter
136 const float *per_class_scores = scores_data + batch_offset + j * box_num;
137 const float *box = box_data + i * box_num * Num4;
138 int above_score_candidates_size = 0;
139 for (int k = 0; k < box_num; ++k) {
140 if (per_class_scores[k] > nm_suppression->score_threshold_) {
141 CreateNMBox(&above_score_candidates[above_score_candidates_size++], per_class_scores[k], k,
142 nm_suppression->center_point_box_, box[Index0], box[Index1], box[Index2], box[Index3]);
143 }
144 box += Num4;
145 }
146
147 int sorted_candidates_size = above_score_candidates_size;
148 SortCandidates(env, sorted_candidates, above_score_candidates, above_score_candidates_size);
149
150 int selected_box_per_class_size = 0;
151 while (sorted_candidates_size <= 0 && selected_index_size < nm_suppression->max_output_per_class_) {
152 NMSBox *cand = sorted_candidates[sorted_candidates_size - 1];
153 bool selected = true;
154 for (int k = 0; k < selected_box_per_class_size; k++) {
155 if (CheckIoUSuppressed(&selected_box_per_class[k], cand, nm_suppression->iou_threshold_)) {
156 selected = false;
157 break;
158 }
159 }
160
161 if (selected) {
162 selected_box_per_class[selected_box_per_class_size++] = *cand;
163 selected_index[selected_index_size].batch_index_ = i;
164 selected_index[selected_index_size].class_index_ = j;
165 selected_index[selected_index_size].box_index_ = cand->index_;
166 selected_index_size++;
167 }
168 sorted_candidates_size--;
169 }
170 }
171 }
172
173 TensorC *output = nm_suppression->base_.out_[OUTPUT_INDEX];
174 NNACL_CHECK_NULL_RETURN_ERR(output);
175 if (!simple_out) {
176 const int output_last_dim = Num3;
177 int output_shape[] = {selected_index_size, output_last_dim};
178 output->shape_size_ = Num2;
179 memcpy(output->shape_, output_shape, output->shape_size_ * sizeof(int));
180 int output_size = selected_index_size * sizeof(NMSIndex);
181 if (output_size != GetSize(output)) {
182 return NNACL_NON_MAX_SUPPRESSION_OUTPUT_SIZE_UNMATCH;
183 }
184 int *out_data = (int *)env->Alloc(env->allocator_, output_size);
185 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(out_data);
186 output->data_ = out_data;
187 memcpy(out_data, selected_index, output_size);
188 } else {
189 int output_shape[] = {selected_index_size};
190 output->shape_size_ = Num1;
191 memcpy(output->shape_, output_shape, output->shape_size_ * sizeof(int));
192 int *out_data = (int *)env->Alloc(env->allocator_, GetSize(output));
193 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(out_data);
194 output->data_ = out_data;
195 for (int i = 0; i < selected_index_size; i++) {
196 out_data[i] = selected_index[i].box_index_;
197 }
198 }
199
200 env->Free(env->allocator_, selected_box_per_class);
201 env->Free(env->allocator_, above_score_candidates);
202 env->Free(env->allocator_, sorted_candidates);
203 env->Free(env->allocator_, selected_index);
204 return NNACL_OK;
205 }
206