1 /*
2 * Copyright (c) 2022 HiSilicon (Shanghai) Technologies CO., LIMITED.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "sample_svp_nnie_software.h"
16 #include <math.h>
17
18 #ifdef __cplusplus // If used by C++ code,
19 extern "C" { // we need to export the C interface
20 #endif
21
22 static HI_FLOAT s_af32ExpCoef[10][16] = {
23 {
24 1.0f, 1.00024f, 1.00049f, 1.00073f, 1.00098f, 1.00122f, 1.00147f, 1.00171f, 1.00196f,
25 1.0022f, 1.00244f, 1.00269f, 1.00293f, 1.00318f, 1.00342f, 1.00367f
26 },
27 {
28 1.0f, 1.00391f, 1.00784f, 1.01179f, 1.01575f, 1.01972f, 1.02371f, 1.02772f, 1.03174f,
29 1.03578f, 1.03984f, 1.04391f, 1.04799f, 1.05209f, 1.05621f, 1.06034f
30 },
31 {
32 1.0f, 1.06449f, 1.13315f, 1.20623f, 1.28403f, 1.36684f, 1.45499f, 1.54883f, 1.64872f,
33 1.75505f, 1.86825f, 1.98874f, 2.117f, 2.25353f, 2.39888f, 2.55359f
34 },
35 {
36 1.0f, 2.71828f, 7.38906f, 20.0855f, 54.5981f, 148.413f, 403.429f, 1096.63f, 2980.96f,
37 8103.08f, 22026.5f, 59874.1f, 162755.0f, 442413.0f, 1.2026e+006f, 3.26902e+006f
38 },
39 {
40 1.0f, 8.88611e+006f, 7.8963e+013f, 7.01674e+020f, 6.23515e+027f, 5.54062e+034f,
41 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f,
42 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f
43 },
44 {
45 1.0f, 0.999756f, 0.999512f, 0.999268f, 0.999024f, 0.99878f, 0.998536f, 0.998292f,
46 0.998049f, 0.997805f, 0.997562f, 0.997318f, 0.997075f, 0.996831f, 0.996588f, 0.996345f
47 },
48 {
49 1.0f, 0.996101f, 0.992218f, 0.98835f, 0.984496f, 0.980658f, 0.976835f, 0.973027f,
50 0.969233f, 0.965455f, 0.961691f, 0.957941f, 0.954207f, 0.950487f, 0.946781f, 0.94309f
51 },
52 {
53 1.0f, 0.939413f, 0.882497f, 0.829029f, 0.778801f, 0.731616f, 0.687289f, 0.645649f, 0.606531f,
54 0.569783f, 0.535261f, 0.502832f, 0.472367f, 0.443747f, 0.416862f, 0.391606f
55 },
56 {
57 1.0f, 0.367879f, 0.135335f, 0.0497871f, 0.0183156f, 0.00673795f, 0.00247875f, 0.000911882f,
58 0.000335463f, 0.00012341f, 4.53999e-005f, 1.67017e-005f, 6.14421e-006f, 2.26033e-006f,
59 8.31529e-007f, 3.05902e-007f
60 },
61 {
62 1.0f, 1.12535e-007f, 1.26642e-014f, 1.42516e-021f, 1.60381e-028f, 1.80485e-035f, 2.03048e-042f,
63 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
64 }
65 };
66
SVP_NNIE_QuickExp(HI_S32 s32Value)67 static HI_FLOAT SVP_NNIE_QuickExp(HI_S32 s32Value)
68 {
69 HI_U32 tmp_val;
70
71 if (s32Value < 0) {
72 tmp_val = *((HI_U32 *)&s32Value);
73 tmp_val = (~tmp_val + 0x00000001);
74 /* get each 4 bit */
75 return s_af32ExpCoef[5][tmp_val & 0x0000000F] * s_af32ExpCoef[6][(tmp_val >> 4) & 0x0000000F] *
76 s_af32ExpCoef[7][(tmp_val >> 8) & 0x0000000F] * s_af32ExpCoef[8][(tmp_val >> 12) & 0x0000000F] *
77 s_af32ExpCoef[9][(tmp_val >> 16) & 0x0000000F];
78 } else {
79 tmp_val = (HI_U32)s32Value;
80 /* get each 4 bit */
81 return s_af32ExpCoef[0][tmp_val & 0x0000000F] * s_af32ExpCoef[1][(tmp_val >> 4) & 0x0000000F] *
82 s_af32ExpCoef[2][(tmp_val >> 8) & 0x0000000F] * s_af32ExpCoef[3][(tmp_val >> 12) & 0x0000000F] *
83 s_af32ExpCoef[4][(tmp_val >> 16) & 0x0000000F];
84 }
85 }
86
SVP_NNIE_SoftMax(HI_FLOAT * pf32Src,HI_U32 u32Num)87 static HI_S32 SVP_NNIE_SoftMax(HI_FLOAT *pf32Src, HI_U32 u32Num)
88 {
89 HI_FLOAT f32Max = 0;
90 HI_FLOAT f32Sum = 0;
91 HI_U32 i = 0;
92
93 for (i = 0; i < u32Num; ++i) {
94 if (f32Max < pf32Src[i]) {
95 f32Max = pf32Src[i];
96 }
97 }
98
99 for (i = 0; i < u32Num; ++i) {
100 pf32Src[i] = (HI_FLOAT)SVP_NNIE_QuickExp((HI_S32)((pf32Src[i] - f32Max) * SAMPLE_SVP_NNIE_QUANT_BASE));
101 f32Sum += pf32Src[i];
102 }
103
104 for (i = 0; i < u32Num; ++i) {
105 pf32Src[i] /= f32Sum;
106 }
107 return HI_SUCCESS;
108 }
109
SVP_NNIE_Sigmoid(HI_FLOAT * pf32Src,HI_U32 u32Num)110 static HI_S32 SVP_NNIE_Sigmoid(HI_FLOAT *pf32Src, HI_U32 u32Num)
111 {
112 HI_U32 i = 0;
113
114 for (i = 0; i < u32Num; i++) {
115 pf32Src[i] = SAMPLE_SVP_NNIE_SIGMOID(pf32Src[i]);
116 }
117 return HI_SUCCESS;
118 }
119
SVP_NNIE_SSD_SoftMax(HI_S32 * ps32Src,HI_S32 s32ArraySize,HI_S32 * ps32Dst)120 static HI_S32 SVP_NNIE_SSD_SoftMax(HI_S32 *ps32Src, HI_S32 s32ArraySize, HI_S32 *ps32Dst)
121 {
122 /* **** define parameters *** */
123 HI_S32 s32Max = 0;
124 HI_S32 s32Sum = 0;
125 HI_S32 i = 0;
126 for (i = 0; i < s32ArraySize; ++i) {
127 if (s32Max < ps32Src[i]) {
128 s32Max = ps32Src[i];
129 }
130 }
131 for (i = 0; i < s32ArraySize; ++i) {
132 ps32Dst[i] =
133 (HI_S32)(SAMPLE_SVP_NNIE_QUANT_BASE * exp((HI_FLOAT)(ps32Src[i] - s32Max) / SAMPLE_SVP_NNIE_QUANT_BASE));
134 s32Sum += ps32Dst[i];
135 }
136 for (i = 0; i < s32ArraySize; ++i) {
137 ps32Dst[i] = (HI_S32)(((HI_FLOAT)ps32Dst[i] / (HI_FLOAT)s32Sum) * SAMPLE_SVP_NNIE_QUANT_BASE);
138 }
139 return HI_SUCCESS;
140 }
141
SVP_NNIE_Argswap(HI_S32 * ps32Src1,HI_S32 * ps32Src2)142 static void SVP_NNIE_Argswap(HI_S32 *ps32Src1, HI_S32 *ps32Src2)
143 {
144 HI_U32 i = 0;
145 HI_S32 u32Tmp = 0;
146 for (i = 0; i < SAMPLE_SVP_NNIE_PROPOSAL_WIDTH; i++) {
147 u32Tmp = ps32Src1[i];
148 ps32Src1[i] = ps32Src2[i];
149 ps32Src2[i] = u32Tmp;
150 }
151 }
152
153 /*
154 * Prototype : SVP_NNIE_NonRecursiveArgQuickSort
155 * Description : this function is used to do quick sort
156 * Input : HI_S32* ps32Array [IN] the array need to be sorted
157 * HI_S32 s32Low [IN] the start position of quick sort
158 * HI_S32 s32High [IN] the end position of quick sort
159 * SAMPLE_SVP_NNIE_STACK_S * pstStack [IN] the buffer used to store start positions and end positions
160 */
SVP_NNIE_NonRecursiveArgQuickSort(HI_S32 * ps32Array,HI_S32 s32Low,HI_S32 s32High,SAMPLE_SVP_NNIE_STACK_S * pstStack,HI_U32 u32MaxNum)161 static HI_S32 SVP_NNIE_NonRecursiveArgQuickSort(HI_S32 *ps32Array, HI_S32 s32Low, HI_S32 s32High,
162 SAMPLE_SVP_NNIE_STACK_S *pstStack, HI_U32 u32MaxNum)
163 {
164 HI_S32 i = s32Low;
165 HI_S32 j = s32High;
166 HI_S32 s32Top = 0;
167 HI_S32 s32KeyConfidence = ps32Array[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * s32Low + SAMPLE_SVP_NNIE_SCORE_OFFSET];
168 pstStack[s32Top].s32Min = s32Low;
169 pstStack[s32Top].s32Max = s32High;
170
171 while (s32Top > -1) {
172 s32Low = pstStack[s32Top].s32Min;
173 s32High = pstStack[s32Top].s32Max;
174 i = s32Low;
175 j = s32High;
176 s32Top--;
177
178 s32KeyConfidence = ps32Array[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * s32Low + SAMPLE_SVP_NNIE_SCORE_OFFSET];
179
180 while (i < j) {
181 while ((i < j) &&
182 (s32KeyConfidence > ps32Array[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + SAMPLE_SVP_NNIE_SCORE_OFFSET])) {
183 j--;
184 }
185 if (i < j) {
186 SVP_NNIE_Argswap(&ps32Array[i * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH],
187 &ps32Array[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH]);
188 i++;
189 }
190
191 while ((i < j) &&
192 (s32KeyConfidence < ps32Array[i * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + SAMPLE_SVP_NNIE_SCORE_OFFSET])) {
193 i++;
194 }
195 if (i < j) {
196 SVP_NNIE_Argswap(&ps32Array[i * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH],
197 &ps32Array[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH]);
198 j--;
199 }
200 }
201
202 if ((hi_u32)s32Low <= u32MaxNum) {
203 if (s32Low < i - 1) {
204 s32Top++;
205 pstStack[s32Top].s32Min = s32Low;
206 pstStack[s32Top].s32Max = i - 1;
207 }
208
209 if (s32High > i + 1) {
210 s32Top++;
211 pstStack[s32Top].s32Min = i + 1;
212 pstStack[s32Top].s32Max = s32High;
213 }
214 }
215 }
216 return HI_SUCCESS;
217 }
218
219 /*
220 * Prototype : SVP_NNIE_Overlap
221 * Description : this function is used to calculate the overlap ratio of two proposals
222 * Input : HI_S32 s32XMin1 [IN] first input proposal's minimum value of x coordinate
223 * HI_S32 s32YMin1 [IN] first input proposal's minimum value of y coordinate of first input
224 * proposal HI_S32 s32XMax1 [IN] first input proposal's maximum value of x coordinate of first
225 * input proposal HI_S32 s32YMax1 [IN] first input proposal's maximum value of y coordinate of
226 * first input proposal HI_S32 s32XMin1 [IN] second input proposal's minimum value of x
227 * coordinate HI_S32 s32YMin1 [IN] second input proposal's minimum value of y coordinate of
228 * first input proposal HI_S32 s32XMax1 [IN] second input proposal's maximum value of x
229 * coordinate of first input proposal HI_S32 s32YMax1 [IN] second input proposal's maximum value
230 * of y coordinate of first input proposal HI_FLOAT *pf32IoU [IN OUT]the pointer of the IoU value
231 */
SVP_NNIE_Overlap(HI_S32 s32XMin1,HI_S32 s32YMin1,HI_S32 s32XMax1,HI_S32 s32YMax1,HI_S32 s32XMin2,HI_S32 s32YMin2,HI_S32 s32XMax2,HI_S32 s32YMax2,HI_S32 * s32AreaSum,HI_S32 * s32AreaInter)232 static HI_S32 SVP_NNIE_Overlap(HI_S32 s32XMin1, HI_S32 s32YMin1, HI_S32 s32XMax1, HI_S32 s32YMax1, HI_S32 s32XMin2,
233 HI_S32 s32YMin2, HI_S32 s32XMax2, HI_S32 s32YMax2, HI_S32 *s32AreaSum, HI_S32 *s32AreaInter)
234 {
235 HI_S32 s32Inter = 0;
236 HI_S32 s32Total = 0;
237 HI_S32 s32XMin = 0;
238 HI_S32 s32YMin = 0;
239 HI_S32 s32XMax = 0;
240 HI_S32 s32YMax = 0;
241 HI_S32 s32Area1 = 0;
242 HI_S32 s32Area2 = 0;
243 HI_S32 s32InterWidth = 0;
244 HI_S32 s32InterHeight = 0;
245
246 s32XMin = SAMPLE_SVP_NNIE_MAX(s32XMin1, s32XMin2);
247 s32YMin = SAMPLE_SVP_NNIE_MAX(s32YMin1, s32YMin2);
248 s32XMax = SAMPLE_SVP_NNIE_MIN(s32XMax1, s32XMax2);
249 s32YMax = SAMPLE_SVP_NNIE_MIN(s32YMax1, s32YMax2);
250
251 s32InterWidth = s32XMax - s32XMin + 1;
252 s32InterHeight = s32YMax - s32YMin + 1;
253
254 s32InterWidth = (s32InterWidth >= 0) ? s32InterWidth : 0;
255 s32InterHeight = (s32InterHeight >= 0) ? s32InterHeight : 0;
256
257 s32Inter = s32InterWidth * s32InterHeight;
258 s32Area1 = (s32XMax1 - s32XMin1 + 1) * (s32YMax1 - s32YMin1 + 1);
259 s32Area2 = (s32XMax2 - s32XMin2 + 1) * (s32YMax2 - s32YMin2 + 1);
260
261 s32Total = s32Area1 + s32Area2 - s32Inter;
262
263 *s32AreaSum = s32Total;
264 *s32AreaInter = s32Inter;
265 return HI_SUCCESS;
266 }
267
268 /*
269 * Prototype : SVP_NNIE_FilterLowScoreBbox
270 * Description : this function is used to remove low score bboxes, in order to speed-up Sort & RPN procedures.
271 * Input : HI_S32* ps32Proposals [IN] proposals
272 * HI_U32 u32NumAnchors [IN] input anchors' num
273 * HI_U32 u32FilterThresh [IN] rpn configuration
274 * HI_U32* u32NumAfterFilter [OUT] output num of anchors after low score filtering
275 */
SVP_NNIE_FilterLowScoreBbox(HI_S32 * ps32Proposals,HI_U32 u32AnchorsNum,HI_U32 u32FilterThresh,HI_U32 * u32NumAfterFilter)276 static HI_S32 SVP_NNIE_FilterLowScoreBbox(HI_S32 *ps32Proposals, HI_U32 u32AnchorsNum, HI_U32 u32FilterThresh,
277 HI_U32 *u32NumAfterFilter)
278 {
279 HI_U32 u32ProposalCnt = u32AnchorsNum;
280 HI_U32 i = 0;
281
282 if (u32FilterThresh > 0) {
283 for (i = 0; i < u32AnchorsNum; i++) {
284 if (ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SCORE_OFFSET] <
285 (HI_S32)u32FilterThresh) {
286 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] = 1;
287 }
288 }
289
290 u32ProposalCnt = 0;
291 for (i = 0; i < u32AnchorsNum; i++) {
292 if (ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) {
293 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt] =
294 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i];
295 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + SAMPLE_SVP_NNIE_Y_MIN_OFFSET] =
296 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_Y_MIN_OFFSET];
297 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
298 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
299 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
300 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
301 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + SAMPLE_SVP_NNIE_SCORE_OFFSET] =
302 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SCORE_OFFSET];
303 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] =
304 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET];
305 u32ProposalCnt++;
306 }
307 }
308 }
309 *u32NumAfterFilter = u32ProposalCnt;
310 return HI_SUCCESS;
311 }
312
313 /*
314 * Prototype : SVP_NNIE_NonMaxSuppression
315 * Description : this function is used to do non maximum suppression
316 * Input : HI_S32* ps32Proposals [IN] proposals
317 * HI_U32 u32AnchorsNum [IN] anchors num
318 * HI_U32 u32NmsThresh [IN] non maximum suppression threshold
319 * HI_U32 u32MaxRoiNum [IN] The max roi num for the roi pooling
320 */
SVP_NNIE_NonMaxSuppression(HI_S32 * ps32Proposals,HI_U32 u32AnchorsNum,HI_U32 u32NmsThresh,HI_U32 u32MaxRoiNum)321 static HI_S32 SVP_NNIE_NonMaxSuppression(HI_S32 *ps32Proposals, HI_U32 u32AnchorsNum, HI_U32 u32NmsThresh,
322 HI_U32 u32MaxRoiNum)
323 {
324 HI_S32 s32XMin1 = 0;
325 HI_S32 s32YMin1 = 0;
326 HI_S32 s32XMax1 = 0;
327 HI_S32 s32YMax1 = 0;
328 HI_S32 s32XMin2 = 0;
329 HI_S32 s32YMin2 = 0;
330 HI_S32 s32XMax2 = 0;
331 HI_S32 s32YMax2 = 0;
332 HI_S32 s32AreaTotal = 0;
333 HI_S32 s32AreaInter = 0;
334 HI_U32 i = 0;
335 HI_U32 j = 0;
336 HI_U32 u32Num = 0;
337 HI_BOOL bNoOverlap = HI_TRUE;
338
339 for (i = 0; i < u32AnchorsNum && u32Num < u32MaxRoiNum; i++) {
340 if (ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) {
341 u32Num++;
342 s32XMin1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i];
343 s32YMin1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_Y_MIN_OFFSET];
344 s32XMax1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
345 s32YMax1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
346 for (j = i + 1; j < u32AnchorsNum; j++) {
347 if (ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) {
348 s32XMin2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j];
349 s32YMin2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_Y_MIN_OFFSET];
350 s32XMax2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
351 s32YMax2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
352 bNoOverlap = (s32XMin2 > s32XMax1) || (s32XMax2 < s32XMin1) || (s32YMin2 > s32YMax1) ||
353 (s32YMax2 < s32YMin1);
354 if (bNoOverlap) {
355 continue;
356 }
357 (void)SVP_NNIE_Overlap(s32XMin1, s32YMin1, s32XMax1, s32YMax1, s32XMin2, s32YMin2, s32XMax2,
358 s32YMax2, &s32AreaTotal, &s32AreaInter);
359 if (s32AreaInter * SAMPLE_SVP_NNIE_QUANT_BASE > ((HI_S32)u32NmsThresh * s32AreaTotal)) {
360 if (ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SCORE_OFFSET] >=
361 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_SCORE_OFFSET]) {
362 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET]
363 = 1;
364 } else {
365 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET]
366 = 1;
367 }
368 }
369 }
370 }
371 }
372 }
373
374 return HI_SUCCESS;
375 }
376
377 /*
378 * Prototype : SVP_NNIE_Cnn_GetTopN
379 * Description : Cnn get top N
380 * Input : HI_S32 *ps32Fc [IN] FC data pointer
381 * HI_U32 u32FcStride [IN] FC stride
382 * HI_U32 u32ClassNum [IN] Class Num
383 * HI_U32 u32BatchNum [IN] Batch Num
384 * HI_U32 u32TopN [IN] TopN
385 * HI_S32 *ps32TmpBuf [IN] assist buffer pointer
386 * HI_U32 u32TopNStride [IN] TopN result stride
387 * HI_S32 *ps32GetTopN [OUT] TopN result
388 */
SVP_NNIE_Cnn_GetTopN(HI_S32 * ps32Fc,HI_U32 u32FcStride,HI_U32 u32ClassNum,HI_U32 u32BatchNum,HI_U32 u32TopN,HI_S32 * ps32TmpBuf,HI_U32 u32TopNStride,HI_S32 * ps32GetTopN)389 static HI_S32 SVP_NNIE_Cnn_GetTopN(HI_S32 *ps32Fc, HI_U32 u32FcStride, HI_U32 u32ClassNum, HI_U32 u32BatchNum,
390 HI_U32 u32TopN, HI_S32 *ps32TmpBuf, HI_U32 u32TopNStride, HI_S32 *ps32GetTopN)
391 {
392 HI_U32 i = 0, j = 0, n = 0;
393 HI_U32 u32Id = 0;
394 HI_S32 *ps32Score = NULL;
395 SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S stTmp = { 0 };
396 SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S *pstTopN = NULL;
397 SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S *pstTmpBuf = (SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S *)ps32TmpBuf;
398 for (n = 0; n < u32BatchNum; n++) {
399 ps32Score = (HI_S32 *)((HI_U8 *)ps32Fc + n * u32FcStride);
400 pstTopN = (SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S *)((HI_U8 *)ps32GetTopN + n * u32TopNStride);
401 for (i = 0; i < u32ClassNum; i++) {
402 pstTmpBuf[i].u32ClassId = i;
403 pstTmpBuf[i].u32Confidence = (HI_U32)ps32Score[i];
404 }
405
406 for (i = 0; i < u32TopN; i++) {
407 u32Id = i;
408 pstTopN[i].u32ClassId = pstTmpBuf[i].u32ClassId;
409 pstTopN[i].u32Confidence = pstTmpBuf[i].u32Confidence;
410 for (j = i + 1; j < u32ClassNum; j++) {
411 if (pstTmpBuf[u32Id].u32Confidence < pstTmpBuf[j].u32Confidence) {
412 u32Id = j;
413 }
414 }
415
416 stTmp.u32ClassId = pstTmpBuf[u32Id].u32ClassId;
417 stTmp.u32Confidence = pstTmpBuf[u32Id].u32Confidence;
418
419 if (i != u32Id) {
420 pstTmpBuf[u32Id].u32ClassId = pstTmpBuf[i].u32ClassId;
421 pstTmpBuf[u32Id].u32Confidence = pstTmpBuf[i].u32Confidence;
422 pstTmpBuf[i].u32ClassId = stTmp.u32ClassId;
423 pstTmpBuf[i].u32Confidence = stTmp.u32Confidence;
424
425 pstTopN[i].u32ClassId = stTmp.u32ClassId;
426 pstTopN[i].u32Confidence = stTmp.u32Confidence;
427 }
428 }
429 }
430
431 return HI_SUCCESS;
432 }
433
434 /*
435 * Prototype : SVP_NNIE_Rpn
436 * Description : this function is used to do RPN
437 * Input : HI_S32** pps32Src [IN] convolution data
438 * HI_U32 u32NumRatioAnchors [IN] Ratio anchor num
439 * HI_U32 u32NumScaleAnchors [IN] scale anchor num
440 * HI_U32* au32Scales [IN] scale value
441 * HI_U32* au32Ratios [IN] ratio value
442 * HI_U32 u32OriImHeight [IN] input image height
443 * HI_U32 u32OriImWidth [IN] input image width
444 * HI_U32* pu32ConvHeight [IN] convolution height
445 * HI_U32* pu32ConvWidth [IN] convolution width
446 * HI_U32* pu32ConvChannel [IN] convolution channel
447 * HI_U32 u32ConvStride [IN] convolution stride
448 * HI_U32 u32MaxRois [IN] max roi num
449 * HI_U32 u32MinSize [IN] min size
450 * HI_U32 u32SpatialScale [IN] spatial scale
451 * HI_U32 u32NmsThresh [IN] NMS thresh
452 * HI_U32 u32FilterThresh [IN] filter thresh
453 * HI_U32 u32NumBeforeNms [IN] num before doing NMS
454 * HI_U32 *pu32MemPool [IN] assist buffer
455 * HI_S32 *ps32ProposalResult [OUT] proposal result
456 * HI_U32* pu32NumRois [OUT] proposal num
457 */
SVP_NNIE_Rpn(HI_S32 ** pps32Src,HI_U32 u32NumRatioAnchors,HI_U32 u32NumScaleAnchors,HI_U32 * au32Scales,HI_U32 * au32Ratios,HI_U32 u32OriImHeight,HI_U32 u32OriImWidth,HI_U32 * pu32ConvHeight,HI_U32 * pu32ConvWidth,HI_U32 * pu32ConvChannel,HI_U32 u32ConvStride,HI_U32 u32MaxRois,HI_U32 u32MinSize,HI_U32 u32SpatialScale,HI_U32 u32NmsThresh,HI_U32 u32FilterThresh,HI_U32 u32NumBeforeNms,HI_U32 * pu32MemPool,HI_S32 * ps32ProposalResult,HI_U32 u32DstStride,HI_U32 * pu32NumRois)458 static HI_S32 SVP_NNIE_Rpn(HI_S32 **pps32Src, HI_U32 u32NumRatioAnchors, HI_U32 u32NumScaleAnchors, HI_U32 *au32Scales,
459 HI_U32 *au32Ratios, HI_U32 u32OriImHeight, HI_U32 u32OriImWidth, HI_U32 *pu32ConvHeight, HI_U32 *pu32ConvWidth,
460 HI_U32 *pu32ConvChannel, HI_U32 u32ConvStride, HI_U32 u32MaxRois, HI_U32 u32MinSize, HI_U32 u32SpatialScale,
461 HI_U32 u32NmsThresh, HI_U32 u32FilterThresh, HI_U32 u32NumBeforeNms, HI_U32 *pu32MemPool,
462 HI_S32 *ps32ProposalResult, HI_U32 u32DstStride, HI_U32 *pu32NumRois)
463 {
464 HI_U32 u32Size = 0;
465 HI_S32 *ps32Anchors = NULL;
466 HI_S32 *ps32BboxDelta = NULL;
467 HI_S32 *ps32Proposals = NULL;
468 HI_U32 *pu32Ptr = NULL;
469 HI_S32 *ps32Ptr = NULL;
470 HI_U32 u32NumAfterFilter = 0;
471 HI_U32 u32NumAnchors = 0;
472 HI_FLOAT f32BaseW = 0;
473 HI_FLOAT f32BaseH = 0;
474 HI_FLOAT f32BaseXCtr = 0;
475 HI_FLOAT f32BaseYCtr = 0;
476 HI_FLOAT f32SizeRatios = 0;
477 HI_FLOAT *pf32RatioAnchors = NULL;
478 HI_FLOAT *pf32Ptr = NULL;
479 HI_FLOAT *pf32Ptr2 = NULL;
480 HI_FLOAT *pf32ScaleAnchors = NULL;
481 HI_FLOAT *pf32Scores = NULL;
482 HI_FLOAT f32Ratios = 0;
483 HI_FLOAT f32Size = 0;
484 HI_U32 u32PixelInterval = 0;
485 HI_U32 u32SrcBboxIndex = 0;
486 HI_U32 u32SrcFgProbIndex = 0;
487 HI_U32 u32SrcBgProbIndex = 0;
488 HI_U32 u32SrcBboxBias = 0;
489 HI_U32 u32SrcProbBias = 0;
490 HI_U32 u32DesBox = 0;
491 HI_U32 u32BgBlobSize = 0;
492 HI_U32 u32AnchorsPerPixel = 0;
493 HI_U32 u32MapSize = 0;
494 HI_U32 u32LineSize = 0;
495 HI_S32 *ps32Ptr2 = NULL;
496 HI_S32 *ps32Ptr3 = NULL;
497 HI_S32 s32ProposalWidth = 0;
498 HI_S32 s32ProposalHeight = 0;
499 HI_S32 s32ProposalCenterX = 0;
500 HI_S32 s32ProposalCenterY = 0;
501 HI_S32 s32PredW = 0;
502 HI_S32 s32PredH = 0;
503 HI_S32 s32PredCenterX = 0;
504 HI_S32 s32PredCenterY = 0;
505 HI_U32 u32DesBboxDeltaIndex = 0;
506 HI_U32 u32DesScoreIndex = 0;
507 HI_U32 u32RoiCount = 0;
508 SAMPLE_SVP_NNIE_STACK_S *pstStack = NULL;
509 HI_S32 s32Ret = HI_SUCCESS;
510 HI_U32 c = 0;
511 HI_U32 h = 0;
512 HI_U32 w = 0;
513 HI_U32 i = 0;
514 HI_U32 j = 0;
515 HI_U32 p = 0;
516 HI_U32 q = 0;
517 HI_U32 z = 0;
518 HI_U32 au32BaseAnchor[4] = {0, 0, (u32MinSize -1), (u32MinSize -1)};
519
520 /* Faster RCNN */
521 /* calculate the start pointer of each part in MemPool */
522 pu32Ptr = (HI_U32 *)pu32MemPool;
523 ps32Anchors = (HI_S32 *)pu32Ptr;
524 u32NumAnchors = u32NumRatioAnchors * u32NumScaleAnchors * (pu32ConvHeight[0] * pu32ConvWidth[0]);
525 u32Size = SAMPLE_SVP_NNIE_COORDI_NUM * u32NumAnchors;
526 pu32Ptr += u32Size;
527
528 ps32BboxDelta = (HI_S32 *)pu32Ptr;
529 pu32Ptr += u32Size;
530
531 ps32Proposals = (HI_S32 *)pu32Ptr;
532 u32Size = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32NumAnchors;
533 pu32Ptr += u32Size;
534
535 pf32RatioAnchors = (HI_FLOAT *)pu32Ptr;
536 pf32Ptr = (HI_FLOAT *)pu32Ptr;
537 u32Size = u32NumRatioAnchors * SAMPLE_SVP_NNIE_COORDI_NUM;
538 pf32Ptr = pf32Ptr + u32Size;
539
540 pf32ScaleAnchors = pf32Ptr;
541 u32Size = u32NumScaleAnchors * u32NumRatioAnchors * SAMPLE_SVP_NNIE_COORDI_NUM;
542 pf32Ptr = pf32Ptr + u32Size;
543
544 pf32Scores = pf32Ptr;
545 u32Size = u32NumAnchors * SAMPLE_SVP_NNIE_SCORE_NUM;
546 pf32Ptr = pf32Ptr + u32Size;
547
548 pstStack = (SAMPLE_SVP_NNIE_STACK_S *)pf32Ptr;
549
550 /* Generate the base anchor */
551 f32BaseW = (HI_FLOAT)(au32BaseAnchor[SAMPLE_SVP_NNIE_X_MAX_OFFSET] -
552 au32BaseAnchor[SAMPLE_SVP_NNIE_X_MIN_OFFSET] + 1);
553 f32BaseH = (HI_FLOAT)(au32BaseAnchor[SAMPLE_SVP_NNIE_Y_MAX_OFFSET] -
554 au32BaseAnchor[SAMPLE_SVP_NNIE_Y_MIN_OFFSET] + 1);
555 f32BaseXCtr = (HI_FLOAT)(au32BaseAnchor[0] + ((f32BaseW - 1) * SAMPLE_SVP_NNIE_HALF));
556 f32BaseYCtr = (HI_FLOAT)(au32BaseAnchor[1] + ((f32BaseH - 1) * SAMPLE_SVP_NNIE_HALF));
557
558 /* Generate Ratio Anchors for the base anchor */
559 pf32Ptr = pf32RatioAnchors;
560 f32Size = f32BaseW * f32BaseH;
561 for (i = 0; i < u32NumRatioAnchors; i++) {
562 f32Ratios = (HI_FLOAT)au32Ratios[i] / SAMPLE_SVP_NNIE_QUANT_BASE;
563 f32SizeRatios = f32Size / f32Ratios;
564 f32BaseW = sqrt(f32SizeRatios);
565 f32BaseW = (HI_FLOAT)(1.0 *
566 ((f32BaseW) >= 0 ? (HI_S32)(f32BaseW + SAMPLE_SVP_NNIE_HALF) : (HI_S32)(f32BaseW - SAMPLE_SVP_NNIE_HALF)));
567 f32BaseH = f32BaseW * f32Ratios;
568 f32BaseH = (HI_FLOAT)(1.0 *
569 ((f32BaseH) >= 0 ? (HI_S32)(f32BaseH + SAMPLE_SVP_NNIE_HALF) : (HI_S32)(f32BaseH - SAMPLE_SVP_NNIE_HALF)));
570
571 *pf32Ptr++ = (HI_FLOAT)(f32BaseXCtr - ((f32BaseW - 1) * SAMPLE_SVP_NNIE_HALF));
572 *(pf32Ptr++) = (HI_FLOAT)(f32BaseYCtr - ((f32BaseH - 1) * SAMPLE_SVP_NNIE_HALF));
573 *(pf32Ptr++) = (HI_FLOAT)(f32BaseXCtr + ((f32BaseW - 1) * SAMPLE_SVP_NNIE_HALF));
574 *(pf32Ptr++) = (HI_FLOAT)(f32BaseYCtr + ((f32BaseH - 1) * SAMPLE_SVP_NNIE_HALF));
575 }
576
577 /* Generate Scale Anchors for each Ratio Anchor */
578 pf32Ptr = pf32RatioAnchors;
579 pf32Ptr2 = pf32ScaleAnchors;
580 /* Generate Scale Anchors for one pixel */
581 for (i = 0; i < u32NumRatioAnchors; i++) {
582 for (j = 0; j < u32NumScaleAnchors; j++) {
583 f32BaseW = *(pf32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET) - *(pf32Ptr) + 1;
584 f32BaseH = *(pf32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) - *(pf32Ptr + SAMPLE_SVP_NNIE_Y_MIN_OFFSET) + 1;
585 f32BaseXCtr = (HI_FLOAT)(*(pf32Ptr) + ((f32BaseW - 1) * SAMPLE_SVP_NNIE_HALF));
586 f32BaseYCtr = (HI_FLOAT)(*(pf32Ptr + 1) + ((f32BaseH - 1) * SAMPLE_SVP_NNIE_HALF));
587
588 *(pf32Ptr2++) = (HI_FLOAT)(f32BaseXCtr -
589 ((f32BaseW * ((HI_FLOAT)au32Scales[j] / SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF));
590 *(pf32Ptr2++) = (HI_FLOAT)(f32BaseYCtr -
591 ((f32BaseH * ((HI_FLOAT)au32Scales[j] / SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF));
592 *(pf32Ptr2++) = (HI_FLOAT)(f32BaseXCtr +
593 ((f32BaseW * ((HI_FLOAT)au32Scales[j] / SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF));
594 *(pf32Ptr2++) = (HI_FLOAT)(f32BaseYCtr +
595 ((f32BaseH * ((HI_FLOAT)au32Scales[j] / SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF));
596 }
597 pf32Ptr += SAMPLE_SVP_NNIE_COORDI_NUM;
598 }
599
600 /* Copy the anchors to every pixel in the feature map */
601 ps32Ptr = ps32Anchors;
602 if (u32SpatialScale == 0) {
603 printf("Divisor u32SpatialScale cannot be 0!\n");
604 return HI_FAILURE;
605 }
606 u32PixelInterval = SAMPLE_SVP_NNIE_QUANT_BASE / u32SpatialScale;
607
608 for (p = 0; p < pu32ConvHeight[0]; p++) {
609 for (q = 0; q < pu32ConvWidth[0]; q++) {
610 pf32Ptr2 = pf32ScaleAnchors;
611 for (z = 0; z < u32NumScaleAnchors * u32NumRatioAnchors; z++) {
612 *(ps32Ptr++) = (HI_S32)(q * u32PixelInterval + *(pf32Ptr2++));
613 *(ps32Ptr++) = (HI_S32)(p * u32PixelInterval + *(pf32Ptr2++));
614 *(ps32Ptr++) = (HI_S32)(q * u32PixelInterval + *(pf32Ptr2++));
615 *(ps32Ptr++) = (HI_S32)(p * u32PixelInterval + *(pf32Ptr2++));
616 }
617 }
618 }
619
620 /* do transpose, convert the blob from (M,C,H,W) to (M,H,W,C) */
621 u32MapSize = pu32ConvHeight[1] * u32ConvStride / sizeof(HI_U32);
622 u32AnchorsPerPixel = u32NumRatioAnchors * u32NumScaleAnchors;
623 u32BgBlobSize = u32AnchorsPerPixel * u32MapSize;
624 u32LineSize = u32ConvStride / sizeof(HI_U32);
625 u32SrcProbBias = 0;
626 u32SrcBboxBias = 0;
627
628 for (c = 0; c < pu32ConvChannel[1]; c++) {
629 for (h = 0; h < pu32ConvHeight[1]; h++) {
630 for (w = 0; w < pu32ConvWidth[1]; w++) {
631 u32SrcBboxIndex = u32SrcBboxBias + c * u32MapSize + h * u32LineSize + w;
632 u32SrcBgProbIndex =
633 u32SrcProbBias + (c / SAMPLE_SVP_NNIE_COORDI_NUM) * u32MapSize + h * u32LineSize + w;
634 u32SrcFgProbIndex = u32BgBlobSize + u32SrcBgProbIndex;
635
636 u32DesBox = (u32AnchorsPerPixel) * (h * pu32ConvWidth[1] + w) + c / SAMPLE_SVP_NNIE_COORDI_NUM;
637
638 u32DesBboxDeltaIndex = SAMPLE_SVP_NNIE_COORDI_NUM * u32DesBox + c % SAMPLE_SVP_NNIE_COORDI_NUM;
639 ps32BboxDelta[u32DesBboxDeltaIndex] = (HI_S32)pps32Src[1][u32SrcBboxIndex];
640
641 u32DesScoreIndex = (SAMPLE_SVP_NNIE_SCORE_NUM)*u32DesBox;
642 pf32Scores[u32DesScoreIndex] =
643 (HI_FLOAT)((HI_S32)pps32Src[0][u32SrcBgProbIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE;
644 pf32Scores[u32DesScoreIndex + 1] =
645 (HI_FLOAT)((HI_S32)pps32Src[0][u32SrcFgProbIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE;
646 }
647 }
648 }
649
650 /* do softmax */
651 pf32Ptr = pf32Scores;
652 for (i = 0; i < u32NumAnchors; i++) {
653 s32Ret = SVP_NNIE_SoftMax(pf32Ptr, SAMPLE_SVP_NNIE_SCORE_NUM);
654 pf32Ptr += SAMPLE_SVP_NNIE_SCORE_NUM;
655 }
656
657 /* BBox Transform */
658 /* use parameters from Conv3 to adjust the coordinates of anchors */
659 ps32Ptr = ps32Anchors;
660 ps32Ptr2 = ps32Proposals;
661 ps32Ptr3 = ps32BboxDelta;
662 for (i = 0; i < u32NumAnchors; i++) {
663 ps32Ptr = ps32Anchors;
664 ps32Ptr = ps32Ptr + SAMPLE_SVP_NNIE_COORDI_NUM * i;
665 ps32Ptr2 = ps32Proposals;
666 ps32Ptr2 = ps32Ptr2 + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
667 ps32Ptr3 = ps32BboxDelta;
668 ps32Ptr3 = ps32Ptr3 + SAMPLE_SVP_NNIE_COORDI_NUM * i;
669 pf32Ptr = pf32Scores;
670 pf32Ptr = pf32Ptr + i * (SAMPLE_SVP_NNIE_SCORE_NUM);
671
672 s32ProposalWidth = *(ps32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET) - *(ps32Ptr) + 1;
673 s32ProposalHeight = *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) - *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MIN_OFFSET) + 1;
674 s32ProposalCenterX = *(ps32Ptr) + (HI_S32)(s32ProposalWidth * SAMPLE_SVP_NNIE_HALF);
675 s32ProposalCenterY = *(ps32Ptr + 1) + (HI_S32)(s32ProposalHeight * SAMPLE_SVP_NNIE_HALF);
676 s32PredCenterX =
677 (HI_S32)(((HI_FLOAT)(*(ps32Ptr3)) / SAMPLE_SVP_NNIE_QUANT_BASE) * s32ProposalWidth + s32ProposalCenterX);
678 s32PredCenterY = (HI_S32)(((HI_FLOAT)(*(ps32Ptr3 + 1)) / SAMPLE_SVP_NNIE_QUANT_BASE) * s32ProposalHeight +
679 s32ProposalCenterY);
680
681 s32PredW =
682 (HI_S32)(s32ProposalWidth * SVP_NNIE_QuickExp((HI_S32)(*(ps32Ptr3 + SAMPLE_SVP_NNIE_X_MAX_OFFSET))));
683 s32PredH =
684 (HI_S32)(s32ProposalHeight * SVP_NNIE_QuickExp((HI_S32)(*(ps32Ptr3 + SAMPLE_SVP_NNIE_Y_MAX_OFFSET))));
685 *(ps32Ptr2) = (HI_S32)(s32PredCenterX - SAMPLE_SVP_NNIE_HALF * s32PredW);
686 *(ps32Ptr2 + SAMPLE_SVP_NNIE_Y_MIN_OFFSET) = (HI_S32)(s32PredCenterY - SAMPLE_SVP_NNIE_HALF * s32PredH);
687 *(ps32Ptr2 + SAMPLE_SVP_NNIE_X_MAX_OFFSET) = (HI_S32)(s32PredCenterX + SAMPLE_SVP_NNIE_HALF * s32PredW);
688 *(ps32Ptr2 + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) = (HI_S32)(s32PredCenterY + SAMPLE_SVP_NNIE_HALF * s32PredH);
689 *(ps32Ptr2 + SAMPLE_SVP_NNIE_SCORE_OFFSET) = (HI_S32)(*(pf32Ptr + 1) * SAMPLE_SVP_NNIE_QUANT_BASE);
690 *(ps32Ptr2 + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET) = 0;
691 }
692
693 /* clip bbox */
694 for (i = 0; i < u32NumAnchors; i++) {
695 ps32Ptr = ps32Proposals;
696 ps32Ptr = ps32Ptr + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
697 *ps32Ptr = SAMPLE_SVP_NNIE_MAX(SAMPLE_SVP_NNIE_MIN(*ps32Ptr, (HI_S32)u32OriImWidth - 1), 0);
698 *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MIN_OFFSET) = SAMPLE_SVP_NNIE_MAX(
699 SAMPLE_SVP_NNIE_MIN(*(ps32Ptr + SAMPLE_SVP_NNIE_Y_MIN_OFFSET), (HI_S32)u32OriImHeight - 1), 0);
700 *(ps32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET) = SAMPLE_SVP_NNIE_MAX(
701 SAMPLE_SVP_NNIE_MIN(*(ps32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET), (HI_S32)u32OriImWidth - 1), 0);
702 *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) = SAMPLE_SVP_NNIE_MAX(
703 SAMPLE_SVP_NNIE_MIN(*(ps32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET), (HI_S32)u32OriImHeight - 1), 0);
704 }
705
706 /* remove the bboxes which are too small */
707 for (i = 0; i < u32NumAnchors; i++) {
708 ps32Ptr = ps32Proposals;
709 ps32Ptr = ps32Ptr + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
710 s32ProposalWidth = *(ps32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET) - *(ps32Ptr) + 1;
711 s32ProposalHeight = *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) - *(ps32Ptr + 1) + 1;
712 if (s32ProposalWidth < (HI_S32)u32MinSize || s32ProposalHeight < (HI_S32)u32MinSize) {
713 *(ps32Ptr + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET) = 1;
714 }
715 }
716
717 /* remove low score bboxes */
718 (void)SVP_NNIE_FilterLowScoreBbox(ps32Proposals, u32NumAnchors, u32FilterThresh, &u32NumAfterFilter);
719
720 if (u32NumAfterFilter >= 1) {
721 (void)SVP_NNIE_NonRecursiveArgQuickSort(ps32Proposals, 0, u32NumAfterFilter - 1, pstStack, u32NumBeforeNms);
722 }
723 u32NumAfterFilter = (u32NumAfterFilter < u32NumBeforeNms) ? u32NumAfterFilter : u32NumBeforeNms;
724
725 /* do nms to remove highly overlapped bbox */
726 (void)SVP_NNIE_NonMaxSuppression(ps32Proposals, u32NumAfterFilter, u32NmsThresh, u32MaxRois); /* function NMS */
727
728 /* write the final result to output */
729 u32RoiCount = 0;
730 for (i = 0; i < u32NumAfterFilter; i++) {
731 ps32Ptr = ps32Proposals;
732 ps32Ptr = ps32Ptr + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
733 if (*(ps32Ptr + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET) == 0) {
734 /* In this sample,the output Roi coordinates will be input in hardware,
735 so the type coordinates are convert to HI_S20Q12 */
736 ps32ProposalResult[u32DstStride / sizeof(HI_U32) * u32RoiCount] = *ps32Ptr * SAMPLE_SVP_NNIE_QUANT_BASE;
737 ps32ProposalResult[u32DstStride / sizeof(HI_U32) * u32RoiCount + 1] =
738 *(ps32Ptr + 1) * SAMPLE_SVP_NNIE_QUANT_BASE;
739 ps32ProposalResult[u32DstStride / sizeof(HI_U32) * u32RoiCount + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
740 *(ps32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET) * SAMPLE_SVP_NNIE_QUANT_BASE;
741 ps32ProposalResult[u32DstStride / sizeof(HI_U32) * u32RoiCount + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
742 *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) * SAMPLE_SVP_NNIE_QUANT_BASE;
743 u32RoiCount++;
744 }
745 if (u32RoiCount >= u32MaxRois) {
746 break;
747 }
748 }
749
750 *pu32NumRois = u32RoiCount;
751
752 return s32Ret;
753 }
754
755 /*
756 * Prototype : SVP_NNIE_FasterRcnn_GetResult
757 * Description : this function is used to get FasterRcnn result
758 * Input : HI_S32* ps32FcBbox [IN] Bbox for Roi
759 * HI_S32 *ps32FcScore [IN] Score for roi
760 * HI_S32 *ps32Proposals [IN] proposal
761 * HI_U32 u32RoiCnt [IN] Roi num
762 * HI_U32 *pu32ConfThresh [IN] each class confidence thresh
763 * HI_U32 u32NmsThresh [IN] Nms thresh
764 * HI_U32 u32MaxRoi [IN] max roi
765 * HI_U32 u32ClassNum [IN] class num
766 * HI_U32 u32OriImWidth [IN] input image width
767 * HI_U32 u32OriImHeight [IN] input image height
768 * HI_U32* pu32MemPool [IN] assist buffer
769 * HI_S32* ps32DstScore [OUT] result of score
770 * HI_S32* ps32DstRoi [OUT] result of Bbox
771 * HI_S32* ps32ClassRoiNum [OUT] result of the roi num of each class
772 */
SVP_NNIE_FasterRcnn_GetResult(HI_S32 * ps32FcBbox,HI_U32 u32BboxStride,HI_S32 * ps32FcScore,HI_U32 u32ScoreStride,HI_S32 * ps32Proposal,HI_U32 u32ProposalStride,HI_U32 u32RoiCnt,HI_U32 * pu32ConfThresh,HI_U32 u32NmsThresh,HI_U32 u32MaxRoi,HI_U32 u32ClassNum,HI_U32 u32OriImWidth,HI_U32 u32OriImHeight,HI_U32 * pu32MemPool,HI_S32 * ps32DstScore,HI_S32 * ps32DstBbox,HI_S32 * ps32ClassRoiNum)773 static HI_S32 SVP_NNIE_FasterRcnn_GetResult(HI_S32 *ps32FcBbox, HI_U32 u32BboxStride, HI_S32 *ps32FcScore,
774 HI_U32 u32ScoreStride, HI_S32 *ps32Proposal, HI_U32 u32ProposalStride, HI_U32 u32RoiCnt, HI_U32 *pu32ConfThresh,
775 HI_U32 u32NmsThresh, HI_U32 u32MaxRoi, HI_U32 u32ClassNum, HI_U32 u32OriImWidth, HI_U32 u32OriImHeight,
776 HI_U32 *pu32MemPool, HI_S32 *ps32DstScore, HI_S32 *ps32DstBbox, HI_S32 *ps32ClassRoiNum)
777 {
778 HI_U32 u32Size = 0;
779 HI_U32 u32ClsScoreChannels = 0;
780 HI_S32 *ps32Proposals = NULL;
781 HI_U32 u32FcScoreWidth = 0;
782 HI_U32 u32FcBboxWidth = 0;
783 HI_FLOAT f32ProposalWidth = 0.0;
784 HI_FLOAT f32ProposalHeight = 0.0;
785 HI_FLOAT f32ProposalCenterX = 0.0;
786 HI_FLOAT f32ProposalCenterY = 0.0;
787 HI_FLOAT f32PredW = 0.0;
788 HI_FLOAT f32PredH = 0.0;
789 HI_FLOAT f32PredCenterX = 0.0;
790 HI_FLOAT f32PredCenterY = 0.0;
791 HI_FLOAT *pf32FcScoresMemPool = NULL;
792 HI_S32 *ps32ProposalMemPool = NULL;
793 HI_S32 *ps32ProposalTmp = NULL;
794 HI_U32 u32FcBboxIndex = 0;
795 HI_U32 u32ProposalMemPoolIndex = 0;
796 HI_FLOAT *pf32Ptr = NULL;
797 HI_S32 *ps32Ptr = NULL;
798 HI_S32 *ps32Score = NULL;
799 HI_S32 *ps32Bbox = NULL;
800 HI_S32 *ps32RoiCnt = NULL;
801 HI_U32 u32RoiOutCnt = 0;
802 HI_U32 u32SrcIndex = 0;
803 HI_U32 u32DstIndex = 0;
804 HI_U32 i = 0;
805 HI_U32 j = 0;
806 HI_U32 k = 0;
807 SAMPLE_SVP_NNIE_STACK_S *pstStack = NULL;
808 HI_S32 s32Ret = HI_SUCCESS;
809 HI_U32 u32OffSet = 0;
810 HI_U32 u32ProposalOffset = u32ProposalStride / sizeof(HI_S32);
811
812 /* Get or calculate parameters */
813 u32ClsScoreChannels = u32ClassNum; /* channel num is equal to class size, cls_score class */
814 u32FcScoreWidth = u32ScoreStride / sizeof(HI_U32);
815 u32FcBboxWidth = u32BboxStride / sizeof(HI_U32);
816
817 /* Get Start Pointer of MemPool */
818 pf32FcScoresMemPool = (HI_FLOAT *)pu32MemPool;
819 pf32Ptr = pf32FcScoresMemPool;
820 u32Size = u32MaxRoi * u32ClsScoreChannels;
821 pf32Ptr += u32Size;
822
823 ps32ProposalMemPool = (HI_S32 *)pf32Ptr;
824 ps32Ptr = ps32ProposalMemPool;
825 u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
826 ps32Ptr += u32Size;
827 pstStack = (SAMPLE_SVP_NNIE_STACK_S *)ps32Ptr;
828
829 u32DstIndex = 0;
830
831 for (i = 0; i < u32RoiCnt; i++) {
832 for (k = 0; k < u32ClsScoreChannels; k++) {
833 u32SrcIndex = i * u32FcScoreWidth + k;
834 pf32FcScoresMemPool[u32DstIndex++] =
835 (HI_FLOAT)((HI_S32)ps32FcScore[u32SrcIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE;
836 }
837 }
838 ps32Proposals = (HI_S32 *)ps32Proposal;
839
840 /* bbox transform */
841 for (j = 0; j < u32ClsScoreChannels; j++) {
842 for (i = 0; i < u32RoiCnt; i++) {
843 f32ProposalWidth = (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_X_MAX_OFFSET] -
844 ps32Proposals[u32ProposalOffset * i] + 1);
845 f32ProposalHeight = (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] -
846 ps32Proposals[u32ProposalOffset * i + 1] + 1);
847 f32ProposalCenterX =
848 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i] + SAMPLE_SVP_NNIE_HALF * f32ProposalWidth);
849 f32ProposalCenterY =
850 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + 1] + SAMPLE_SVP_NNIE_HALF * f32ProposalHeight);
851
852 u32FcBboxIndex = u32FcBboxWidth * i + SAMPLE_SVP_NNIE_COORDI_NUM * j;
853 f32PredCenterX = ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex] / SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalWidth +
854 f32ProposalCenterX;
855 f32PredCenterY =
856 ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex + 1] / SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalHeight +
857 f32ProposalCenterY;
858 f32PredW = f32ProposalWidth *
859 SVP_NNIE_QuickExp((HI_S32)(ps32FcBbox[u32FcBboxIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]));
860 f32PredH = f32ProposalHeight *
861 SVP_NNIE_QuickExp((HI_S32)(ps32FcBbox[u32FcBboxIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]));
862
863 u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
864 ps32ProposalMemPool[u32ProposalMemPoolIndex] = (HI_S32)(f32PredCenterX - SAMPLE_SVP_NNIE_HALF * f32PredW);
865 ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] =
866 (HI_S32)(f32PredCenterY - SAMPLE_SVP_NNIE_HALF * f32PredH);
867 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
868 (HI_S32)(f32PredCenterX + SAMPLE_SVP_NNIE_HALF * f32PredW);
869 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
870 (HI_S32)(f32PredCenterY + SAMPLE_SVP_NNIE_HALF * f32PredH);
871 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] =
872 (HI_S32)(pf32FcScoresMemPool[u32ClsScoreChannels * i + j] * SAMPLE_SVP_NNIE_QUANT_BASE);
873 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] = 0;
874 }
875
876 /* clip bbox */
877 for (i = 0; i < u32RoiCnt; i++) {
878 u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
879 ps32ProposalMemPool[u32ProposalMemPoolIndex] =
880 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth - 1) ?
881 ((HI_S32)u32OriImWidth - 1) :
882 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) > 0 ?
883 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth) ?
884 ((HI_S32)u32OriImWidth - 1) :
885 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) : 0;
886 ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] =
887 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight - 1) ?
888 ((HI_S32)u32OriImHeight - 1) :
889 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) > 0 ?
890 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight) ?
891 ((HI_S32)u32OriImHeight - 1) :
892 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) : 0;
893 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
894 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
895 ((HI_S32)u32OriImWidth - 1) ? ((HI_S32)u32OriImWidth - 1) :
896 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) > 0 ?
897 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
898 ((HI_S32)u32OriImWidth) ? ((HI_S32)u32OriImWidth - 1) :
899 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) : 0;
900 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
901 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
902 ((HI_S32)u32OriImHeight - 1) ? ((HI_S32)u32OriImHeight - 1) :
903 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) > 0 ?
904 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
905 ((HI_S32)u32OriImHeight) ? ((HI_S32)u32OriImHeight - 1) :
906 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) : 0;
907 }
908
909 ps32ProposalTmp = ps32ProposalMemPool;
910 if (u32RoiCnt >= 1) {
911 (void)SVP_NNIE_NonRecursiveArgQuickSort(ps32ProposalTmp, 0, u32RoiCnt - 1, pstStack, u32RoiCnt);
912 }
913 (void)SVP_NNIE_NonMaxSuppression(ps32ProposalTmp, u32RoiCnt, u32NmsThresh, u32RoiCnt);
914
915 ps32Score = (HI_S32 *)ps32DstScore;
916 ps32Bbox = (HI_S32 *)ps32DstBbox;
917 ps32RoiCnt = (HI_S32 *)ps32ClassRoiNum;
918
919 ps32Score += (HI_S32)(u32OffSet);
920 ps32Bbox += (HI_S32)(SAMPLE_SVP_NNIE_COORDI_NUM * u32OffSet);
921
922 u32RoiOutCnt = 0;
923 for (i = 0; i < u32RoiCnt; i++) {
924 u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
925 if ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) &&
926 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] >
927 (HI_S32)pu32ConfThresh[j])) {
928 ps32Score[u32RoiOutCnt] = ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET];
929 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] = ps32ProposalMemPool[u32ProposalMemPoolIndex];
930 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] =
931 ps32ProposalMemPool[u32ProposalMemPoolIndex + 1];
932 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
933 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
934 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
935 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
936 u32RoiOutCnt++;
937 }
938 if (u32RoiOutCnt >= u32RoiCnt)
939 break;
940 }
941 ps32RoiCnt[j] = (HI_S32)u32RoiOutCnt;
942 u32OffSet += u32RoiOutCnt;
943 }
944 return s32Ret;
945 }
946
947 /*
948 * Prototype : SVP_NNIE_Pvanet_GetResult
949 * Description : this function is used to get FasterRcnn result
950 * Input : HI_S32* ps32FcBbox [IN] Bbox for Roi
951 * HI_S32 *ps32FcScore [IN] Score for roi
952 * HI_S32 *ps32Proposals [IN] proposal
953 * HI_U32 u32RoiCnt [IN] Roi num
954 * HI_U32 *pu32ConfThresh [IN] each class confidence thresh
955 * HI_U32 u32NmsThresh [IN] Nms thresh
956 * HI_U32 u32MaxRoi [IN] max roi
957 * HI_U32 u32ClassNum [IN] class num
958 * HI_U32 u32OriImWidth [IN] input image width
959 * HI_U32 u32OriImHeight [IN] input image height
960 * HI_U32* pu32MemPool [IN] assist buffer
961 * HI_S32* ps32DstScore [OUT] result of score
962 * HI_S32* ps32DstRoi [OUT] result of Bbox
963 * HI_S32* ps32ClassRoiNum [OUT] result of the roi num of each class
964 */
SVP_NNIE_Pvanet_GetResult(HI_S32 * ps32FcBbox,HI_U32 u32BboxStride,HI_S32 * ps32FcScore,HI_U32 u32ScoreStride,HI_S32 * ps32Proposal,HI_U32 u32ProposalStride,HI_U32 u32RoiCnt,HI_U32 * pu32ConfThresh,HI_U32 u32NmsThresh,HI_U32 u32MaxRoi,HI_U32 u32ClassNum,HI_U32 u32OriImWidth,HI_U32 u32OriImHeight,HI_U32 * pu32MemPool,HI_S32 * ps32DstScore,HI_S32 * ps32DstBbox,HI_S32 * ps32ClassRoiNum)965 static HI_S32 SVP_NNIE_Pvanet_GetResult(HI_S32 *ps32FcBbox, HI_U32 u32BboxStride, HI_S32 *ps32FcScore,
966 HI_U32 u32ScoreStride, HI_S32 *ps32Proposal, HI_U32 u32ProposalStride, HI_U32 u32RoiCnt, HI_U32 *pu32ConfThresh,
967 HI_U32 u32NmsThresh, HI_U32 u32MaxRoi, HI_U32 u32ClassNum, HI_U32 u32OriImWidth, HI_U32 u32OriImHeight,
968 HI_U32 *pu32MemPool, HI_S32 *ps32DstScore, HI_S32 *ps32DstBbox, HI_S32 *ps32ClassRoiNum)
969 {
970 /* define variables */
971 HI_U32 u32Size = 0;
972 HI_U32 u32ClsScoreChannels = 0;
973 HI_S32 *ps32Proposals = NULL;
974 HI_U32 u32FcScoreWidth = 0;
975 HI_U32 u32FcBboxWidth = 0;
976 HI_FLOAT f32ProposalWidth = 0.0;
977 HI_FLOAT f32ProposalHeight = 0.0;
978 HI_FLOAT f32ProposalCenterX = 0.0;
979 HI_FLOAT f32ProposalCenterY = 0.0;
980 HI_FLOAT f32PredW = 0.0;
981 HI_FLOAT f32PredH = 0.0;
982 HI_FLOAT f32PredCenterX = 0.0;
983 HI_FLOAT f32PredCenterY = 0.0;
984 HI_FLOAT *pf32FcScoresMemPool = NULL;
985 HI_S32 *ps32ProposalMemPool = NULL;
986 HI_S32 *ps32ProposalTmp = NULL;
987 HI_U32 u32FcBboxIndex = 0;
988 HI_U32 u32ProposalMemPoolIndex = 0;
989 HI_FLOAT *pf32Ptr = NULL;
990 HI_S32 *ps32Ptr = NULL;
991 HI_S32 *ps32Score = NULL;
992 HI_S32 *ps32Bbox = NULL;
993 HI_S32 *ps32RoiCnt = NULL;
994 HI_U32 u32RoiOutCnt = 0;
995 HI_U32 u32SrcIndex = 0;
996 HI_U32 u32DstIndex = 0;
997 HI_U32 i = 0;
998 HI_U32 j = 0;
999 HI_U32 k = 0;
1000 SAMPLE_SVP_NNIE_STACK_S *pstStack = NULL;
1001 HI_S32 s32Ret = HI_SUCCESS;
1002 HI_U32 u32OffSet = 0;
1003 HI_U32 u32ProposalOffset = u32ProposalStride / sizeof(HI_S32);
1004
1005 /* Get or calculate parameters */
1006 u32ClsScoreChannels = u32ClassNum; /* channel num is equal to class size, cls_score class */
1007 u32FcScoreWidth = u32ScoreStride / sizeof(HI_U32);
1008 u32FcBboxWidth = u32BboxStride / sizeof(HI_U32);
1009
1010 /* Get Start Pointer of MemPool */
1011 pf32FcScoresMemPool = (HI_FLOAT *)pu32MemPool;
1012 pf32Ptr = pf32FcScoresMemPool;
1013 u32Size = u32MaxRoi * u32ClsScoreChannels;
1014 pf32Ptr += u32Size;
1015
1016 ps32ProposalMemPool = (HI_S32 *)pf32Ptr;
1017 ps32Ptr = ps32ProposalMemPool;
1018 u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
1019 ps32Ptr += u32Size;
1020 pstStack = (SAMPLE_SVP_NNIE_STACK_S *)ps32Ptr;
1021
1022 u32DstIndex = 0;
1023
1024 for (i = 0; i < u32RoiCnt; i++) {
1025 for (k = 0; k < u32ClsScoreChannels; k++) {
1026 u32SrcIndex = i * u32FcScoreWidth + k;
1027 pf32FcScoresMemPool[u32DstIndex++] =
1028 (HI_FLOAT)((HI_S32)ps32FcScore[u32SrcIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE;
1029 }
1030 }
1031 ps32Proposals = (HI_S32 *)ps32Proposal;
1032
1033 /* bbox transform */
1034 for (j = 0; j < u32ClsScoreChannels; j++) {
1035 for (i = 0; i < u32RoiCnt; i++) {
1036 f32ProposalWidth =
1037 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_X_MAX_OFFSET] -
1038 ps32Proposals[u32ProposalOffset * i] + 1);
1039 f32ProposalHeight =
1040 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] -
1041 ps32Proposals[u32ProposalOffset * i + 1] + 1);
1042 f32ProposalCenterX =
1043 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i] + SAMPLE_SVP_NNIE_HALF * f32ProposalWidth);
1044 f32ProposalCenterY =
1045 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + 1] + SAMPLE_SVP_NNIE_HALF * f32ProposalHeight);
1046
1047 u32FcBboxIndex = u32FcBboxWidth * i + SAMPLE_SVP_NNIE_COORDI_NUM * j;
1048 f32PredCenterX = ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex] / SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalWidth +
1049 f32ProposalCenterX;
1050 f32PredCenterY =
1051 ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex + 1] / SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalHeight +
1052 f32ProposalCenterY;
1053 f32PredW = f32ProposalWidth * SVP_NNIE_QuickExp(
1054 (HI_S32)(ps32FcBbox[u32FcBboxIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]));
1055 f32PredH = f32ProposalHeight * SVP_NNIE_QuickExp(
1056 (HI_S32)(ps32FcBbox[u32FcBboxIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]));
1057
1058 u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1059 ps32ProposalMemPool[u32ProposalMemPoolIndex] = (HI_S32)(f32PredCenterX - SAMPLE_SVP_NNIE_HALF * f32PredW);
1060 ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] =
1061 (HI_S32)(f32PredCenterY - SAMPLE_SVP_NNIE_HALF * f32PredH);
1062 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1063 (HI_S32)(f32PredCenterX + SAMPLE_SVP_NNIE_HALF * f32PredW);
1064 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1065 (HI_S32)(f32PredCenterY + SAMPLE_SVP_NNIE_HALF * f32PredH);
1066 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] =
1067 (HI_S32)(pf32FcScoresMemPool[u32ClsScoreChannels * i + j] * SAMPLE_SVP_NNIE_QUANT_BASE);
1068 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] = 0;
1069 }
1070
1071 /* clip bbox */
1072 for (i = 0; i < u32RoiCnt; i++) {
1073 u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1074 ps32ProposalMemPool[u32ProposalMemPoolIndex] =
1075 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth - 1) ?
1076 ((HI_S32)u32OriImWidth - 1) :
1077 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) > 0 ?
1078 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth) ?
1079 ((HI_S32)u32OriImWidth - 1) :
1080 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) :
1081 0;
1082 ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] =
1083 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight - 1) ?
1084 ((HI_S32)u32OriImHeight - 1) :
1085 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) > 0 ?
1086 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight) ?
1087 ((HI_S32)u32OriImHeight - 1) :
1088 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) :
1089 0;
1090 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1091 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
1092 ((HI_S32)u32OriImWidth - 1) ? ((HI_S32)u32OriImWidth - 1) :
1093 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) > 0 ?
1094 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
1095 ((HI_S32)u32OriImWidth) ? ((HI_S32)u32OriImWidth - 1) :
1096 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) :
1097 0;
1098 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1099 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
1100 ((HI_S32)u32OriImHeight - 1) ?
1101 ((HI_S32)u32OriImHeight - 1) :
1102 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) > 0 ?
1103 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
1104 ((HI_S32)u32OriImHeight) ?
1105 ((HI_S32)u32OriImHeight - 1) :
1106 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) :
1107 0;
1108 }
1109
1110 ps32ProposalTmp = ps32ProposalMemPool;
1111 if (u32RoiCnt >= 1) {
1112 (void)SVP_NNIE_NonRecursiveArgQuickSort(ps32ProposalTmp, 0, u32RoiCnt - 1, pstStack, u32RoiCnt);
1113 }
1114 (void)SVP_NNIE_NonMaxSuppression(ps32ProposalTmp, u32RoiCnt, u32NmsThresh, u32RoiCnt);
1115
1116 ps32Score = (HI_S32 *)ps32DstScore;
1117 ps32Bbox = (HI_S32 *)ps32DstBbox;
1118 ps32RoiCnt = (HI_S32 *)ps32ClassRoiNum;
1119
1120 ps32Score += (HI_S32)(u32OffSet);
1121 ps32Bbox += (HI_S32)(SAMPLE_SVP_NNIE_COORDI_NUM * u32OffSet);
1122
1123 u32RoiOutCnt = 0;
1124 for (i = 0; i < u32RoiCnt; i++) {
1125 u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1126 if ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) &&
1127 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] >
1128 (HI_S32)pu32ConfThresh[j])) {
1129 ps32Score[u32RoiOutCnt] = ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET];
1130 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] = ps32ProposalMemPool[u32ProposalMemPoolIndex];
1131 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] =
1132 ps32ProposalMemPool[u32ProposalMemPoolIndex + 1];
1133 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1134 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
1135 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1136 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
1137 u32RoiOutCnt++;
1138 }
1139 if (u32RoiOutCnt >= u32RoiCnt)
1140 break;
1141 }
1142 ps32RoiCnt[j] = (HI_S32)u32RoiOutCnt;
1143 u32OffSet += u32RoiOutCnt;
1144 }
1145 return s32Ret;
1146 }
1147
1148 /*
1149 * Prototype : SVP_NNIE_Rfcn_GetResult
1150 * Description : this function is used to get RFCN result
1151 * Input : HI_S32* ps32FcBbox [IN] Bbox for Roi
1152 * HI_U32 u32FcBboxStride [IN] Bbox stride
1153 * HI_S32 *ps32FcScore [IN] Score for roi
1154 * HI_U32 u32FcScoreStride [IN] Score stride
1155 * HI_S32 *ps32Proposals [IN] proposal
1156 * HI_U32 u32RoiCnt [IN] Roi num
1157 * HI_U32 *pu32ConfThresh [IN] each class confidence thresh
1158 * HI_U32 u32MaxRoi [IN] max roi
1159 * HI_U32 u32ClassNum [IN] class num
1160 * HI_U32 u32OriImWidth [IN] input image width
1161 * HI_U32 u32OriImHeight [IN] input image height
1162 * HI_U32 u32NmsThresh [IN] num thresh
1163 * HI_U32* pu32MemPool [IN] assist buffer
1164 * HI_S32* ps32DstScore [OUT]result of score
1165 * HI_S32* ps32DstRoi [OUT]result of Bbox
1166 * HI_S32* ps32ClassRoiNum [OUT]result of the roi num of each class
1167 */
SVP_NNIE_Rfcn_GetResult(HI_S32 * ps32FcScore,HI_U32 u32FcScoreStride,HI_S32 * ps32FcBbox,HI_U32 u32FcBboxStride,HI_S32 * ps32Proposals,HI_U32 u32ProposalStride,HI_U32 u32RoiCnt,HI_U32 * pu32ConfThresh,HI_U32 u32MaxRoi,HI_U32 u32ClassNum,HI_U32 u32OriImWidth,HI_U32 u32OriImHeight,HI_U32 u32NmsThresh,HI_U32 * pu32MemPool,HI_S32 * ps32DstScores,HI_S32 * ps32DstRoi,HI_S32 * ps32ClassRoiNum)1168 static HI_S32 SVP_NNIE_Rfcn_GetResult(HI_S32 *ps32FcScore, HI_U32 u32FcScoreStride, HI_S32 *ps32FcBbox,
1169 HI_U32 u32FcBboxStride, HI_S32 *ps32Proposals, HI_U32 u32ProposalStride, HI_U32 u32RoiCnt, HI_U32 *pu32ConfThresh,
1170 HI_U32 u32MaxRoi, HI_U32 u32ClassNum, HI_U32 u32OriImWidth, HI_U32 u32OriImHeight, HI_U32 u32NmsThresh,
1171 HI_U32 *pu32MemPool, HI_S32 *ps32DstScores, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
1172 {
1173 HI_U32 u32Size = 0;
1174 HI_U32 u32ClsScoreChannels = 0;
1175 HI_U32 u32FcScoreWidth = 0;
1176 HI_FLOAT f32ProposalWidth = 0.0;
1177 HI_FLOAT f32ProposalHeight = 0.0;
1178 HI_FLOAT f32ProposalCenterX = 0.0;
1179 HI_FLOAT f32ProposalCenterY = 0.0;
1180 HI_FLOAT f32PredW = 0.0;
1181 HI_FLOAT f32PredH = 0.0;
1182 HI_FLOAT f32PredCenterX = 0.0;
1183 HI_FLOAT f32PredCenterY = 0.0;
1184 HI_FLOAT *pf32FcScoresMemPool = NULL;
1185 HI_S32 *ps32FcBboxMemPool = NULL;
1186 HI_S32 *ps32ProposalMemPool = NULL;
1187 HI_S32 *ps32ProposalTmp = NULL;
1188 HI_U32 u32FcBboxIndex = 0;
1189 HI_U32 u32ProposalMemPoolIndex = 0;
1190 HI_FLOAT *pf32Ptr = NULL;
1191 HI_S32 *ps32Ptr = NULL;
1192 HI_S32 *ps32DstScore = NULL;
1193 HI_S32 *ps32DstBbox = NULL;
1194 HI_U32 u32RoiOutCnt = 0;
1195 HI_U32 u32SrcIndex = 0;
1196 HI_U32 u32DstIndex = 0;
1197 HI_U32 i = 0;
1198 HI_U32 j = 0;
1199 HI_U32 u32OffSet = 0;
1200 SAMPLE_SVP_NNIE_STACK_S *pstStack = NULL;
1201 HI_S32 s32Ret = HI_SUCCESS;
1202 HI_U32 u32ProposalOffset = u32ProposalStride / sizeof(HI_S32);
1203
1204 /* Get or calculate parameters */
1205 u32ClsScoreChannels = u32ClassNum; /* channel num is equal to class size, cls_score class */
1206 u32FcScoreWidth = u32ClsScoreChannels;
1207
1208 /* Get Start Pointer of MemPool */
1209 pf32FcScoresMemPool = (HI_FLOAT *)(pu32MemPool);
1210 pf32Ptr = pf32FcScoresMemPool;
1211 u32Size = u32MaxRoi * u32ClsScoreChannels;
1212 pf32Ptr += u32Size;
1213
1214 ps32FcBboxMemPool = (HI_S32 *)pf32Ptr;
1215 ps32Ptr = (HI_S32 *)pf32Ptr;
1216 u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_COORDI_NUM;
1217 ps32Ptr += u32Size;
1218
1219 ps32ProposalMemPool = (HI_S32 *)ps32Ptr;
1220 ps32Ptr = ps32ProposalMemPool;
1221 u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
1222 ps32Ptr += u32Size;
1223 pstStack = (SAMPLE_SVP_NNIE_STACK_S *)ps32Ptr;
1224
1225 // prepare input data
1226 for (i = 0; i < u32RoiCnt; i++) {
1227 for (j = 0; j < u32ClsScoreChannels; j++) {
1228 u32DstIndex = u32FcScoreWidth * i + j;
1229 u32SrcIndex = u32FcScoreStride / sizeof(HI_U32) * i + j;
1230 pf32FcScoresMemPool[u32DstIndex] = (HI_FLOAT)(ps32FcScore[u32SrcIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE;
1231 }
1232 }
1233
1234 for (i = 0; i < u32RoiCnt; i++) {
1235 for (j = 0; j < SAMPLE_SVP_NNIE_COORDI_NUM; j++) {
1236 u32SrcIndex = u32FcBboxStride / sizeof(HI_U32) * i + SAMPLE_SVP_NNIE_COORDI_NUM + j;
1237 u32DstIndex = SAMPLE_SVP_NNIE_COORDI_NUM * i + j;
1238 ps32FcBboxMemPool[u32DstIndex] = ps32FcBbox[u32SrcIndex];
1239 }
1240 }
1241 /* bbox transform
1242 * change the fc output to Proposal temp MemPool.
1243 * Each Line of the Proposal has 6 bits.
1244 * The Format of the Proposal is:
1245 * 0-3: The four coordinate of the bbox, x1,y1,x2, y2
1246 * 4: The Confidence Score of the bbox
1247 * 5: The suprressed flag
1248 */
1249 for (j = 0; j < u32ClsScoreChannels; j++) {
1250 for (i = 0; i < u32RoiCnt; i++) {
1251 f32ProposalWidth = ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_X_MAX_OFFSET] -
1252 ps32Proposals[u32ProposalOffset * i] + 1;
1253 f32ProposalHeight = ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] -
1254 ps32Proposals[u32ProposalOffset * i + 1] + 1;
1255 f32ProposalCenterX = ps32Proposals[u32ProposalOffset * i] + 0.5 * f32ProposalWidth;
1256 f32ProposalCenterY = ps32Proposals[u32ProposalOffset * i + 1] + 0.5 * f32ProposalHeight;
1257
1258 u32FcBboxIndex = SAMPLE_SVP_NNIE_COORDI_NUM * i;
1259 f32PredCenterX = ((HI_FLOAT)ps32FcBboxMemPool[u32FcBboxIndex] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1260 f32ProposalWidth + f32ProposalCenterX;
1261 f32PredCenterY = ((HI_FLOAT)ps32FcBboxMemPool[u32FcBboxIndex + 1] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1262 f32ProposalHeight + f32ProposalCenterY;
1263 f32PredW =
1264 f32ProposalWidth * SVP_NNIE_QuickExp(ps32FcBboxMemPool[u32FcBboxIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]);
1265 f32PredH =
1266 f32ProposalHeight * SVP_NNIE_QuickExp(ps32FcBboxMemPool[u32FcBboxIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]);
1267
1268 u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1269 ps32ProposalMemPool[u32ProposalMemPoolIndex] = (HI_S32)(f32PredCenterX - 0.5 * f32PredW);
1270 ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] = (HI_S32)(f32PredCenterY - 0.5 * f32PredH);
1271 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1272 (HI_S32)(f32PredCenterX + 0.5 * f32PredW);
1273 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1274 (HI_S32)(f32PredCenterY + 0.5 * f32PredH);
1275 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] =
1276 (HI_S32)(pf32FcScoresMemPool[u32ClsScoreChannels * i + j] * SAMPLE_SVP_NNIE_QUANT_BASE);
1277 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] = 0;
1278 }
1279
1280 /* clip bbox */
1281 for (i = 0; i < u32RoiCnt; i++) {
1282 u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1283 ps32ProposalMemPool[u32ProposalMemPoolIndex] =
1284 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth - 1) ?
1285 ((HI_S32)u32OriImWidth - 1) :
1286 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) > 0 ?
1287 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth) ?
1288 ((HI_S32)u32OriImWidth - 1) :
1289 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) :
1290 0;
1291 ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] =
1292 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight - 1) ?
1293 ((HI_S32)u32OriImHeight - 1) :
1294 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) > 0 ?
1295 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight) ?
1296 ((HI_S32)u32OriImHeight - 1) :
1297 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) :
1298 0;
1299 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1300 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
1301 ((HI_S32)u32OriImWidth - 1) ?
1302 ((HI_S32)u32OriImWidth - 1) :
1303 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) > 0 ?
1304 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
1305 ((HI_S32)u32OriImWidth) ?
1306 ((HI_S32)u32OriImWidth - 1) :
1307 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) :
1308 0;
1309 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1310 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
1311 ((HI_S32)u32OriImHeight - 1) ?
1312 ((HI_S32)u32OriImHeight - 1) :
1313 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) > 0 ?
1314 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
1315 ((HI_S32)u32OriImHeight) ?
1316 ((HI_S32)u32OriImHeight - 1) :
1317 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) :
1318 0;
1319 }
1320
1321 ps32ProposalTmp = ps32ProposalMemPool;
1322 if (u32RoiCnt >= 1) {
1323 (hi_void)SVP_NNIE_NonRecursiveArgQuickSort(ps32ProposalTmp, 0, u32RoiCnt - 1, pstStack, u32RoiCnt);
1324 }
1325 s32Ret = SVP_NNIE_NonMaxSuppression(ps32ProposalTmp, u32RoiCnt, u32NmsThresh, u32RoiCnt);
1326 u32RoiOutCnt = 0;
1327
1328 ps32DstScore = (HI_S32 *)ps32DstScores;
1329 ps32DstBbox = (HI_S32 *)ps32DstRoi;
1330
1331 ps32DstScore += (HI_S32)u32OffSet;
1332 ps32DstBbox += (HI_S32)(SAMPLE_SVP_NNIE_COORDI_NUM * u32OffSet);
1333 for (i = 0; i < u32RoiCnt; i++) {
1334 u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1335 if ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) &&
1336 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] >
1337 (HI_S32)pu32ConfThresh[j])) {
1338 ps32DstScore[u32RoiOutCnt] =
1339 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET];
1340 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] = ps32ProposalMemPool[u32ProposalMemPoolIndex];
1341 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] =
1342 ps32ProposalMemPool[u32ProposalMemPoolIndex + 1];
1343 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1344 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
1345 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1346 ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
1347 u32RoiOutCnt++;
1348 }
1349 if (u32RoiOutCnt >= u32RoiCnt) {
1350 break;
1351 }
1352 }
1353 ps32ClassRoiNum[j] = (HI_S32)u32RoiOutCnt;
1354 u32OffSet = u32OffSet + u32RoiOutCnt;
1355 }
1356
1357 return s32Ret;
1358 }
1359
1360 /*
1361 * Prototype : SVP_NNIE_Ssd_PriorBoxForward
1362 * Description : this function is used to get SSD priorbox
1363 * Input : HI_U32 u32PriorBoxWidth [IN] prior box width
1364 * HI_U32 u32PriorBoxHeight [IN] prior box height
1365 * HI_U32 u32OriImWidth [IN] input image width
1366 * HI_U32 u32OriImHeight [IN] input image height
1367 * HI_U32 f32PriorBoxMinSize [IN] prior box min size
1368 * HI_U32 u32MinSizeNum [IN] min size num
1369 * HI_U32 f32PriorBoxMaxSize [IN] prior box max size
1370 * HI_U32 u32MaxSizeNum [IN] max size num
1371 * HI_BOOL bFlip [IN] whether do Flip
1372 * HI_BOOL bClip [IN] whether do Clip
1373 * HI_U32 u32InputAspectRatioNum [IN] aspect ratio num
1374 * HI_FLOAT af32PriorBoxAspectRatio[] [IN] aspect ratio value
1375 * HI_FLOAT f32PriorBoxStepWidth [IN] prior box step width
1376 * HI_FLOAT f32PriorBoxStepHeight [IN] prior box step height
1377 * HI_FLOAT f32Offset [IN] offset value
1378 * HI_S32 as32PriorBoxVar[] [IN] prior box variance
1379 * HI_S32* ps32PriorboxOutputData [OUT] output result
1380 */
SVP_NNIE_Ssd_PriorBoxForward(HI_U32 u32PriorBoxWidth,HI_U32 u32PriorBoxHeight,HI_U32 u32OriImWidth,HI_U32 u32OriImHeight,HI_FLOAT * pf32PriorBoxMinSize,HI_U32 u32MinSizeNum,HI_FLOAT * pf32PriorBoxMaxSize,HI_U32 u32MaxSizeNum,HI_BOOL bFlip,HI_BOOL bClip,HI_U32 u32InputAspectRatioNum,HI_FLOAT af32PriorBoxAspectRatio[],HI_FLOAT f32PriorBoxStepWidth,HI_FLOAT f32PriorBoxStepHeight,HI_FLOAT f32Offset,HI_S32 as32PriorBoxVar[],HI_S32 * ps32PriorboxOutputData)1381 static HI_S32 SVP_NNIE_Ssd_PriorBoxForward(HI_U32 u32PriorBoxWidth, HI_U32 u32PriorBoxHeight, HI_U32 u32OriImWidth,
1382 HI_U32 u32OriImHeight, HI_FLOAT *pf32PriorBoxMinSize, HI_U32 u32MinSizeNum, HI_FLOAT *pf32PriorBoxMaxSize,
1383 HI_U32 u32MaxSizeNum, HI_BOOL bFlip, HI_BOOL bClip, HI_U32 u32InputAspectRatioNum,
1384 HI_FLOAT af32PriorBoxAspectRatio[], HI_FLOAT f32PriorBoxStepWidth, HI_FLOAT f32PriorBoxStepHeight,
1385 HI_FLOAT f32Offset, HI_S32 as32PriorBoxVar[], HI_S32 *ps32PriorboxOutputData)
1386 {
1387 HI_U32 u32AspectRatioNum = 0;
1388 HI_U32 u32Index = 0;
1389 HI_FLOAT af32AspectRatio[SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM] = { 0 };
1390 HI_U32 u32NumPrior = 0;
1391 HI_FLOAT f32CenterX = 0;
1392 HI_FLOAT f32CenterY = 0;
1393 HI_FLOAT f32BoxHeight = 0;
1394 HI_FLOAT f32BoxWidth = 0;
1395 HI_FLOAT f32MaxBoxWidth = 0;
1396 HI_U32 i = 0;
1397 HI_U32 j = 0;
1398 HI_U32 n = 0;
1399 HI_U32 h = 0;
1400 HI_U32 w = 0;
1401 SAMPLE_SVP_CHECK_EXPR_RET(
1402 (HI_TRUE == bFlip && u32InputAspectRatioNum > (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM - 1) / 2), HI_INVALID_VALUE,
1403 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,when bFlip is true, u32InputAspectRatioNum(%d) can't be greater than %d!\n",
1404 u32InputAspectRatioNum, (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM - 1) / 2);
1405 SAMPLE_SVP_CHECK_EXPR_RET(
1406 (HI_FALSE == bFlip && u32InputAspectRatioNum > (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM - 1)), HI_INVALID_VALUE,
1407 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,when bFlip is false, u32InputAspectRatioNum(%d) can't be greater than %d!\n",
1408 u32InputAspectRatioNum, (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM - 1));
1409
1410 // generate aspect_ratios
1411 u32AspectRatioNum = 0;
1412 af32AspectRatio[0] = 1;
1413 u32AspectRatioNum++;
1414 for (i = 0; i < u32InputAspectRatioNum; i++) {
1415 af32AspectRatio[u32AspectRatioNum++] = af32PriorBoxAspectRatio[i];
1416 if (bFlip) {
1417 af32AspectRatio[u32AspectRatioNum++] = 1.0f / af32PriorBoxAspectRatio[i];
1418 }
1419 }
1420 u32NumPrior = u32MinSizeNum * u32AspectRatioNum + u32MaxSizeNum;
1421
1422 u32Index = 0;
1423 for (h = 0; h < u32PriorBoxHeight; h++) {
1424 for (w = 0; w < u32PriorBoxWidth; w++) {
1425 f32CenterX = (w + f32Offset) * f32PriorBoxStepWidth;
1426 f32CenterY = (h + f32Offset) * f32PriorBoxStepHeight;
1427 for (n = 0; n < u32MinSizeNum; n++) {
1428 /* ** first prior ** */
1429 f32BoxHeight = pf32PriorBoxMinSize[n];
1430 f32BoxWidth = pf32PriorBoxMinSize[n];
1431 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX - f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1432 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY - f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1433 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX + f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1434 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY + f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1435 /* ** second prior ** */
1436 if (u32MaxSizeNum > 0) {
1437 f32MaxBoxWidth = sqrt(pf32PriorBoxMinSize[n] * pf32PriorBoxMaxSize[n]);
1438 f32BoxHeight = f32MaxBoxWidth;
1439 f32BoxWidth = f32MaxBoxWidth;
1440 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX - f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1441 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY - f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1442 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX + f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1443 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY + f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1444 }
1445 /* *** rest of priors, skip AspectRatio == 1 *** */
1446 for (i = 1; i < u32AspectRatioNum; i++) {
1447 f32BoxWidth = (HI_FLOAT)(pf32PriorBoxMinSize[n] * sqrt(af32AspectRatio[i]));
1448 f32BoxHeight = (HI_FLOAT)(pf32PriorBoxMinSize[n] / sqrt(af32AspectRatio[i]));
1449 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX - f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1450 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY - f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1451 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX + f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1452 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY + f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1453 }
1454 }
1455 }
1456 }
1457 /* clip the priors' coordidates, within [0, u32ImgWidth] & [0, u32ImgHeight] */
1458 if (bClip) {
1459 for (i = 0; i < (HI_U32)(u32PriorBoxWidth * u32PriorBoxHeight * SAMPLE_SVP_NNIE_COORDI_NUM * u32NumPrior / 2);
1460 i++) {
1461 ps32PriorboxOutputData[2 * i] =
1462 SAMPLE_SVP_NNIE_MIN((HI_U32)SAMPLE_SVP_NNIE_MAX(ps32PriorboxOutputData[2 * i], 0), u32OriImWidth);
1463 ps32PriorboxOutputData[2 * i + 1] =
1464 SAMPLE_SVP_NNIE_MIN((HI_U32)SAMPLE_SVP_NNIE_MAX(ps32PriorboxOutputData[2 * i + 1], 0), u32OriImHeight);
1465 }
1466 }
1467
1468 for (h = 0; h < u32PriorBoxHeight; h++) {
1469 for (w = 0; w < u32PriorBoxWidth; w++) {
1470 for (i = 0; i < u32NumPrior; i++) {
1471 for (j = 0; j < SAMPLE_SVP_NNIE_COORDI_NUM; j++) {
1472 ps32PriorboxOutputData[u32Index++] = (HI_S32)as32PriorBoxVar[j];
1473 }
1474 }
1475 }
1476 }
1477 return HI_SUCCESS;
1478 }
1479
1480 /*
1481 * Prototype : SVP_NNIE_Ssd_SoftmaxForward
1482 * Description : this function is used to do SSD softmax
1483 * Input : HI_U32 u32SoftMaxInHeight [IN] softmax input height
1484 * HI_U32 au32SoftMaxInChn[] [IN] softmax input channel
1485 * HI_U32 u32ConcatNum [IN] concat num
1486 * HI_U32 au32ConvStride[] [IN] conv stride
1487 * HI_U32 u32SoftMaxOutWidth [IN] softmax output width
1488 * HI_U32 u32SoftMaxOutHeight [IN] softmax output height
1489 * HI_U32 u32SoftMaxOutChn [IN] softmax output channel
1490 * HI_S32* aps32SoftMaxInputData[] [IN] softmax input data
1491 * HI_S32* ps32SoftMaxOutputData [OUT]softmax output data
1492 */
SVP_NNIE_Ssd_SoftmaxForward(HI_U32 u32SoftMaxInHeight,HI_U32 au32SoftMaxInChn[],HI_U32 u32ConcatNum,HI_U32 au32ConvStride[],HI_U32 au32SoftMaxWidth[],HI_S32 * aps32SoftMaxInputData[],HI_S32 * ps32SoftMaxOutputData)1493 static HI_S32 SVP_NNIE_Ssd_SoftmaxForward(HI_U32 u32SoftMaxInHeight, HI_U32 au32SoftMaxInChn[], HI_U32 u32ConcatNum,
1494 HI_U32 au32ConvStride[], HI_U32 au32SoftMaxWidth[], HI_S32 *aps32SoftMaxInputData[], HI_S32 *ps32SoftMaxOutputData)
1495 {
1496 HI_S32 *ps32InputData = NULL;
1497 HI_S32 *ps32OutputTmp = NULL;
1498 HI_U32 u32OuterNum = 0;
1499 HI_U32 u32InnerNum = 0;
1500 HI_U32 u32InputChannel = 0;
1501 HI_U32 i = 0;
1502 HI_U32 u32ConcatCnt = 0;
1503 HI_S32 s32Ret = 0;
1504 HI_U32 u32Stride = 0;
1505 HI_U32 u32Skip = 0;
1506 HI_U32 u32Left = 0;
1507 ps32OutputTmp = ps32SoftMaxOutputData;
1508 for (u32ConcatCnt = 0; u32ConcatCnt < u32ConcatNum; u32ConcatCnt++) {
1509 ps32InputData = aps32SoftMaxInputData[u32ConcatCnt];
1510 u32Stride = au32ConvStride[u32ConcatCnt];
1511 u32InputChannel = au32SoftMaxInChn[u32ConcatCnt];
1512 if (u32SoftMaxInHeight == 0) {
1513 printf("Divisor u32SoftMaxInHeight cannot be 0!\n");
1514 return HI_FAILURE;
1515 }
1516 u32OuterNum = u32InputChannel / u32SoftMaxInHeight;
1517 u32InnerNum = u32SoftMaxInHeight;
1518 u32Skip = au32SoftMaxWidth[u32ConcatCnt] / u32InnerNum;
1519 u32Left = u32Stride - au32SoftMaxWidth[u32ConcatCnt];
1520 for (i = 0; i < u32OuterNum; i++) {
1521 s32Ret = SVP_NNIE_SSD_SoftMax(ps32InputData, (HI_S32)u32InnerNum, ps32OutputTmp);
1522 if ((i + 1) % u32Skip == 0) {
1523 ps32InputData += u32Left;
1524 }
1525 ps32InputData += u32InnerNum;
1526 ps32OutputTmp += u32InnerNum;
1527 }
1528 }
1529 return s32Ret;
1530 }
1531
1532 /*
1533 * Prototype : SVP_NNIE_Ssd_DetectionOutForward
1534 * Description : this function is used to get detection result of SSD
1535 * Input : HI_U32 u32ConcatNum [IN] SSD concat num
1536 * HI_U32 u32ConfThresh [IN] confidence thresh
1537 * HI_U32 u32ClassNum [IN] class num
1538 * HI_U32 u32TopK [IN] Topk value
1539 * HI_U32 u32KeepTopK [IN] KeepTopK value
1540 * HI_U32 u32NmsThresh [IN] NMS thresh
1541 * HI_U32 au32DetectInputChn[] [IN] detection input channel
1542 * HI_S32* aps32AllLocPreds[] [IN] Location prediction
1543 * HI_S32* aps32AllPriorBoxes[] [IN] prior box
1544 * HI_S32* ps32ConfScores [IN] confidence score
1545 * HI_S32* ps32AssistMemPool [IN] assist buffer
1546 * HI_S32* ps32DstScoreSrc [OUT] result of score
1547 * HI_S32* ps32DstBboxSrc [OUT] result of Bbox
1548 * HI_S32* ps32RoiOutCntSrc [OUT] result of the roi num of each class
1549 */
SVP_NNIE_Ssd_DetectionOutForward(HI_U32 u32ConcatNum,HI_U32 u32ConfThresh,HI_U32 u32ClassNum,HI_U32 u32TopK,HI_U32 u32KeepTopK,HI_U32 u32NmsThresh,HI_U32 au32DetectInputChn[],HI_S32 * aps32AllLocPreds[],HI_S32 * aps32AllPriorBoxes[],HI_S32 * ps32ConfScores,HI_S32 * ps32AssistMemPool,HI_S32 * ps32DstScoreSrc,HI_S32 * ps32DstBboxSrc,HI_S32 * ps32RoiOutCntSrc)1550 static HI_S32 SVP_NNIE_Ssd_DetectionOutForward(HI_U32 u32ConcatNum, HI_U32 u32ConfThresh, HI_U32 u32ClassNum,
1551 HI_U32 u32TopK, HI_U32 u32KeepTopK, HI_U32 u32NmsThresh, HI_U32 au32DetectInputChn[], HI_S32 *aps32AllLocPreds[],
1552 HI_S32 *aps32AllPriorBoxes[], HI_S32 *ps32ConfScores, HI_S32 *ps32AssistMemPool, HI_S32 *ps32DstScoreSrc,
1553 HI_S32 *ps32DstBboxSrc, HI_S32 *ps32RoiOutCntSrc)
1554 {
1555 HI_S32 *ps32LocPreds = NULL;
1556 HI_S32 *ps32PriorBoxes = NULL;
1557 HI_S32 *ps32PriorVar = NULL;
1558 HI_S32 *ps32AllDecodeBoxes = NULL;
1559 HI_S32 *ps32DstScore = NULL;
1560 HI_S32 *ps32DstBbox = NULL;
1561 HI_S32 *ps32ClassRoiNum = NULL;
1562 HI_U32 u32RoiOutCnt = 0;
1563 HI_S32 *ps32SingleProposal = NULL;
1564 HI_S32 *ps32AfterTopK = NULL;
1565 SAMPLE_SVP_NNIE_STACK_S *pstStack = NULL;
1566 HI_U32 u32PriorNum = 0;
1567 HI_U32 u32NumPredsPerClass = 0;
1568 HI_FLOAT f32PriorWidth = 0;
1569 HI_FLOAT f32PriorHeight = 0;
1570 HI_FLOAT f32PriorCenterX = 0;
1571 HI_FLOAT f32PriorCenterY = 0;
1572 HI_FLOAT f32DecodeBoxCenterX = 0;
1573 HI_FLOAT f32DecodeBoxCenterY = 0;
1574 HI_FLOAT f32DecodeBoxWidth = 0;
1575 HI_FLOAT f32DecodeBoxHeight = 0;
1576 HI_U32 u32SrcIdx = 0;
1577 HI_U32 u32AfterFilter = 0;
1578 HI_U32 u32AfterTopK = 0;
1579 HI_U32 u32KeepCnt = 0;
1580 HI_U32 i = 0;
1581 HI_U32 j = 0;
1582 HI_U32 u32Offset = 0;
1583 HI_S32 s32Ret = HI_SUCCESS;
1584 u32PriorNum = 0;
1585 for (i = 0; i < u32ConcatNum; i++) {
1586 u32PriorNum += au32DetectInputChn[i] / SAMPLE_SVP_NNIE_COORDI_NUM;
1587 }
1588 // prepare for Assist MemPool
1589 ps32AllDecodeBoxes = ps32AssistMemPool;
1590 ps32SingleProposal = ps32AllDecodeBoxes + u32PriorNum * SAMPLE_SVP_NNIE_COORDI_NUM;
1591 ps32AfterTopK = ps32SingleProposal + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32PriorNum;
1592 pstStack = (SAMPLE_SVP_NNIE_STACK_S *)(ps32AfterTopK + u32PriorNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH);
1593 u32SrcIdx = 0;
1594 for (i = 0; i < u32ConcatNum; i++) {
1595 /* get loc predictions */
1596 ps32LocPreds = aps32AllLocPreds[i];
1597 u32NumPredsPerClass = au32DetectInputChn[i] / SAMPLE_SVP_NNIE_COORDI_NUM;
1598 /* get Prior Bboxes */
1599 ps32PriorBoxes = aps32AllPriorBoxes[i];
1600 ps32PriorVar = ps32PriorBoxes + u32NumPredsPerClass * SAMPLE_SVP_NNIE_COORDI_NUM;
1601 for (j = 0; j < u32NumPredsPerClass; j++) {
1602 f32PriorWidth = (HI_FLOAT)(ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2] -
1603 ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM]);
1604 f32PriorHeight = (HI_FLOAT)(ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3] -
1605 ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1]);
1606 f32PriorCenterX =
1607 (ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2] + ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM]) *
1608 SAMPLE_SVP_NNIE_HALF;
1609 f32PriorCenterY = (ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3] +
1610 ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1]) *
1611 SAMPLE_SVP_NNIE_HALF;
1612
1613 f32DecodeBoxCenterX =
1614 ((HI_FLOAT)ps32PriorVar[j * SAMPLE_SVP_NNIE_COORDI_NUM] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1615 ((HI_FLOAT)ps32LocPreds[j * SAMPLE_SVP_NNIE_COORDI_NUM] / SAMPLE_SVP_NNIE_QUANT_BASE) * f32PriorWidth +
1616 f32PriorCenterX;
1617
1618 f32DecodeBoxCenterY =
1619 ((HI_FLOAT)ps32PriorVar[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1620 ((HI_FLOAT)ps32LocPreds[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1621 f32PriorHeight +
1622 f32PriorCenterY;
1623
1624 f32DecodeBoxWidth =
1625 exp(((HI_FLOAT)ps32PriorVar[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1626 ((HI_FLOAT)ps32LocPreds[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2] / SAMPLE_SVP_NNIE_QUANT_BASE)) *
1627 f32PriorWidth;
1628
1629 f32DecodeBoxHeight =
1630 exp(((HI_FLOAT)ps32PriorVar[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1631 ((HI_FLOAT)ps32LocPreds[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3] / SAMPLE_SVP_NNIE_QUANT_BASE)) *
1632 f32PriorHeight;
1633
1634 ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterX - f32DecodeBoxWidth * SAMPLE_SVP_NNIE_HALF);
1635 ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterY - f32DecodeBoxHeight * SAMPLE_SVP_NNIE_HALF);
1636 ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterX + f32DecodeBoxWidth * SAMPLE_SVP_NNIE_HALF);
1637 ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterY + f32DecodeBoxHeight * SAMPLE_SVP_NNIE_HALF);
1638 }
1639 }
1640 /* do NMS for each class */
1641 u32AfterTopK = 0;
1642 for (i = 0; i < u32ClassNum; i++) {
1643 for (j = 0; j < u32PriorNum; j++) {
1644 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH] = ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM];
1645 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1] =
1646 ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1];
1647 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2] =
1648 ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2];
1649 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3] =
1650 ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3];
1651 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4] = ps32ConfScores[j * u32ClassNum + i];
1652 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] = 0;
1653 }
1654 s32Ret = SVP_NNIE_NonRecursiveArgQuickSort(ps32SingleProposal, 0, u32PriorNum - 1, pstStack, u32TopK);
1655 u32AfterFilter = (u32PriorNum < u32TopK) ? u32PriorNum : u32TopK;
1656 s32Ret = SVP_NNIE_NonMaxSuppression(ps32SingleProposal, u32AfterFilter, u32NmsThresh, u32AfterFilter);
1657 u32RoiOutCnt = 0;
1658 ps32DstScore = (HI_S32 *)ps32DstScoreSrc;
1659 ps32DstBbox = (HI_S32 *)ps32DstBboxSrc;
1660 ps32ClassRoiNum = (HI_S32 *)ps32RoiOutCntSrc;
1661 ps32DstScore += (HI_S32)u32AfterTopK;
1662 ps32DstBbox += (HI_S32)(u32AfterTopK * SAMPLE_SVP_NNIE_COORDI_NUM);
1663 for (j = 0; j < u32TopK; j++) {
1664 if (ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] == 0 &&
1665 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4] > (HI_S32)u32ConfThresh) {
1666 ps32DstScore[u32RoiOutCnt] = ps32SingleProposal[j * 6 + 4];
1667 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] =
1668 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH];
1669 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] =
1670 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1];
1671 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 2] =
1672 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2];
1673 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 3] =
1674 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3];
1675 u32RoiOutCnt++;
1676 }
1677 }
1678 ps32ClassRoiNum[i] = (HI_S32)u32RoiOutCnt;
1679 u32AfterTopK += u32RoiOutCnt;
1680 }
1681
1682 u32KeepCnt = 0;
1683 u32Offset = 0;
1684 if (u32AfterTopK > u32KeepTopK) {
1685 u32Offset = ps32ClassRoiNum[0];
1686 for (i = 1; i < u32ClassNum; i++) {
1687 ps32DstScore = (HI_S32 *)ps32DstScoreSrc;
1688 ps32DstBbox = (HI_S32 *)ps32DstBboxSrc;
1689 ps32ClassRoiNum = (HI_S32 *)ps32RoiOutCntSrc;
1690 ps32DstScore += (HI_S32)(u32Offset);
1691 ps32DstBbox += (HI_S32)(u32Offset * SAMPLE_SVP_NNIE_COORDI_NUM);
1692 for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++) {
1693 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH] =
1694 ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM];
1695 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1] =
1696 ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1];
1697 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2] =
1698 ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2];
1699 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3] =
1700 ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3];
1701 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4] = ps32DstScore[j];
1702 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] = i;
1703 u32KeepCnt++;
1704 }
1705 u32Offset = u32Offset + ps32ClassRoiNum[i];
1706 }
1707 if (u32KeepCnt >= 1) {
1708 s32Ret = SVP_NNIE_NonRecursiveArgQuickSort(ps32AfterTopK, 0, u32KeepCnt - 1, pstStack, u32KeepCnt);
1709 }
1710
1711 u32Offset = ps32ClassRoiNum[0];
1712 for (i = 1; i < u32ClassNum; i++) {
1713 u32RoiOutCnt = 0;
1714 ps32DstScore = (HI_S32 *)ps32DstScoreSrc;
1715 ps32DstBbox = (HI_S32 *)ps32DstBboxSrc;
1716 ps32ClassRoiNum = (HI_S32 *)ps32RoiOutCntSrc;
1717 ps32DstScore += (HI_S32)(u32Offset);
1718 ps32DstBbox += (HI_S32)(u32Offset * SAMPLE_SVP_NNIE_COORDI_NUM);
1719 for (j = 0; j < u32KeepTopK; j++) {
1720 if (ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] == (HI_S32)i) {
1721 ps32DstScore[u32RoiOutCnt] = ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4];
1722 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] =
1723 ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH];
1724 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] =
1725 ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1];
1726 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 2] =
1727 ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2];
1728 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 3] =
1729 ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3];
1730 u32RoiOutCnt++;
1731 }
1732 }
1733 ps32ClassRoiNum[i] = (HI_S32)u32RoiOutCnt;
1734 u32Offset += u32RoiOutCnt;
1735 }
1736 }
1737 return s32Ret;
1738 }
1739
SVP_NNIE_Yolov1_Iou(HI_FLOAT * pf32Bbox,HI_U32 u32Idx1,HI_U32 u32Idx2)1740 static HI_S32 SVP_NNIE_Yolov1_Iou(HI_FLOAT *pf32Bbox, HI_U32 u32Idx1, HI_U32 u32Idx2)
1741 {
1742 HI_FLOAT f32WidthDis = 0.0f, f32HeightDis = 0.0f;
1743 HI_FLOAT f32Intersection = 0.0f;
1744 HI_FLOAT f32Iou = 0.0f;
1745 f32WidthDis = SAMPLE_SVP_NNIE_MIN(pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM] +
1746 0.5f * pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 2],
1747 pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM] + 0.5f * pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 2]) -
1748 SAMPLE_SVP_NNIE_MAX(pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM] -
1749 0.5f * pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 2],
1750 pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM] - 0.5f * pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 2]);
1751
1752 f32HeightDis = SAMPLE_SVP_NNIE_MIN(pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 1] +
1753 0.5f * pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 3],
1754 pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 1] +
1755 0.5f * pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 3]) -
1756 SAMPLE_SVP_NNIE_MAX(pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 1] -
1757 0.5f * pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 3],
1758 pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 1] - 0.5f * pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 3]);
1759 if (f32WidthDis < 0 || f32HeightDis < 0) {
1760 f32Intersection = 0;
1761 } else {
1762 f32Intersection = f32WidthDis * f32HeightDis;
1763 }
1764 f32Iou = f32Intersection /
1765 (pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 2] * pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 3] +
1766 pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 2] * pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 3] -
1767 f32Intersection);
1768
1769 return (HI_S32)(f32Iou * SAMPLE_SVP_NNIE_QUANT_BASE);
1770 }
1771
1772 /*
1773 * Prototype : SVP_NNIE_Yolov1_Argswap
1774 * Description : this function is used to exchange data
1775 * Input : HI_S32* ps32Src1 [IN] first input array
1776 * HI_S32* ps32Src2 [IN] second input array
1777 * HI_U32 u32ArraySize [IN] array size
1778 */
SVP_NNIE_Yolov1_Argswap(HI_S32 * ps32Src1,HI_S32 * ps32Src2,HI_U32 u32ArraySize)1779 static void SVP_NNIE_Yolov1_Argswap(HI_S32 *ps32Src1, HI_S32 *ps32Src2, HI_U32 u32ArraySize)
1780 {
1781 HI_U32 i = 0;
1782 HI_S32 s32Tmp = 0;
1783 for (i = 0; i < u32ArraySize; i++) {
1784 s32Tmp = ps32Src1[i];
1785 ps32Src1[i] = ps32Src2[i];
1786 ps32Src2[i] = s32Tmp;
1787 }
1788 }
1789
1790 /*
1791 * Prototype : SVP_NNIE_Yolov1_NonRecursiveArgQuickSort
1792 * Description : this function is used to do quick sort
1793 * Input : HI_S32* ps32Array [IN] the array need to be sorted
1794 * HI_S32 s32Low [IN] the start position of quick sort
1795 * HI_S32 s32High [IN] the end position of quick sort
1796 * HI_U32 u32ArraySize [IN] the element size of input array
1797 * HI_U32 u32ScoreIdx [IN] the score index in array element
1798 * SAMPLE_SVP_NNIE_STACK_S *pstStack [IN] the buffer used to store start positions and end positions
1799 */
SVP_NNIE_Yolo_NonRecursiveArgQuickSort(HI_S32 * ps32Array,HI_S32 s32Low,HI_S32 s32High,HI_U32 u32ArraySize,HI_U32 u32ScoreIdx,SAMPLE_SVP_NNIE_STACK_S * pstStack)1800 static HI_S32 SVP_NNIE_Yolo_NonRecursiveArgQuickSort(HI_S32 *ps32Array, HI_S32 s32Low, HI_S32 s32High,
1801 HI_U32 u32ArraySize, HI_U32 u32ScoreIdx, SAMPLE_SVP_NNIE_STACK_S *pstStack)
1802 {
1803 HI_S32 i = s32Low;
1804 HI_S32 j = s32High;
1805 HI_S32 s32Top = 0;
1806 HI_S32 s32KeyConfidence = ps32Array[u32ArraySize * s32Low + u32ScoreIdx];
1807 pstStack[s32Top].s32Min = s32Low;
1808 pstStack[s32Top].s32Max = s32High;
1809
1810 while (s32Top > -1) {
1811 s32Low = pstStack[s32Top].s32Min;
1812 s32High = pstStack[s32Top].s32Max;
1813 i = s32Low;
1814 j = s32High;
1815 s32Top--;
1816
1817 s32KeyConfidence = ps32Array[u32ArraySize * s32Low + u32ScoreIdx];
1818
1819 while (i < j) {
1820 while ((i < j) && (s32KeyConfidence > ps32Array[j * u32ArraySize + u32ScoreIdx])) {
1821 j--;
1822 }
1823 if (i < j) {
1824 SVP_NNIE_Yolov1_Argswap(&ps32Array[i * u32ArraySize], &ps32Array[j * u32ArraySize], u32ArraySize);
1825 i++;
1826 }
1827
1828 while ((i < j) && (s32KeyConfidence < ps32Array[i * u32ArraySize + u32ScoreIdx])) {
1829 i++;
1830 }
1831 if (i < j) {
1832 SVP_NNIE_Yolov1_Argswap(&ps32Array[i * u32ArraySize], &ps32Array[j * u32ArraySize], u32ArraySize);
1833 j--;
1834 }
1835 }
1836
1837 if (s32Low < i - 1) {
1838 s32Top++;
1839 pstStack[s32Top].s32Min = s32Low;
1840 pstStack[s32Top].s32Max = i - 1;
1841 }
1842
1843 if (s32High > i + 1) {
1844 s32Top++;
1845 pstStack[s32Top].s32Min = i + 1;
1846 pstStack[s32Top].s32Max = s32High;
1847 }
1848 }
1849 return HI_SUCCESS;
1850 }
1851
1852 /*
1853 * Prototype : SVP_NNIE_Yolov1_Nms
1854 * Description : this function is used to do NMS
1855 * Input : HI_S32* ps32Score [IN] class score of each bbox
1856 * HI_FLOAT* pf32Bbox [IN] pointer to the Bbox memory
1857 * HI_U32 u32ConfThresh [IN] confidence thresh
1858 * HI_U32 u32NmsThresh [IN] NMS thresh
1859 * HI_U32* pu32TmpBuf [IN] assist buffer
1860 */
SVP_NNIE_Yolov1_Nms(HI_S32 * ps32Score,HI_FLOAT * pf32Bbox,HI_U32 u32BboxNum,HI_U32 u32ConfThresh,HI_U32 u32NmsThresh,HI_U32 * pu32TmpBuf)1861 static HI_S32 SVP_NNIE_Yolov1_Nms(HI_S32 *ps32Score, HI_FLOAT *pf32Bbox, HI_U32 u32BboxNum, HI_U32 u32ConfThresh,
1862 HI_U32 u32NmsThresh, HI_U32 *pu32TmpBuf)
1863 {
1864 HI_U32 i = 0, j = 0;
1865 HI_U32 u32Idx1 = 0, u32Idx2 = 0;
1866 SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *pstScore = (SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *)pu32TmpBuf;
1867 SAMPLE_SVP_NNIE_STACK_S *pstAssitBuf =
1868 (SAMPLE_SVP_NNIE_STACK_S *)((HI_U8 *)pu32TmpBuf + u32BboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV1_SCORE_S));
1869 for (i = 0; i < u32BboxNum; i++) {
1870 if (ps32Score[i] < (HI_S32)u32ConfThresh) {
1871 ps32Score[i] = 0;
1872 }
1873 }
1874
1875 for (i = 0; i < u32BboxNum; ++i) {
1876 pstScore[i].u32Idx = i;
1877 pstScore[i].s32Score = (ps32Score[i]);
1878 }
1879 /* quick sort */
1880 if (u32BboxNum >= 1) {
1881 (void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32 *)pstScore, 0, u32BboxNum - 1,
1882 sizeof(SAMPLE_SVP_NNIE_YOLOV1_SCORE_S) / sizeof(HI_U32), 1, pstAssitBuf);
1883 }
1884 /* NMS */
1885 for (i = 0; i < u32BboxNum; i++) {
1886 u32Idx1 = pstScore[i].u32Idx;
1887 if (pstScore[i].s32Score == 0) {
1888 continue;
1889 }
1890 for (j = i + 1; j < u32BboxNum; j++) {
1891 u32Idx2 = pstScore[j].u32Idx;
1892 if (pstScore[j].s32Score == 0) {
1893 continue;
1894 }
1895 if (SVP_NNIE_Yolov1_Iou(pf32Bbox, u32Idx1, u32Idx2) > (HI_S32)u32NmsThresh) {
1896 pstScore[j].s32Score = 0;
1897 ps32Score[pstScore[j].u32Idx] = 0;
1898 }
1899 }
1900 }
1901
1902 return HI_SUCCESS;
1903 }
1904
1905 /*
1906 * Prototype : SVP_NNIE_Yolov1_ConvertPosition
1907 * Description : this function is used to do convert position coordinates
1908 * Input : HI_FLOAT* pf32Bbox [IN] pointer to the Bbox memory
1909 * HI_U32 u32OriImgWidth [IN] input image width
1910 * HI_U32 u32OriImagHeight [IN] input image height
1911 * HI_FLOAT af32Roi[] [OUT] converted position coordinates
1912 */
SVP_NNIE_Yolov1_ConvertPosition(HI_FLOAT * pf32Bbox,HI_U32 u32OriImgWidth,HI_U32 u32OriImagHeight,HI_FLOAT af32Roi[])1913 static void SVP_NNIE_Yolov1_ConvertPosition(HI_FLOAT *pf32Bbox, HI_U32 u32OriImgWidth, HI_U32 u32OriImagHeight,
1914 HI_FLOAT af32Roi[])
1915 {
1916 HI_FLOAT f32Xmin, f32Ymin, f32Xmax, f32Ymax;
1917 f32Xmin = *pf32Bbox - *(pf32Bbox + SAMPLE_SVP_NNIE_X_MAX_OFFSET) * SAMPLE_SVP_NNIE_HALF;
1918 f32Xmin = f32Xmin > 0 ? f32Xmin : 0;
1919 f32Ymin = *(pf32Bbox + 1) - *(pf32Bbox + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) * SAMPLE_SVP_NNIE_HALF;
1920 f32Ymin = f32Ymin > 0 ? f32Ymin : 0;
1921 f32Xmax = *pf32Bbox + *(pf32Bbox + SAMPLE_SVP_NNIE_X_MAX_OFFSET) * SAMPLE_SVP_NNIE_HALF;
1922 f32Xmax = f32Xmax > u32OriImgWidth ? u32OriImgWidth : f32Xmax;
1923 f32Ymax = *(pf32Bbox + 1) + *(pf32Bbox + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) * SAMPLE_SVP_NNIE_HALF;
1924 f32Ymax = f32Ymax > u32OriImagHeight ? u32OriImagHeight : f32Ymax;
1925
1926 af32Roi[0] = f32Xmin;
1927 af32Roi[1] = f32Ymin;
1928 af32Roi[2] = f32Xmax;
1929 af32Roi[3] = f32Ymax;
1930 }
1931
1932 /*
1933 * Prototype : SVP_NNIE_Yolov1_Detection
1934 * Description : Yolov1 detection
1935 * Input : HI_S32* ps32Score [IN] bbox each class score
1936 * HI_FLOAT* pf32Bbox [IN] bbox
1937 * HI_U32 u32ClassNum [IN] Class num
1938 * HI_U32 u32GridNum [IN] grid num
1939 * HI_U32 u32BboxNum [IN] bbox num
1940 * HI_U32 u32ConfThresh [IN] confidence thresh
1941 * HI_U32 u32NmsThresh [IN] Nms thresh
1942 * HI_U32 u32OriImgWidth [IN] input image width
1943 * HI_U32 u32OriImgHeight [IN] input image height
1944 * HI_U32* pu32MemPool [IN] assist buffer
1945 * HI_S32 *ps32DstScores [OUT] dst score of ROI
1946 * HI_S32 *ps32DstRoi [OUT] dst Roi
1947 * HI_S32 *ps32ClassRoiNum[OUT] dst roi num of each class
1948 */
SVP_NNIE_Yolov1_Detection(HI_S32 * ps32Score,HI_FLOAT * pf32Bbox,HI_U32 u32ClassNum,HI_U32 u32BboxNum,HI_U32 u32ConfThresh,HI_U32 u32NmsThresh,HI_U32 u32OriImgWidth,HI_U32 u32OriImgHeight,HI_U32 * pu32MemPool,HI_S32 * ps32DstScores,HI_S32 * ps32DstRoi,HI_S32 * ps32ClassRoiNum)1949 static HI_S32 SVP_NNIE_Yolov1_Detection(HI_S32 *ps32Score, HI_FLOAT *pf32Bbox, HI_U32 u32ClassNum, HI_U32 u32BboxNum,
1950 HI_U32 u32ConfThresh, HI_U32 u32NmsThresh, HI_U32 u32OriImgWidth, HI_U32 u32OriImgHeight, HI_U32 *pu32MemPool,
1951 HI_S32 *ps32DstScores, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
1952 {
1953 HI_U32 i = 0, j = 0;
1954 HI_U32 u32Idx = 0;
1955 HI_U32 u32RoiNum = 0;
1956 HI_S32 *ps32EachClassScore = NULL;
1957 HI_FLOAT af32Roi[SAMPLE_SVP_NNIE_COORDI_NUM] = {0.0f};
1958 SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *pstScore = NULL;
1959 *(ps32ClassRoiNum++) = 0;
1960 for (i = 0; i < u32ClassNum; i++) {
1961 ps32EachClassScore = ps32Score + u32BboxNum * i;
1962 (void)SVP_NNIE_Yolov1_Nms(ps32EachClassScore, pf32Bbox, u32BboxNum, u32ConfThresh, u32NmsThresh, pu32MemPool);
1963
1964 pstScore = (SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *)pu32MemPool;
1965 u32RoiNum = 0;
1966 for (j = 0; j < u32BboxNum; j++) {
1967 if (pstScore[j].s32Score != 0) {
1968 u32RoiNum++;
1969 *(ps32DstScores++) = pstScore[j].s32Score;
1970 u32Idx = pstScore[j].u32Idx;
1971 (void)SVP_NNIE_Yolov1_ConvertPosition((pf32Bbox + u32Idx * SAMPLE_SVP_NNIE_COORDI_NUM), u32OriImgWidth,
1972 u32OriImgHeight, af32Roi);
1973 *(ps32DstRoi++) = (HI_S32)af32Roi[0];
1974 *(ps32DstRoi++) = (HI_S32)af32Roi[1];
1975 *(ps32DstRoi++) = (HI_S32)af32Roi[2];
1976 *(ps32DstRoi++) = (HI_S32)af32Roi[3];
1977 } else {
1978 continue;
1979 }
1980 }
1981 *(ps32ClassRoiNum++) = u32RoiNum;
1982 }
1983 return HI_SUCCESS;
1984 }
1985
1986 /*
1987 * Prototype : SVP_NNIE_Yolov2_Iou
1988 * Description : Yolov2 IOU
1989 * Input : SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox1 [IN] first bbox
1990 * SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox2 [IN] second bbox
1991 * HI_U32 u32ClassNum [IN] Class num
1992 * HI_U32 u32GridNum [IN] grid num
1993 * HI_U32 u32BboxNum [IN] bbox num
1994 * HI_U32 u32ConfThresh [IN] confidence thresh
1995 * HI_U32 u32NmsThresh [IN] Nms thresh
1996 * HI_U32 u32OriImgWidth [IN] input image width
1997 * HI_U32 u32OriImgHeight [IN] input image height
1998 * HI_U32* pu32MemPool [IN] assist buffer
1999 * HI_S32 *ps32DstScores [OUT] dst score of ROI
2000 * HI_S32 *ps32DstRoi [OUT] dst Roi
2001 * HI_S32 *ps32ClassRoiNum[OUT] dst roi num of each class
2002 */
SVP_NNIE_Yolov2_Iou(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S * pstBbox1,SAMPLE_SVP_NNIE_YOLOV2_BBOX_S * pstBbox2)2003 static HI_DOUBLE SVP_NNIE_Yolov2_Iou(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox1, SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox2)
2004 {
2005 HI_FLOAT f32InterWidth = 0.0;
2006 HI_FLOAT f32InterHeight = 0.0;
2007 HI_DOUBLE f64InterArea = 0.0;
2008 HI_DOUBLE f64Box1Area = 0.0;
2009 HI_DOUBLE f64Box2Area = 0.0;
2010 HI_DOUBLE f64UnionArea = 0.0;
2011
2012 f32InterWidth = SAMPLE_SVP_NNIE_MIN(pstBbox1->f32Xmax, pstBbox2->f32Xmax) -
2013 SAMPLE_SVP_NNIE_MAX(pstBbox1->f32Xmin, pstBbox2->f32Xmin);
2014 f32InterHeight = SAMPLE_SVP_NNIE_MIN(pstBbox1->f32Ymax, pstBbox2->f32Ymax) -
2015 SAMPLE_SVP_NNIE_MAX(pstBbox1->f32Ymin, pstBbox2->f32Ymin);
2016 if (f32InterWidth <= 0 || f32InterHeight <= 0)
2017 return 0;
2018
2019 f64InterArea = f32InterWidth * f32InterHeight;
2020 f64Box1Area = (pstBbox1->f32Xmax - pstBbox1->f32Xmin) * (pstBbox1->f32Ymax - pstBbox1->f32Ymin);
2021 f64Box2Area = (pstBbox2->f32Xmax - pstBbox2->f32Xmin) * (pstBbox2->f32Ymax - pstBbox2->f32Ymin);
2022 f64UnionArea = f64Box1Area + f64Box2Area - f64InterArea;
2023
2024 return f64InterArea / f64UnionArea;
2025 }
2026
2027 /*
2028 * Prototype : SVP_NNIE_Yolov2_NonMaxSuppression
2029 * Description : Yolov2 NonMaxSuppression function
2030 * Input : SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox [IN] input bbox
2031 * HI_U32 u32BoxNum [IN] Bbox num
2032 * HI_U32 u32ClassNum [IN] Class num
2033 * HI_U32 u32NmsThresh [IN] NMS thresh
2034 * HI_U32 u32BboxNum [IN] bbox num
2035 * HI_U32 u32MaxRoiNum [IN] max roi num
2036 */
SVP_NNIE_Yolov2_NonMaxSuppression(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S * pstBbox,HI_U32 u32BboxNum,HI_U32 u32NmsThresh,HI_U32 u32MaxRoiNum)2037 static HI_S32 SVP_NNIE_Yolov2_NonMaxSuppression(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox, HI_U32 u32BboxNum,
2038 HI_U32 u32NmsThresh, HI_U32 u32MaxRoiNum)
2039 {
2040 HI_U32 i, j;
2041 HI_U32 u32Num = 0;
2042 HI_DOUBLE f64Iou = 0.0;
2043
2044 for (i = 0; i < u32BboxNum && u32Num < u32MaxRoiNum; i++) {
2045 if (pstBbox[i].u32Mask == 0) {
2046 u32Num++;
2047 for (j = i + 1; j < u32BboxNum; j++) {
2048 if (pstBbox[j].u32Mask == 0) {
2049 f64Iou = SVP_NNIE_Yolov2_Iou(&pstBbox[i], &pstBbox[j]);
2050 if (f64Iou >= (HI_DOUBLE)u32NmsThresh / SAMPLE_SVP_NNIE_QUANT_BASE) {
2051 pstBbox[j].u32Mask = 1;
2052 }
2053 }
2054 }
2055 }
2056 }
2057
2058 return HI_SUCCESS;
2059 }
2060
SVP_NNIE_GetMaxVal(HI_FLOAT * pf32Val,HI_U32 u32Num,HI_U32 * pu32MaxValueIndex)2061 static HI_FLOAT SVP_NNIE_GetMaxVal(HI_FLOAT *pf32Val, HI_U32 u32Num, HI_U32 *pu32MaxValueIndex)
2062 {
2063 HI_U32 i = 0;
2064 HI_FLOAT f32MaxTmp = 0;
2065
2066 f32MaxTmp = pf32Val[0];
2067 *pu32MaxValueIndex = 0;
2068 for (i = 1; i < u32Num; i++) {
2069 if (pf32Val[i] > f32MaxTmp) {
2070 f32MaxTmp = pf32Val[i];
2071 *pu32MaxValueIndex = i;
2072 }
2073 }
2074
2075 return f32MaxTmp;
2076 }
2077
2078 /*
2079 * Prototype : SVP_NNIE_Yolov2_GetResult
2080 * Description : Yolov2 GetResult function
2081 * Input : HI_S32 *ps32InputData [IN] pointer to the input data memory
2082 * HI_U32 u32GridNumWidth [IN] Grid num in width direction
2083 * HI_U32 u32GridNumHeight [IN] Grid num in height direction
2084 * HI_U32 u32EachGridBbox [IN] Bbox num of each grid
2085 * HI_U32 u32ClassNum [IN] class num
2086 * HI_U32 u32SrcWidth [IN] input image width
2087 * HI_U32 u32SrcHeight [IN] input image height
2088 * HI_U32 u32MaxRoiNum [IN] Max output roi num
2089 * HI_U32 u32NmsThresh [IN] NMS thresh
2090 * HI_U32* pu32TmpBuf [IN] assist buffer
2091 * HI_S32 *ps32DstScores [OUT] dst score
2092 * HI_S32 *ps32DstRoi [OUT] dst roi
2093 * HI_S32 *ps32ClassRoiNum [OUT] class roi num
2094 */
SVP_NNIE_Yolov2_GetResult(HI_S32 * ps32InputData,HI_U32 u32GridNumWidth,HI_U32 u32GridNumHeight,HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth,HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh,HI_FLOAT af32Bias[],HI_U32 * pu32TmpBuf,HI_S32 * ps32DstScores,HI_S32 * ps32DstRoi,HI_S32 * ps32ClassRoiNum)2095 static HI_S32 SVP_NNIE_Yolov2_GetResult(HI_S32 *ps32InputData, HI_U32 u32GridNumWidth, HI_U32 u32GridNumHeight,
2096 HI_U32 u32EachGridBbox, HI_U32 u32ClassNum, HI_U32 u32SrcWidth, HI_U32 u32SrcHeight, HI_U32 u32MaxRoiNum,
2097 HI_U32 u32NmsThresh, HI_U32 u32ConfThresh, HI_FLOAT af32Bias[], HI_U32 *pu32TmpBuf, HI_S32 *ps32DstScores,
2098 HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
2099 {
2100 HI_U32 u32GridNum = u32GridNumWidth * u32GridNumHeight;
2101 const HI_U32 u32ParaNum = (SAMPLE_SVP_NNIE_COORDI_NUM + 1 + u32ClassNum);
2102 HI_U32 u32TotalBboxNum = u32GridNum * u32EachGridBbox;
2103 HI_U32 u32CStep = u32GridNum;
2104 HI_U32 u32HStep = u32GridNumWidth;
2105 HI_U32 u32BoxsNum = 0;
2106 HI_FLOAT *pf32BoxTmp = NULL;
2107 HI_FLOAT *f32InputData = NULL;
2108 HI_FLOAT f32ObjScore = 0.0;
2109 HI_FLOAT f32MaxScore = 0.0;
2110 HI_S32 s32Score = 0;
2111 HI_U32 u32MaxValueIndex = 0;
2112 HI_U32 h = 0, w = 0, n = 0;
2113 HI_U32 c = 0, k = 0, i = 0;
2114 HI_U32 u32Index = 0;
2115 HI_FLOAT x, y, f32Width, f32Height;
2116 HI_U32 u32AssistBuffSize = u32TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_STACK_S);
2117 HI_U32 u32BoxBuffSize = u32TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S);
2118 HI_U32 u32BoxResultNum = 0;
2119 SAMPLE_SVP_NNIE_STACK_S *pstAssistStack = NULL;
2120 SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBox = NULL;
2121
2122 /* store float type data */
2123 f32InputData = (HI_FLOAT *)pu32TmpBuf;
2124 /* assist buffer for sort */
2125 pstAssistStack = (SAMPLE_SVP_NNIE_STACK_S *)(f32InputData + u32TotalBboxNum * u32ParaNum);
2126 /* assist box buffer */
2127 pstBox = (SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *)((HI_U8 *)pstAssistStack + u32AssistBuffSize);
2128 /* box tmp buffer */
2129 pf32BoxTmp = (HI_FLOAT *)((HI_U8 *)pstBox + u32BoxBuffSize);
2130
2131 for (i = 0; i < u32TotalBboxNum * u32ParaNum; i++) {
2132 f32InputData[i] = (HI_FLOAT)(ps32InputData[i]) / SAMPLE_SVP_NNIE_QUANT_BASE;
2133 }
2134
2135 // permute
2136 for (h = 0; h < u32GridNumHeight; h++) {
2137 for (w = 0; w < u32GridNumWidth; w++) {
2138 for (c = 0; c < u32EachGridBbox * u32ParaNum; c++) {
2139 pf32BoxTmp[n++] = f32InputData[c * u32CStep + h * u32HStep + w];
2140 }
2141 }
2142 }
2143
2144 for (n = 0; n < u32GridNum; n++) {
2145 // Grid
2146 w = n % u32GridNumWidth;
2147 h = n / u32GridNumWidth;
2148 for (k = 0; k < u32EachGridBbox; k++) {
2149 u32Index = (n * u32EachGridBbox + k) * u32ParaNum;
2150 x = (HI_FLOAT)((w + SAMPLE_SVP_NNIE_SIGMOID(pf32BoxTmp[u32Index + 0])) / u32GridNumWidth); // x
2151 y = (HI_FLOAT)((h + SAMPLE_SVP_NNIE_SIGMOID(pf32BoxTmp[u32Index + 1])) / u32GridNumHeight); // y
2152 f32Width = (HI_FLOAT)((exp(pf32BoxTmp[u32Index + 2]) * af32Bias[2 * k]) / u32GridNumWidth); // w
2153 f32Height = (HI_FLOAT)((exp(pf32BoxTmp[u32Index + 3]) * af32Bias[2 * k + 1]) / u32GridNumHeight); // h
2154
2155 f32ObjScore = SAMPLE_SVP_NNIE_SIGMOID(pf32BoxTmp[u32Index + 4]);
2156 SVP_NNIE_SoftMax(&pf32BoxTmp[u32Index + 5], u32ClassNum);
2157
2158 f32MaxScore = SVP_NNIE_GetMaxVal(&pf32BoxTmp[u32Index + 5], u32ClassNum, &u32MaxValueIndex);
2159
2160 s32Score = (HI_S32)(f32MaxScore * f32ObjScore * SAMPLE_SVP_NNIE_QUANT_BASE);
2161 if ((HI_U32)s32Score > u32ConfThresh) {
2162 pstBox[u32BoxsNum].f32Xmin = (HI_FLOAT)(x - f32Width * SAMPLE_SVP_NNIE_HALF);
2163 pstBox[u32BoxsNum].f32Xmax = (HI_FLOAT)(x + f32Width * SAMPLE_SVP_NNIE_HALF);
2164 pstBox[u32BoxsNum].f32Ymin = (HI_FLOAT)(y - f32Height * SAMPLE_SVP_NNIE_HALF);
2165 pstBox[u32BoxsNum].f32Ymax = (HI_FLOAT)(y + f32Height * SAMPLE_SVP_NNIE_HALF);
2166 pstBox[u32BoxsNum].s32ClsScore = s32Score;
2167 pstBox[u32BoxsNum].u32ClassIdx = u32MaxValueIndex + 1;
2168 pstBox[u32BoxsNum].u32Mask = 0;
2169 u32BoxsNum++;
2170 }
2171 }
2172 }
2173 // quick_sort
2174 if (u32BoxsNum > 1) {
2175 SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32 *)pstBox, 0, u32BoxsNum - 1,
2176 sizeof(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S) / sizeof(HI_S32), 4, pstAssistStack);
2177 }
2178 // Nms
2179 SVP_NNIE_Yolov2_NonMaxSuppression(pstBox, u32BoxsNum, u32NmsThresh, u32BoxsNum);
2180 // Get the result
2181 (HI_VOID)
2182 memset_s((void *)ps32ClassRoiNum, (u32ClassNum + 1) * sizeof(HI_U32), 0, (u32ClassNum + 1) * sizeof(HI_U32));
2183 for (i = 1; i < u32ClassNum + 1; i++) {
2184 for (n = 0; n < u32BoxsNum && u32BoxResultNum < u32MaxRoiNum; n++) {
2185 if ((pstBox[n].u32Mask == 0) && (i == pstBox[n].u32ClassIdx)) {
2186 *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MAX(pstBox[n].f32Xmin * u32SrcWidth, 0);
2187 *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MAX(pstBox[n].f32Ymin * u32SrcHeight, 0);
2188 *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MIN(pstBox[n].f32Xmax * u32SrcWidth, u32SrcWidth);
2189 *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MIN(pstBox[n].f32Ymax * u32SrcHeight, u32SrcHeight);
2190 *(ps32DstScores++) = pstBox[n].s32ClsScore;
2191 *(ps32ClassRoiNum + pstBox[n].u32ClassIdx) = *(ps32ClassRoiNum + pstBox[n].u32ClassIdx) + 1;
2192 u32BoxResultNum++;
2193 }
2194 }
2195 }
2196 return HI_SUCCESS;
2197 }
2198
2199 /*
2200 * Prototype : SVP_NNIE_Yolov3_GetResult
2201 * Description : Yolov3 GetResult function
2202 * Input : HI_S32 **pps32InputData [IN] pointer to the input data
2203 * HI_U32 au32GridNumWidth[] [IN] Grid num in width direction
2204 * HI_U32 au32GridNumHeight[] [IN] Grid num in height direction
2205 * HI_U32 au32Stride[] [IN] stride of input data
2206 * HI_U32 u32EachGridBbox [IN] Bbox num of each grid
2207 * HI_U32 u32ClassNum [IN] class num
2208 * HI_U32 u32SrcWidth [IN] input image width
2209 * HI_U32 u32SrcHeight [IN] input image height
2210 * HI_U32 u32MaxRoiNum [IN] Max output roi num
2211 * HI_U32 u32NmsThresh [IN] NMS thresh
2212 * HI_U32 u32ConfThresh [IN] conf thresh
2213 * HI_U32 af32Bias[][] [IN] bias
2214 * HI_U32* pu32TmpBuf [IN] assist buffer
2215 * HI_S32 *ps32DstScores [OUT] dst score
2216 * HI_S32 *ps32DstRoi [OUT] dst roi
2217 * HI_S32 *ps32ClassRoiNum [OUT] class roi num
2218 */
SVP_NNIE_Yolov3_GetResult(HI_U64 au64InputBlobAddr[],HI_U32 au32GridNumWidth[],HI_U32 au32GridNumHeight[],HI_U32 au32Stride[],HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth,HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh,HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM],HI_S32 * ps32TmpBuf,HI_S32 * ps32DstScore,HI_S32 * ps32DstRoi,HI_S32 * ps32ClassRoiNum)2219 static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_U64 au64InputBlobAddr[], HI_U32 au32GridNumWidth[],
2220 HI_U32 au32GridNumHeight[], HI_U32 au32Stride[], HI_U32 u32EachGridBbox, HI_U32 u32ClassNum, HI_U32 u32SrcWidth,
2221 HI_U32 u32SrcHeight, HI_U32 u32MaxRoiNum, HI_U32 u32NmsThresh, HI_U32 u32ConfThresh,
2222 HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM],
2223 HI_S32 *ps32TmpBuf, HI_S32 *ps32DstScore, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
2224 {
2225 HI_S32 *ps32InputBlob = NULL;
2226 HI_FLOAT *pf32Permute = NULL;
2227 SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *pstBbox = NULL;
2228 HI_S32 *ps32AssistBuf = NULL;
2229 HI_U32 u32TotalBboxNum = 0;
2230 HI_U32 u32ChnOffset = 0;
2231 HI_U32 u32HeightOffset = 0;
2232 HI_U32 u32BboxNum = 0;
2233 HI_U32 u32GridXIdx;
2234 HI_U32 u32GridYIdx;
2235 HI_U32 u32Offset;
2236 HI_FLOAT f32StartX;
2237 HI_FLOAT f32StartY;
2238 HI_FLOAT f32Width;
2239 HI_FLOAT f32Height;
2240 HI_FLOAT f32ObjScore;
2241 HI_U32 u32MaxValueIndex = 0;
2242 HI_FLOAT f32MaxScore;
2243 HI_S32 s32ClassScore;
2244 HI_U32 u32ClassRoiNum;
2245 HI_U32 i = 0, j = 0, k = 0, c = 0, h = 0, w = 0;
2246 HI_U32 u32BlobSize = 0;
2247 HI_U32 u32MaxBlobSize = 0;
2248
2249 for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
2250 u32BlobSize = au32GridNumWidth[i] * au32GridNumHeight[i] * sizeof(HI_U32) *
2251 SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM * u32EachGridBbox;
2252 if (u32MaxBlobSize < u32BlobSize) {
2253 u32MaxBlobSize = u32BlobSize;
2254 }
2255 }
2256
2257 for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
2258 u32TotalBboxNum += au32GridNumWidth[i] * au32GridNumHeight[i] * u32EachGridBbox;
2259 }
2260
2261 // get each tmpbuf addr
2262 pf32Permute = (HI_FLOAT *)ps32TmpBuf;
2263 pstBbox = (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *)(pf32Permute + u32MaxBlobSize / sizeof(HI_S32));
2264 ps32AssistBuf = (HI_S32 *)(pstBbox + u32TotalBboxNum);
2265
2266 for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
2267 // permute
2268 u32Offset = 0;
2269 ps32InputBlob = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, au64InputBlobAddr[i]);
2270 u32ChnOffset = au32GridNumHeight[i] * au32Stride[i] / sizeof(HI_S32);
2271 u32HeightOffset = au32Stride[i] / sizeof(HI_S32);
2272 for (h = 0; h < au32GridNumHeight[i]; h++) {
2273 for (w = 0; w < au32GridNumWidth[i]; w++) {
2274 for (c = 0; c < SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM * u32EachGridBbox; c++) {
2275 pf32Permute[u32Offset++] = (HI_FLOAT)(ps32InputBlob[c * u32ChnOffset + h * u32HeightOffset + w]) /
2276 SAMPLE_SVP_NNIE_QUANT_BASE;
2277 }
2278 }
2279 }
2280
2281 // decode bbox and calculate score
2282 for (j = 0; j < au32GridNumWidth[i] * au32GridNumHeight[i]; j++) {
2283 u32GridXIdx = j % au32GridNumWidth[i];
2284 u32GridYIdx = j / au32GridNumWidth[i];
2285 for (k = 0; k < u32EachGridBbox; k++) {
2286 u32MaxValueIndex = 0;
2287 u32Offset = (j * u32EachGridBbox + k) * SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM;
2288 // decode bbox
2289 f32StartX =
2290 ((HI_FLOAT)u32GridXIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 0])) / au32GridNumWidth[i];
2291 f32StartY = ((HI_FLOAT)u32GridYIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 1])) /
2292 au32GridNumHeight[i];
2293 if (u32SrcWidth == 0 || u32SrcHeight == 0) {
2294 printf("Divisor u32SrcWidth or u32SrcHeight cannot be 0!\n");
2295 return HI_FAILURE;
2296 }
2297 f32Width = (HI_FLOAT)(exp(pf32Permute[u32Offset + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) *
2298 af32Bias[i][2 * k]) / u32SrcWidth;
2299 f32Height = (HI_FLOAT)(exp(pf32Permute[u32Offset + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) *
2300 af32Bias[i][2 * k + 1]) / u32SrcHeight;
2301
2302 // calculate score
2303 (void)SVP_NNIE_Sigmoid(&pf32Permute[u32Offset + SAMPLE_SVP_NNIE_SCORE_OFFSET], (u32ClassNum + 1));
2304 f32ObjScore = pf32Permute[u32Offset + SAMPLE_SVP_NNIE_SCORE_OFFSET];
2305 f32MaxScore = SVP_NNIE_GetMaxVal(&pf32Permute[u32Offset + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET],
2306 u32ClassNum, &u32MaxValueIndex);
2307 s32ClassScore = (HI_S32)(f32MaxScore * f32ObjScore * SAMPLE_SVP_NNIE_QUANT_BASE);
2308
2309 // filter low score roi
2310 if ((HI_U32)s32ClassScore > u32ConfThresh) {
2311 pstBbox[u32BboxNum].f32Xmin = (HI_FLOAT)(f32StartX - f32Width * 0.5f);
2312 pstBbox[u32BboxNum].f32Ymin = (HI_FLOAT)(f32StartY - f32Height * 0.5f);
2313 pstBbox[u32BboxNum].f32Xmax = (HI_FLOAT)(f32StartX + f32Width * 0.5f);
2314 pstBbox[u32BboxNum].f32Ymax = (HI_FLOAT)(f32StartY + f32Height * 0.5f);
2315 pstBbox[u32BboxNum].s32ClsScore = s32ClassScore;
2316 pstBbox[u32BboxNum].u32Mask = 0;
2317 pstBbox[u32BboxNum].u32ClassIdx = (HI_S32)(u32MaxValueIndex + 1);
2318 u32BboxNum++;
2319 }
2320 }
2321 }
2322 }
2323
2324 // quick sort
2325 if (u32BboxNum >= 1) {
2326 (void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32 *)pstBbox, 0, u32BboxNum - 1,
2327 sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S) / sizeof(HI_U32), 4, (SAMPLE_SVP_NNIE_STACK_S *)ps32AssistBuf);
2328 }
2329 // Yolov3 and Yolov2 have the same Nms operation
2330 (void)SVP_NNIE_Yolov2_NonMaxSuppression(pstBbox, u32BboxNum, u32NmsThresh, u32BboxNum);
2331
2332 // Get result
2333 for (i = 1; i < u32ClassNum + 1; i++) {
2334 u32ClassRoiNum = 0;
2335 for (j = 0; j < u32BboxNum; j++) {
2336 if ((pstBbox[j].u32Mask == 0) && (i == pstBbox[j].u32ClassIdx) && (u32ClassRoiNum < u32MaxRoiNum)) {
2337 *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Xmin * u32SrcWidth), 0);
2338 *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Ymin * u32SrcHeight), 0);
2339 *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Xmax * u32SrcWidth), (HI_S32)u32SrcWidth);
2340 *(ps32DstRoi++) =
2341 SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Ymax * u32SrcHeight), (HI_S32)u32SrcHeight);
2342 *(ps32DstScore++) = pstBbox[j].s32ClsScore;
2343 u32ClassRoiNum++;
2344 }
2345 }
2346 *(ps32ClassRoiNum + i) = u32ClassRoiNum;
2347 }
2348
2349 return HI_SUCCESS;
2350 }
2351
SAMPLE_SVP_NNIE_Cnn_GetTopN(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S * pstSoftwareParam)2352 HI_S32 SAMPLE_SVP_NNIE_Cnn_GetTopN(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2353 SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S *pstSoftwareParam)
2354 {
2355 HI_S32 s32Ret = HI_SUCCESS;
2356 CHECK_NULL_PTR(pstNnieParam);
2357 CHECK_NULL_PTR(pstSoftwareParam);
2358 s32Ret = SVP_NNIE_Cnn_GetTopN(
2359 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[0].astDst[0].u64VirAddr),
2360 pstNnieParam->astSegData[0].astDst[0].u32Stride, pstNnieParam->astSegData[0].astDst[0].unShape.stWhc.u32Width,
2361 pstNnieParam->astSegData[0].astDst[0].u32Num, pstSoftwareParam->u32TopN,
2362 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stAssistBuf.u64VirAddr),
2363 pstSoftwareParam->stGetTopN.u32Stride,
2364 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stGetTopN.u64VirAddr));
2365 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
2366 "Error,SVP_NNIE_Cnn_GetTopN failed!\n");
2367 return s32Ret;
2368 }
2369
2370 /*
2371 * Prototype : SAMPLE_SVP_NNIE_RpnTmpBufSize
2372 * Description : this function is used to get RPN func's assist buffer size
2373 * Input : HI_U32 u32NumRatioAnchors [IN] ratio anchor num
2374 * HI_U32 u32NumScaleAnchors [IN] scale anchor num
2375 * HI_U32 u32ConvHeight [IN] convolution height
2376 * HI_U32 u32ConvWidth [IN] convolution width
2377 */
SAMPLE_SVP_NNIE_RpnTmpBufSize(HI_U32 u32NumRatioAnchors,HI_U32 u32NumScaleAnchors,HI_U32 u32ConvHeight,HI_U32 u32ConvWidth)2378 HI_U32 SAMPLE_SVP_NNIE_RpnTmpBufSize(HI_U32 u32NumRatioAnchors, HI_U32 u32NumScaleAnchors, HI_U32 u32ConvHeight,
2379 HI_U32 u32ConvWidth)
2380 {
2381 HI_U64 u64AnchorsNum, u64BboxDeltaSize, u64AnchorsSize, u64ProposalSize, u64RatioAnchorsSize, u64ScaleAnchorsSize;
2382 HI_U64 u64ScoreSize, u64StackSize;
2383 HI_U64 u64TotalSize = 0;
2384
2385 SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)u32NumRatioAnchors * u32NumScaleAnchors > SAMPLE_SVP_NNIE_MAX_MEM, 0,
2386 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32NumRatioAnchors * u32NumScaleAnchors should be less than %u!\n",
2387 SAMPLE_SVP_NNIE_MAX_MEM);
2388 SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)u32ConvHeight * u32ConvWidth > SAMPLE_SVP_NNIE_MAX_MEM, 0,
2389 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32ConvHeight*u32ConvWidth should be less than %u!\n",
2390 SAMPLE_SVP_NNIE_MAX_MEM);
2391 u64AnchorsNum = (HI_U64)u32NumRatioAnchors * u32NumScaleAnchors * u32ConvHeight * u32ConvWidth;
2392 SAMPLE_SVP_CHECK_EXPR_RET(u64AnchorsNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2393 "Error,u64AnchorsNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2394 u64AnchorsSize = sizeof(HI_U32) * SAMPLE_SVP_NNIE_COORDI_NUM * u64AnchorsNum;
2395 SAMPLE_SVP_CHECK_EXPR_RET(u64AnchorsSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2396 "Error,u64AnchorsSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2397
2398 u64BboxDeltaSize = u64AnchorsSize;
2399 u64ProposalSize = sizeof(HI_U32) * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u64AnchorsNum;
2400 SAMPLE_SVP_CHECK_EXPR_RET(u64ProposalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2401 "Error,u64ProposalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2402
2403 u64RatioAnchorsSize = sizeof(HI_FLOAT) * u32NumRatioAnchors * SAMPLE_SVP_NNIE_COORDI_NUM;
2404 SAMPLE_SVP_CHECK_EXPR_RET(u64RatioAnchorsSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2405 "Error,u64RatioAnchorsSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2406 u64ScaleAnchorsSize = sizeof(HI_FLOAT) * u32NumRatioAnchors * u32NumScaleAnchors * SAMPLE_SVP_NNIE_COORDI_NUM;
2407 SAMPLE_SVP_CHECK_EXPR_RET(u64ScaleAnchorsSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2408 "Error,u64ScaleAnchorsSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2409 u64ScoreSize = sizeof(HI_FLOAT) * u64AnchorsNum * 2;
2410 SAMPLE_SVP_CHECK_EXPR_RET(u64ScoreSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2411 "Error,u64ScoreSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2412 u64StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * u64AnchorsNum;
2413 SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2414 "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2415 u64TotalSize = u64AnchorsSize + u64BboxDeltaSize + u64ProposalSize + u64RatioAnchorsSize + u64ScaleAnchorsSize +
2416 u64ScoreSize + u64StackSize;
2417 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2418 "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2419 return (HI_U32)u64TotalSize;
2420 }
2421
SAMPLE_SVP_NNIE_FasterRcnn_Rpn(SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S * pstSoftwareParam)2422 HI_S32 SAMPLE_SVP_NNIE_FasterRcnn_Rpn(SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S *pstSoftwareParam)
2423 {
2424 HI_S32 s32Ret = HI_SUCCESS;
2425 CHECK_NULL_PTR(pstSoftwareParam);
2426 s32Ret = SVP_NNIE_Rpn(pstSoftwareParam->aps32Conv, pstSoftwareParam->u32NumRatioAnchors,
2427 pstSoftwareParam->u32NumScaleAnchors, pstSoftwareParam->au32Scales, pstSoftwareParam->au32Ratios,
2428 pstSoftwareParam->u32OriImHeight, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->au32ConvHeight,
2429 pstSoftwareParam->au32ConvWidth, pstSoftwareParam->au32ConvChannel, pstSoftwareParam->u32ConvStride,
2430 pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32MinSize, pstSoftwareParam->u32SpatialScale,
2431 pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32FilterThresh, pstSoftwareParam->u32NumBeforeNms,
2432 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stRpnTmpBuf.u64VirAddr),
2433 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2434 pstSoftwareParam->stRpnBbox.u32Stride, &pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height);
2435 SAMPLE_COMM_SVP_FlushCache(pstSoftwareParam->stRpnBbox.u64PhyAddr,
2436 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstSoftwareParam->stRpnBbox.u64VirAddr),
2437 pstSoftwareParam->stRpnBbox.u32Num * pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Chn *
2438 pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height * pstSoftwareParam->stRpnBbox.u32Stride);
2439 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,SVP_NNIE_Rpn failed!\n");
2440 return s32Ret;
2441 }
2442
SAMPLE_SVP_NNIE_Pvanet_Rpn(SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S * pstSoftwareParam)2443 HI_S32 SAMPLE_SVP_NNIE_Pvanet_Rpn(SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S *pstSoftwareParam)
2444 {
2445 HI_S32 s32Ret = HI_SUCCESS;
2446
2447 CHECK_NULL_PTR(pstSoftwareParam);
2448 s32Ret = SVP_NNIE_Rpn(pstSoftwareParam->aps32Conv, pstSoftwareParam->u32NumRatioAnchors,
2449 pstSoftwareParam->u32NumScaleAnchors, pstSoftwareParam->au32Scales, pstSoftwareParam->au32Ratios,
2450 pstSoftwareParam->u32OriImHeight, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->au32ConvHeight,
2451 pstSoftwareParam->au32ConvWidth, pstSoftwareParam->au32ConvChannel, pstSoftwareParam->u32ConvStride,
2452 pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32MinSize, pstSoftwareParam->u32SpatialScale,
2453 pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32FilterThresh, pstSoftwareParam->u32NumBeforeNms,
2454 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stRpnTmpBuf.u64VirAddr),
2455 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2456 pstSoftwareParam->stRpnBbox.u32Stride, &pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height);
2457 SAMPLE_COMM_SVP_FlushCache(pstSoftwareParam->stRpnBbox.u64PhyAddr,
2458 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstSoftwareParam->stRpnBbox.u64VirAddr),
2459 pstSoftwareParam->stRpnBbox.u32Num * pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Chn *
2460 pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height * pstSoftwareParam->stRpnBbox.u32Stride);
2461 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,SVP_NNIE_Rpn failed!\n");
2462 return s32Ret;
2463 }
2464
SAMPLE_SVP_NNIE_FasterRcnn_GetResultTmpBufSize(HI_U32 u32MaxRoiNum,HI_U32 u32ClassNum)2465 HI_U32 SAMPLE_SVP_NNIE_FasterRcnn_GetResultTmpBufSize(HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum)
2466 {
2467 HI_U64 u64ScoreSize, u64ProposalSize, u64StackSize, u64TotalSize;
2468
2469 SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)u32MaxRoiNum * u32ClassNum > SAMPLE_SVP_NNIE_MAX_MEM, 0,
2470 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32MaxRoiNum * u32ClassNum should be less than %u!\n",
2471 SAMPLE_SVP_NNIE_MAX_MEM);
2472 u64ScoreSize = sizeof(HI_FLOAT) * (HI_U64)u32MaxRoiNum * u32ClassNum;
2473 SAMPLE_SVP_CHECK_EXPR_RET(u64ScoreSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2474 "Error,u64ScoreSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2475
2476 u64ProposalSize = sizeof(HI_U32) * (HI_U64)u32MaxRoiNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
2477 SAMPLE_SVP_CHECK_EXPR_RET(u64ProposalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2478 "Error,u64ProposalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2479
2480 u64StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * (HI_U64)u32MaxRoiNum;
2481 SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2482 "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2483
2484 u64TotalSize = u64ScoreSize + u64ProposalSize + u64StackSize;
2485 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2486 "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2487 return (HI_U32)u64TotalSize;
2488 }
2489
SAMPLE_SVP_NNIE_Pvanet_GetResultTmpBufSize(HI_U32 u32MaxRoiNum,HI_U32 u32ClassNum)2490 HI_U32 SAMPLE_SVP_NNIE_Pvanet_GetResultTmpBufSize(HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum)
2491 {
2492 HI_U64 u64ScoreSize, u64ProposalSize, u64StackSize, u64TotalSize;
2493
2494 SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)u32MaxRoiNum * u32ClassNum > SAMPLE_SVP_NNIE_MAX_MEM, 0,
2495 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32MaxRoiNum * u32ClassNum should be less than %u!\n",
2496 SAMPLE_SVP_NNIE_MAX_MEM);
2497 u64ScoreSize = sizeof(HI_FLOAT) * (HI_U64)u32MaxRoiNum * u32ClassNum;
2498 SAMPLE_SVP_CHECK_EXPR_RET(u64ScoreSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2499 "Error,u64ScoreSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2500
2501 u64ProposalSize = sizeof(HI_U32) * (HI_U64)u32MaxRoiNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
2502 SAMPLE_SVP_CHECK_EXPR_RET(u64ProposalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2503 "Error,u64ProposalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2504
2505 u64StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * (HI_U64)u32MaxRoiNum;
2506 SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2507 "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2508
2509 u64TotalSize = u64ScoreSize + u64ProposalSize + u64StackSize;
2510 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2511 "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2512 return (HI_U32)u64TotalSize;
2513 }
2514
SAMPLE_SVP_NNIE_FasterRcnn_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S * pstSoftwareParam)2515 HI_S32 SAMPLE_SVP_NNIE_FasterRcnn_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2516 SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S *pstSoftwareParam)
2517 {
2518 HI_S32 s32Ret = HI_SUCCESS;
2519 HI_U32 i = 0;
2520 HI_U32 u32Offset;
2521 HI_S32 *ps32Proposal = HI_NULL;
2522
2523 CHECK_NULL_PTR(pstNnieParam);
2524 CHECK_NULL_PTR(pstSoftwareParam);
2525 SAMPLE_SVP_CHECK_EXPR_RET(pstSoftwareParam->stRpnBbox.u64VirAddr == 0, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
2526 "Error,pstSoftwareParam->stRpnBbox.u64VirAddr can't be 0!\n");
2527 u32Offset = pstSoftwareParam->stRpnBbox.u32Stride / sizeof(HI_S32);
2528 ps32Proposal = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr);
2529 for (i = 0; i < pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height; i++) {
2530 *(ps32Proposal + u32Offset * i) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2531 *(ps32Proposal + u32Offset * i + 1) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2532 *(ps32Proposal + u32Offset * i + 2) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2533 *(ps32Proposal + u32Offset * i + 3) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2534 }
2535 s32Ret = SVP_NNIE_FasterRcnn_GetResult(
2536 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[1].astDst[0].u64VirAddr),
2537 pstNnieParam->astSegData[1].astDst[0].u32Stride,
2538 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[1].astDst[1].u64VirAddr),
2539 pstNnieParam->astSegData[1].astDst[1].u32Stride,
2540 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2541 pstSoftwareParam->stRpnBbox.u32Stride, pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height,
2542 pstSoftwareParam->au32ConfThresh, pstSoftwareParam->u32ValidNmsThresh, pstSoftwareParam->u32MaxRoiNum,
2543 pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
2544 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
2545 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
2546 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
2547 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
2548
2549 return s32Ret;
2550 }
2551
SAMPLE_SVP_NNIE_Pvanet_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S * pstSoftwareParam)2552 HI_S32 SAMPLE_SVP_NNIE_Pvanet_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2553 SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S *pstSoftwareParam)
2554 {
2555 HI_S32 s32Ret = HI_SUCCESS;
2556 HI_U32 i;
2557 HI_U32 u32Offset;
2558 HI_S32 *ps32Proposal = HI_NULL;
2559
2560 CHECK_NULL_PTR(pstNnieParam);
2561 CHECK_NULL_PTR(pstSoftwareParam);
2562 SAMPLE_SVP_CHECK_EXPR_RET(pstSoftwareParam->stRpnBbox.u64VirAddr == 0, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
2563 "Error,pstSoftwareParam->stRpnBbox.u64VirAddr can't be 0!\n");
2564 u32Offset = pstSoftwareParam->stRpnBbox.u32Stride / sizeof(HI_S32);
2565 ps32Proposal = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr);
2566 for (i = 0; i < pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height; i++) {
2567 *(ps32Proposal + u32Offset * i) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2568 *(ps32Proposal + u32Offset * i + 1) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2569 *(ps32Proposal + u32Offset * i + 2) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2570 *(ps32Proposal + u32Offset * i + 3) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2571 }
2572 s32Ret = SVP_NNIE_Pvanet_GetResult(
2573 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[1].astDst[0].u64VirAddr),
2574 pstNnieParam->astSegData[1].astDst[0].u32Stride,
2575 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[1].astDst[1].u64VirAddr),
2576 pstNnieParam->astSegData[1].astDst[1].u32Stride,
2577 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2578 pstSoftwareParam->stRpnBbox.u32Stride, pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height,
2579 pstSoftwareParam->au32ConfThresh, pstSoftwareParam->u32ValidNmsThresh, pstSoftwareParam->u32MaxRoiNum,
2580 pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
2581 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
2582 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
2583 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
2584 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
2585
2586 return s32Ret;
2587 }
2588
SAMPLE_SVP_NNIE_Rfcn_GetResultTmpBuf(HI_U32 u32MaxRoiNum,HI_U32 u32ClassNum)2589 HI_U32 SAMPLE_SVP_NNIE_Rfcn_GetResultTmpBuf(HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum)
2590 {
2591 HI_U64 u64ScoreSize, u64ProposalSize, u64BboxSize, u64StackSize, u64TotalSize;
2592
2593 SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)u32MaxRoiNum * u32ClassNum > SAMPLE_SVP_NNIE_MAX_MEM, 0,
2594 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32MaxRoiNum * u32ClassNum should be less than %u!\n",
2595 SAMPLE_SVP_NNIE_MAX_MEM);
2596 u64ScoreSize = sizeof(HI_FLOAT) * (HI_U64)u32MaxRoiNum * u32ClassNum;
2597 SAMPLE_SVP_CHECK_EXPR_RET(u64ScoreSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2598 "Error,u64ScoreSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2599
2600 u64ProposalSize = sizeof(HI_U32) * (HI_U64)u32MaxRoiNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
2601 SAMPLE_SVP_CHECK_EXPR_RET(u64ProposalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2602 "Error,u64ProposalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2603
2604 u64BboxSize = sizeof(HI_U32) * (HI_U64)u32MaxRoiNum * SAMPLE_SVP_NNIE_COORDI_NUM;
2605 SAMPLE_SVP_CHECK_EXPR_RET(u64BboxSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2606 "Error,u64BboxSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2607
2608 u64StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * (HI_U64)u32MaxRoiNum;
2609 SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2610 "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2611
2612 u64TotalSize = u64ScoreSize + u64ProposalSize + u64BboxSize + u64StackSize;
2613 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2614 "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2615 return (HI_U32)u64TotalSize;
2616 }
2617
SAMPLE_SVP_NNIE_Rfcn_Rpn(SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S * pstSoftwareParam)2618 HI_S32 SAMPLE_SVP_NNIE_Rfcn_Rpn(SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S *pstSoftwareParam)
2619 {
2620 HI_S32 s32Ret = HI_SUCCESS;
2621 CHECK_NULL_PTR(pstSoftwareParam);
2622 s32Ret = SVP_NNIE_Rpn(pstSoftwareParam->aps32Conv, pstSoftwareParam->u32NumRatioAnchors,
2623 pstSoftwareParam->u32NumScaleAnchors, pstSoftwareParam->au32Scales, pstSoftwareParam->au32Ratios,
2624 pstSoftwareParam->u32OriImHeight, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->au32ConvHeight,
2625 pstSoftwareParam->au32ConvWidth, pstSoftwareParam->au32ConvChannel, pstSoftwareParam->u32ConvStride,
2626 pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32MinSize, pstSoftwareParam->u32SpatialScale,
2627 pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32FilterThresh, pstSoftwareParam->u32NumBeforeNms,
2628 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stRpnTmpBuf.u64VirAddr),
2629 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2630 pstSoftwareParam->stRpnBbox.u32Stride, &pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height);
2631 SAMPLE_COMM_SVP_FlushCache(pstSoftwareParam->stRpnBbox.u64PhyAddr,
2632 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstSoftwareParam->stRpnBbox.u64VirAddr),
2633 pstSoftwareParam->stRpnBbox.u32Num * pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Chn *
2634 pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height * pstSoftwareParam->stRpnBbox.u32Stride);
2635 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,SVP_NNIE_Rpn failed!\n");
2636 return s32Ret;
2637 }
2638
SAMPLE_SVP_NNIE_Rfcn_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S * pstSoftwareParam)2639 HI_S32 SAMPLE_SVP_NNIE_Rfcn_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2640 SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S *pstSoftwareParam)
2641 {
2642 HI_S32 s32Ret = HI_SUCCESS;
2643 HI_U32 i = 0;
2644 HI_U32 u32Offset;
2645 HI_S32 *ps32Proposal = HI_NULL;
2646
2647 CHECK_NULL_PTR(pstNnieParam);
2648 CHECK_NULL_PTR(pstSoftwareParam);
2649 u32Offset = pstSoftwareParam->stRpnBbox.u32Stride / sizeof(HI_S32);
2650 ps32Proposal = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr);
2651 SAMPLE_SVP_CHECK_EXPR_RET(pstSoftwareParam->stRpnBbox.u64VirAddr == 0, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
2652 "Error,pstSoftwareParam->stRpnBbox.u64VirAddr can't be 0!\n");
2653 for (i = 0; i < pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height; i++) {
2654 *(ps32Proposal + u32Offset * i) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2655 *(ps32Proposal + u32Offset * i + 1) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2656 *(ps32Proposal + u32Offset * i + 2) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2657 *(ps32Proposal + u32Offset * i + 3) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2658 }
2659 s32Ret = SVP_NNIE_Rfcn_GetResult(
2660 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[1].astDst[0].u64VirAddr),
2661 pstNnieParam->astSegData[1].astDst[0].u32Stride,
2662 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[2].astDst[0].u64VirAddr),
2663 pstNnieParam->astSegData[2].astDst[0].u32Stride,
2664 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2665 pstSoftwareParam->stRpnBbox.u32Stride, pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height,
2666 pstSoftwareParam->au32ConfThresh, pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32ClassNum,
2667 pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight, pstSoftwareParam->u32ValidNmsThresh,
2668 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
2669 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
2670 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
2671 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
2672 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
2673 "Error,SVP_NNIE_Rfcn_GetResult failed!\n");
2674 return s32Ret;
2675 }
2676
SAMPLE_SVP_NNIE_Ssd_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S * pstSoftwareParam)2677 HI_U32 SAMPLE_SVP_NNIE_Ssd_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2678 SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S *pstSoftwareParam)
2679 {
2680 HI_U64 u64PriorBoxSize = 0;
2681 HI_U64 u64SoftMaxSize = 0;
2682 HI_U64 u64DetectionSize = 0;
2683 HI_U64 u64TotalSize = 0;
2684 HI_U64 u64PriorNum = 0;
2685 HI_U64 u64Tmp;
2686 HI_U32 i;
2687
2688 CHECK_NULL_PTR(pstNnieParam);
2689 CHECK_NULL_PTR(pstSoftwareParam);
2690 /* priorbox size */
2691 for (i = 0; i < pstNnieParam->pstModel->astSeg[0].u16DstNum / 2; i++) {
2692 u64Tmp = (HI_U64)pstSoftwareParam->au32PriorBoxHeight[i] * pstSoftwareParam->au32PriorBoxWidth[i];
2693 SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2694 "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2695
2696 u64Tmp *= SAMPLE_SVP_NNIE_COORDI_NUM * 2 * sizeof(HI_U32);
2697 SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2698 "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2699
2700 u64Tmp *= ((HI_U64)pstSoftwareParam->u32MaxSizeNum + pstSoftwareParam->u32MinSizeNum +
2701 (HI_U64)pstSoftwareParam->au32InputAspectRatioNum[i] * 2 * pstSoftwareParam->u32MinSizeNum);
2702 SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2703 "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2704
2705 u64PriorBoxSize += u64Tmp;
2706 SAMPLE_SVP_CHECK_EXPR_RET(u64PriorBoxSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2707 "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2708 }
2709 pstSoftwareParam->stPriorBoxTmpBuf.u32Size = (HI_U32)u64PriorBoxSize;
2710 u64TotalSize += u64PriorBoxSize;
2711
2712 /* softmax size */
2713 for (i = 0; i < pstSoftwareParam->u32ConcatNum; i++) {
2714 u64Tmp = (HI_U64)pstSoftwareParam->au32SoftMaxInChn[i] * sizeof(HI_U32);
2715 SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2716 "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2717
2718 u64SoftMaxSize += u64Tmp;
2719 SAMPLE_SVP_CHECK_EXPR_RET(u64SoftMaxSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2720 "Error,u64SoftMaxSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2721 }
2722 pstSoftwareParam->stSoftMaxTmpBuf.u32Size = (HI_U32)u64SoftMaxSize;
2723 u64TotalSize += u64SoftMaxSize;
2724 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2725 "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2726
2727 /* detection size */
2728 for (i = 0; i < pstSoftwareParam->u32ConcatNum; i++) {
2729 u64PriorNum += pstSoftwareParam->au32DetectInputChn[i] / SAMPLE_SVP_NNIE_COORDI_NUM;
2730 SAMPLE_SVP_CHECK_EXPR_RET(u64PriorNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2731 "Error,u64PriorNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2732 }
2733 u64DetectionSize += u64PriorNum * SAMPLE_SVP_NNIE_COORDI_NUM * sizeof(HI_U32);
2734 SAMPLE_SVP_CHECK_EXPR_RET(u64DetectionSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2735 "Error,u64DetectionSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2736
2737 u64DetectionSize += u64PriorNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * sizeof(HI_U32) * 2;
2738 SAMPLE_SVP_CHECK_EXPR_RET(u64DetectionSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2739 "Error,u64DetectionSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2740
2741 u64DetectionSize += u64PriorNum * 2 * sizeof(HI_U32);
2742 SAMPLE_SVP_CHECK_EXPR_RET(u64DetectionSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2743 "Error,u64DetectionSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2744 pstSoftwareParam->stGetResultTmpBuf.u32Size = (HI_U32)u64DetectionSize;
2745
2746 u64TotalSize += u64DetectionSize;
2747 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2748 "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2749 return (HI_U32)u64TotalSize;
2750 }
2751
SAMPLE_SVP_NNIE_Ssd_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S * pstSoftwareParam)2752 HI_S32 SAMPLE_SVP_NNIE_Ssd_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2753 SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S *pstSoftwareParam)
2754 {
2755 HI_S32 *aps32PermuteResult[SAMPLE_SVP_NNIE_SSD_REPORT_NODE_NUM];
2756 HI_S32 *aps32PriorboxOutputData[SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM];
2757 HI_S32 *aps32SoftMaxInputData[SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM];
2758 HI_S32 *aps32DetectionLocData[SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM];
2759 HI_S32 *ps32SoftMaxOutputData = NULL;
2760 HI_S32 *ps32DetectionOutTmpBuf = NULL;
2761 HI_U32 au32SoftMaxWidth[SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM];
2762 HI_U32 u32Size = 0;
2763 HI_S32 s32Ret = HI_SUCCESS;
2764 HI_U32 i = 0;
2765
2766 CHECK_NULL_PTR(pstNnieParam);
2767 CHECK_NULL_PTR(pstSoftwareParam);
2768 /* get permut result */
2769 for (i = 0; i < SAMPLE_SVP_NNIE_SSD_REPORT_NODE_NUM; i++) {
2770 aps32PermuteResult[i] =
2771 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[0].astDst[i].u64VirAddr);
2772 }
2773
2774 /* priorbox */
2775 aps32PriorboxOutputData[0] =
2776 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stPriorBoxTmpBuf.u64VirAddr);
2777 for (i = 1; i < SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM; i++) {
2778 u32Size = pstSoftwareParam->au32PriorBoxHeight[i - 1] * pstSoftwareParam->au32PriorBoxWidth[i - 1] *
2779 SAMPLE_SVP_NNIE_COORDI_NUM * 2 *
2780 (pstSoftwareParam->u32MaxSizeNum + pstSoftwareParam->u32MinSizeNum +
2781 pstSoftwareParam->au32InputAspectRatioNum[i - 1] * 2 * pstSoftwareParam->u32MinSizeNum);
2782 aps32PriorboxOutputData[i] = aps32PriorboxOutputData[i - 1] + u32Size;
2783 }
2784
2785 for (i = 0; i < SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM; i++) {
2786 s32Ret = SVP_NNIE_Ssd_PriorBoxForward(pstSoftwareParam->au32PriorBoxWidth[i],
2787 pstSoftwareParam->au32PriorBoxHeight[i], pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
2788 pstSoftwareParam->af32PriorBoxMinSize[i], pstSoftwareParam->u32MinSizeNum,
2789 pstSoftwareParam->af32PriorBoxMaxSize[i], pstSoftwareParam->u32MaxSizeNum, pstSoftwareParam->bFlip,
2790 pstSoftwareParam->bClip, pstSoftwareParam->au32InputAspectRatioNum[i],
2791 pstSoftwareParam->af32PriorBoxAspectRatio[i], pstSoftwareParam->af32PriorBoxStepWidth[i],
2792 pstSoftwareParam->af32PriorBoxStepHeight[i], pstSoftwareParam->f32Offset, pstSoftwareParam->as32PriorBoxVar,
2793 aps32PriorboxOutputData[i]);
2794 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
2795 "Error,SVP_NNIE_Ssd_PriorBoxForward failed!\n");
2796 }
2797
2798 /* softmax */
2799 ps32SoftMaxOutputData = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stSoftMaxTmpBuf.u64VirAddr);
2800 for (i = 0; i < SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM; i++) {
2801 aps32SoftMaxInputData[i] = aps32PermuteResult[i * 2 + 1];
2802 au32SoftMaxWidth[i] = pstSoftwareParam->au32ConvChannel[i * 2 + 1];
2803 }
2804
2805 (void)SVP_NNIE_Ssd_SoftmaxForward(pstSoftwareParam->u32SoftMaxInHeight, pstSoftwareParam->au32SoftMaxInChn,
2806 pstSoftwareParam->u32ConcatNum, pstSoftwareParam->au32ConvStride, au32SoftMaxWidth, aps32SoftMaxInputData,
2807 ps32SoftMaxOutputData);
2808
2809 /* detection */
2810 ps32DetectionOutTmpBuf = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr);
2811 for (i = 0; i < SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM; i++) {
2812 aps32DetectionLocData[i] = aps32PermuteResult[i * 2];
2813 }
2814
2815 (void)SVP_NNIE_Ssd_DetectionOutForward(pstSoftwareParam->u32ConcatNum, pstSoftwareParam->u32ConfThresh,
2816 pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32TopK, pstSoftwareParam->u32KeepTopK,
2817 pstSoftwareParam->u32NmsThresh, pstSoftwareParam->au32DetectInputChn, aps32DetectionLocData,
2818 aps32PriorboxOutputData, ps32SoftMaxOutputData, ps32DetectionOutTmpBuf,
2819 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
2820 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
2821 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
2822
2823 return s32Ret;
2824 }
2825
SAMPLE_SVP_NNIE_Yolov1_GetResultTmpBuf(SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S * pstSoftwareParam)2826 HI_U32 SAMPLE_SVP_NNIE_Yolov1_GetResultTmpBuf(SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S *pstSoftwareParam)
2827 {
2828 HI_U64 u64TotalGridNum, u64TotalBboxNum, u64TransSize, u64Probsize, u64ScoreSize, u64StackSize, u64TotalSize;
2829 HI_U32 u32ClassNum;
2830 HI_U32 u32EachGridBboxNum;
2831 HI_U64 u64EachVecSize;
2832
2833 CHECK_NULL_PTR(pstSoftwareParam);
2834 u32ClassNum = pstSoftwareParam->u32ClassNum;
2835 u32EachGridBboxNum = pstSoftwareParam->u32BboxNumEachGrid;
2836 u64TotalGridNum = (HI_U64)pstSoftwareParam->u32GridNumHeight * pstSoftwareParam->u32GridNumWidth;
2837 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalGridNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2838 "Error,u64TotalGridNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2839
2840 u64TotalBboxNum = (HI_U64)u64TotalGridNum * u32EachGridBboxNum;
2841 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalBboxNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2842 "Error,u64TotalBboxNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2843
2844 u64EachVecSize = (u32ClassNum + (HI_U64)u32EachGridBboxNum * (SAMPLE_SVP_NNIE_COORDI_NUM + 1)) * sizeof(HI_U32);
2845 SAMPLE_SVP_CHECK_EXPR_RET(u64EachVecSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2846 "Error,u64EachVecSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2847
2848 u64TransSize = u64EachVecSize * u64TotalGridNum;
2849 SAMPLE_SVP_CHECK_EXPR_RET(u64TransSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2850 "Error,u64TransSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2851
2852 u64Probsize = u32ClassNum * u64TotalBboxNum * sizeof(HI_U32);
2853 SAMPLE_SVP_CHECK_EXPR_RET(u64Probsize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2854 "Error,u64Probsize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2855
2856 u64ScoreSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV1_SCORE_S);
2857 SAMPLE_SVP_CHECK_EXPR_RET(u64ScoreSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2858 "Error,u64ScoreSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2859
2860 u64StackSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_STACK_S);
2861 SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2862 "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2863
2864 u64TotalSize = u64TransSize + u64Probsize + u64ScoreSize + u64StackSize;
2865 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2866 "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2867 return (HI_U64)u64TotalSize;
2868 }
2869
SAMPLE_SVP_NNIE_Yolov1_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S * pstSoftwareParam)2870 HI_S32 SAMPLE_SVP_NNIE_Yolov1_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2871 SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S *pstSoftwareParam)
2872 {
2873 HI_FLOAT *pf32ClassProb = NULL;
2874 HI_FLOAT *pf32Confidence = NULL;
2875 HI_FLOAT *pf32Bbox = NULL;
2876 HI_S32 *ps32Score = NULL;
2877 HI_U32 *pu32AssistBuf = NULL;
2878 HI_U32 u32Offset = 0;
2879 HI_U32 u32Index = 0;
2880 HI_U32 u32GridNum;
2881 HI_U32 i, j, k;
2882 HI_U8 *pu8Tmp = NULL;
2883 HI_FLOAT f32Score = 0.0f;
2884
2885 CHECK_NULL_PTR(pstNnieParam);
2886 CHECK_NULL_PTR(pstSoftwareParam);
2887 u32GridNum = pstSoftwareParam->u32GridNumHeight * pstSoftwareParam->u32GridNumWidth;
2888 pu8Tmp = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U8, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr);
2889 u32Offset = u32GridNum * (pstSoftwareParam->u32BboxNumEachGrid * SAMPLE_SVP_NNIE_BBOX_AND_CONFIDENCE +
2890 pstSoftwareParam->u32ClassNum);
2891 pf32ClassProb = (HI_FLOAT *)pu8Tmp;
2892 pf32Confidence = pf32ClassProb + u32GridNum * pstSoftwareParam->u32ClassNum;
2893 pf32Bbox = pf32Confidence + u32GridNum * pstSoftwareParam->u32BboxNumEachGrid;
2894
2895 ps32Score = (HI_S32 *)(pf32ClassProb + u32Offset);
2896 pu32AssistBuf =
2897 (HI_U32 *)(ps32Score + u32GridNum * pstSoftwareParam->u32BboxNumEachGrid * pstSoftwareParam->u32ClassNum);
2898
2899 for (i = 0; i < u32Offset; i++) {
2900 ((HI_FLOAT *)pu8Tmp)[i] =
2901 (SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[0].astDst[0].u64VirAddr))[i] /
2902 ((HI_FLOAT)SAMPLE_SVP_NNIE_QUANT_BASE);
2903 }
2904 for (i = 0; i < u32GridNum; i++) {
2905 for (j = 0; j < pstSoftwareParam->u32BboxNumEachGrid; j++) {
2906 for (k = 0; k < pstSoftwareParam->u32ClassNum; k++) {
2907 u32Offset = k * u32GridNum * pstSoftwareParam->u32BboxNumEachGrid;
2908 f32Score = *(pf32ClassProb + i * pstSoftwareParam->u32ClassNum + k) *
2909 *(pf32Confidence + i * pstSoftwareParam->u32BboxNumEachGrid + j);
2910 *(ps32Score + u32Offset + u32Index) = (HI_S32)(f32Score * SAMPLE_SVP_NNIE_QUANT_BASE);
2911 }
2912 u32Index++;
2913 }
2914 }
2915
2916 for (i = 0; i < u32GridNum; i++) {
2917 for (j = 0; j < pstSoftwareParam->u32BboxNumEachGrid; j++) {
2918 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2919 SAMPLE_SVP_NNIE_X_MIN_OFFSET] =
2920 (pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2921 SAMPLE_SVP_NNIE_X_MIN_OFFSET] +
2922 i % pstSoftwareParam->u32GridNumWidth) /
2923 pstSoftwareParam->u32GridNumWidth * pstSoftwareParam->u32OriImWidth;
2924 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2925 SAMPLE_SVP_NNIE_Y_MIN_OFFSET] =
2926 (pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2927 SAMPLE_SVP_NNIE_Y_MIN_OFFSET] +
2928 i / pstSoftwareParam->u32GridNumWidth) /
2929 pstSoftwareParam->u32GridNumHeight * pstSoftwareParam->u32OriImHeight;
2930 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2931 SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
2932 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2933 SAMPLE_SVP_NNIE_X_MAX_OFFSET] *
2934 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2935 SAMPLE_SVP_NNIE_X_MAX_OFFSET] *
2936 pstSoftwareParam->u32OriImWidth;
2937 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2938 SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
2939 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2940 SAMPLE_SVP_NNIE_Y_MAX_OFFSET] *
2941 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2942 SAMPLE_SVP_NNIE_Y_MAX_OFFSET] *
2943 pstSoftwareParam->u32OriImHeight;
2944 }
2945 }
2946
2947 (void)SVP_NNIE_Yolov1_Detection(ps32Score, pf32Bbox, pstSoftwareParam->u32ClassNum,
2948 u32GridNum * pstSoftwareParam->u32BboxNumEachGrid, pstSoftwareParam->u32ConfThresh,
2949 pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
2950 pu32AssistBuf, SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
2951 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
2952 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
2953 return HI_SUCCESS;
2954 }
2955
SAMPLE_SVP_NNIE_Yolov2_GetResultTmpBuf(SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S * pstSoftwareParam)2956 HI_U32 SAMPLE_SVP_NNIE_Yolov2_GetResultTmpBuf(SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S *pstSoftwareParam)
2957 {
2958 HI_U64 u64TotalGridNum, u64ParaLength, u64TotalBboxNum, u64TransSize, u64BboxBufSize, u64BboxTmpBufSize;
2959 HI_U64 u64StackSize, u64TotalSize;
2960
2961 CHECK_NULL_PTR(pstSoftwareParam);
2962 u64TotalGridNum = (HI_U64)pstSoftwareParam->u32GridNumHeight * pstSoftwareParam->u32GridNumWidth;
2963 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalGridNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2964 "Error,u64TotalGridNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2965
2966 u64ParaLength =
2967 pstSoftwareParam->u32BboxNumEachGrid * (SAMPLE_SVP_NNIE_COORDI_NUM + 1 + (HI_U64)pstSoftwareParam->u32ClassNum);
2968 SAMPLE_SVP_CHECK_EXPR_RET(u64ParaLength > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2969 "Error,u64ParaLength should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2970
2971 u64TotalBboxNum = u64TotalGridNum * pstSoftwareParam->u32BboxNumEachGrid;
2972 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalBboxNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2973 "Error,u64TotalBboxNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2974
2975 u64TransSize = u64TotalGridNum * u64ParaLength * sizeof(HI_U32);
2976 SAMPLE_SVP_CHECK_EXPR_RET(u64TransSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2977 "Error,u64TransSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2978
2979 u64StackSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_STACK_S);
2980 SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2981 "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2982
2983 u64BboxBufSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S);
2984 SAMPLE_SVP_CHECK_EXPR_RET(u64BboxBufSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2985 "Error,u64BboxBufSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2986
2987 u64BboxTmpBufSize = u64TotalGridNum * u64ParaLength * sizeof(HI_FLOAT);
2988 SAMPLE_SVP_CHECK_EXPR_RET(u64BboxTmpBufSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2989 "Error,u64BboxTmpBufSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2990
2991 u64TotalSize = u64TransSize + u64StackSize + u64BboxBufSize + u64BboxTmpBufSize;
2992 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2993 "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2994 return (HI_U32)u64TotalSize;
2995 }
2996
SAMPLE_SVP_NNIE_Yolov2_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S * pstSoftwareParam)2997 HI_S32 SAMPLE_SVP_NNIE_Yolov2_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2998 SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S *pstSoftwareParam)
2999 {
3000 CHECK_NULL_PTR(pstNnieParam);
3001 CHECK_NULL_PTR(pstSoftwareParam);
3002 return SVP_NNIE_Yolov2_GetResult(
3003 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[0].astDst[0].u64VirAddr),
3004 pstSoftwareParam->u32GridNumWidth, pstSoftwareParam->u32GridNumHeight, pstSoftwareParam->u32BboxNumEachGrid,
3005 pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
3006 pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32ConfThresh,
3007 pstSoftwareParam->af32Bias,
3008 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
3009 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
3010 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
3011 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
3012 }
3013
SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S * pstSoftwareParam)3014 HI_U32 SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
3015 SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftwareParam)
3016 {
3017 HI_U64 u64TotalSize, u64AssistStackSize, u64TotalBboxSize, u64DstBlobSize, u64Tmp;
3018 HI_U64 u64TotalBboxNum = 0;
3019 HI_U64 u64MaxBlobSize = 0;
3020 HI_U32 i;
3021
3022 CHECK_NULL_PTR(pstNnieParam);
3023 CHECK_NULL_PTR(pstSoftwareParam);
3024 for (i = 0; i < pstNnieParam->pstModel->astSeg[0].u16DstNum; i++) {
3025 u64DstBlobSize = pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Width * sizeof(HI_U32);
3026 SAMPLE_SVP_CHECK_EXPR_RET(u64DstBlobSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3027 "Error,u64DstBlobSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3028
3029 u64DstBlobSize *= pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Height;
3030 SAMPLE_SVP_CHECK_EXPR_RET(u64DstBlobSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3031 "Error,u64DstBlobSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3032
3033 u64DstBlobSize *= pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Chn;
3034 SAMPLE_SVP_CHECK_EXPR_RET(u64DstBlobSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3035 "Error,u64DstBlobSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3036
3037 if (u64MaxBlobSize < u64DstBlobSize) {
3038 u64MaxBlobSize = u64DstBlobSize;
3039 }
3040
3041 u64Tmp = (HI_U64)pstSoftwareParam->au32GridNumWidth[i] * pstSoftwareParam->au32GridNumHeight[i];
3042 SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3043 "Error, %u-th au32GridNumWidth * au32GridNumHeight should be less than %u!\n", i, SAMPLE_SVP_NNIE_MAX_MEM);
3044 u64Tmp *= pstSoftwareParam->u32BboxNumEachGrid;
3045 SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3046 "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3047
3048 u64TotalBboxNum += u64Tmp;
3049 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalBboxNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3050 "Error,u64TotalBboxNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3051 }
3052 u64AssistStackSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_STACK_S);
3053 SAMPLE_SVP_CHECK_EXPR_RET(u64AssistStackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3054 "Error,u64TotalBboxSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3055
3056 u64TotalBboxSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S);
3057 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalBboxSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3058 "Error,u64TotalBboxSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3059
3060 u64TotalSize = (u64MaxBlobSize + u64AssistStackSize + u64TotalBboxSize);
3061 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3062 "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3063
3064 return (HI_U32)u64TotalSize;
3065 }
3066
SAMPLE_SVP_NNIE_Yolov3_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S * pstSoftwareParam)3067 HI_S32 SAMPLE_SVP_NNIE_Yolov3_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
3068 SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftwareParam)
3069 {
3070 HI_U32 i = 0;
3071 HI_U64 au64InputBlobAddr[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};
3072 HI_U32 au32Stride[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};
3073
3074 CHECK_NULL_PTR(pstNnieParam);
3075 CHECK_NULL_PTR(pstSoftwareParam);
3076 for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
3077 au64InputBlobAddr[i] = pstNnieParam->astSegData[0].astDst[i].u64VirAddr;
3078 au32Stride[i] = pstNnieParam->astSegData[0].astDst[i].u32Stride;
3079 }
3080 return SVP_NNIE_Yolov3_GetResult(au64InputBlobAddr, pstSoftwareParam->au32GridNumWidth,
3081 pstSoftwareParam->au32GridNumHeight, au32Stride, pstSoftwareParam->u32BboxNumEachGrid,
3082 pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
3083 pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32ConfThresh,
3084 pstSoftwareParam->af32Bias,
3085 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
3086 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
3087 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
3088 SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
3089 }
3090
3091 #ifdef __cplusplus
3092 }
3093 #endif
3094