• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 HiSilicon (Shanghai) Technologies CO., LIMITED.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "sample_svp_nnie_software.h"
16 #include <math.h>
17 
18 #ifdef __cplusplus // If used by C++ code,
19 extern "C" {       // we need to export the C interface
20 #endif
21 
22 static HI_FLOAT s_af32ExpCoef[10][16] = {
23     {
24         1.0f, 1.00024f, 1.00049f, 1.00073f, 1.00098f, 1.00122f, 1.00147f, 1.00171f, 1.00196f,
25         1.0022f, 1.00244f, 1.00269f, 1.00293f, 1.00318f, 1.00342f, 1.00367f
26     },
27     {
28         1.0f, 1.00391f, 1.00784f, 1.01179f, 1.01575f, 1.01972f, 1.02371f, 1.02772f, 1.03174f,
29         1.03578f, 1.03984f, 1.04391f, 1.04799f, 1.05209f, 1.05621f, 1.06034f
30     },
31     {
32         1.0f, 1.06449f, 1.13315f, 1.20623f, 1.28403f, 1.36684f, 1.45499f, 1.54883f, 1.64872f,
33         1.75505f, 1.86825f, 1.98874f, 2.117f, 2.25353f, 2.39888f, 2.55359f
34     },
35     {
36         1.0f, 2.71828f, 7.38906f, 20.0855f, 54.5981f, 148.413f, 403.429f, 1096.63f, 2980.96f,
37         8103.08f, 22026.5f, 59874.1f, 162755.0f, 442413.0f, 1.2026e+006f, 3.26902e+006f
38     },
39     {
40         1.0f, 8.88611e+006f, 7.8963e+013f, 7.01674e+020f, 6.23515e+027f, 5.54062e+034f,
41         5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f,
42         5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f
43     },
44     {
45         1.0f, 0.999756f, 0.999512f, 0.999268f, 0.999024f, 0.99878f, 0.998536f, 0.998292f,
46         0.998049f, 0.997805f, 0.997562f, 0.997318f, 0.997075f, 0.996831f, 0.996588f, 0.996345f
47     },
48     {
49         1.0f, 0.996101f, 0.992218f, 0.98835f, 0.984496f, 0.980658f, 0.976835f, 0.973027f,
50         0.969233f, 0.965455f, 0.961691f, 0.957941f, 0.954207f, 0.950487f, 0.946781f, 0.94309f
51     },
52     {
53         1.0f, 0.939413f, 0.882497f, 0.829029f, 0.778801f, 0.731616f, 0.687289f, 0.645649f, 0.606531f,
54         0.569783f, 0.535261f, 0.502832f, 0.472367f, 0.443747f, 0.416862f, 0.391606f
55     },
56     {
57         1.0f, 0.367879f, 0.135335f, 0.0497871f, 0.0183156f, 0.00673795f, 0.00247875f, 0.000911882f,
58         0.000335463f, 0.00012341f, 4.53999e-005f, 1.67017e-005f, 6.14421e-006f, 2.26033e-006f,
59         8.31529e-007f, 3.05902e-007f
60     },
61     {
62         1.0f, 1.12535e-007f, 1.26642e-014f, 1.42516e-021f, 1.60381e-028f, 1.80485e-035f, 2.03048e-042f,
63         0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
64     }
65 };
66 
SVP_NNIE_QuickExp(HI_S32 s32Value)67 static HI_FLOAT SVP_NNIE_QuickExp(HI_S32 s32Value)
68 {
69     HI_U32 tmp_val;
70 
71     if (s32Value < 0) {
72         tmp_val = *((HI_U32 *)&s32Value);
73         tmp_val = (~tmp_val + 0x00000001);
74         /* get each 4 bit */
75         return s_af32ExpCoef[5][tmp_val & 0x0000000F] * s_af32ExpCoef[6][(tmp_val >> 4) & 0x0000000F] *
76             s_af32ExpCoef[7][(tmp_val >> 8) & 0x0000000F] * s_af32ExpCoef[8][(tmp_val >> 12) & 0x0000000F] *
77             s_af32ExpCoef[9][(tmp_val >> 16) & 0x0000000F];
78     } else {
79         tmp_val = (HI_U32)s32Value;
80         /* get each 4 bit */
81         return s_af32ExpCoef[0][tmp_val & 0x0000000F] * s_af32ExpCoef[1][(tmp_val >> 4) & 0x0000000F] *
82             s_af32ExpCoef[2][(tmp_val >> 8) & 0x0000000F] * s_af32ExpCoef[3][(tmp_val >> 12) & 0x0000000F] *
83             s_af32ExpCoef[4][(tmp_val >> 16) & 0x0000000F];
84     }
85 }
86 
SVP_NNIE_SoftMax(HI_FLOAT * pf32Src,HI_U32 u32Num)87 static HI_S32 SVP_NNIE_SoftMax(HI_FLOAT *pf32Src, HI_U32 u32Num)
88 {
89     HI_FLOAT f32Max = 0;
90     HI_FLOAT f32Sum = 0;
91     HI_U32 i = 0;
92 
93     for (i = 0; i < u32Num; ++i) {
94         if (f32Max < pf32Src[i]) {
95             f32Max = pf32Src[i];
96         }
97     }
98 
99     for (i = 0; i < u32Num; ++i) {
100         pf32Src[i] = (HI_FLOAT)SVP_NNIE_QuickExp((HI_S32)((pf32Src[i] - f32Max) * SAMPLE_SVP_NNIE_QUANT_BASE));
101         f32Sum += pf32Src[i];
102     }
103 
104     for (i = 0; i < u32Num; ++i) {
105         pf32Src[i] /= f32Sum;
106     }
107     return HI_SUCCESS;
108 }
109 
SVP_NNIE_Sigmoid(HI_FLOAT * pf32Src,HI_U32 u32Num)110 static HI_S32 SVP_NNIE_Sigmoid(HI_FLOAT *pf32Src, HI_U32 u32Num)
111 {
112     HI_U32 i = 0;
113 
114     for (i = 0; i < u32Num; i++) {
115         pf32Src[i] = SAMPLE_SVP_NNIE_SIGMOID(pf32Src[i]);
116     }
117     return HI_SUCCESS;
118 }
119 
SVP_NNIE_SSD_SoftMax(HI_S32 * ps32Src,HI_S32 s32ArraySize,HI_S32 * ps32Dst)120 static HI_S32 SVP_NNIE_SSD_SoftMax(HI_S32 *ps32Src, HI_S32 s32ArraySize, HI_S32 *ps32Dst)
121 {
122     /* **** define parameters *** */
123     HI_S32 s32Max = 0;
124     HI_S32 s32Sum = 0;
125     HI_S32 i = 0;
126     for (i = 0; i < s32ArraySize; ++i) {
127         if (s32Max < ps32Src[i]) {
128             s32Max = ps32Src[i];
129         }
130     }
131     for (i = 0; i < s32ArraySize; ++i) {
132         ps32Dst[i] =
133             (HI_S32)(SAMPLE_SVP_NNIE_QUANT_BASE * exp((HI_FLOAT)(ps32Src[i] - s32Max) / SAMPLE_SVP_NNIE_QUANT_BASE));
134         s32Sum += ps32Dst[i];
135     }
136     for (i = 0; i < s32ArraySize; ++i) {
137         ps32Dst[i] = (HI_S32)(((HI_FLOAT)ps32Dst[i] / (HI_FLOAT)s32Sum) * SAMPLE_SVP_NNIE_QUANT_BASE);
138     }
139     return HI_SUCCESS;
140 }
141 
SVP_NNIE_Argswap(HI_S32 * ps32Src1,HI_S32 * ps32Src2)142 static void SVP_NNIE_Argswap(HI_S32 *ps32Src1, HI_S32 *ps32Src2)
143 {
144     HI_U32 i = 0;
145     HI_S32 u32Tmp = 0;
146     for (i = 0; i < SAMPLE_SVP_NNIE_PROPOSAL_WIDTH; i++) {
147         u32Tmp = ps32Src1[i];
148         ps32Src1[i] = ps32Src2[i];
149         ps32Src2[i] = u32Tmp;
150     }
151 }
152 
153 /*
154  * Prototype :   SVP_NNIE_NonRecursiveArgQuickSort
155  * Description : this function is used to do quick sort
156  * Input :       HI_S32*             ps32Array         [IN]   the array need to be sorted
157  * HI_S32              s32Low            [IN]   the start position of quick sort
158  * HI_S32              s32High           [IN]   the end position of quick sort
159  * SAMPLE_SVP_NNIE_STACK_S *  pstStack   [IN]   the buffer used to store start positions and end positions
160  */
SVP_NNIE_NonRecursiveArgQuickSort(HI_S32 * ps32Array,HI_S32 s32Low,HI_S32 s32High,SAMPLE_SVP_NNIE_STACK_S * pstStack,HI_U32 u32MaxNum)161 static HI_S32 SVP_NNIE_NonRecursiveArgQuickSort(HI_S32 *ps32Array, HI_S32 s32Low, HI_S32 s32High,
162     SAMPLE_SVP_NNIE_STACK_S *pstStack, HI_U32 u32MaxNum)
163 {
164     HI_S32 i = s32Low;
165     HI_S32 j = s32High;
166     HI_S32 s32Top = 0;
167     HI_S32 s32KeyConfidence = ps32Array[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * s32Low + SAMPLE_SVP_NNIE_SCORE_OFFSET];
168     pstStack[s32Top].s32Min = s32Low;
169     pstStack[s32Top].s32Max = s32High;
170 
171     while (s32Top > -1) {
172         s32Low = pstStack[s32Top].s32Min;
173         s32High = pstStack[s32Top].s32Max;
174         i = s32Low;
175         j = s32High;
176         s32Top--;
177 
178         s32KeyConfidence = ps32Array[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * s32Low + SAMPLE_SVP_NNIE_SCORE_OFFSET];
179 
180         while (i < j) {
181             while ((i < j) &&
182                    (s32KeyConfidence > ps32Array[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + SAMPLE_SVP_NNIE_SCORE_OFFSET])) {
183                 j--;
184             }
185             if (i < j) {
186                 SVP_NNIE_Argswap(&ps32Array[i * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH],
187                     &ps32Array[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH]);
188                 i++;
189             }
190 
191             while ((i < j) &&
192                    (s32KeyConfidence < ps32Array[i * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + SAMPLE_SVP_NNIE_SCORE_OFFSET])) {
193                 i++;
194             }
195             if (i < j) {
196                 SVP_NNIE_Argswap(&ps32Array[i * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH],
197                     &ps32Array[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH]);
198                 j--;
199             }
200         }
201 
202         if ((hi_u32)s32Low <= u32MaxNum) {
203             if (s32Low < i - 1) {
204                 s32Top++;
205                 pstStack[s32Top].s32Min = s32Low;
206                 pstStack[s32Top].s32Max = i - 1;
207             }
208 
209             if (s32High > i + 1) {
210                 s32Top++;
211                 pstStack[s32Top].s32Min = i + 1;
212                 pstStack[s32Top].s32Max = s32High;
213             }
214         }
215     }
216     return HI_SUCCESS;
217 }
218 
219 /*
220  * Prototype :   SVP_NNIE_Overlap
221  * Description : this function is used to calculate the overlap ratio of two proposals
222  * Input :     HI_S32              s32XMin1          [IN]   first input proposal's minimum value of x coordinate
223  * HI_S32              s32YMin1          [IN]   first input proposal's minimum value of y coordinate of first input
224  * proposal HI_S32              s32XMax1          [IN]   first input proposal's maximum value of x coordinate of first
225  * input proposal HI_S32              s32YMax1          [IN]   first input proposal's maximum value of y coordinate of
226  * first input proposal HI_S32              s32XMin1          [IN]   second input proposal's minimum value of x
227  * coordinate HI_S32              s32YMin1          [IN]   second input proposal's minimum value of y coordinate of
228  * first input proposal HI_S32              s32XMax1          [IN]   second input proposal's maximum value of x
229  * coordinate of first input proposal HI_S32              s32YMax1        [IN]   second input proposal's maximum value
230  * of y coordinate of first input proposal HI_FLOAT            *pf32IoU          [IN OUT]the pointer of the IoU value
231  */
SVP_NNIE_Overlap(HI_S32 s32XMin1,HI_S32 s32YMin1,HI_S32 s32XMax1,HI_S32 s32YMax1,HI_S32 s32XMin2,HI_S32 s32YMin2,HI_S32 s32XMax2,HI_S32 s32YMax2,HI_S32 * s32AreaSum,HI_S32 * s32AreaInter)232 static HI_S32 SVP_NNIE_Overlap(HI_S32 s32XMin1, HI_S32 s32YMin1, HI_S32 s32XMax1, HI_S32 s32YMax1, HI_S32 s32XMin2,
233     HI_S32 s32YMin2, HI_S32 s32XMax2, HI_S32 s32YMax2, HI_S32 *s32AreaSum, HI_S32 *s32AreaInter)
234 {
235     HI_S32 s32Inter = 0;
236     HI_S32 s32Total = 0;
237     HI_S32 s32XMin = 0;
238     HI_S32 s32YMin = 0;
239     HI_S32 s32XMax = 0;
240     HI_S32 s32YMax = 0;
241     HI_S32 s32Area1 = 0;
242     HI_S32 s32Area2 = 0;
243     HI_S32 s32InterWidth = 0;
244     HI_S32 s32InterHeight = 0;
245 
246     s32XMin = SAMPLE_SVP_NNIE_MAX(s32XMin1, s32XMin2);
247     s32YMin = SAMPLE_SVP_NNIE_MAX(s32YMin1, s32YMin2);
248     s32XMax = SAMPLE_SVP_NNIE_MIN(s32XMax1, s32XMax2);
249     s32YMax = SAMPLE_SVP_NNIE_MIN(s32YMax1, s32YMax2);
250 
251     s32InterWidth = s32XMax - s32XMin + 1;
252     s32InterHeight = s32YMax - s32YMin + 1;
253 
254     s32InterWidth = (s32InterWidth >= 0) ? s32InterWidth : 0;
255     s32InterHeight = (s32InterHeight >= 0) ? s32InterHeight : 0;
256 
257     s32Inter = s32InterWidth * s32InterHeight;
258     s32Area1 = (s32XMax1 - s32XMin1 + 1) * (s32YMax1 - s32YMin1 + 1);
259     s32Area2 = (s32XMax2 - s32XMin2 + 1) * (s32YMax2 - s32YMin2 + 1);
260 
261     s32Total = s32Area1 + s32Area2 - s32Inter;
262 
263     *s32AreaSum = s32Total;
264     *s32AreaInter = s32Inter;
265     return HI_SUCCESS;
266 }
267 
268 /*
269  * Prototype :   SVP_NNIE_FilterLowScoreBbox
270  * Description : this function is used to remove low score bboxes, in order to speed-up Sort & RPN procedures.
271  * Input :      HI_S32*         ps32Proposals     [IN]   proposals
272  * HI_U32          u32NumAnchors     [IN]   input anchors' num
273  * HI_U32          u32FilterThresh   [IN]   rpn configuration
274  * HI_U32*         u32NumAfterFilter [OUT]  output num of anchors after low score filtering
275  */
SVP_NNIE_FilterLowScoreBbox(HI_S32 * ps32Proposals,HI_U32 u32AnchorsNum,HI_U32 u32FilterThresh,HI_U32 * u32NumAfterFilter)276 static HI_S32 SVP_NNIE_FilterLowScoreBbox(HI_S32 *ps32Proposals, HI_U32 u32AnchorsNum, HI_U32 u32FilterThresh,
277     HI_U32 *u32NumAfterFilter)
278 {
279     HI_U32 u32ProposalCnt = u32AnchorsNum;
280     HI_U32 i = 0;
281 
282     if (u32FilterThresh > 0) {
283         for (i = 0; i < u32AnchorsNum; i++) {
284             if (ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SCORE_OFFSET] <
285                 (HI_S32)u32FilterThresh) {
286                 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] = 1;
287             }
288         }
289 
290         u32ProposalCnt = 0;
291         for (i = 0; i < u32AnchorsNum; i++) {
292             if (ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) {
293                 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt] =
294                     ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i];
295                 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + SAMPLE_SVP_NNIE_Y_MIN_OFFSET] =
296                     ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_Y_MIN_OFFSET];
297                 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
298                     ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
299                 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
300                     ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
301                 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + SAMPLE_SVP_NNIE_SCORE_OFFSET] =
302                     ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SCORE_OFFSET];
303                 ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32ProposalCnt + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] =
304                     ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET];
305                 u32ProposalCnt++;
306             }
307         }
308     }
309     *u32NumAfterFilter = u32ProposalCnt;
310     return HI_SUCCESS;
311 }
312 
313 /*
314  * Prototype :   SVP_NNIE_NonMaxSuppression
315  * Description : this function is used to do non maximum suppression
316  * Input :       HI_S32*           ps32Proposals     [IN]   proposals
317  * HI_U32            u32AnchorsNum     [IN]   anchors num
318  * HI_U32            u32NmsThresh      [IN]   non maximum suppression threshold
319  * HI_U32            u32MaxRoiNum      [IN]  The max roi num for the roi pooling
320  */
SVP_NNIE_NonMaxSuppression(HI_S32 * ps32Proposals,HI_U32 u32AnchorsNum,HI_U32 u32NmsThresh,HI_U32 u32MaxRoiNum)321 static HI_S32 SVP_NNIE_NonMaxSuppression(HI_S32 *ps32Proposals, HI_U32 u32AnchorsNum, HI_U32 u32NmsThresh,
322     HI_U32 u32MaxRoiNum)
323 {
324     HI_S32 s32XMin1 = 0;
325     HI_S32 s32YMin1 = 0;
326     HI_S32 s32XMax1 = 0;
327     HI_S32 s32YMax1 = 0;
328     HI_S32 s32XMin2 = 0;
329     HI_S32 s32YMin2 = 0;
330     HI_S32 s32XMax2 = 0;
331     HI_S32 s32YMax2 = 0;
332     HI_S32 s32AreaTotal = 0;
333     HI_S32 s32AreaInter = 0;
334     HI_U32 i = 0;
335     HI_U32 j = 0;
336     HI_U32 u32Num = 0;
337     HI_BOOL bNoOverlap = HI_TRUE;
338 
339     for (i = 0; i < u32AnchorsNum && u32Num < u32MaxRoiNum; i++) {
340         if (ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) {
341             u32Num++;
342             s32XMin1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i];
343             s32YMin1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_Y_MIN_OFFSET];
344             s32XMax1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
345             s32YMax1 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
346             for (j = i + 1; j < u32AnchorsNum; j++) {
347                 if (ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) {
348                     s32XMin2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j];
349                     s32YMin2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_Y_MIN_OFFSET];
350                     s32XMax2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
351                     s32YMax2 = ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
352                     bNoOverlap = (s32XMin2 > s32XMax1) || (s32XMax2 < s32XMin1) || (s32YMin2 > s32YMax1) ||
353                         (s32YMax2 < s32YMin1);
354                     if (bNoOverlap) {
355                         continue;
356                     }
357                     (void)SVP_NNIE_Overlap(s32XMin1, s32YMin1, s32XMax1, s32YMax1, s32XMin2, s32YMin2, s32XMax2,
358                         s32YMax2, &s32AreaTotal, &s32AreaInter);
359                     if (s32AreaInter * SAMPLE_SVP_NNIE_QUANT_BASE > ((HI_S32)u32NmsThresh * s32AreaTotal)) {
360                         if (ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SCORE_OFFSET] >=
361                             ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_SCORE_OFFSET]) {
362                             ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * j + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET]
363                                 = 1;
364                         } else {
365                             ps32Proposals[SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET]
366                                 = 1;
367                         }
368                     }
369                 }
370             }
371         }
372     }
373 
374     return HI_SUCCESS;
375 }
376 
377 /*
378  * Prototype    : SVP_NNIE_Cnn_GetTopN
379  * Description  : Cnn get top N
380  * Input        : HI_S32   *ps32Fc       [IN]  FC data pointer
381  * HI_U32   u32FcStride   [IN]  FC stride
382  * HI_U32   u32ClassNum   [IN]  Class Num
383  * HI_U32   u32BatchNum   [IN]  Batch Num
384  * HI_U32   u32TopN       [IN]  TopN
385  * HI_S32   *ps32TmpBuf   [IN]  assist buffer pointer
386  * HI_U32   u32TopNStride [IN]  TopN result stride
387  * HI_S32   *ps32GetTopN  [OUT]  TopN result
388  */
SVP_NNIE_Cnn_GetTopN(HI_S32 * ps32Fc,HI_U32 u32FcStride,HI_U32 u32ClassNum,HI_U32 u32BatchNum,HI_U32 u32TopN,HI_S32 * ps32TmpBuf,HI_U32 u32TopNStride,HI_S32 * ps32GetTopN)389 static HI_S32 SVP_NNIE_Cnn_GetTopN(HI_S32 *ps32Fc, HI_U32 u32FcStride, HI_U32 u32ClassNum, HI_U32 u32BatchNum,
390     HI_U32 u32TopN, HI_S32 *ps32TmpBuf, HI_U32 u32TopNStride, HI_S32 *ps32GetTopN)
391 {
392     HI_U32 i = 0, j = 0, n = 0;
393     HI_U32 u32Id = 0;
394     HI_S32 *ps32Score = NULL;
395     SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S stTmp = { 0 };
396     SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S *pstTopN = NULL;
397     SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S *pstTmpBuf = (SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S *)ps32TmpBuf;
398     for (n = 0; n < u32BatchNum; n++) {
399         ps32Score = (HI_S32 *)((HI_U8 *)ps32Fc + n * u32FcStride);
400         pstTopN = (SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S *)((HI_U8 *)ps32GetTopN + n * u32TopNStride);
401         for (i = 0; i < u32ClassNum; i++) {
402             pstTmpBuf[i].u32ClassId = i;
403             pstTmpBuf[i].u32Confidence = (HI_U32)ps32Score[i];
404         }
405 
406         for (i = 0; i < u32TopN; i++) {
407             u32Id = i;
408             pstTopN[i].u32ClassId = pstTmpBuf[i].u32ClassId;
409             pstTopN[i].u32Confidence = pstTmpBuf[i].u32Confidence;
410             for (j = i + 1; j < u32ClassNum; j++) {
411                 if (pstTmpBuf[u32Id].u32Confidence < pstTmpBuf[j].u32Confidence) {
412                     u32Id = j;
413                 }
414             }
415 
416             stTmp.u32ClassId = pstTmpBuf[u32Id].u32ClassId;
417             stTmp.u32Confidence = pstTmpBuf[u32Id].u32Confidence;
418 
419             if (i != u32Id) {
420                 pstTmpBuf[u32Id].u32ClassId = pstTmpBuf[i].u32ClassId;
421                 pstTmpBuf[u32Id].u32Confidence = pstTmpBuf[i].u32Confidence;
422                 pstTmpBuf[i].u32ClassId = stTmp.u32ClassId;
423                 pstTmpBuf[i].u32Confidence = stTmp.u32Confidence;
424 
425                 pstTopN[i].u32ClassId = stTmp.u32ClassId;
426                 pstTopN[i].u32Confidence = stTmp.u32Confidence;
427             }
428         }
429     }
430 
431     return HI_SUCCESS;
432 }
433 
434 /*
435  * Prototype :   SVP_NNIE_Rpn
436  * Description : this function is used to do RPN
437  * Input :     HI_S32** pps32Src              [IN] convolution data
438  * HI_U32 u32NumRatioAnchors      [IN] Ratio anchor num
439  * HI_U32 u32NumScaleAnchors      [IN] scale anchor num
440  * HI_U32* au32Scales             [IN] scale value
441  * HI_U32* au32Ratios             [IN] ratio value
442  * HI_U32 u32OriImHeight          [IN] input image height
443  * HI_U32 u32OriImWidth           [IN] input image width
444  * HI_U32* pu32ConvHeight         [IN] convolution height
445  * HI_U32* pu32ConvWidth          [IN] convolution width
446  * HI_U32* pu32ConvChannel        [IN] convolution channel
447  * HI_U32  u32ConvStride          [IN] convolution stride
448  * HI_U32 u32MaxRois              [IN] max roi num
449  * HI_U32 u32MinSize              [IN] min size
450  * HI_U32 u32SpatialScale         [IN] spatial scale
451  * HI_U32 u32NmsThresh            [IN] NMS thresh
452  * HI_U32 u32FilterThresh         [IN] filter thresh
453  * HI_U32 u32NumBeforeNms         [IN] num before doing NMS
454  * HI_U32 *pu32MemPool            [IN] assist buffer
455  * HI_S32 *ps32ProposalResult     [OUT] proposal result
456  * HI_U32* pu32NumRois            [OUT] proposal num
457  */
SVP_NNIE_Rpn(HI_S32 ** pps32Src,HI_U32 u32NumRatioAnchors,HI_U32 u32NumScaleAnchors,HI_U32 * au32Scales,HI_U32 * au32Ratios,HI_U32 u32OriImHeight,HI_U32 u32OriImWidth,HI_U32 * pu32ConvHeight,HI_U32 * pu32ConvWidth,HI_U32 * pu32ConvChannel,HI_U32 u32ConvStride,HI_U32 u32MaxRois,HI_U32 u32MinSize,HI_U32 u32SpatialScale,HI_U32 u32NmsThresh,HI_U32 u32FilterThresh,HI_U32 u32NumBeforeNms,HI_U32 * pu32MemPool,HI_S32 * ps32ProposalResult,HI_U32 u32DstStride,HI_U32 * pu32NumRois)458 static HI_S32 SVP_NNIE_Rpn(HI_S32 **pps32Src, HI_U32 u32NumRatioAnchors, HI_U32 u32NumScaleAnchors, HI_U32 *au32Scales,
459     HI_U32 *au32Ratios, HI_U32 u32OriImHeight, HI_U32 u32OriImWidth, HI_U32 *pu32ConvHeight, HI_U32 *pu32ConvWidth,
460     HI_U32 *pu32ConvChannel, HI_U32 u32ConvStride, HI_U32 u32MaxRois, HI_U32 u32MinSize, HI_U32 u32SpatialScale,
461     HI_U32 u32NmsThresh, HI_U32 u32FilterThresh, HI_U32 u32NumBeforeNms, HI_U32 *pu32MemPool,
462     HI_S32 *ps32ProposalResult, HI_U32 u32DstStride, HI_U32 *pu32NumRois)
463 {
464     HI_U32 u32Size = 0;
465     HI_S32 *ps32Anchors = NULL;
466     HI_S32 *ps32BboxDelta = NULL;
467     HI_S32 *ps32Proposals = NULL;
468     HI_U32 *pu32Ptr = NULL;
469     HI_S32 *ps32Ptr = NULL;
470     HI_U32 u32NumAfterFilter = 0;
471     HI_U32 u32NumAnchors = 0;
472     HI_FLOAT f32BaseW = 0;
473     HI_FLOAT f32BaseH = 0;
474     HI_FLOAT f32BaseXCtr = 0;
475     HI_FLOAT f32BaseYCtr = 0;
476     HI_FLOAT f32SizeRatios = 0;
477     HI_FLOAT *pf32RatioAnchors = NULL;
478     HI_FLOAT *pf32Ptr = NULL;
479     HI_FLOAT *pf32Ptr2 = NULL;
480     HI_FLOAT *pf32ScaleAnchors = NULL;
481     HI_FLOAT *pf32Scores = NULL;
482     HI_FLOAT f32Ratios = 0;
483     HI_FLOAT f32Size = 0;
484     HI_U32 u32PixelInterval = 0;
485     HI_U32 u32SrcBboxIndex = 0;
486     HI_U32 u32SrcFgProbIndex = 0;
487     HI_U32 u32SrcBgProbIndex = 0;
488     HI_U32 u32SrcBboxBias = 0;
489     HI_U32 u32SrcProbBias = 0;
490     HI_U32 u32DesBox = 0;
491     HI_U32 u32BgBlobSize = 0;
492     HI_U32 u32AnchorsPerPixel = 0;
493     HI_U32 u32MapSize = 0;
494     HI_U32 u32LineSize = 0;
495     HI_S32 *ps32Ptr2 = NULL;
496     HI_S32 *ps32Ptr3 = NULL;
497     HI_S32 s32ProposalWidth = 0;
498     HI_S32 s32ProposalHeight = 0;
499     HI_S32 s32ProposalCenterX = 0;
500     HI_S32 s32ProposalCenterY = 0;
501     HI_S32 s32PredW = 0;
502     HI_S32 s32PredH = 0;
503     HI_S32 s32PredCenterX = 0;
504     HI_S32 s32PredCenterY = 0;
505     HI_U32 u32DesBboxDeltaIndex = 0;
506     HI_U32 u32DesScoreIndex = 0;
507     HI_U32 u32RoiCount = 0;
508     SAMPLE_SVP_NNIE_STACK_S *pstStack = NULL;
509     HI_S32 s32Ret = HI_SUCCESS;
510     HI_U32 c = 0;
511     HI_U32 h = 0;
512     HI_U32 w = 0;
513     HI_U32 i = 0;
514     HI_U32 j = 0;
515     HI_U32 p = 0;
516     HI_U32 q = 0;
517     HI_U32 z = 0;
518     HI_U32 au32BaseAnchor[4] = {0, 0, (u32MinSize -1), (u32MinSize -1)};
519 
520     /* Faster RCNN */
521     /* calculate the start pointer of each part in MemPool */
522     pu32Ptr = (HI_U32 *)pu32MemPool;
523     ps32Anchors = (HI_S32 *)pu32Ptr;
524     u32NumAnchors = u32NumRatioAnchors * u32NumScaleAnchors * (pu32ConvHeight[0] * pu32ConvWidth[0]);
525     u32Size = SAMPLE_SVP_NNIE_COORDI_NUM * u32NumAnchors;
526     pu32Ptr += u32Size;
527 
528     ps32BboxDelta = (HI_S32 *)pu32Ptr;
529     pu32Ptr += u32Size;
530 
531     ps32Proposals = (HI_S32 *)pu32Ptr;
532     u32Size = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32NumAnchors;
533     pu32Ptr += u32Size;
534 
535     pf32RatioAnchors = (HI_FLOAT *)pu32Ptr;
536     pf32Ptr = (HI_FLOAT *)pu32Ptr;
537     u32Size = u32NumRatioAnchors * SAMPLE_SVP_NNIE_COORDI_NUM;
538     pf32Ptr = pf32Ptr + u32Size;
539 
540     pf32ScaleAnchors = pf32Ptr;
541     u32Size = u32NumScaleAnchors * u32NumRatioAnchors * SAMPLE_SVP_NNIE_COORDI_NUM;
542     pf32Ptr = pf32Ptr + u32Size;
543 
544     pf32Scores = pf32Ptr;
545     u32Size = u32NumAnchors * SAMPLE_SVP_NNIE_SCORE_NUM;
546     pf32Ptr = pf32Ptr + u32Size;
547 
548     pstStack = (SAMPLE_SVP_NNIE_STACK_S *)pf32Ptr;
549 
550     /* Generate the base anchor */
551     f32BaseW = (HI_FLOAT)(au32BaseAnchor[SAMPLE_SVP_NNIE_X_MAX_OFFSET] -
552         au32BaseAnchor[SAMPLE_SVP_NNIE_X_MIN_OFFSET] + 1);
553     f32BaseH = (HI_FLOAT)(au32BaseAnchor[SAMPLE_SVP_NNIE_Y_MAX_OFFSET] -
554         au32BaseAnchor[SAMPLE_SVP_NNIE_Y_MIN_OFFSET] + 1);
555     f32BaseXCtr = (HI_FLOAT)(au32BaseAnchor[0] + ((f32BaseW - 1) * SAMPLE_SVP_NNIE_HALF));
556     f32BaseYCtr = (HI_FLOAT)(au32BaseAnchor[1] + ((f32BaseH - 1) * SAMPLE_SVP_NNIE_HALF));
557 
558     /* Generate Ratio Anchors for the base anchor */
559     pf32Ptr = pf32RatioAnchors;
560     f32Size = f32BaseW * f32BaseH;
561     for (i = 0; i < u32NumRatioAnchors; i++) {
562         f32Ratios = (HI_FLOAT)au32Ratios[i] / SAMPLE_SVP_NNIE_QUANT_BASE;
563         f32SizeRatios = f32Size / f32Ratios;
564         f32BaseW = sqrt(f32SizeRatios);
565         f32BaseW = (HI_FLOAT)(1.0 *
566             ((f32BaseW) >= 0 ? (HI_S32)(f32BaseW + SAMPLE_SVP_NNIE_HALF) : (HI_S32)(f32BaseW - SAMPLE_SVP_NNIE_HALF)));
567         f32BaseH = f32BaseW * f32Ratios;
568         f32BaseH = (HI_FLOAT)(1.0 *
569             ((f32BaseH) >= 0 ? (HI_S32)(f32BaseH + SAMPLE_SVP_NNIE_HALF) : (HI_S32)(f32BaseH - SAMPLE_SVP_NNIE_HALF)));
570 
571         *pf32Ptr++ = (HI_FLOAT)(f32BaseXCtr - ((f32BaseW - 1) * SAMPLE_SVP_NNIE_HALF));
572         *(pf32Ptr++) = (HI_FLOAT)(f32BaseYCtr - ((f32BaseH - 1) * SAMPLE_SVP_NNIE_HALF));
573         *(pf32Ptr++) = (HI_FLOAT)(f32BaseXCtr + ((f32BaseW - 1) * SAMPLE_SVP_NNIE_HALF));
574         *(pf32Ptr++) = (HI_FLOAT)(f32BaseYCtr + ((f32BaseH - 1) * SAMPLE_SVP_NNIE_HALF));
575     }
576 
577     /* Generate Scale Anchors for each Ratio Anchor */
578     pf32Ptr = pf32RatioAnchors;
579     pf32Ptr2 = pf32ScaleAnchors;
580     /* Generate Scale Anchors for one pixel */
581     for (i = 0; i < u32NumRatioAnchors; i++) {
582         for (j = 0; j < u32NumScaleAnchors; j++) {
583             f32BaseW = *(pf32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET) - *(pf32Ptr) + 1;
584             f32BaseH = *(pf32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) - *(pf32Ptr + SAMPLE_SVP_NNIE_Y_MIN_OFFSET) + 1;
585             f32BaseXCtr = (HI_FLOAT)(*(pf32Ptr) + ((f32BaseW - 1) * SAMPLE_SVP_NNIE_HALF));
586             f32BaseYCtr = (HI_FLOAT)(*(pf32Ptr + 1) + ((f32BaseH - 1) * SAMPLE_SVP_NNIE_HALF));
587 
588             *(pf32Ptr2++) = (HI_FLOAT)(f32BaseXCtr -
589                 ((f32BaseW * ((HI_FLOAT)au32Scales[j] / SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF));
590             *(pf32Ptr2++) = (HI_FLOAT)(f32BaseYCtr -
591                 ((f32BaseH * ((HI_FLOAT)au32Scales[j] / SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF));
592             *(pf32Ptr2++) = (HI_FLOAT)(f32BaseXCtr +
593                 ((f32BaseW * ((HI_FLOAT)au32Scales[j] / SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF));
594             *(pf32Ptr2++) = (HI_FLOAT)(f32BaseYCtr +
595                 ((f32BaseH * ((HI_FLOAT)au32Scales[j] / SAMPLE_SVP_NNIE_QUANT_BASE) - 1) * SAMPLE_SVP_NNIE_HALF));
596         }
597         pf32Ptr += SAMPLE_SVP_NNIE_COORDI_NUM;
598     }
599 
600     /* Copy the anchors to every pixel in the feature map */
601     ps32Ptr = ps32Anchors;
602     if (u32SpatialScale == 0) {
603         printf("Divisor u32SpatialScale cannot be 0!\n");
604         return HI_FAILURE;
605     }
606     u32PixelInterval = SAMPLE_SVP_NNIE_QUANT_BASE / u32SpatialScale;
607 
608     for (p = 0; p < pu32ConvHeight[0]; p++) {
609         for (q = 0; q < pu32ConvWidth[0]; q++) {
610             pf32Ptr2 = pf32ScaleAnchors;
611             for (z = 0; z < u32NumScaleAnchors * u32NumRatioAnchors; z++) {
612                 *(ps32Ptr++) = (HI_S32)(q * u32PixelInterval + *(pf32Ptr2++));
613                 *(ps32Ptr++) = (HI_S32)(p * u32PixelInterval + *(pf32Ptr2++));
614                 *(ps32Ptr++) = (HI_S32)(q * u32PixelInterval + *(pf32Ptr2++));
615                 *(ps32Ptr++) = (HI_S32)(p * u32PixelInterval + *(pf32Ptr2++));
616             }
617         }
618     }
619 
620     /* do transpose, convert the blob from (M,C,H,W) to (M,H,W,C) */
621     u32MapSize = pu32ConvHeight[1] * u32ConvStride / sizeof(HI_U32);
622     u32AnchorsPerPixel = u32NumRatioAnchors * u32NumScaleAnchors;
623     u32BgBlobSize = u32AnchorsPerPixel * u32MapSize;
624     u32LineSize = u32ConvStride / sizeof(HI_U32);
625     u32SrcProbBias = 0;
626     u32SrcBboxBias = 0;
627 
628     for (c = 0; c < pu32ConvChannel[1]; c++) {
629         for (h = 0; h < pu32ConvHeight[1]; h++) {
630             for (w = 0; w < pu32ConvWidth[1]; w++) {
631                 u32SrcBboxIndex = u32SrcBboxBias + c * u32MapSize + h * u32LineSize + w;
632                 u32SrcBgProbIndex =
633                     u32SrcProbBias + (c / SAMPLE_SVP_NNIE_COORDI_NUM) * u32MapSize + h * u32LineSize + w;
634                 u32SrcFgProbIndex = u32BgBlobSize + u32SrcBgProbIndex;
635 
636                 u32DesBox = (u32AnchorsPerPixel) * (h * pu32ConvWidth[1] + w) + c / SAMPLE_SVP_NNIE_COORDI_NUM;
637 
638                 u32DesBboxDeltaIndex = SAMPLE_SVP_NNIE_COORDI_NUM * u32DesBox + c % SAMPLE_SVP_NNIE_COORDI_NUM;
639                 ps32BboxDelta[u32DesBboxDeltaIndex] = (HI_S32)pps32Src[1][u32SrcBboxIndex];
640 
641                 u32DesScoreIndex = (SAMPLE_SVP_NNIE_SCORE_NUM)*u32DesBox;
642                 pf32Scores[u32DesScoreIndex] =
643                     (HI_FLOAT)((HI_S32)pps32Src[0][u32SrcBgProbIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE;
644                 pf32Scores[u32DesScoreIndex + 1] =
645                     (HI_FLOAT)((HI_S32)pps32Src[0][u32SrcFgProbIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE;
646             }
647         }
648     }
649 
650     /* do softmax */
651     pf32Ptr = pf32Scores;
652     for (i = 0; i < u32NumAnchors; i++) {
653         s32Ret = SVP_NNIE_SoftMax(pf32Ptr, SAMPLE_SVP_NNIE_SCORE_NUM);
654         pf32Ptr += SAMPLE_SVP_NNIE_SCORE_NUM;
655     }
656 
657     /* BBox Transform */
658     /* use parameters from Conv3 to adjust the coordinates of anchors */
659     ps32Ptr = ps32Anchors;
660     ps32Ptr2 = ps32Proposals;
661     ps32Ptr3 = ps32BboxDelta;
662     for (i = 0; i < u32NumAnchors; i++) {
663         ps32Ptr = ps32Anchors;
664         ps32Ptr = ps32Ptr + SAMPLE_SVP_NNIE_COORDI_NUM * i;
665         ps32Ptr2 = ps32Proposals;
666         ps32Ptr2 = ps32Ptr2 + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
667         ps32Ptr3 = ps32BboxDelta;
668         ps32Ptr3 = ps32Ptr3 + SAMPLE_SVP_NNIE_COORDI_NUM * i;
669         pf32Ptr = pf32Scores;
670         pf32Ptr = pf32Ptr + i * (SAMPLE_SVP_NNIE_SCORE_NUM);
671 
672         s32ProposalWidth = *(ps32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET) - *(ps32Ptr) + 1;
673         s32ProposalHeight = *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) - *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MIN_OFFSET) + 1;
674         s32ProposalCenterX = *(ps32Ptr) + (HI_S32)(s32ProposalWidth * SAMPLE_SVP_NNIE_HALF);
675         s32ProposalCenterY = *(ps32Ptr + 1) + (HI_S32)(s32ProposalHeight * SAMPLE_SVP_NNIE_HALF);
676         s32PredCenterX =
677             (HI_S32)(((HI_FLOAT)(*(ps32Ptr3)) / SAMPLE_SVP_NNIE_QUANT_BASE) * s32ProposalWidth + s32ProposalCenterX);
678         s32PredCenterY = (HI_S32)(((HI_FLOAT)(*(ps32Ptr3 + 1)) / SAMPLE_SVP_NNIE_QUANT_BASE) * s32ProposalHeight +
679             s32ProposalCenterY);
680 
681         s32PredW =
682             (HI_S32)(s32ProposalWidth * SVP_NNIE_QuickExp((HI_S32)(*(ps32Ptr3 + SAMPLE_SVP_NNIE_X_MAX_OFFSET))));
683         s32PredH =
684             (HI_S32)(s32ProposalHeight * SVP_NNIE_QuickExp((HI_S32)(*(ps32Ptr3 + SAMPLE_SVP_NNIE_Y_MAX_OFFSET))));
685         *(ps32Ptr2) = (HI_S32)(s32PredCenterX - SAMPLE_SVP_NNIE_HALF * s32PredW);
686         *(ps32Ptr2 + SAMPLE_SVP_NNIE_Y_MIN_OFFSET) = (HI_S32)(s32PredCenterY - SAMPLE_SVP_NNIE_HALF * s32PredH);
687         *(ps32Ptr2 + SAMPLE_SVP_NNIE_X_MAX_OFFSET) = (HI_S32)(s32PredCenterX + SAMPLE_SVP_NNIE_HALF * s32PredW);
688         *(ps32Ptr2 + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) = (HI_S32)(s32PredCenterY + SAMPLE_SVP_NNIE_HALF * s32PredH);
689         *(ps32Ptr2 + SAMPLE_SVP_NNIE_SCORE_OFFSET) = (HI_S32)(*(pf32Ptr + 1) * SAMPLE_SVP_NNIE_QUANT_BASE);
690         *(ps32Ptr2 + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET) = 0;
691     }
692 
693     /* clip bbox */
694     for (i = 0; i < u32NumAnchors; i++) {
695         ps32Ptr = ps32Proposals;
696         ps32Ptr = ps32Ptr + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
697         *ps32Ptr = SAMPLE_SVP_NNIE_MAX(SAMPLE_SVP_NNIE_MIN(*ps32Ptr, (HI_S32)u32OriImWidth - 1), 0);
698         *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MIN_OFFSET) = SAMPLE_SVP_NNIE_MAX(
699             SAMPLE_SVP_NNIE_MIN(*(ps32Ptr + SAMPLE_SVP_NNIE_Y_MIN_OFFSET), (HI_S32)u32OriImHeight - 1), 0);
700         *(ps32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET) = SAMPLE_SVP_NNIE_MAX(
701             SAMPLE_SVP_NNIE_MIN(*(ps32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET), (HI_S32)u32OriImWidth - 1), 0);
702         *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) = SAMPLE_SVP_NNIE_MAX(
703             SAMPLE_SVP_NNIE_MIN(*(ps32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET), (HI_S32)u32OriImHeight - 1), 0);
704     }
705 
706     /* remove the bboxes which are too small */
707     for (i = 0; i < u32NumAnchors; i++) {
708         ps32Ptr = ps32Proposals;
709         ps32Ptr = ps32Ptr + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
710         s32ProposalWidth = *(ps32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET) - *(ps32Ptr) + 1;
711         s32ProposalHeight = *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) - *(ps32Ptr + 1) + 1;
712         if (s32ProposalWidth < (HI_S32)u32MinSize || s32ProposalHeight < (HI_S32)u32MinSize) {
713             *(ps32Ptr + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET) = 1;
714         }
715     }
716 
717     /* remove low score bboxes */
718     (void)SVP_NNIE_FilterLowScoreBbox(ps32Proposals, u32NumAnchors, u32FilterThresh, &u32NumAfterFilter);
719 
720     if (u32NumAfterFilter >= 1) {
721         (void)SVP_NNIE_NonRecursiveArgQuickSort(ps32Proposals, 0, u32NumAfterFilter - 1, pstStack, u32NumBeforeNms);
722     }
723     u32NumAfterFilter = (u32NumAfterFilter < u32NumBeforeNms) ? u32NumAfterFilter : u32NumBeforeNms;
724 
725     /* do nms to remove highly overlapped bbox */
726     (void)SVP_NNIE_NonMaxSuppression(ps32Proposals, u32NumAfterFilter, u32NmsThresh, u32MaxRois); /* function NMS */
727 
728     /* write the final result to output */
729     u32RoiCount = 0;
730     for (i = 0; i < u32NumAfterFilter; i++) {
731         ps32Ptr = ps32Proposals;
732         ps32Ptr = ps32Ptr + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
733         if (*(ps32Ptr + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET) == 0) {
734             /* In this sample,the output Roi coordinates will be input in hardware,
735             so the type coordinates are convert to HI_S20Q12 */
736             ps32ProposalResult[u32DstStride / sizeof(HI_U32) * u32RoiCount] = *ps32Ptr * SAMPLE_SVP_NNIE_QUANT_BASE;
737             ps32ProposalResult[u32DstStride / sizeof(HI_U32) * u32RoiCount + 1] =
738                 *(ps32Ptr + 1) * SAMPLE_SVP_NNIE_QUANT_BASE;
739             ps32ProposalResult[u32DstStride / sizeof(HI_U32) * u32RoiCount + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
740                 *(ps32Ptr + SAMPLE_SVP_NNIE_X_MAX_OFFSET) * SAMPLE_SVP_NNIE_QUANT_BASE;
741             ps32ProposalResult[u32DstStride / sizeof(HI_U32) * u32RoiCount + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
742                 *(ps32Ptr + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) * SAMPLE_SVP_NNIE_QUANT_BASE;
743             u32RoiCount++;
744         }
745         if (u32RoiCount >= u32MaxRois) {
746             break;
747         }
748     }
749 
750     *pu32NumRois = u32RoiCount;
751 
752     return s32Ret;
753 }
754 
755 /*
756  * Prototype :   SVP_NNIE_FasterRcnn_GetResult
757  * Description : this function is used to get FasterRcnn result
758  * Input :     HI_S32* ps32FcBbox             [IN] Bbox for Roi
759  * HI_S32 *ps32FcScore            [IN] Score for roi
760  * HI_S32 *ps32Proposals          [IN] proposal
761  * HI_U32 u32RoiCnt               [IN] Roi num
762  * HI_U32 *pu32ConfThresh         [IN] each class confidence thresh
763  * HI_U32 u32NmsThresh            [IN] Nms thresh
764  * HI_U32 u32MaxRoi               [IN] max roi
765  * HI_U32 u32ClassNum             [IN] class num
766  * HI_U32 u32OriImWidth           [IN] input image width
767  * HI_U32 u32OriImHeight          [IN] input image height
768  * HI_U32* pu32MemPool            [IN] assist buffer
769  * HI_S32* ps32DstScore           [OUT] result of score
770  * HI_S32* ps32DstRoi             [OUT] result of Bbox
771  * HI_S32* ps32ClassRoiNum        [OUT] result of the roi num of each class
772  */
SVP_NNIE_FasterRcnn_GetResult(HI_S32 * ps32FcBbox,HI_U32 u32BboxStride,HI_S32 * ps32FcScore,HI_U32 u32ScoreStride,HI_S32 * ps32Proposal,HI_U32 u32ProposalStride,HI_U32 u32RoiCnt,HI_U32 * pu32ConfThresh,HI_U32 u32NmsThresh,HI_U32 u32MaxRoi,HI_U32 u32ClassNum,HI_U32 u32OriImWidth,HI_U32 u32OriImHeight,HI_U32 * pu32MemPool,HI_S32 * ps32DstScore,HI_S32 * ps32DstBbox,HI_S32 * ps32ClassRoiNum)773 static HI_S32 SVP_NNIE_FasterRcnn_GetResult(HI_S32 *ps32FcBbox, HI_U32 u32BboxStride, HI_S32 *ps32FcScore,
774     HI_U32 u32ScoreStride, HI_S32 *ps32Proposal, HI_U32 u32ProposalStride, HI_U32 u32RoiCnt, HI_U32 *pu32ConfThresh,
775     HI_U32 u32NmsThresh, HI_U32 u32MaxRoi, HI_U32 u32ClassNum, HI_U32 u32OriImWidth, HI_U32 u32OriImHeight,
776     HI_U32 *pu32MemPool, HI_S32 *ps32DstScore, HI_S32 *ps32DstBbox, HI_S32 *ps32ClassRoiNum)
777 {
778     HI_U32 u32Size = 0;
779     HI_U32 u32ClsScoreChannels = 0;
780     HI_S32 *ps32Proposals = NULL;
781     HI_U32 u32FcScoreWidth = 0;
782     HI_U32 u32FcBboxWidth = 0;
783     HI_FLOAT f32ProposalWidth = 0.0;
784     HI_FLOAT f32ProposalHeight = 0.0;
785     HI_FLOAT f32ProposalCenterX = 0.0;
786     HI_FLOAT f32ProposalCenterY = 0.0;
787     HI_FLOAT f32PredW = 0.0;
788     HI_FLOAT f32PredH = 0.0;
789     HI_FLOAT f32PredCenterX = 0.0;
790     HI_FLOAT f32PredCenterY = 0.0;
791     HI_FLOAT *pf32FcScoresMemPool = NULL;
792     HI_S32 *ps32ProposalMemPool = NULL;
793     HI_S32 *ps32ProposalTmp = NULL;
794     HI_U32 u32FcBboxIndex = 0;
795     HI_U32 u32ProposalMemPoolIndex = 0;
796     HI_FLOAT *pf32Ptr = NULL;
797     HI_S32 *ps32Ptr = NULL;
798     HI_S32 *ps32Score = NULL;
799     HI_S32 *ps32Bbox = NULL;
800     HI_S32 *ps32RoiCnt = NULL;
801     HI_U32 u32RoiOutCnt = 0;
802     HI_U32 u32SrcIndex = 0;
803     HI_U32 u32DstIndex = 0;
804     HI_U32 i = 0;
805     HI_U32 j = 0;
806     HI_U32 k = 0;
807     SAMPLE_SVP_NNIE_STACK_S *pstStack = NULL;
808     HI_S32 s32Ret = HI_SUCCESS;
809     HI_U32 u32OffSet = 0;
810     HI_U32 u32ProposalOffset = u32ProposalStride / sizeof(HI_S32);
811 
812     /* Get or calculate parameters */
813     u32ClsScoreChannels = u32ClassNum; /* channel num is equal to class size, cls_score class */
814     u32FcScoreWidth = u32ScoreStride / sizeof(HI_U32);
815     u32FcBboxWidth = u32BboxStride / sizeof(HI_U32);
816 
817     /* Get Start Pointer of MemPool */
818     pf32FcScoresMemPool = (HI_FLOAT *)pu32MemPool;
819     pf32Ptr = pf32FcScoresMemPool;
820     u32Size = u32MaxRoi * u32ClsScoreChannels;
821     pf32Ptr += u32Size;
822 
823     ps32ProposalMemPool = (HI_S32 *)pf32Ptr;
824     ps32Ptr = ps32ProposalMemPool;
825     u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
826     ps32Ptr += u32Size;
827     pstStack = (SAMPLE_SVP_NNIE_STACK_S *)ps32Ptr;
828 
829     u32DstIndex = 0;
830 
831     for (i = 0; i < u32RoiCnt; i++) {
832         for (k = 0; k < u32ClsScoreChannels; k++) {
833             u32SrcIndex = i * u32FcScoreWidth + k;
834             pf32FcScoresMemPool[u32DstIndex++] =
835                 (HI_FLOAT)((HI_S32)ps32FcScore[u32SrcIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE;
836         }
837     }
838     ps32Proposals = (HI_S32 *)ps32Proposal;
839 
840     /* bbox transform */
841     for (j = 0; j < u32ClsScoreChannels; j++) {
842         for (i = 0; i < u32RoiCnt; i++) {
843             f32ProposalWidth = (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_X_MAX_OFFSET] -
844                 ps32Proposals[u32ProposalOffset * i] + 1);
845             f32ProposalHeight = (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] -
846                 ps32Proposals[u32ProposalOffset * i + 1] + 1);
847             f32ProposalCenterX =
848                 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i] + SAMPLE_SVP_NNIE_HALF * f32ProposalWidth);
849             f32ProposalCenterY =
850                 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + 1] + SAMPLE_SVP_NNIE_HALF * f32ProposalHeight);
851 
852             u32FcBboxIndex = u32FcBboxWidth * i + SAMPLE_SVP_NNIE_COORDI_NUM * j;
853             f32PredCenterX = ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex] / SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalWidth +
854                 f32ProposalCenterX;
855             f32PredCenterY =
856                 ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex + 1] / SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalHeight +
857                 f32ProposalCenterY;
858             f32PredW = f32ProposalWidth *
859                 SVP_NNIE_QuickExp((HI_S32)(ps32FcBbox[u32FcBboxIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]));
860             f32PredH = f32ProposalHeight *
861                 SVP_NNIE_QuickExp((HI_S32)(ps32FcBbox[u32FcBboxIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]));
862 
863             u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
864             ps32ProposalMemPool[u32ProposalMemPoolIndex] = (HI_S32)(f32PredCenterX - SAMPLE_SVP_NNIE_HALF * f32PredW);
865             ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] =
866                 (HI_S32)(f32PredCenterY - SAMPLE_SVP_NNIE_HALF * f32PredH);
867             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
868                 (HI_S32)(f32PredCenterX + SAMPLE_SVP_NNIE_HALF * f32PredW);
869             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
870                 (HI_S32)(f32PredCenterY + SAMPLE_SVP_NNIE_HALF * f32PredH);
871             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] =
872                 (HI_S32)(pf32FcScoresMemPool[u32ClsScoreChannels * i + j] * SAMPLE_SVP_NNIE_QUANT_BASE);
873             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] = 0;
874         }
875 
876         /* clip bbox */
877         for (i = 0; i < u32RoiCnt; i++) {
878             u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
879             ps32ProposalMemPool[u32ProposalMemPoolIndex] =
880                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth - 1) ?
881                 ((HI_S32)u32OriImWidth - 1) :
882                 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) > 0 ?
883                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth) ?
884                 ((HI_S32)u32OriImWidth - 1) :
885                 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) : 0;
886             ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] =
887                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight - 1) ?
888                 ((HI_S32)u32OriImHeight - 1) :
889                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) > 0 ?
890                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight) ?
891                 ((HI_S32)u32OriImHeight - 1) :
892                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) : 0;
893             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
894                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
895                 ((HI_S32)u32OriImWidth - 1) ? ((HI_S32)u32OriImWidth - 1) :
896                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) > 0 ?
897                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
898                 ((HI_S32)u32OriImWidth) ? ((HI_S32)u32OriImWidth - 1) :
899                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) : 0;
900             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
901                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
902                 ((HI_S32)u32OriImHeight - 1) ? ((HI_S32)u32OriImHeight - 1) :
903                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) > 0 ?
904                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
905                 ((HI_S32)u32OriImHeight) ? ((HI_S32)u32OriImHeight - 1) :
906                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) : 0;
907         }
908 
909         ps32ProposalTmp = ps32ProposalMemPool;
910         if (u32RoiCnt >= 1) {
911             (void)SVP_NNIE_NonRecursiveArgQuickSort(ps32ProposalTmp, 0, u32RoiCnt - 1, pstStack, u32RoiCnt);
912         }
913         (void)SVP_NNIE_NonMaxSuppression(ps32ProposalTmp, u32RoiCnt, u32NmsThresh, u32RoiCnt);
914 
915         ps32Score = (HI_S32 *)ps32DstScore;
916         ps32Bbox = (HI_S32 *)ps32DstBbox;
917         ps32RoiCnt = (HI_S32 *)ps32ClassRoiNum;
918 
919         ps32Score += (HI_S32)(u32OffSet);
920         ps32Bbox += (HI_S32)(SAMPLE_SVP_NNIE_COORDI_NUM * u32OffSet);
921 
922         u32RoiOutCnt = 0;
923         for (i = 0; i < u32RoiCnt; i++) {
924             u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
925             if ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) &&
926                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] >
927                 (HI_S32)pu32ConfThresh[j])) {
928                 ps32Score[u32RoiOutCnt] = ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET];
929                 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] = ps32ProposalMemPool[u32ProposalMemPoolIndex];
930                 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] =
931                     ps32ProposalMemPool[u32ProposalMemPoolIndex + 1];
932                 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
933                     ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
934                 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
935                     ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
936                 u32RoiOutCnt++;
937             }
938             if (u32RoiOutCnt >= u32RoiCnt)
939                 break;
940         }
941         ps32RoiCnt[j] = (HI_S32)u32RoiOutCnt;
942         u32OffSet += u32RoiOutCnt;
943     }
944     return s32Ret;
945 }
946 
947 /*
948  * Prototype :   SVP_NNIE_Pvanet_GetResult
949  * Description : this function is used to get FasterRcnn result
950  * Input :     HI_S32* ps32FcBbox             [IN] Bbox for Roi
951  * HI_S32 *ps32FcScore            [IN] Score for roi
952  * HI_S32 *ps32Proposals          [IN] proposal
953  * HI_U32 u32RoiCnt               [IN] Roi num
954  * HI_U32 *pu32ConfThresh         [IN] each class confidence thresh
955  * HI_U32 u32NmsThresh            [IN] Nms thresh
956  * HI_U32 u32MaxRoi               [IN] max roi
957  * HI_U32 u32ClassNum             [IN] class num
958  * HI_U32 u32OriImWidth           [IN] input image width
959  * HI_U32 u32OriImHeight          [IN] input image height
960  * HI_U32* pu32MemPool            [IN] assist buffer
961  * HI_S32* ps32DstScore           [OUT] result of score
962  * HI_S32* ps32DstRoi             [OUT] result of Bbox
963  * HI_S32* ps32ClassRoiNum        [OUT] result of the roi num of each class
964  */
SVP_NNIE_Pvanet_GetResult(HI_S32 * ps32FcBbox,HI_U32 u32BboxStride,HI_S32 * ps32FcScore,HI_U32 u32ScoreStride,HI_S32 * ps32Proposal,HI_U32 u32ProposalStride,HI_U32 u32RoiCnt,HI_U32 * pu32ConfThresh,HI_U32 u32NmsThresh,HI_U32 u32MaxRoi,HI_U32 u32ClassNum,HI_U32 u32OriImWidth,HI_U32 u32OriImHeight,HI_U32 * pu32MemPool,HI_S32 * ps32DstScore,HI_S32 * ps32DstBbox,HI_S32 * ps32ClassRoiNum)965 static HI_S32 SVP_NNIE_Pvanet_GetResult(HI_S32 *ps32FcBbox, HI_U32 u32BboxStride, HI_S32 *ps32FcScore,
966     HI_U32 u32ScoreStride, HI_S32 *ps32Proposal, HI_U32 u32ProposalStride, HI_U32 u32RoiCnt, HI_U32 *pu32ConfThresh,
967     HI_U32 u32NmsThresh, HI_U32 u32MaxRoi, HI_U32 u32ClassNum, HI_U32 u32OriImWidth, HI_U32 u32OriImHeight,
968     HI_U32 *pu32MemPool, HI_S32 *ps32DstScore, HI_S32 *ps32DstBbox, HI_S32 *ps32ClassRoiNum)
969 {
970     /* define variables */
971     HI_U32 u32Size = 0;
972     HI_U32 u32ClsScoreChannels = 0;
973     HI_S32 *ps32Proposals = NULL;
974     HI_U32 u32FcScoreWidth = 0;
975     HI_U32 u32FcBboxWidth = 0;
976     HI_FLOAT f32ProposalWidth = 0.0;
977     HI_FLOAT f32ProposalHeight = 0.0;
978     HI_FLOAT f32ProposalCenterX = 0.0;
979     HI_FLOAT f32ProposalCenterY = 0.0;
980     HI_FLOAT f32PredW = 0.0;
981     HI_FLOAT f32PredH = 0.0;
982     HI_FLOAT f32PredCenterX = 0.0;
983     HI_FLOAT f32PredCenterY = 0.0;
984     HI_FLOAT *pf32FcScoresMemPool = NULL;
985     HI_S32 *ps32ProposalMemPool = NULL;
986     HI_S32 *ps32ProposalTmp = NULL;
987     HI_U32 u32FcBboxIndex = 0;
988     HI_U32 u32ProposalMemPoolIndex = 0;
989     HI_FLOAT *pf32Ptr = NULL;
990     HI_S32 *ps32Ptr = NULL;
991     HI_S32 *ps32Score = NULL;
992     HI_S32 *ps32Bbox = NULL;
993     HI_S32 *ps32RoiCnt = NULL;
994     HI_U32 u32RoiOutCnt = 0;
995     HI_U32 u32SrcIndex = 0;
996     HI_U32 u32DstIndex = 0;
997     HI_U32 i = 0;
998     HI_U32 j = 0;
999     HI_U32 k = 0;
1000     SAMPLE_SVP_NNIE_STACK_S *pstStack = NULL;
1001     HI_S32 s32Ret = HI_SUCCESS;
1002     HI_U32 u32OffSet = 0;
1003     HI_U32 u32ProposalOffset = u32ProposalStride / sizeof(HI_S32);
1004 
1005     /* Get or calculate parameters */
1006     u32ClsScoreChannels = u32ClassNum; /* channel num is equal to class size, cls_score class */
1007     u32FcScoreWidth = u32ScoreStride / sizeof(HI_U32);
1008     u32FcBboxWidth = u32BboxStride / sizeof(HI_U32);
1009 
1010     /* Get Start Pointer of MemPool */
1011     pf32FcScoresMemPool = (HI_FLOAT *)pu32MemPool;
1012     pf32Ptr = pf32FcScoresMemPool;
1013     u32Size = u32MaxRoi * u32ClsScoreChannels;
1014     pf32Ptr += u32Size;
1015 
1016     ps32ProposalMemPool = (HI_S32 *)pf32Ptr;
1017     ps32Ptr = ps32ProposalMemPool;
1018     u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
1019     ps32Ptr += u32Size;
1020     pstStack = (SAMPLE_SVP_NNIE_STACK_S *)ps32Ptr;
1021 
1022     u32DstIndex = 0;
1023 
1024     for (i = 0; i < u32RoiCnt; i++) {
1025         for (k = 0; k < u32ClsScoreChannels; k++) {
1026             u32SrcIndex = i * u32FcScoreWidth + k;
1027             pf32FcScoresMemPool[u32DstIndex++] =
1028                 (HI_FLOAT)((HI_S32)ps32FcScore[u32SrcIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE;
1029         }
1030     }
1031     ps32Proposals = (HI_S32 *)ps32Proposal;
1032 
1033     /* bbox transform */
1034     for (j = 0; j < u32ClsScoreChannels; j++) {
1035         for (i = 0; i < u32RoiCnt; i++) {
1036             f32ProposalWidth =
1037                 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_X_MAX_OFFSET] -
1038                 ps32Proposals[u32ProposalOffset * i] + 1);
1039             f32ProposalHeight =
1040                 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] -
1041                 ps32Proposals[u32ProposalOffset * i + 1] + 1);
1042             f32ProposalCenterX =
1043                 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i] + SAMPLE_SVP_NNIE_HALF * f32ProposalWidth);
1044             f32ProposalCenterY =
1045                 (HI_FLOAT)(ps32Proposals[u32ProposalOffset * i + 1] + SAMPLE_SVP_NNIE_HALF * f32ProposalHeight);
1046 
1047             u32FcBboxIndex = u32FcBboxWidth * i + SAMPLE_SVP_NNIE_COORDI_NUM * j;
1048             f32PredCenterX = ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex] / SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalWidth +
1049                 f32ProposalCenterX;
1050             f32PredCenterY =
1051                 ((HI_FLOAT)ps32FcBbox[u32FcBboxIndex + 1] / SAMPLE_SVP_NNIE_QUANT_BASE) * f32ProposalHeight +
1052                 f32ProposalCenterY;
1053             f32PredW = f32ProposalWidth * SVP_NNIE_QuickExp(
1054                 (HI_S32)(ps32FcBbox[u32FcBboxIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]));
1055             f32PredH = f32ProposalHeight * SVP_NNIE_QuickExp(
1056                 (HI_S32)(ps32FcBbox[u32FcBboxIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]));
1057 
1058             u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1059             ps32ProposalMemPool[u32ProposalMemPoolIndex] = (HI_S32)(f32PredCenterX - SAMPLE_SVP_NNIE_HALF * f32PredW);
1060             ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] =
1061                 (HI_S32)(f32PredCenterY - SAMPLE_SVP_NNIE_HALF * f32PredH);
1062             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1063                 (HI_S32)(f32PredCenterX + SAMPLE_SVP_NNIE_HALF * f32PredW);
1064             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1065                 (HI_S32)(f32PredCenterY + SAMPLE_SVP_NNIE_HALF * f32PredH);
1066             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] =
1067                 (HI_S32)(pf32FcScoresMemPool[u32ClsScoreChannels * i + j] * SAMPLE_SVP_NNIE_QUANT_BASE);
1068             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] = 0;
1069         }
1070 
1071         /* clip bbox */
1072         for (i = 0; i < u32RoiCnt; i++) {
1073             u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1074             ps32ProposalMemPool[u32ProposalMemPoolIndex] =
1075                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth - 1) ?
1076                 ((HI_S32)u32OriImWidth - 1) :
1077                 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) > 0 ?
1078                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth) ?
1079                 ((HI_S32)u32OriImWidth - 1) :
1080                 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) :
1081                 0;
1082             ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] =
1083                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight - 1) ?
1084                 ((HI_S32)u32OriImHeight - 1) :
1085                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) > 0 ?
1086                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight) ?
1087                 ((HI_S32)u32OriImHeight - 1) :
1088                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) :
1089                 0;
1090             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1091                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
1092                 ((HI_S32)u32OriImWidth - 1) ? ((HI_S32)u32OriImWidth - 1) :
1093                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) > 0 ?
1094                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
1095                 ((HI_S32)u32OriImWidth) ? ((HI_S32)u32OriImWidth - 1) :
1096                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) :
1097                 0;
1098             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1099                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
1100                 ((HI_S32)u32OriImHeight - 1) ?
1101                 ((HI_S32)u32OriImHeight - 1) :
1102                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) > 0 ?
1103                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
1104                 ((HI_S32)u32OriImHeight) ?
1105                 ((HI_S32)u32OriImHeight - 1) :
1106                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) :
1107                 0;
1108         }
1109 
1110         ps32ProposalTmp = ps32ProposalMemPool;
1111         if (u32RoiCnt >= 1) {
1112             (void)SVP_NNIE_NonRecursiveArgQuickSort(ps32ProposalTmp, 0, u32RoiCnt - 1, pstStack, u32RoiCnt);
1113         }
1114         (void)SVP_NNIE_NonMaxSuppression(ps32ProposalTmp, u32RoiCnt, u32NmsThresh, u32RoiCnt);
1115 
1116         ps32Score = (HI_S32 *)ps32DstScore;
1117         ps32Bbox = (HI_S32 *)ps32DstBbox;
1118         ps32RoiCnt = (HI_S32 *)ps32ClassRoiNum;
1119 
1120         ps32Score += (HI_S32)(u32OffSet);
1121         ps32Bbox += (HI_S32)(SAMPLE_SVP_NNIE_COORDI_NUM * u32OffSet);
1122 
1123         u32RoiOutCnt = 0;
1124         for (i = 0; i < u32RoiCnt; i++) {
1125             u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1126             if ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) &&
1127                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] >
1128                 (HI_S32)pu32ConfThresh[j])) {
1129                 ps32Score[u32RoiOutCnt] = ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET];
1130                 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] = ps32ProposalMemPool[u32ProposalMemPoolIndex];
1131                 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] =
1132                     ps32ProposalMemPool[u32ProposalMemPoolIndex + 1];
1133                 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1134                     ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
1135                 ps32Bbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1136                     ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
1137                 u32RoiOutCnt++;
1138             }
1139             if (u32RoiOutCnt >= u32RoiCnt)
1140                 break;
1141         }
1142         ps32RoiCnt[j] = (HI_S32)u32RoiOutCnt;
1143         u32OffSet += u32RoiOutCnt;
1144     }
1145     return s32Ret;
1146 }
1147 
1148 /*
1149  * Prototype :   SVP_NNIE_Rfcn_GetResult
1150  * Description : this function is used to get RFCN result
1151  * Input :     HI_S32* ps32FcBbox             [IN] Bbox for Roi
1152  * HI_U32 u32FcBboxStride         [IN] Bbox stride
1153  * HI_S32 *ps32FcScore            [IN] Score for roi
1154  * HI_U32 u32FcScoreStride        [IN] Score stride
1155  * HI_S32 *ps32Proposals          [IN] proposal
1156  * HI_U32 u32RoiCnt               [IN] Roi num
1157  * HI_U32 *pu32ConfThresh         [IN] each class confidence thresh
1158  * HI_U32 u32MaxRoi               [IN] max roi
1159  * HI_U32 u32ClassNum             [IN] class num
1160  * HI_U32 u32OriImWidth           [IN] input image width
1161  * HI_U32 u32OriImHeight          [IN] input image height
1162  * HI_U32 u32NmsThresh            [IN] num thresh
1163  * HI_U32* pu32MemPool            [IN] assist buffer
1164  * HI_S32* ps32DstScore           [OUT]result of score
1165  * HI_S32* ps32DstRoi             [OUT]result of Bbox
1166  * HI_S32* ps32ClassRoiNum        [OUT]result of the roi num of each class
1167  */
SVP_NNIE_Rfcn_GetResult(HI_S32 * ps32FcScore,HI_U32 u32FcScoreStride,HI_S32 * ps32FcBbox,HI_U32 u32FcBboxStride,HI_S32 * ps32Proposals,HI_U32 u32ProposalStride,HI_U32 u32RoiCnt,HI_U32 * pu32ConfThresh,HI_U32 u32MaxRoi,HI_U32 u32ClassNum,HI_U32 u32OriImWidth,HI_U32 u32OriImHeight,HI_U32 u32NmsThresh,HI_U32 * pu32MemPool,HI_S32 * ps32DstScores,HI_S32 * ps32DstRoi,HI_S32 * ps32ClassRoiNum)1168 static HI_S32 SVP_NNIE_Rfcn_GetResult(HI_S32 *ps32FcScore, HI_U32 u32FcScoreStride, HI_S32 *ps32FcBbox,
1169     HI_U32 u32FcBboxStride, HI_S32 *ps32Proposals, HI_U32 u32ProposalStride, HI_U32 u32RoiCnt, HI_U32 *pu32ConfThresh,
1170     HI_U32 u32MaxRoi, HI_U32 u32ClassNum, HI_U32 u32OriImWidth, HI_U32 u32OriImHeight, HI_U32 u32NmsThresh,
1171     HI_U32 *pu32MemPool, HI_S32 *ps32DstScores, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
1172 {
1173     HI_U32 u32Size = 0;
1174     HI_U32 u32ClsScoreChannels = 0;
1175     HI_U32 u32FcScoreWidth = 0;
1176     HI_FLOAT f32ProposalWidth = 0.0;
1177     HI_FLOAT f32ProposalHeight = 0.0;
1178     HI_FLOAT f32ProposalCenterX = 0.0;
1179     HI_FLOAT f32ProposalCenterY = 0.0;
1180     HI_FLOAT f32PredW = 0.0;
1181     HI_FLOAT f32PredH = 0.0;
1182     HI_FLOAT f32PredCenterX = 0.0;
1183     HI_FLOAT f32PredCenterY = 0.0;
1184     HI_FLOAT *pf32FcScoresMemPool = NULL;
1185     HI_S32 *ps32FcBboxMemPool = NULL;
1186     HI_S32 *ps32ProposalMemPool = NULL;
1187     HI_S32 *ps32ProposalTmp = NULL;
1188     HI_U32 u32FcBboxIndex = 0;
1189     HI_U32 u32ProposalMemPoolIndex = 0;
1190     HI_FLOAT *pf32Ptr = NULL;
1191     HI_S32 *ps32Ptr = NULL;
1192     HI_S32 *ps32DstScore = NULL;
1193     HI_S32 *ps32DstBbox = NULL;
1194     HI_U32 u32RoiOutCnt = 0;
1195     HI_U32 u32SrcIndex = 0;
1196     HI_U32 u32DstIndex = 0;
1197     HI_U32 i = 0;
1198     HI_U32 j = 0;
1199     HI_U32 u32OffSet = 0;
1200     SAMPLE_SVP_NNIE_STACK_S *pstStack = NULL;
1201     HI_S32 s32Ret = HI_SUCCESS;
1202     HI_U32 u32ProposalOffset = u32ProposalStride / sizeof(HI_S32);
1203 
1204     /* Get or calculate parameters */
1205     u32ClsScoreChannels = u32ClassNum; /* channel num is equal to class size, cls_score class */
1206     u32FcScoreWidth = u32ClsScoreChannels;
1207 
1208     /* Get Start Pointer of MemPool */
1209     pf32FcScoresMemPool = (HI_FLOAT *)(pu32MemPool);
1210     pf32Ptr = pf32FcScoresMemPool;
1211     u32Size = u32MaxRoi * u32ClsScoreChannels;
1212     pf32Ptr += u32Size;
1213 
1214     ps32FcBboxMemPool = (HI_S32 *)pf32Ptr;
1215     ps32Ptr = (HI_S32 *)pf32Ptr;
1216     u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_COORDI_NUM;
1217     ps32Ptr += u32Size;
1218 
1219     ps32ProposalMemPool = (HI_S32 *)ps32Ptr;
1220     ps32Ptr = ps32ProposalMemPool;
1221     u32Size = u32MaxRoi * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
1222     ps32Ptr += u32Size;
1223     pstStack = (SAMPLE_SVP_NNIE_STACK_S *)ps32Ptr;
1224 
1225     // prepare input data
1226     for (i = 0; i < u32RoiCnt; i++) {
1227         for (j = 0; j < u32ClsScoreChannels; j++) {
1228             u32DstIndex = u32FcScoreWidth * i + j;
1229             u32SrcIndex = u32FcScoreStride / sizeof(HI_U32) * i + j;
1230             pf32FcScoresMemPool[u32DstIndex] = (HI_FLOAT)(ps32FcScore[u32SrcIndex]) / SAMPLE_SVP_NNIE_QUANT_BASE;
1231         }
1232     }
1233 
1234     for (i = 0; i < u32RoiCnt; i++) {
1235         for (j = 0; j < SAMPLE_SVP_NNIE_COORDI_NUM; j++) {
1236             u32SrcIndex = u32FcBboxStride / sizeof(HI_U32) * i + SAMPLE_SVP_NNIE_COORDI_NUM + j;
1237             u32DstIndex = SAMPLE_SVP_NNIE_COORDI_NUM * i + j;
1238             ps32FcBboxMemPool[u32DstIndex] = ps32FcBbox[u32SrcIndex];
1239         }
1240     }
1241     /* bbox transform
1242      * change the fc output to Proposal temp MemPool.
1243      * Each Line of the Proposal has 6 bits.
1244      * The Format of the Proposal is:
1245      * 0-3: The four coordinate of the bbox, x1,y1,x2, y2
1246      * 4: The Confidence Score of the bbox
1247      * 5: The suprressed flag
1248      */
1249     for (j = 0; j < u32ClsScoreChannels; j++) {
1250         for (i = 0; i < u32RoiCnt; i++) {
1251             f32ProposalWidth = ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_X_MAX_OFFSET] -
1252                 ps32Proposals[u32ProposalOffset * i] + 1;
1253             f32ProposalHeight = ps32Proposals[u32ProposalOffset * i + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] -
1254                 ps32Proposals[u32ProposalOffset * i + 1] + 1;
1255             f32ProposalCenterX = ps32Proposals[u32ProposalOffset * i] + 0.5 * f32ProposalWidth;
1256             f32ProposalCenterY = ps32Proposals[u32ProposalOffset * i + 1] + 0.5 * f32ProposalHeight;
1257 
1258             u32FcBboxIndex = SAMPLE_SVP_NNIE_COORDI_NUM * i;
1259             f32PredCenterX = ((HI_FLOAT)ps32FcBboxMemPool[u32FcBboxIndex] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1260                 f32ProposalWidth + f32ProposalCenterX;
1261             f32PredCenterY = ((HI_FLOAT)ps32FcBboxMemPool[u32FcBboxIndex + 1] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1262                 f32ProposalHeight + f32ProposalCenterY;
1263             f32PredW =
1264                 f32ProposalWidth * SVP_NNIE_QuickExp(ps32FcBboxMemPool[u32FcBboxIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]);
1265             f32PredH =
1266                 f32ProposalHeight * SVP_NNIE_QuickExp(ps32FcBboxMemPool[u32FcBboxIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]);
1267 
1268             u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1269             ps32ProposalMemPool[u32ProposalMemPoolIndex] = (HI_S32)(f32PredCenterX - 0.5 * f32PredW);
1270             ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] = (HI_S32)(f32PredCenterY - 0.5 * f32PredH);
1271             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1272                 (HI_S32)(f32PredCenterX + 0.5 * f32PredW);
1273             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1274                 (HI_S32)(f32PredCenterY + 0.5 * f32PredH);
1275             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] =
1276                 (HI_S32)(pf32FcScoresMemPool[u32ClsScoreChannels * i + j] * SAMPLE_SVP_NNIE_QUANT_BASE);
1277             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] = 0;
1278         }
1279 
1280         /* clip bbox */
1281         for (i = 0; i < u32RoiCnt; i++) {
1282             u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1283             ps32ProposalMemPool[u32ProposalMemPoolIndex] =
1284                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth - 1) ?
1285                 ((HI_S32)u32OriImWidth - 1) :
1286                 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) > 0 ?
1287                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex]) > ((HI_S32)u32OriImWidth) ?
1288                 ((HI_S32)u32OriImWidth - 1) :
1289                 (ps32ProposalMemPool[u32ProposalMemPoolIndex])) :
1290                 0;
1291             ps32ProposalMemPool[u32ProposalMemPoolIndex + 1] =
1292                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight - 1) ?
1293                 ((HI_S32)u32OriImHeight - 1) :
1294                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) > 0 ?
1295                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + 1]) > ((HI_S32)u32OriImHeight) ?
1296                 ((HI_S32)u32OriImHeight - 1) :
1297                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + 1])) :
1298                 0;
1299             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1300                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
1301                 ((HI_S32)u32OriImWidth - 1) ?
1302                 ((HI_S32)u32OriImWidth - 1) :
1303                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) > 0 ?
1304                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) >
1305                 ((HI_S32)u32OriImWidth) ?
1306                 ((HI_S32)u32OriImWidth - 1) :
1307                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET])) :
1308                 0;
1309             ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1310                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
1311                 ((HI_S32)u32OriImHeight - 1) ?
1312                 ((HI_S32)u32OriImHeight - 1) :
1313                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) > 0 ?
1314                 ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) >
1315                 ((HI_S32)u32OriImHeight) ?
1316                 ((HI_S32)u32OriImHeight - 1) :
1317                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET])) :
1318                 0;
1319         }
1320 
1321         ps32ProposalTmp = ps32ProposalMemPool;
1322         if (u32RoiCnt >= 1) {
1323             (hi_void)SVP_NNIE_NonRecursiveArgQuickSort(ps32ProposalTmp, 0, u32RoiCnt - 1, pstStack, u32RoiCnt);
1324         }
1325         s32Ret = SVP_NNIE_NonMaxSuppression(ps32ProposalTmp, u32RoiCnt, u32NmsThresh, u32RoiCnt);
1326         u32RoiOutCnt = 0;
1327 
1328         ps32DstScore = (HI_S32 *)ps32DstScores;
1329         ps32DstBbox = (HI_S32 *)ps32DstRoi;
1330 
1331         ps32DstScore += (HI_S32)u32OffSet;
1332         ps32DstBbox += (HI_S32)(SAMPLE_SVP_NNIE_COORDI_NUM * u32OffSet);
1333         for (i = 0; i < u32RoiCnt; i++) {
1334             u32ProposalMemPoolIndex = SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * i;
1335             if ((ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET] == 0) &&
1336                 (ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET] >
1337                 (HI_S32)pu32ConfThresh[j])) {
1338                 ps32DstScore[u32RoiOutCnt] =
1339                     ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_SCORE_OFFSET];
1340                 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] = ps32ProposalMemPool[u32ProposalMemPoolIndex];
1341                 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] =
1342                     ps32ProposalMemPool[u32ProposalMemPoolIndex + 1];
1343                 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
1344                     ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_X_MAX_OFFSET];
1345                 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
1346                     ps32ProposalMemPool[u32ProposalMemPoolIndex + SAMPLE_SVP_NNIE_Y_MAX_OFFSET];
1347                 u32RoiOutCnt++;
1348             }
1349             if (u32RoiOutCnt >= u32RoiCnt) {
1350                 break;
1351             }
1352         }
1353         ps32ClassRoiNum[j] = (HI_S32)u32RoiOutCnt;
1354         u32OffSet = u32OffSet + u32RoiOutCnt;
1355     }
1356 
1357     return s32Ret;
1358 }
1359 
1360 /*
1361  * Prototype :   SVP_NNIE_Ssd_PriorBoxForward
1362  * Description : this function is used to get SSD priorbox
1363  * Input :     HI_U32 u32PriorBoxWidth            [IN] prior box width
1364  * HI_U32 u32PriorBoxHeight           [IN] prior box height
1365  * HI_U32 u32OriImWidth               [IN] input image width
1366  * HI_U32 u32OriImHeight              [IN] input image height
1367  * HI_U32 f32PriorBoxMinSize          [IN] prior box min size
1368  * HI_U32 u32MinSizeNum               [IN] min size num
1369  * HI_U32 f32PriorBoxMaxSize          [IN] prior box max size
1370  * HI_U32 u32MaxSizeNum               [IN] max size num
1371  * HI_BOOL bFlip                      [IN] whether do Flip
1372  * HI_BOOL bClip                      [IN] whether do Clip
1373  * HI_U32  u32InputAspectRatioNum     [IN] aspect ratio num
1374  * HI_FLOAT af32PriorBoxAspectRatio[] [IN] aspect ratio value
1375  * HI_FLOAT f32PriorBoxStepWidth      [IN] prior box step width
1376  * HI_FLOAT f32PriorBoxStepHeight     [IN] prior box step height
1377  * HI_FLOAT f32Offset                 [IN] offset value
1378  * HI_S32   as32PriorBoxVar[]         [IN] prior box variance
1379  * HI_S32*  ps32PriorboxOutputData    [OUT] output result
1380  */
SVP_NNIE_Ssd_PriorBoxForward(HI_U32 u32PriorBoxWidth,HI_U32 u32PriorBoxHeight,HI_U32 u32OriImWidth,HI_U32 u32OriImHeight,HI_FLOAT * pf32PriorBoxMinSize,HI_U32 u32MinSizeNum,HI_FLOAT * pf32PriorBoxMaxSize,HI_U32 u32MaxSizeNum,HI_BOOL bFlip,HI_BOOL bClip,HI_U32 u32InputAspectRatioNum,HI_FLOAT af32PriorBoxAspectRatio[],HI_FLOAT f32PriorBoxStepWidth,HI_FLOAT f32PriorBoxStepHeight,HI_FLOAT f32Offset,HI_S32 as32PriorBoxVar[],HI_S32 * ps32PriorboxOutputData)1381 static HI_S32 SVP_NNIE_Ssd_PriorBoxForward(HI_U32 u32PriorBoxWidth, HI_U32 u32PriorBoxHeight, HI_U32 u32OriImWidth,
1382     HI_U32 u32OriImHeight, HI_FLOAT *pf32PriorBoxMinSize, HI_U32 u32MinSizeNum, HI_FLOAT *pf32PriorBoxMaxSize,
1383     HI_U32 u32MaxSizeNum, HI_BOOL bFlip, HI_BOOL bClip, HI_U32 u32InputAspectRatioNum,
1384     HI_FLOAT af32PriorBoxAspectRatio[], HI_FLOAT f32PriorBoxStepWidth, HI_FLOAT f32PriorBoxStepHeight,
1385     HI_FLOAT f32Offset, HI_S32 as32PriorBoxVar[], HI_S32 *ps32PriorboxOutputData)
1386 {
1387     HI_U32 u32AspectRatioNum = 0;
1388     HI_U32 u32Index = 0;
1389     HI_FLOAT af32AspectRatio[SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM] = { 0 };
1390     HI_U32 u32NumPrior = 0;
1391     HI_FLOAT f32CenterX = 0;
1392     HI_FLOAT f32CenterY = 0;
1393     HI_FLOAT f32BoxHeight = 0;
1394     HI_FLOAT f32BoxWidth = 0;
1395     HI_FLOAT f32MaxBoxWidth = 0;
1396     HI_U32 i = 0;
1397     HI_U32 j = 0;
1398     HI_U32 n = 0;
1399     HI_U32 h = 0;
1400     HI_U32 w = 0;
1401     SAMPLE_SVP_CHECK_EXPR_RET(
1402         (HI_TRUE == bFlip && u32InputAspectRatioNum > (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM - 1) / 2), HI_INVALID_VALUE,
1403         SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,when bFlip is true, u32InputAspectRatioNum(%d) can't be greater than %d!\n",
1404         u32InputAspectRatioNum, (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM - 1) / 2);
1405     SAMPLE_SVP_CHECK_EXPR_RET(
1406         (HI_FALSE == bFlip && u32InputAspectRatioNum > (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM - 1)), HI_INVALID_VALUE,
1407         SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,when bFlip is false, u32InputAspectRatioNum(%d) can't be greater than %d!\n",
1408         u32InputAspectRatioNum, (SAMPLE_SVP_NNIE_SSD_ASPECT_RATIO_NUM - 1));
1409 
1410     // generate aspect_ratios
1411     u32AspectRatioNum = 0;
1412     af32AspectRatio[0] = 1;
1413     u32AspectRatioNum++;
1414     for (i = 0; i < u32InputAspectRatioNum; i++) {
1415         af32AspectRatio[u32AspectRatioNum++] = af32PriorBoxAspectRatio[i];
1416         if (bFlip) {
1417             af32AspectRatio[u32AspectRatioNum++] = 1.0f / af32PriorBoxAspectRatio[i];
1418         }
1419     }
1420     u32NumPrior = u32MinSizeNum * u32AspectRatioNum + u32MaxSizeNum;
1421 
1422     u32Index = 0;
1423     for (h = 0; h < u32PriorBoxHeight; h++) {
1424         for (w = 0; w < u32PriorBoxWidth; w++) {
1425             f32CenterX = (w + f32Offset) * f32PriorBoxStepWidth;
1426             f32CenterY = (h + f32Offset) * f32PriorBoxStepHeight;
1427             for (n = 0; n < u32MinSizeNum; n++) {
1428                 /* ** first prior ** */
1429                 f32BoxHeight = pf32PriorBoxMinSize[n];
1430                 f32BoxWidth = pf32PriorBoxMinSize[n];
1431                 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX - f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1432                 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY - f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1433                 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX + f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1434                 ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY + f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1435                 /* ** second prior ** */
1436                 if (u32MaxSizeNum > 0) {
1437                     f32MaxBoxWidth = sqrt(pf32PriorBoxMinSize[n] * pf32PriorBoxMaxSize[n]);
1438                     f32BoxHeight = f32MaxBoxWidth;
1439                     f32BoxWidth = f32MaxBoxWidth;
1440                     ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX - f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1441                     ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY - f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1442                     ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX + f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1443                     ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY + f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1444                 }
1445                 /* *** rest of priors, skip AspectRatio == 1 *** */
1446                 for (i = 1; i < u32AspectRatioNum; i++) {
1447                     f32BoxWidth = (HI_FLOAT)(pf32PriorBoxMinSize[n] * sqrt(af32AspectRatio[i]));
1448                     f32BoxHeight = (HI_FLOAT)(pf32PriorBoxMinSize[n] / sqrt(af32AspectRatio[i]));
1449                     ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX - f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1450                     ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY - f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1451                     ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterX + f32BoxWidth * SAMPLE_SVP_NNIE_HALF);
1452                     ps32PriorboxOutputData[u32Index++] = (HI_S32)(f32CenterY + f32BoxHeight * SAMPLE_SVP_NNIE_HALF);
1453                 }
1454             }
1455         }
1456     }
1457     /* clip the priors' coordidates, within [0, u32ImgWidth] & [0, u32ImgHeight] */
1458     if (bClip) {
1459         for (i = 0; i < (HI_U32)(u32PriorBoxWidth * u32PriorBoxHeight * SAMPLE_SVP_NNIE_COORDI_NUM * u32NumPrior / 2);
1460             i++) {
1461             ps32PriorboxOutputData[2 * i] =
1462                 SAMPLE_SVP_NNIE_MIN((HI_U32)SAMPLE_SVP_NNIE_MAX(ps32PriorboxOutputData[2 * i], 0), u32OriImWidth);
1463             ps32PriorboxOutputData[2 * i + 1] =
1464                 SAMPLE_SVP_NNIE_MIN((HI_U32)SAMPLE_SVP_NNIE_MAX(ps32PriorboxOutputData[2 * i + 1], 0), u32OriImHeight);
1465         }
1466     }
1467 
1468     for (h = 0; h < u32PriorBoxHeight; h++) {
1469         for (w = 0; w < u32PriorBoxWidth; w++) {
1470             for (i = 0; i < u32NumPrior; i++) {
1471                 for (j = 0; j < SAMPLE_SVP_NNIE_COORDI_NUM; j++) {
1472                     ps32PriorboxOutputData[u32Index++] = (HI_S32)as32PriorBoxVar[j];
1473                 }
1474             }
1475         }
1476     }
1477     return HI_SUCCESS;
1478 }
1479 
1480 /*
1481  * Prototype :   SVP_NNIE_Ssd_SoftmaxForward
1482  * Description : this function is used to do SSD softmax
1483  * Input :     HI_U32 u32SoftMaxInHeight          [IN] softmax input height
1484  * HI_U32 au32SoftMaxInChn[]          [IN] softmax input channel
1485  * HI_U32 u32ConcatNum                [IN] concat num
1486  * HI_U32 au32ConvStride[]            [IN] conv stride
1487  * HI_U32 u32SoftMaxOutWidth          [IN] softmax output width
1488  * HI_U32 u32SoftMaxOutHeight         [IN] softmax output height
1489  * HI_U32 u32SoftMaxOutChn            [IN] softmax output channel
1490  * HI_S32* aps32SoftMaxInputData[]    [IN] softmax input data
1491  * HI_S32* ps32SoftMaxOutputData      [OUT]softmax output data
1492  */
SVP_NNIE_Ssd_SoftmaxForward(HI_U32 u32SoftMaxInHeight,HI_U32 au32SoftMaxInChn[],HI_U32 u32ConcatNum,HI_U32 au32ConvStride[],HI_U32 au32SoftMaxWidth[],HI_S32 * aps32SoftMaxInputData[],HI_S32 * ps32SoftMaxOutputData)1493 static HI_S32 SVP_NNIE_Ssd_SoftmaxForward(HI_U32 u32SoftMaxInHeight, HI_U32 au32SoftMaxInChn[], HI_U32 u32ConcatNum,
1494     HI_U32 au32ConvStride[], HI_U32 au32SoftMaxWidth[], HI_S32 *aps32SoftMaxInputData[], HI_S32 *ps32SoftMaxOutputData)
1495 {
1496     HI_S32 *ps32InputData = NULL;
1497     HI_S32 *ps32OutputTmp = NULL;
1498     HI_U32 u32OuterNum = 0;
1499     HI_U32 u32InnerNum = 0;
1500     HI_U32 u32InputChannel = 0;
1501     HI_U32 i = 0;
1502     HI_U32 u32ConcatCnt = 0;
1503     HI_S32 s32Ret = 0;
1504     HI_U32 u32Stride = 0;
1505     HI_U32 u32Skip = 0;
1506     HI_U32 u32Left = 0;
1507     ps32OutputTmp = ps32SoftMaxOutputData;
1508     for (u32ConcatCnt = 0; u32ConcatCnt < u32ConcatNum; u32ConcatCnt++) {
1509         ps32InputData = aps32SoftMaxInputData[u32ConcatCnt];
1510         u32Stride = au32ConvStride[u32ConcatCnt];
1511         u32InputChannel = au32SoftMaxInChn[u32ConcatCnt];
1512         if (u32SoftMaxInHeight == 0) {
1513             printf("Divisor u32SoftMaxInHeight cannot be 0!\n");
1514             return HI_FAILURE;
1515         }
1516         u32OuterNum = u32InputChannel / u32SoftMaxInHeight;
1517         u32InnerNum = u32SoftMaxInHeight;
1518         u32Skip = au32SoftMaxWidth[u32ConcatCnt] / u32InnerNum;
1519         u32Left = u32Stride - au32SoftMaxWidth[u32ConcatCnt];
1520         for (i = 0; i < u32OuterNum; i++) {
1521             s32Ret = SVP_NNIE_SSD_SoftMax(ps32InputData, (HI_S32)u32InnerNum, ps32OutputTmp);
1522             if ((i + 1) % u32Skip == 0) {
1523                 ps32InputData += u32Left;
1524             }
1525             ps32InputData += u32InnerNum;
1526             ps32OutputTmp += u32InnerNum;
1527         }
1528     }
1529     return s32Ret;
1530 }
1531 
1532 /*
1533  * Prototype :   SVP_NNIE_Ssd_DetectionOutForward
1534  * Description : this function is used to get detection result of SSD
1535  * Input :     HI_U32 u32ConcatNum            [IN] SSD concat num
1536  * HI_U32 u32ConfThresh           [IN] confidence thresh
1537  * HI_U32 u32ClassNum             [IN] class num
1538  * HI_U32 u32TopK                 [IN] Topk value
1539  * HI_U32 u32KeepTopK             [IN] KeepTopK value
1540  * HI_U32 u32NmsThresh            [IN] NMS thresh
1541  * HI_U32 au32DetectInputChn[]    [IN] detection input channel
1542  * HI_S32* aps32AllLocPreds[]     [IN] Location prediction
1543  * HI_S32* aps32AllPriorBoxes[]   [IN] prior box
1544  * HI_S32* ps32ConfScores         [IN] confidence score
1545  * HI_S32* ps32AssistMemPool      [IN] assist buffer
1546  * HI_S32* ps32DstScoreSrc        [OUT] result of score
1547  * HI_S32* ps32DstBboxSrc         [OUT] result of Bbox
1548  * HI_S32* ps32RoiOutCntSrc       [OUT] result of the roi num of each class
1549  */
SVP_NNIE_Ssd_DetectionOutForward(HI_U32 u32ConcatNum,HI_U32 u32ConfThresh,HI_U32 u32ClassNum,HI_U32 u32TopK,HI_U32 u32KeepTopK,HI_U32 u32NmsThresh,HI_U32 au32DetectInputChn[],HI_S32 * aps32AllLocPreds[],HI_S32 * aps32AllPriorBoxes[],HI_S32 * ps32ConfScores,HI_S32 * ps32AssistMemPool,HI_S32 * ps32DstScoreSrc,HI_S32 * ps32DstBboxSrc,HI_S32 * ps32RoiOutCntSrc)1550 static HI_S32 SVP_NNIE_Ssd_DetectionOutForward(HI_U32 u32ConcatNum, HI_U32 u32ConfThresh, HI_U32 u32ClassNum,
1551     HI_U32 u32TopK, HI_U32 u32KeepTopK, HI_U32 u32NmsThresh, HI_U32 au32DetectInputChn[], HI_S32 *aps32AllLocPreds[],
1552     HI_S32 *aps32AllPriorBoxes[], HI_S32 *ps32ConfScores, HI_S32 *ps32AssistMemPool, HI_S32 *ps32DstScoreSrc,
1553     HI_S32 *ps32DstBboxSrc, HI_S32 *ps32RoiOutCntSrc)
1554 {
1555     HI_S32 *ps32LocPreds = NULL;
1556     HI_S32 *ps32PriorBoxes = NULL;
1557     HI_S32 *ps32PriorVar = NULL;
1558     HI_S32 *ps32AllDecodeBoxes = NULL;
1559     HI_S32 *ps32DstScore = NULL;
1560     HI_S32 *ps32DstBbox = NULL;
1561     HI_S32 *ps32ClassRoiNum = NULL;
1562     HI_U32 u32RoiOutCnt = 0;
1563     HI_S32 *ps32SingleProposal = NULL;
1564     HI_S32 *ps32AfterTopK = NULL;
1565     SAMPLE_SVP_NNIE_STACK_S *pstStack = NULL;
1566     HI_U32 u32PriorNum = 0;
1567     HI_U32 u32NumPredsPerClass = 0;
1568     HI_FLOAT f32PriorWidth = 0;
1569     HI_FLOAT f32PriorHeight = 0;
1570     HI_FLOAT f32PriorCenterX = 0;
1571     HI_FLOAT f32PriorCenterY = 0;
1572     HI_FLOAT f32DecodeBoxCenterX = 0;
1573     HI_FLOAT f32DecodeBoxCenterY = 0;
1574     HI_FLOAT f32DecodeBoxWidth = 0;
1575     HI_FLOAT f32DecodeBoxHeight = 0;
1576     HI_U32 u32SrcIdx = 0;
1577     HI_U32 u32AfterFilter = 0;
1578     HI_U32 u32AfterTopK = 0;
1579     HI_U32 u32KeepCnt = 0;
1580     HI_U32 i = 0;
1581     HI_U32 j = 0;
1582     HI_U32 u32Offset = 0;
1583     HI_S32 s32Ret = HI_SUCCESS;
1584     u32PriorNum = 0;
1585     for (i = 0; i < u32ConcatNum; i++) {
1586         u32PriorNum += au32DetectInputChn[i] / SAMPLE_SVP_NNIE_COORDI_NUM;
1587     }
1588     // prepare for Assist MemPool
1589     ps32AllDecodeBoxes = ps32AssistMemPool;
1590     ps32SingleProposal = ps32AllDecodeBoxes + u32PriorNum * SAMPLE_SVP_NNIE_COORDI_NUM;
1591     ps32AfterTopK = ps32SingleProposal + SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u32PriorNum;
1592     pstStack = (SAMPLE_SVP_NNIE_STACK_S *)(ps32AfterTopK + u32PriorNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH);
1593     u32SrcIdx = 0;
1594     for (i = 0; i < u32ConcatNum; i++) {
1595         /* get loc predictions */
1596         ps32LocPreds = aps32AllLocPreds[i];
1597         u32NumPredsPerClass = au32DetectInputChn[i] / SAMPLE_SVP_NNIE_COORDI_NUM;
1598         /* get Prior Bboxes */
1599         ps32PriorBoxes = aps32AllPriorBoxes[i];
1600         ps32PriorVar = ps32PriorBoxes + u32NumPredsPerClass * SAMPLE_SVP_NNIE_COORDI_NUM;
1601         for (j = 0; j < u32NumPredsPerClass; j++) {
1602             f32PriorWidth = (HI_FLOAT)(ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2] -
1603                 ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM]);
1604             f32PriorHeight = (HI_FLOAT)(ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3] -
1605                 ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1]);
1606             f32PriorCenterX =
1607                 (ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2] + ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM]) *
1608                 SAMPLE_SVP_NNIE_HALF;
1609             f32PriorCenterY = (ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3] +
1610                 ps32PriorBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1]) *
1611                 SAMPLE_SVP_NNIE_HALF;
1612 
1613             f32DecodeBoxCenterX =
1614                 ((HI_FLOAT)ps32PriorVar[j * SAMPLE_SVP_NNIE_COORDI_NUM] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1615                 ((HI_FLOAT)ps32LocPreds[j * SAMPLE_SVP_NNIE_COORDI_NUM] / SAMPLE_SVP_NNIE_QUANT_BASE) * f32PriorWidth +
1616                 f32PriorCenterX;
1617 
1618             f32DecodeBoxCenterY =
1619                 ((HI_FLOAT)ps32PriorVar[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1620                 ((HI_FLOAT)ps32LocPreds[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1621                 f32PriorHeight +
1622                 f32PriorCenterY;
1623 
1624             f32DecodeBoxWidth =
1625                 exp(((HI_FLOAT)ps32PriorVar[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1626                 ((HI_FLOAT)ps32LocPreds[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2] / SAMPLE_SVP_NNIE_QUANT_BASE)) *
1627                 f32PriorWidth;
1628 
1629             f32DecodeBoxHeight =
1630                 exp(((HI_FLOAT)ps32PriorVar[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3] / SAMPLE_SVP_NNIE_QUANT_BASE) *
1631                 ((HI_FLOAT)ps32LocPreds[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3] / SAMPLE_SVP_NNIE_QUANT_BASE)) *
1632                 f32PriorHeight;
1633 
1634             ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterX - f32DecodeBoxWidth * SAMPLE_SVP_NNIE_HALF);
1635             ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterY - f32DecodeBoxHeight * SAMPLE_SVP_NNIE_HALF);
1636             ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterX + f32DecodeBoxWidth * SAMPLE_SVP_NNIE_HALF);
1637             ps32AllDecodeBoxes[u32SrcIdx++] = (HI_S32)(f32DecodeBoxCenterY + f32DecodeBoxHeight * SAMPLE_SVP_NNIE_HALF);
1638         }
1639     }
1640     /* do NMS for each class */
1641     u32AfterTopK = 0;
1642     for (i = 0; i < u32ClassNum; i++) {
1643         for (j = 0; j < u32PriorNum; j++) {
1644             ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH] = ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM];
1645             ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1] =
1646                 ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1];
1647             ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2] =
1648                 ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2];
1649             ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3] =
1650                 ps32AllDecodeBoxes[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3];
1651             ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4] = ps32ConfScores[j * u32ClassNum + i];
1652             ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] = 0;
1653         }
1654         s32Ret = SVP_NNIE_NonRecursiveArgQuickSort(ps32SingleProposal, 0, u32PriorNum - 1, pstStack, u32TopK);
1655         u32AfterFilter = (u32PriorNum < u32TopK) ? u32PriorNum : u32TopK;
1656         s32Ret = SVP_NNIE_NonMaxSuppression(ps32SingleProposal, u32AfterFilter, u32NmsThresh, u32AfterFilter);
1657         u32RoiOutCnt = 0;
1658         ps32DstScore = (HI_S32 *)ps32DstScoreSrc;
1659         ps32DstBbox = (HI_S32 *)ps32DstBboxSrc;
1660         ps32ClassRoiNum = (HI_S32 *)ps32RoiOutCntSrc;
1661         ps32DstScore += (HI_S32)u32AfterTopK;
1662         ps32DstBbox += (HI_S32)(u32AfterTopK * SAMPLE_SVP_NNIE_COORDI_NUM);
1663         for (j = 0; j < u32TopK; j++) {
1664             if (ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] == 0 &&
1665                 ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4] > (HI_S32)u32ConfThresh) {
1666                 ps32DstScore[u32RoiOutCnt] = ps32SingleProposal[j * 6 + 4];
1667                 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] =
1668                     ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH];
1669                 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] =
1670                     ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1];
1671                 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 2] =
1672                     ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2];
1673                 ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 3] =
1674                     ps32SingleProposal[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3];
1675                 u32RoiOutCnt++;
1676             }
1677         }
1678         ps32ClassRoiNum[i] = (HI_S32)u32RoiOutCnt;
1679         u32AfterTopK += u32RoiOutCnt;
1680     }
1681 
1682     u32KeepCnt = 0;
1683     u32Offset = 0;
1684     if (u32AfterTopK > u32KeepTopK) {
1685         u32Offset = ps32ClassRoiNum[0];
1686         for (i = 1; i < u32ClassNum; i++) {
1687             ps32DstScore = (HI_S32 *)ps32DstScoreSrc;
1688             ps32DstBbox = (HI_S32 *)ps32DstBboxSrc;
1689             ps32ClassRoiNum = (HI_S32 *)ps32RoiOutCntSrc;
1690             ps32DstScore += (HI_S32)(u32Offset);
1691             ps32DstBbox += (HI_S32)(u32Offset * SAMPLE_SVP_NNIE_COORDI_NUM);
1692             for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++) {
1693                 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH] =
1694                     ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM];
1695                 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1] =
1696                     ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM + 1];
1697                 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2] =
1698                     ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM + 2];
1699                 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3] =
1700                     ps32DstBbox[j * SAMPLE_SVP_NNIE_COORDI_NUM + 3];
1701                 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4] = ps32DstScore[j];
1702                 ps32AfterTopK[u32KeepCnt * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] = i;
1703                 u32KeepCnt++;
1704             }
1705             u32Offset = u32Offset + ps32ClassRoiNum[i];
1706         }
1707         if (u32KeepCnt >= 1) {
1708             s32Ret = SVP_NNIE_NonRecursiveArgQuickSort(ps32AfterTopK, 0, u32KeepCnt - 1, pstStack, u32KeepCnt);
1709         }
1710 
1711         u32Offset = ps32ClassRoiNum[0];
1712         for (i = 1; i < u32ClassNum; i++) {
1713             u32RoiOutCnt = 0;
1714             ps32DstScore = (HI_S32 *)ps32DstScoreSrc;
1715             ps32DstBbox = (HI_S32 *)ps32DstBboxSrc;
1716             ps32ClassRoiNum = (HI_S32 *)ps32RoiOutCntSrc;
1717             ps32DstScore += (HI_S32)(u32Offset);
1718             ps32DstBbox += (HI_S32)(u32Offset * SAMPLE_SVP_NNIE_COORDI_NUM);
1719             for (j = 0; j < u32KeepTopK; j++) {
1720                 if (ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 5] == (HI_S32)i) {
1721                     ps32DstScore[u32RoiOutCnt] = ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 4];
1722                     ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM] =
1723                         ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH];
1724                     ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 1] =
1725                         ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 1];
1726                     ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 2] =
1727                         ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 2];
1728                     ps32DstBbox[u32RoiOutCnt * SAMPLE_SVP_NNIE_COORDI_NUM + 3] =
1729                         ps32AfterTopK[j * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH + 3];
1730                     u32RoiOutCnt++;
1731                 }
1732             }
1733             ps32ClassRoiNum[i] = (HI_S32)u32RoiOutCnt;
1734             u32Offset += u32RoiOutCnt;
1735         }
1736     }
1737     return s32Ret;
1738 }
1739 
SVP_NNIE_Yolov1_Iou(HI_FLOAT * pf32Bbox,HI_U32 u32Idx1,HI_U32 u32Idx2)1740 static HI_S32 SVP_NNIE_Yolov1_Iou(HI_FLOAT *pf32Bbox, HI_U32 u32Idx1, HI_U32 u32Idx2)
1741 {
1742     HI_FLOAT f32WidthDis = 0.0f, f32HeightDis = 0.0f;
1743     HI_FLOAT f32Intersection = 0.0f;
1744     HI_FLOAT f32Iou = 0.0f;
1745     f32WidthDis = SAMPLE_SVP_NNIE_MIN(pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM] +
1746         0.5f * pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 2],
1747         pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM] + 0.5f * pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 2]) -
1748         SAMPLE_SVP_NNIE_MAX(pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM] -
1749         0.5f * pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 2],
1750         pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM] - 0.5f * pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 2]);
1751 
1752     f32HeightDis = SAMPLE_SVP_NNIE_MIN(pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 1] +
1753         0.5f * pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 3],
1754         pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 1] +
1755         0.5f * pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 3]) -
1756         SAMPLE_SVP_NNIE_MAX(pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 1] -
1757         0.5f * pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 3],
1758         pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 1] - 0.5f * pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 3]);
1759     if (f32WidthDis < 0 || f32HeightDis < 0) {
1760         f32Intersection = 0;
1761     } else {
1762         f32Intersection = f32WidthDis * f32HeightDis;
1763     }
1764     f32Iou = f32Intersection /
1765         (pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 2] * pf32Bbox[u32Idx1 * SAMPLE_SVP_NNIE_COORDI_NUM + 3] +
1766         pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 2] * pf32Bbox[u32Idx2 * SAMPLE_SVP_NNIE_COORDI_NUM + 3] -
1767         f32Intersection);
1768 
1769     return (HI_S32)(f32Iou * SAMPLE_SVP_NNIE_QUANT_BASE);
1770 }
1771 
1772 /*
1773  * Prototype :   SVP_NNIE_Yolov1_Argswap
1774  * Description : this function is used to exchange data
1775  * Input :     HI_S32*  ps32Src1           [IN] first input array
1776  * HI_S32*  ps32Src2           [IN] second input array
1777  * HI_U32  u32ArraySize        [IN] array size
1778  */
SVP_NNIE_Yolov1_Argswap(HI_S32 * ps32Src1,HI_S32 * ps32Src2,HI_U32 u32ArraySize)1779 static void SVP_NNIE_Yolov1_Argswap(HI_S32 *ps32Src1, HI_S32 *ps32Src2, HI_U32 u32ArraySize)
1780 {
1781     HI_U32 i = 0;
1782     HI_S32 s32Tmp = 0;
1783     for (i = 0; i < u32ArraySize; i++) {
1784         s32Tmp = ps32Src1[i];
1785         ps32Src1[i] = ps32Src2[i];
1786         ps32Src2[i] = s32Tmp;
1787     }
1788 }
1789 
1790 /*
1791  * Prototype :   SVP_NNIE_Yolov1_NonRecursiveArgQuickSort
1792  * Description : this function is used to do quick sort
1793  * Input :     HI_S32*  ps32Array          [IN] the array need to be sorted
1794  * HI_S32   s32Low             [IN] the start position of quick sort
1795  * HI_S32   s32High            [IN] the end position of quick sort
1796  * HI_U32   u32ArraySize       [IN] the element size of input array
1797  * HI_U32   u32ScoreIdx        [IN] the score index in array element
1798  * SAMPLE_SVP_NNIE_STACK_S *pstStack [IN] the buffer used to store start positions and end positions
1799  */
SVP_NNIE_Yolo_NonRecursiveArgQuickSort(HI_S32 * ps32Array,HI_S32 s32Low,HI_S32 s32High,HI_U32 u32ArraySize,HI_U32 u32ScoreIdx,SAMPLE_SVP_NNIE_STACK_S * pstStack)1800 static HI_S32 SVP_NNIE_Yolo_NonRecursiveArgQuickSort(HI_S32 *ps32Array, HI_S32 s32Low, HI_S32 s32High,
1801     HI_U32 u32ArraySize, HI_U32 u32ScoreIdx, SAMPLE_SVP_NNIE_STACK_S *pstStack)
1802 {
1803     HI_S32 i = s32Low;
1804     HI_S32 j = s32High;
1805     HI_S32 s32Top = 0;
1806     HI_S32 s32KeyConfidence = ps32Array[u32ArraySize * s32Low + u32ScoreIdx];
1807     pstStack[s32Top].s32Min = s32Low;
1808     pstStack[s32Top].s32Max = s32High;
1809 
1810     while (s32Top > -1) {
1811         s32Low = pstStack[s32Top].s32Min;
1812         s32High = pstStack[s32Top].s32Max;
1813         i = s32Low;
1814         j = s32High;
1815         s32Top--;
1816 
1817         s32KeyConfidence = ps32Array[u32ArraySize * s32Low + u32ScoreIdx];
1818 
1819         while (i < j) {
1820             while ((i < j) && (s32KeyConfidence > ps32Array[j * u32ArraySize + u32ScoreIdx])) {
1821                 j--;
1822             }
1823             if (i < j) {
1824                 SVP_NNIE_Yolov1_Argswap(&ps32Array[i * u32ArraySize], &ps32Array[j * u32ArraySize], u32ArraySize);
1825                 i++;
1826             }
1827 
1828             while ((i < j) && (s32KeyConfidence < ps32Array[i * u32ArraySize + u32ScoreIdx])) {
1829                 i++;
1830             }
1831             if (i < j) {
1832                 SVP_NNIE_Yolov1_Argswap(&ps32Array[i * u32ArraySize], &ps32Array[j * u32ArraySize], u32ArraySize);
1833                 j--;
1834             }
1835         }
1836 
1837         if (s32Low < i - 1) {
1838             s32Top++;
1839             pstStack[s32Top].s32Min = s32Low;
1840             pstStack[s32Top].s32Max = i - 1;
1841         }
1842 
1843         if (s32High > i + 1) {
1844             s32Top++;
1845             pstStack[s32Top].s32Min = i + 1;
1846             pstStack[s32Top].s32Max = s32High;
1847         }
1848     }
1849     return HI_SUCCESS;
1850 }
1851 
1852 /*
1853  * Prototype :   SVP_NNIE_Yolov1_Nms
1854  * Description : this function is used to do NMS
1855  * Input :     HI_S32*   ps32Score          [IN] class score of each bbox
1856  * HI_FLOAT* pf32Bbox           [IN] pointer to the Bbox memory
1857  * HI_U32    u32ConfThresh      [IN] confidence thresh
1858  * HI_U32    u32NmsThresh       [IN] NMS thresh
1859  * HI_U32*   pu32TmpBuf         [IN] assist buffer
1860  */
SVP_NNIE_Yolov1_Nms(HI_S32 * ps32Score,HI_FLOAT * pf32Bbox,HI_U32 u32BboxNum,HI_U32 u32ConfThresh,HI_U32 u32NmsThresh,HI_U32 * pu32TmpBuf)1861 static HI_S32 SVP_NNIE_Yolov1_Nms(HI_S32 *ps32Score, HI_FLOAT *pf32Bbox, HI_U32 u32BboxNum, HI_U32 u32ConfThresh,
1862     HI_U32 u32NmsThresh, HI_U32 *pu32TmpBuf)
1863 {
1864     HI_U32 i = 0, j = 0;
1865     HI_U32 u32Idx1 = 0, u32Idx2 = 0;
1866     SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *pstScore = (SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *)pu32TmpBuf;
1867     SAMPLE_SVP_NNIE_STACK_S *pstAssitBuf =
1868         (SAMPLE_SVP_NNIE_STACK_S *)((HI_U8 *)pu32TmpBuf + u32BboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV1_SCORE_S));
1869     for (i = 0; i < u32BboxNum; i++) {
1870         if (ps32Score[i] < (HI_S32)u32ConfThresh) {
1871             ps32Score[i] = 0;
1872         }
1873     }
1874 
1875     for (i = 0; i < u32BboxNum; ++i) {
1876         pstScore[i].u32Idx = i;
1877         pstScore[i].s32Score = (ps32Score[i]);
1878     }
1879     /* quick sort */
1880     if (u32BboxNum >= 1) {
1881         (void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32 *)pstScore, 0, u32BboxNum - 1,
1882             sizeof(SAMPLE_SVP_NNIE_YOLOV1_SCORE_S) / sizeof(HI_U32), 1, pstAssitBuf);
1883     }
1884     /* NMS */
1885     for (i = 0; i < u32BboxNum; i++) {
1886         u32Idx1 = pstScore[i].u32Idx;
1887         if (pstScore[i].s32Score == 0) {
1888             continue;
1889         }
1890         for (j = i + 1; j < u32BboxNum; j++) {
1891             u32Idx2 = pstScore[j].u32Idx;
1892             if (pstScore[j].s32Score == 0) {
1893                 continue;
1894             }
1895             if (SVP_NNIE_Yolov1_Iou(pf32Bbox, u32Idx1, u32Idx2) > (HI_S32)u32NmsThresh) {
1896                 pstScore[j].s32Score = 0;
1897                 ps32Score[pstScore[j].u32Idx] = 0;
1898             }
1899         }
1900     }
1901 
1902     return HI_SUCCESS;
1903 }
1904 
1905 /*
1906  * Prototype :   SVP_NNIE_Yolov1_ConvertPosition
1907  * Description : this function is used to do convert position coordinates
1908  * Input :     HI_FLOAT* pf32Bbox           [IN] pointer to the Bbox memory
1909  * HI_U32    u32OriImgWidth     [IN] input image width
1910  * HI_U32    u32OriImagHeight   [IN] input image height
1911  * HI_FLOAT  af32Roi[]          [OUT] converted position coordinates
1912  */
SVP_NNIE_Yolov1_ConvertPosition(HI_FLOAT * pf32Bbox,HI_U32 u32OriImgWidth,HI_U32 u32OriImagHeight,HI_FLOAT af32Roi[])1913 static void SVP_NNIE_Yolov1_ConvertPosition(HI_FLOAT *pf32Bbox, HI_U32 u32OriImgWidth, HI_U32 u32OriImagHeight,
1914     HI_FLOAT af32Roi[])
1915 {
1916     HI_FLOAT f32Xmin, f32Ymin, f32Xmax, f32Ymax;
1917     f32Xmin = *pf32Bbox - *(pf32Bbox + SAMPLE_SVP_NNIE_X_MAX_OFFSET) * SAMPLE_SVP_NNIE_HALF;
1918     f32Xmin = f32Xmin > 0 ? f32Xmin : 0;
1919     f32Ymin = *(pf32Bbox + 1) - *(pf32Bbox + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) * SAMPLE_SVP_NNIE_HALF;
1920     f32Ymin = f32Ymin > 0 ? f32Ymin : 0;
1921     f32Xmax = *pf32Bbox + *(pf32Bbox + SAMPLE_SVP_NNIE_X_MAX_OFFSET) * SAMPLE_SVP_NNIE_HALF;
1922     f32Xmax = f32Xmax > u32OriImgWidth ? u32OriImgWidth : f32Xmax;
1923     f32Ymax = *(pf32Bbox + 1) + *(pf32Bbox + SAMPLE_SVP_NNIE_Y_MAX_OFFSET) * SAMPLE_SVP_NNIE_HALF;
1924     f32Ymax = f32Ymax > u32OriImagHeight ? u32OriImagHeight : f32Ymax;
1925 
1926     af32Roi[0] = f32Xmin;
1927     af32Roi[1] = f32Ymin;
1928     af32Roi[2] = f32Xmax;
1929     af32Roi[3] = f32Ymax;
1930 }
1931 
1932 /*
1933  * Prototype    : SVP_NNIE_Yolov1_Detection
1934  * Description  : Yolov1 detection
1935  * Input :     HI_S32*   ps32Score       [IN]  bbox each class score
1936  * HI_FLOAT* pf32Bbox        [IN]  bbox
1937  * HI_U32    u32ClassNum     [IN]  Class num
1938  * HI_U32    u32GridNum      [IN]  grid num
1939  * HI_U32    u32BboxNum      [IN]  bbox num
1940  * HI_U32    u32ConfThresh   [IN]  confidence thresh
1941  * HI_U32    u32NmsThresh    [IN]  Nms thresh
1942  * HI_U32    u32OriImgWidth  [IN]  input image width
1943  * HI_U32    u32OriImgHeight [IN]  input image height
1944  * HI_U32*   pu32MemPool     [IN]  assist buffer
1945  * HI_S32    *ps32DstScores  [OUT]  dst score of ROI
1946  * HI_S32    *ps32DstRoi     [OUT]  dst Roi
1947  * HI_S32    *ps32ClassRoiNum[OUT]  dst roi num of each class
1948  */
SVP_NNIE_Yolov1_Detection(HI_S32 * ps32Score,HI_FLOAT * pf32Bbox,HI_U32 u32ClassNum,HI_U32 u32BboxNum,HI_U32 u32ConfThresh,HI_U32 u32NmsThresh,HI_U32 u32OriImgWidth,HI_U32 u32OriImgHeight,HI_U32 * pu32MemPool,HI_S32 * ps32DstScores,HI_S32 * ps32DstRoi,HI_S32 * ps32ClassRoiNum)1949 static HI_S32 SVP_NNIE_Yolov1_Detection(HI_S32 *ps32Score, HI_FLOAT *pf32Bbox, HI_U32 u32ClassNum, HI_U32 u32BboxNum,
1950     HI_U32 u32ConfThresh, HI_U32 u32NmsThresh, HI_U32 u32OriImgWidth, HI_U32 u32OriImgHeight, HI_U32 *pu32MemPool,
1951     HI_S32 *ps32DstScores, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
1952 {
1953     HI_U32 i = 0, j = 0;
1954     HI_U32 u32Idx = 0;
1955     HI_U32 u32RoiNum = 0;
1956     HI_S32 *ps32EachClassScore = NULL;
1957     HI_FLOAT af32Roi[SAMPLE_SVP_NNIE_COORDI_NUM] = {0.0f};
1958     SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *pstScore = NULL;
1959     *(ps32ClassRoiNum++) = 0;
1960     for (i = 0; i < u32ClassNum; i++) {
1961         ps32EachClassScore = ps32Score + u32BboxNum * i;
1962         (void)SVP_NNIE_Yolov1_Nms(ps32EachClassScore, pf32Bbox, u32BboxNum, u32ConfThresh, u32NmsThresh, pu32MemPool);
1963 
1964         pstScore = (SAMPLE_SVP_NNIE_YOLOV1_SCORE_S *)pu32MemPool;
1965         u32RoiNum = 0;
1966         for (j = 0; j < u32BboxNum; j++) {
1967             if (pstScore[j].s32Score != 0) {
1968                 u32RoiNum++;
1969                 *(ps32DstScores++) = pstScore[j].s32Score;
1970                 u32Idx = pstScore[j].u32Idx;
1971                 (void)SVP_NNIE_Yolov1_ConvertPosition((pf32Bbox + u32Idx * SAMPLE_SVP_NNIE_COORDI_NUM), u32OriImgWidth,
1972                     u32OriImgHeight, af32Roi);
1973                 *(ps32DstRoi++) = (HI_S32)af32Roi[0];
1974                 *(ps32DstRoi++) = (HI_S32)af32Roi[1];
1975                 *(ps32DstRoi++) = (HI_S32)af32Roi[2];
1976                 *(ps32DstRoi++) = (HI_S32)af32Roi[3];
1977             } else {
1978                 continue;
1979             }
1980         }
1981         *(ps32ClassRoiNum++) = u32RoiNum;
1982     }
1983     return HI_SUCCESS;
1984 }
1985 
1986 /*
1987  * Prototype    : SVP_NNIE_Yolov2_Iou
1988  * Description  : Yolov2 IOU
1989  * Input :     SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox1 [IN]  first bbox
1990  * SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox2 [IN]  second bbox
1991  * HI_U32    u32ClassNum     [IN]  Class num
1992  * HI_U32    u32GridNum      [IN]  grid num
1993  * HI_U32    u32BboxNum      [IN]  bbox num
1994  * HI_U32    u32ConfThresh   [IN]  confidence thresh
1995  * HI_U32    u32NmsThresh    [IN]  Nms thresh
1996  * HI_U32    u32OriImgWidth  [IN]  input image width
1997  * HI_U32    u32OriImgHeight [IN]  input image height
1998  * HI_U32*   pu32MemPool     [IN]  assist buffer
1999  * HI_S32    *ps32DstScores  [OUT]  dst score of ROI
2000  * HI_S32    *ps32DstRoi     [OUT]  dst Roi
2001  * HI_S32    *ps32ClassRoiNum[OUT]  dst roi num of each class
2002  */
SVP_NNIE_Yolov2_Iou(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S * pstBbox1,SAMPLE_SVP_NNIE_YOLOV2_BBOX_S * pstBbox2)2003 static HI_DOUBLE SVP_NNIE_Yolov2_Iou(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox1, SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox2)
2004 {
2005     HI_FLOAT f32InterWidth = 0.0;
2006     HI_FLOAT f32InterHeight = 0.0;
2007     HI_DOUBLE f64InterArea = 0.0;
2008     HI_DOUBLE f64Box1Area = 0.0;
2009     HI_DOUBLE f64Box2Area = 0.0;
2010     HI_DOUBLE f64UnionArea = 0.0;
2011 
2012     f32InterWidth = SAMPLE_SVP_NNIE_MIN(pstBbox1->f32Xmax, pstBbox2->f32Xmax) -
2013         SAMPLE_SVP_NNIE_MAX(pstBbox1->f32Xmin, pstBbox2->f32Xmin);
2014     f32InterHeight = SAMPLE_SVP_NNIE_MIN(pstBbox1->f32Ymax, pstBbox2->f32Ymax) -
2015         SAMPLE_SVP_NNIE_MAX(pstBbox1->f32Ymin, pstBbox2->f32Ymin);
2016     if (f32InterWidth <= 0 || f32InterHeight <= 0)
2017         return 0;
2018 
2019     f64InterArea = f32InterWidth * f32InterHeight;
2020     f64Box1Area = (pstBbox1->f32Xmax - pstBbox1->f32Xmin) * (pstBbox1->f32Ymax - pstBbox1->f32Ymin);
2021     f64Box2Area = (pstBbox2->f32Xmax - pstBbox2->f32Xmin) * (pstBbox2->f32Ymax - pstBbox2->f32Ymin);
2022     f64UnionArea = f64Box1Area + f64Box2Area - f64InterArea;
2023 
2024     return f64InterArea / f64UnionArea;
2025 }
2026 
2027 /*
2028  * Prototype    : SVP_NNIE_Yolov2_NonMaxSuppression
2029  * Description  : Yolov2 NonMaxSuppression function
2030  * Input :     SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox [IN]  input bbox
2031  * HI_U32    u32BoxNum       [IN]  Bbox num
2032  * HI_U32    u32ClassNum     [IN]  Class num
2033  * HI_U32    u32NmsThresh    [IN]  NMS thresh
2034  * HI_U32    u32BboxNum      [IN]  bbox num
2035  * HI_U32    u32MaxRoiNum    [IN]  max roi num
2036  */
SVP_NNIE_Yolov2_NonMaxSuppression(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S * pstBbox,HI_U32 u32BboxNum,HI_U32 u32NmsThresh,HI_U32 u32MaxRoiNum)2037 static HI_S32 SVP_NNIE_Yolov2_NonMaxSuppression(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox, HI_U32 u32BboxNum,
2038     HI_U32 u32NmsThresh, HI_U32 u32MaxRoiNum)
2039 {
2040     HI_U32 i, j;
2041     HI_U32 u32Num = 0;
2042     HI_DOUBLE f64Iou = 0.0;
2043 
2044     for (i = 0; i < u32BboxNum && u32Num < u32MaxRoiNum; i++) {
2045         if (pstBbox[i].u32Mask == 0) {
2046             u32Num++;
2047             for (j = i + 1; j < u32BboxNum; j++) {
2048                 if (pstBbox[j].u32Mask == 0) {
2049                     f64Iou = SVP_NNIE_Yolov2_Iou(&pstBbox[i], &pstBbox[j]);
2050                     if (f64Iou >= (HI_DOUBLE)u32NmsThresh / SAMPLE_SVP_NNIE_QUANT_BASE) {
2051                         pstBbox[j].u32Mask = 1;
2052                     }
2053                 }
2054             }
2055         }
2056     }
2057 
2058     return HI_SUCCESS;
2059 }
2060 
SVP_NNIE_GetMaxVal(HI_FLOAT * pf32Val,HI_U32 u32Num,HI_U32 * pu32MaxValueIndex)2061 static HI_FLOAT SVP_NNIE_GetMaxVal(HI_FLOAT *pf32Val, HI_U32 u32Num, HI_U32 *pu32MaxValueIndex)
2062 {
2063     HI_U32 i = 0;
2064     HI_FLOAT f32MaxTmp = 0;
2065 
2066     f32MaxTmp = pf32Val[0];
2067     *pu32MaxValueIndex = 0;
2068     for (i = 1; i < u32Num; i++) {
2069         if (pf32Val[i] > f32MaxTmp) {
2070             f32MaxTmp = pf32Val[i];
2071             *pu32MaxValueIndex = i;
2072         }
2073     }
2074 
2075     return f32MaxTmp;
2076 }
2077 
2078 /*
2079  * Prototype    : SVP_NNIE_Yolov2_GetResult
2080  * Description  : Yolov2 GetResult function
2081  * Input :     HI_S32    *ps32InputData    [IN]  pointer to the input data memory
2082  * HI_U32    u32GridNumWidth   [IN]  Grid num in width direction
2083  * HI_U32    u32GridNumHeight  [IN]  Grid num in height direction
2084  * HI_U32    u32EachGridBbox   [IN]  Bbox num of each grid
2085  * HI_U32    u32ClassNum       [IN]  class num
2086  * HI_U32    u32SrcWidth       [IN]  input image width
2087  * HI_U32    u32SrcHeight      [IN]  input image height
2088  * HI_U32    u32MaxRoiNum      [IN]  Max output roi num
2089  * HI_U32    u32NmsThresh      [IN]  NMS thresh
2090  * HI_U32*   pu32TmpBuf        [IN]  assist buffer
2091  * HI_S32    *ps32DstScores    [OUT] dst score
2092  * HI_S32    *ps32DstRoi       [OUT] dst roi
2093  * HI_S32    *ps32ClassRoiNum  [OUT] class roi num
2094  */
SVP_NNIE_Yolov2_GetResult(HI_S32 * ps32InputData,HI_U32 u32GridNumWidth,HI_U32 u32GridNumHeight,HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth,HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh,HI_FLOAT af32Bias[],HI_U32 * pu32TmpBuf,HI_S32 * ps32DstScores,HI_S32 * ps32DstRoi,HI_S32 * ps32ClassRoiNum)2095 static HI_S32 SVP_NNIE_Yolov2_GetResult(HI_S32 *ps32InputData, HI_U32 u32GridNumWidth, HI_U32 u32GridNumHeight,
2096     HI_U32 u32EachGridBbox, HI_U32 u32ClassNum, HI_U32 u32SrcWidth, HI_U32 u32SrcHeight, HI_U32 u32MaxRoiNum,
2097     HI_U32 u32NmsThresh, HI_U32 u32ConfThresh, HI_FLOAT af32Bias[], HI_U32 *pu32TmpBuf, HI_S32 *ps32DstScores,
2098     HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
2099 {
2100     HI_U32 u32GridNum = u32GridNumWidth * u32GridNumHeight;
2101     const HI_U32 u32ParaNum = (SAMPLE_SVP_NNIE_COORDI_NUM + 1 + u32ClassNum);
2102     HI_U32 u32TotalBboxNum = u32GridNum * u32EachGridBbox;
2103     HI_U32 u32CStep = u32GridNum;
2104     HI_U32 u32HStep = u32GridNumWidth;
2105     HI_U32 u32BoxsNum = 0;
2106     HI_FLOAT *pf32BoxTmp = NULL;
2107     HI_FLOAT *f32InputData = NULL;
2108     HI_FLOAT f32ObjScore = 0.0;
2109     HI_FLOAT f32MaxScore = 0.0;
2110     HI_S32 s32Score = 0;
2111     HI_U32 u32MaxValueIndex = 0;
2112     HI_U32 h = 0, w = 0, n = 0;
2113     HI_U32 c = 0, k = 0, i = 0;
2114     HI_U32 u32Index = 0;
2115     HI_FLOAT x, y, f32Width, f32Height;
2116     HI_U32 u32AssistBuffSize = u32TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_STACK_S);
2117     HI_U32 u32BoxBuffSize = u32TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S);
2118     HI_U32 u32BoxResultNum = 0;
2119     SAMPLE_SVP_NNIE_STACK_S *pstAssistStack = NULL;
2120     SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBox = NULL;
2121 
2122     /* store float type data */
2123     f32InputData = (HI_FLOAT *)pu32TmpBuf;
2124     /* assist buffer for sort */
2125     pstAssistStack = (SAMPLE_SVP_NNIE_STACK_S *)(f32InputData + u32TotalBboxNum * u32ParaNum);
2126     /* assist box buffer */
2127     pstBox = (SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *)((HI_U8 *)pstAssistStack + u32AssistBuffSize);
2128     /* box tmp buffer */
2129     pf32BoxTmp = (HI_FLOAT *)((HI_U8 *)pstBox + u32BoxBuffSize);
2130 
2131     for (i = 0; i < u32TotalBboxNum * u32ParaNum; i++) {
2132         f32InputData[i] = (HI_FLOAT)(ps32InputData[i]) / SAMPLE_SVP_NNIE_QUANT_BASE;
2133     }
2134 
2135     // permute
2136     for (h = 0; h < u32GridNumHeight; h++) {
2137         for (w = 0; w < u32GridNumWidth; w++) {
2138             for (c = 0; c < u32EachGridBbox * u32ParaNum; c++) {
2139                 pf32BoxTmp[n++] = f32InputData[c * u32CStep + h * u32HStep + w];
2140             }
2141         }
2142     }
2143 
2144     for (n = 0; n < u32GridNum; n++) {
2145         // Grid
2146         w = n % u32GridNumWidth;
2147         h = n / u32GridNumWidth;
2148         for (k = 0; k < u32EachGridBbox; k++) {
2149             u32Index = (n * u32EachGridBbox + k) * u32ParaNum;
2150             x = (HI_FLOAT)((w + SAMPLE_SVP_NNIE_SIGMOID(pf32BoxTmp[u32Index + 0])) / u32GridNumWidth);        // x
2151             y = (HI_FLOAT)((h + SAMPLE_SVP_NNIE_SIGMOID(pf32BoxTmp[u32Index + 1])) / u32GridNumHeight);       // y
2152             f32Width = (HI_FLOAT)((exp(pf32BoxTmp[u32Index + 2]) * af32Bias[2 * k]) / u32GridNumWidth);       // w
2153             f32Height = (HI_FLOAT)((exp(pf32BoxTmp[u32Index + 3]) * af32Bias[2 * k + 1]) / u32GridNumHeight); // h
2154 
2155             f32ObjScore = SAMPLE_SVP_NNIE_SIGMOID(pf32BoxTmp[u32Index + 4]);
2156             SVP_NNIE_SoftMax(&pf32BoxTmp[u32Index + 5], u32ClassNum);
2157 
2158             f32MaxScore = SVP_NNIE_GetMaxVal(&pf32BoxTmp[u32Index + 5], u32ClassNum, &u32MaxValueIndex);
2159 
2160             s32Score = (HI_S32)(f32MaxScore * f32ObjScore * SAMPLE_SVP_NNIE_QUANT_BASE);
2161             if ((HI_U32)s32Score > u32ConfThresh) {
2162                 pstBox[u32BoxsNum].f32Xmin = (HI_FLOAT)(x - f32Width * SAMPLE_SVP_NNIE_HALF);
2163                 pstBox[u32BoxsNum].f32Xmax = (HI_FLOAT)(x + f32Width * SAMPLE_SVP_NNIE_HALF);
2164                 pstBox[u32BoxsNum].f32Ymin = (HI_FLOAT)(y - f32Height * SAMPLE_SVP_NNIE_HALF);
2165                 pstBox[u32BoxsNum].f32Ymax = (HI_FLOAT)(y + f32Height * SAMPLE_SVP_NNIE_HALF);
2166                 pstBox[u32BoxsNum].s32ClsScore = s32Score;
2167                 pstBox[u32BoxsNum].u32ClassIdx = u32MaxValueIndex + 1;
2168                 pstBox[u32BoxsNum].u32Mask = 0;
2169                 u32BoxsNum++;
2170             }
2171         }
2172     }
2173     // quick_sort
2174     if (u32BoxsNum > 1) {
2175         SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32 *)pstBox, 0, u32BoxsNum - 1,
2176             sizeof(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S) / sizeof(HI_S32), 4, pstAssistStack);
2177     }
2178     // Nms
2179     SVP_NNIE_Yolov2_NonMaxSuppression(pstBox, u32BoxsNum, u32NmsThresh, u32BoxsNum);
2180     // Get the result
2181     (HI_VOID)
2182         memset_s((void *)ps32ClassRoiNum, (u32ClassNum + 1) * sizeof(HI_U32), 0, (u32ClassNum + 1) * sizeof(HI_U32));
2183     for (i = 1; i < u32ClassNum + 1; i++) {
2184         for (n = 0; n < u32BoxsNum && u32BoxResultNum < u32MaxRoiNum; n++) {
2185             if ((pstBox[n].u32Mask == 0) && (i == pstBox[n].u32ClassIdx)) {
2186                 *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MAX(pstBox[n].f32Xmin * u32SrcWidth, 0);
2187                 *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MAX(pstBox[n].f32Ymin * u32SrcHeight, 0);
2188                 *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MIN(pstBox[n].f32Xmax * u32SrcWidth, u32SrcWidth);
2189                 *(ps32DstRoi++) = (HI_S32)SAMPLE_SVP_NNIE_MIN(pstBox[n].f32Ymax * u32SrcHeight, u32SrcHeight);
2190                 *(ps32DstScores++) = pstBox[n].s32ClsScore;
2191                 *(ps32ClassRoiNum + pstBox[n].u32ClassIdx) = *(ps32ClassRoiNum + pstBox[n].u32ClassIdx) + 1;
2192                 u32BoxResultNum++;
2193             }
2194         }
2195     }
2196     return HI_SUCCESS;
2197 }
2198 
2199 /*
2200  * Prototype    : SVP_NNIE_Yolov3_GetResult
2201  * Description  : Yolov3 GetResult function
2202  * Input :      HI_S32    **pps32InputData     [IN]  pointer to the input data
2203  * HI_U32    au32GridNumWidth[]   [IN]  Grid num in width direction
2204  * HI_U32    au32GridNumHeight[]  [IN]  Grid num in height direction
2205  * HI_U32    au32Stride[]         [IN]  stride of input data
2206  * HI_U32    u32EachGridBbox      [IN]  Bbox num of each grid
2207  * HI_U32    u32ClassNum          [IN]  class num
2208  * HI_U32    u32SrcWidth          [IN]  input image width
2209  * HI_U32    u32SrcHeight         [IN]  input image height
2210  * HI_U32    u32MaxRoiNum         [IN]  Max output roi num
2211  * HI_U32    u32NmsThresh         [IN]  NMS thresh
2212  * HI_U32    u32ConfThresh        [IN]  conf thresh
2213  * HI_U32    af32Bias[][]         [IN]  bias
2214  * HI_U32*   pu32TmpBuf           [IN]  assist buffer
2215  * HI_S32    *ps32DstScores       [OUT] dst score
2216  * HI_S32    *ps32DstRoi          [OUT] dst roi
2217  * HI_S32    *ps32ClassRoiNum     [OUT] class roi num
2218  */
SVP_NNIE_Yolov3_GetResult(HI_U64 au64InputBlobAddr[],HI_U32 au32GridNumWidth[],HI_U32 au32GridNumHeight[],HI_U32 au32Stride[],HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth,HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh,HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM],HI_S32 * ps32TmpBuf,HI_S32 * ps32DstScore,HI_S32 * ps32DstRoi,HI_S32 * ps32ClassRoiNum)2219 static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_U64 au64InputBlobAddr[], HI_U32 au32GridNumWidth[],
2220     HI_U32 au32GridNumHeight[], HI_U32 au32Stride[], HI_U32 u32EachGridBbox, HI_U32 u32ClassNum, HI_U32 u32SrcWidth,
2221     HI_U32 u32SrcHeight, HI_U32 u32MaxRoiNum, HI_U32 u32NmsThresh, HI_U32 u32ConfThresh,
2222     HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM],
2223     HI_S32 *ps32TmpBuf, HI_S32 *ps32DstScore, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
2224 {
2225     HI_S32 *ps32InputBlob = NULL;
2226     HI_FLOAT *pf32Permute = NULL;
2227     SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *pstBbox = NULL;
2228     HI_S32 *ps32AssistBuf = NULL;
2229     HI_U32 u32TotalBboxNum = 0;
2230     HI_U32 u32ChnOffset = 0;
2231     HI_U32 u32HeightOffset = 0;
2232     HI_U32 u32BboxNum = 0;
2233     HI_U32 u32GridXIdx;
2234     HI_U32 u32GridYIdx;
2235     HI_U32 u32Offset;
2236     HI_FLOAT f32StartX;
2237     HI_FLOAT f32StartY;
2238     HI_FLOAT f32Width;
2239     HI_FLOAT f32Height;
2240     HI_FLOAT f32ObjScore;
2241     HI_U32 u32MaxValueIndex = 0;
2242     HI_FLOAT f32MaxScore;
2243     HI_S32 s32ClassScore;
2244     HI_U32 u32ClassRoiNum;
2245     HI_U32 i = 0, j = 0, k = 0, c = 0, h = 0, w = 0;
2246     HI_U32 u32BlobSize = 0;
2247     HI_U32 u32MaxBlobSize = 0;
2248 
2249     for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
2250         u32BlobSize = au32GridNumWidth[i] * au32GridNumHeight[i] * sizeof(HI_U32) *
2251             SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM * u32EachGridBbox;
2252         if (u32MaxBlobSize < u32BlobSize) {
2253             u32MaxBlobSize = u32BlobSize;
2254         }
2255     }
2256 
2257     for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
2258         u32TotalBboxNum += au32GridNumWidth[i] * au32GridNumHeight[i] * u32EachGridBbox;
2259     }
2260 
2261     // get each tmpbuf addr
2262     pf32Permute = (HI_FLOAT *)ps32TmpBuf;
2263     pstBbox = (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *)(pf32Permute + u32MaxBlobSize / sizeof(HI_S32));
2264     ps32AssistBuf = (HI_S32 *)(pstBbox + u32TotalBboxNum);
2265 
2266     for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
2267         // permute
2268         u32Offset = 0;
2269         ps32InputBlob = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, au64InputBlobAddr[i]);
2270         u32ChnOffset = au32GridNumHeight[i] * au32Stride[i] / sizeof(HI_S32);
2271         u32HeightOffset = au32Stride[i] / sizeof(HI_S32);
2272         for (h = 0; h < au32GridNumHeight[i]; h++) {
2273             for (w = 0; w < au32GridNumWidth[i]; w++) {
2274                 for (c = 0; c < SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM * u32EachGridBbox; c++) {
2275                     pf32Permute[u32Offset++] = (HI_FLOAT)(ps32InputBlob[c * u32ChnOffset + h * u32HeightOffset + w]) /
2276                         SAMPLE_SVP_NNIE_QUANT_BASE;
2277                 }
2278             }
2279         }
2280 
2281         // decode bbox and calculate score
2282         for (j = 0; j < au32GridNumWidth[i] * au32GridNumHeight[i]; j++) {
2283             u32GridXIdx = j % au32GridNumWidth[i];
2284             u32GridYIdx = j / au32GridNumWidth[i];
2285             for (k = 0; k < u32EachGridBbox; k++) {
2286                 u32MaxValueIndex = 0;
2287                 u32Offset = (j * u32EachGridBbox + k) * SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM;
2288                 // decode bbox
2289                 f32StartX =
2290                     ((HI_FLOAT)u32GridXIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 0])) / au32GridNumWidth[i];
2291                 f32StartY = ((HI_FLOAT)u32GridYIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 1])) /
2292                     au32GridNumHeight[i];
2293                 if (u32SrcWidth == 0 || u32SrcHeight == 0) {
2294                     printf("Divisor u32SrcWidth or u32SrcHeight cannot be 0!\n");
2295                     return HI_FAILURE;
2296                 }
2297                 f32Width = (HI_FLOAT)(exp(pf32Permute[u32Offset + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) *
2298                     af32Bias[i][2 * k]) / u32SrcWidth;
2299                 f32Height = (HI_FLOAT)(exp(pf32Permute[u32Offset + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) *
2300                     af32Bias[i][2 * k + 1]) / u32SrcHeight;
2301 
2302                 // calculate score
2303                 (void)SVP_NNIE_Sigmoid(&pf32Permute[u32Offset + SAMPLE_SVP_NNIE_SCORE_OFFSET], (u32ClassNum + 1));
2304                 f32ObjScore = pf32Permute[u32Offset + SAMPLE_SVP_NNIE_SCORE_OFFSET];
2305                 f32MaxScore = SVP_NNIE_GetMaxVal(&pf32Permute[u32Offset + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET],
2306                     u32ClassNum, &u32MaxValueIndex);
2307                 s32ClassScore = (HI_S32)(f32MaxScore * f32ObjScore * SAMPLE_SVP_NNIE_QUANT_BASE);
2308 
2309                 // filter low score roi
2310                 if ((HI_U32)s32ClassScore > u32ConfThresh) {
2311                     pstBbox[u32BboxNum].f32Xmin = (HI_FLOAT)(f32StartX - f32Width * 0.5f);
2312                     pstBbox[u32BboxNum].f32Ymin = (HI_FLOAT)(f32StartY - f32Height * 0.5f);
2313                     pstBbox[u32BboxNum].f32Xmax = (HI_FLOAT)(f32StartX + f32Width * 0.5f);
2314                     pstBbox[u32BboxNum].f32Ymax = (HI_FLOAT)(f32StartY + f32Height * 0.5f);
2315                     pstBbox[u32BboxNum].s32ClsScore = s32ClassScore;
2316                     pstBbox[u32BboxNum].u32Mask = 0;
2317                     pstBbox[u32BboxNum].u32ClassIdx = (HI_S32)(u32MaxValueIndex + 1);
2318                     u32BboxNum++;
2319                 }
2320             }
2321         }
2322     }
2323 
2324     // quick sort
2325     if (u32BboxNum >= 1) {
2326         (void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32 *)pstBbox, 0, u32BboxNum - 1,
2327             sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S) / sizeof(HI_U32), 4, (SAMPLE_SVP_NNIE_STACK_S *)ps32AssistBuf);
2328     }
2329     // Yolov3 and Yolov2 have the same Nms operation
2330     (void)SVP_NNIE_Yolov2_NonMaxSuppression(pstBbox, u32BboxNum, u32NmsThresh, u32BboxNum);
2331 
2332     // Get result
2333     for (i = 1; i < u32ClassNum + 1; i++) {
2334         u32ClassRoiNum = 0;
2335         for (j = 0; j < u32BboxNum; j++) {
2336             if ((pstBbox[j].u32Mask == 0) && (i == pstBbox[j].u32ClassIdx) && (u32ClassRoiNum < u32MaxRoiNum)) {
2337                 *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Xmin * u32SrcWidth), 0);
2338                 *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Ymin * u32SrcHeight), 0);
2339                 *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Xmax * u32SrcWidth), (HI_S32)u32SrcWidth);
2340                 *(ps32DstRoi++) =
2341                     SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Ymax * u32SrcHeight), (HI_S32)u32SrcHeight);
2342                 *(ps32DstScore++) = pstBbox[j].s32ClsScore;
2343                 u32ClassRoiNum++;
2344             }
2345         }
2346         *(ps32ClassRoiNum + i) = u32ClassRoiNum;
2347     }
2348 
2349     return HI_SUCCESS;
2350 }
2351 
SAMPLE_SVP_NNIE_Cnn_GetTopN(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S * pstSoftwareParam)2352 HI_S32 SAMPLE_SVP_NNIE_Cnn_GetTopN(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2353     SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S *pstSoftwareParam)
2354 {
2355     HI_S32 s32Ret = HI_SUCCESS;
2356     CHECK_NULL_PTR(pstNnieParam);
2357     CHECK_NULL_PTR(pstSoftwareParam);
2358     s32Ret = SVP_NNIE_Cnn_GetTopN(
2359         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[0].astDst[0].u64VirAddr),
2360         pstNnieParam->astSegData[0].astDst[0].u32Stride, pstNnieParam->astSegData[0].astDst[0].unShape.stWhc.u32Width,
2361         pstNnieParam->astSegData[0].astDst[0].u32Num, pstSoftwareParam->u32TopN,
2362         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stAssistBuf.u64VirAddr),
2363         pstSoftwareParam->stGetTopN.u32Stride,
2364         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stGetTopN.u64VirAddr));
2365     SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
2366         "Error,SVP_NNIE_Cnn_GetTopN failed!\n");
2367     return s32Ret;
2368 }
2369 
2370 /*
2371  * Prototype :   SAMPLE_SVP_NNIE_RpnTmpBufSize
2372  * Description : this function is used to get RPN func's assist buffer size
2373  * Input :     HI_U32 u32NumRatioAnchors     [IN]  ratio anchor num
2374  * HI_U32 u32NumScaleAnchors     [IN]  scale anchor num
2375  * HI_U32 u32ConvHeight          [IN]  convolution height
2376  * HI_U32 u32ConvWidth           [IN]  convolution width
2377  */
SAMPLE_SVP_NNIE_RpnTmpBufSize(HI_U32 u32NumRatioAnchors,HI_U32 u32NumScaleAnchors,HI_U32 u32ConvHeight,HI_U32 u32ConvWidth)2378 HI_U32 SAMPLE_SVP_NNIE_RpnTmpBufSize(HI_U32 u32NumRatioAnchors, HI_U32 u32NumScaleAnchors, HI_U32 u32ConvHeight,
2379     HI_U32 u32ConvWidth)
2380 {
2381     HI_U64 u64AnchorsNum, u64BboxDeltaSize, u64AnchorsSize, u64ProposalSize, u64RatioAnchorsSize, u64ScaleAnchorsSize;
2382     HI_U64 u64ScoreSize, u64StackSize;
2383     HI_U64 u64TotalSize = 0;
2384 
2385     SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)u32NumRatioAnchors * u32NumScaleAnchors > SAMPLE_SVP_NNIE_MAX_MEM, 0,
2386         SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32NumRatioAnchors * u32NumScaleAnchors should be less than %u!\n",
2387         SAMPLE_SVP_NNIE_MAX_MEM);
2388     SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)u32ConvHeight * u32ConvWidth > SAMPLE_SVP_NNIE_MAX_MEM, 0,
2389         SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32ConvHeight*u32ConvWidth should be less than %u!\n",
2390         SAMPLE_SVP_NNIE_MAX_MEM);
2391     u64AnchorsNum = (HI_U64)u32NumRatioAnchors * u32NumScaleAnchors * u32ConvHeight * u32ConvWidth;
2392     SAMPLE_SVP_CHECK_EXPR_RET(u64AnchorsNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2393         "Error,u64AnchorsNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2394     u64AnchorsSize = sizeof(HI_U32) * SAMPLE_SVP_NNIE_COORDI_NUM * u64AnchorsNum;
2395     SAMPLE_SVP_CHECK_EXPR_RET(u64AnchorsSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2396         "Error,u64AnchorsSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2397 
2398     u64BboxDeltaSize = u64AnchorsSize;
2399     u64ProposalSize = sizeof(HI_U32) * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * u64AnchorsNum;
2400     SAMPLE_SVP_CHECK_EXPR_RET(u64ProposalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2401         "Error,u64ProposalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2402 
2403     u64RatioAnchorsSize = sizeof(HI_FLOAT) * u32NumRatioAnchors * SAMPLE_SVP_NNIE_COORDI_NUM;
2404     SAMPLE_SVP_CHECK_EXPR_RET(u64RatioAnchorsSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2405         "Error,u64RatioAnchorsSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2406     u64ScaleAnchorsSize = sizeof(HI_FLOAT) * u32NumRatioAnchors * u32NumScaleAnchors * SAMPLE_SVP_NNIE_COORDI_NUM;
2407     SAMPLE_SVP_CHECK_EXPR_RET(u64ScaleAnchorsSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2408         "Error,u64ScaleAnchorsSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2409     u64ScoreSize = sizeof(HI_FLOAT) * u64AnchorsNum * 2;
2410     SAMPLE_SVP_CHECK_EXPR_RET(u64ScoreSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2411         "Error,u64ScoreSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2412     u64StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * u64AnchorsNum;
2413     SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2414         "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2415     u64TotalSize = u64AnchorsSize + u64BboxDeltaSize + u64ProposalSize + u64RatioAnchorsSize + u64ScaleAnchorsSize +
2416         u64ScoreSize + u64StackSize;
2417     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2418         "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2419     return (HI_U32)u64TotalSize;
2420 }
2421 
SAMPLE_SVP_NNIE_FasterRcnn_Rpn(SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S * pstSoftwareParam)2422 HI_S32 SAMPLE_SVP_NNIE_FasterRcnn_Rpn(SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S *pstSoftwareParam)
2423 {
2424     HI_S32 s32Ret = HI_SUCCESS;
2425     CHECK_NULL_PTR(pstSoftwareParam);
2426     s32Ret = SVP_NNIE_Rpn(pstSoftwareParam->aps32Conv, pstSoftwareParam->u32NumRatioAnchors,
2427         pstSoftwareParam->u32NumScaleAnchors, pstSoftwareParam->au32Scales, pstSoftwareParam->au32Ratios,
2428         pstSoftwareParam->u32OriImHeight, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->au32ConvHeight,
2429         pstSoftwareParam->au32ConvWidth, pstSoftwareParam->au32ConvChannel, pstSoftwareParam->u32ConvStride,
2430         pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32MinSize, pstSoftwareParam->u32SpatialScale,
2431         pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32FilterThresh, pstSoftwareParam->u32NumBeforeNms,
2432         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stRpnTmpBuf.u64VirAddr),
2433         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2434         pstSoftwareParam->stRpnBbox.u32Stride, &pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height);
2435     SAMPLE_COMM_SVP_FlushCache(pstSoftwareParam->stRpnBbox.u64PhyAddr,
2436         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstSoftwareParam->stRpnBbox.u64VirAddr),
2437         pstSoftwareParam->stRpnBbox.u32Num * pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Chn *
2438         pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height * pstSoftwareParam->stRpnBbox.u32Stride);
2439     SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,SVP_NNIE_Rpn failed!\n");
2440     return s32Ret;
2441 }
2442 
SAMPLE_SVP_NNIE_Pvanet_Rpn(SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S * pstSoftwareParam)2443 HI_S32 SAMPLE_SVP_NNIE_Pvanet_Rpn(SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S *pstSoftwareParam)
2444 {
2445     HI_S32 s32Ret = HI_SUCCESS;
2446 
2447     CHECK_NULL_PTR(pstSoftwareParam);
2448     s32Ret = SVP_NNIE_Rpn(pstSoftwareParam->aps32Conv, pstSoftwareParam->u32NumRatioAnchors,
2449         pstSoftwareParam->u32NumScaleAnchors, pstSoftwareParam->au32Scales, pstSoftwareParam->au32Ratios,
2450         pstSoftwareParam->u32OriImHeight, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->au32ConvHeight,
2451         pstSoftwareParam->au32ConvWidth, pstSoftwareParam->au32ConvChannel, pstSoftwareParam->u32ConvStride,
2452         pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32MinSize, pstSoftwareParam->u32SpatialScale,
2453         pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32FilterThresh, pstSoftwareParam->u32NumBeforeNms,
2454         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stRpnTmpBuf.u64VirAddr),
2455         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2456         pstSoftwareParam->stRpnBbox.u32Stride, &pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height);
2457     SAMPLE_COMM_SVP_FlushCache(pstSoftwareParam->stRpnBbox.u64PhyAddr,
2458         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstSoftwareParam->stRpnBbox.u64VirAddr),
2459         pstSoftwareParam->stRpnBbox.u32Num * pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Chn *
2460         pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height * pstSoftwareParam->stRpnBbox.u32Stride);
2461     SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,SVP_NNIE_Rpn failed!\n");
2462     return s32Ret;
2463 }
2464 
SAMPLE_SVP_NNIE_FasterRcnn_GetResultTmpBufSize(HI_U32 u32MaxRoiNum,HI_U32 u32ClassNum)2465 HI_U32 SAMPLE_SVP_NNIE_FasterRcnn_GetResultTmpBufSize(HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum)
2466 {
2467     HI_U64 u64ScoreSize, u64ProposalSize, u64StackSize, u64TotalSize;
2468 
2469     SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)u32MaxRoiNum * u32ClassNum > SAMPLE_SVP_NNIE_MAX_MEM, 0,
2470         SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32MaxRoiNum * u32ClassNum should be less than %u!\n",
2471         SAMPLE_SVP_NNIE_MAX_MEM);
2472     u64ScoreSize = sizeof(HI_FLOAT) * (HI_U64)u32MaxRoiNum * u32ClassNum;
2473     SAMPLE_SVP_CHECK_EXPR_RET(u64ScoreSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2474         "Error,u64ScoreSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2475 
2476     u64ProposalSize = sizeof(HI_U32) * (HI_U64)u32MaxRoiNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
2477     SAMPLE_SVP_CHECK_EXPR_RET(u64ProposalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2478         "Error,u64ProposalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2479 
2480     u64StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * (HI_U64)u32MaxRoiNum;
2481     SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2482         "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2483 
2484     u64TotalSize = u64ScoreSize + u64ProposalSize + u64StackSize;
2485     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2486         "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2487     return (HI_U32)u64TotalSize;
2488 }
2489 
SAMPLE_SVP_NNIE_Pvanet_GetResultTmpBufSize(HI_U32 u32MaxRoiNum,HI_U32 u32ClassNum)2490 HI_U32 SAMPLE_SVP_NNIE_Pvanet_GetResultTmpBufSize(HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum)
2491 {
2492     HI_U64 u64ScoreSize, u64ProposalSize, u64StackSize, u64TotalSize;
2493 
2494     SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)u32MaxRoiNum * u32ClassNum > SAMPLE_SVP_NNIE_MAX_MEM, 0,
2495         SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32MaxRoiNum * u32ClassNum should be less than %u!\n",
2496         SAMPLE_SVP_NNIE_MAX_MEM);
2497     u64ScoreSize = sizeof(HI_FLOAT) * (HI_U64)u32MaxRoiNum * u32ClassNum;
2498     SAMPLE_SVP_CHECK_EXPR_RET(u64ScoreSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2499         "Error,u64ScoreSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2500 
2501     u64ProposalSize = sizeof(HI_U32) * (HI_U64)u32MaxRoiNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
2502     SAMPLE_SVP_CHECK_EXPR_RET(u64ProposalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2503         "Error,u64ProposalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2504 
2505     u64StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * (HI_U64)u32MaxRoiNum;
2506     SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2507         "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2508 
2509     u64TotalSize = u64ScoreSize + u64ProposalSize + u64StackSize;
2510     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2511         "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2512     return (HI_U32)u64TotalSize;
2513 }
2514 
SAMPLE_SVP_NNIE_FasterRcnn_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S * pstSoftwareParam)2515 HI_S32 SAMPLE_SVP_NNIE_FasterRcnn_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2516     SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S *pstSoftwareParam)
2517 {
2518     HI_S32 s32Ret = HI_SUCCESS;
2519     HI_U32 i = 0;
2520     HI_U32 u32Offset;
2521     HI_S32 *ps32Proposal = HI_NULL;
2522 
2523     CHECK_NULL_PTR(pstNnieParam);
2524     CHECK_NULL_PTR(pstSoftwareParam);
2525     SAMPLE_SVP_CHECK_EXPR_RET(pstSoftwareParam->stRpnBbox.u64VirAddr == 0, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
2526         "Error,pstSoftwareParam->stRpnBbox.u64VirAddr can't be 0!\n");
2527     u32Offset = pstSoftwareParam->stRpnBbox.u32Stride / sizeof(HI_S32);
2528     ps32Proposal = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr);
2529     for (i = 0; i < pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height; i++) {
2530         *(ps32Proposal + u32Offset * i) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2531         *(ps32Proposal + u32Offset * i + 1) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2532         *(ps32Proposal + u32Offset * i + 2) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2533         *(ps32Proposal + u32Offset * i + 3) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2534     }
2535     s32Ret = SVP_NNIE_FasterRcnn_GetResult(
2536         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[1].astDst[0].u64VirAddr),
2537         pstNnieParam->astSegData[1].astDst[0].u32Stride,
2538         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[1].astDst[1].u64VirAddr),
2539         pstNnieParam->astSegData[1].astDst[1].u32Stride,
2540         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2541         pstSoftwareParam->stRpnBbox.u32Stride, pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height,
2542         pstSoftwareParam->au32ConfThresh, pstSoftwareParam->u32ValidNmsThresh, pstSoftwareParam->u32MaxRoiNum,
2543         pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
2544         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
2545         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
2546         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
2547         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
2548 
2549     return s32Ret;
2550 }
2551 
SAMPLE_SVP_NNIE_Pvanet_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S * pstSoftwareParam)2552 HI_S32 SAMPLE_SVP_NNIE_Pvanet_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2553     SAMPLE_SVP_NNIE_FASTERRCNN_SOFTWARE_PARAM_S *pstSoftwareParam)
2554 {
2555     HI_S32 s32Ret = HI_SUCCESS;
2556     HI_U32 i;
2557     HI_U32 u32Offset;
2558     HI_S32 *ps32Proposal = HI_NULL;
2559 
2560     CHECK_NULL_PTR(pstNnieParam);
2561     CHECK_NULL_PTR(pstSoftwareParam);
2562     SAMPLE_SVP_CHECK_EXPR_RET(pstSoftwareParam->stRpnBbox.u64VirAddr == 0, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
2563         "Error,pstSoftwareParam->stRpnBbox.u64VirAddr can't be 0!\n");
2564     u32Offset = pstSoftwareParam->stRpnBbox.u32Stride / sizeof(HI_S32);
2565     ps32Proposal = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr);
2566     for (i = 0; i < pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height; i++) {
2567         *(ps32Proposal + u32Offset * i) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2568         *(ps32Proposal + u32Offset * i + 1) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2569         *(ps32Proposal + u32Offset * i + 2) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2570         *(ps32Proposal + u32Offset * i + 3) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2571     }
2572     s32Ret = SVP_NNIE_Pvanet_GetResult(
2573         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[1].astDst[0].u64VirAddr),
2574         pstNnieParam->astSegData[1].astDst[0].u32Stride,
2575         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[1].astDst[1].u64VirAddr),
2576         pstNnieParam->astSegData[1].astDst[1].u32Stride,
2577         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2578         pstSoftwareParam->stRpnBbox.u32Stride, pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height,
2579         pstSoftwareParam->au32ConfThresh, pstSoftwareParam->u32ValidNmsThresh, pstSoftwareParam->u32MaxRoiNum,
2580         pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
2581         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
2582         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
2583         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
2584         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
2585 
2586     return s32Ret;
2587 }
2588 
SAMPLE_SVP_NNIE_Rfcn_GetResultTmpBuf(HI_U32 u32MaxRoiNum,HI_U32 u32ClassNum)2589 HI_U32 SAMPLE_SVP_NNIE_Rfcn_GetResultTmpBuf(HI_U32 u32MaxRoiNum, HI_U32 u32ClassNum)
2590 {
2591     HI_U64 u64ScoreSize, u64ProposalSize, u64BboxSize, u64StackSize, u64TotalSize;
2592 
2593     SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)u32MaxRoiNum * u32ClassNum > SAMPLE_SVP_NNIE_MAX_MEM, 0,
2594         SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32MaxRoiNum * u32ClassNum should be less than %u!\n",
2595         SAMPLE_SVP_NNIE_MAX_MEM);
2596     u64ScoreSize = sizeof(HI_FLOAT) * (HI_U64)u32MaxRoiNum * u32ClassNum;
2597     SAMPLE_SVP_CHECK_EXPR_RET(u64ScoreSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2598         "Error,u64ScoreSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2599 
2600     u64ProposalSize = sizeof(HI_U32) * (HI_U64)u32MaxRoiNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH;
2601     SAMPLE_SVP_CHECK_EXPR_RET(u64ProposalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2602         "Error,u64ProposalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2603 
2604     u64BboxSize = sizeof(HI_U32) * (HI_U64)u32MaxRoiNum * SAMPLE_SVP_NNIE_COORDI_NUM;
2605     SAMPLE_SVP_CHECK_EXPR_RET(u64BboxSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2606         "Error,u64BboxSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2607 
2608     u64StackSize = sizeof(SAMPLE_SVP_NNIE_STACK_S) * (HI_U64)u32MaxRoiNum;
2609     SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2610         "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2611 
2612     u64TotalSize = u64ScoreSize + u64ProposalSize + u64BboxSize + u64StackSize;
2613     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2614         "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2615     return (HI_U32)u64TotalSize;
2616 }
2617 
SAMPLE_SVP_NNIE_Rfcn_Rpn(SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S * pstSoftwareParam)2618 HI_S32 SAMPLE_SVP_NNIE_Rfcn_Rpn(SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S *pstSoftwareParam)
2619 {
2620     HI_S32 s32Ret = HI_SUCCESS;
2621     CHECK_NULL_PTR(pstSoftwareParam);
2622     s32Ret = SVP_NNIE_Rpn(pstSoftwareParam->aps32Conv, pstSoftwareParam->u32NumRatioAnchors,
2623         pstSoftwareParam->u32NumScaleAnchors, pstSoftwareParam->au32Scales, pstSoftwareParam->au32Ratios,
2624         pstSoftwareParam->u32OriImHeight, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->au32ConvHeight,
2625         pstSoftwareParam->au32ConvWidth, pstSoftwareParam->au32ConvChannel, pstSoftwareParam->u32ConvStride,
2626         pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32MinSize, pstSoftwareParam->u32SpatialScale,
2627         pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32FilterThresh, pstSoftwareParam->u32NumBeforeNms,
2628         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stRpnTmpBuf.u64VirAddr),
2629         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2630         pstSoftwareParam->stRpnBbox.u32Stride, &pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height);
2631     SAMPLE_COMM_SVP_FlushCache(pstSoftwareParam->stRpnBbox.u64PhyAddr,
2632         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstSoftwareParam->stRpnBbox.u64VirAddr),
2633         pstSoftwareParam->stRpnBbox.u32Num * pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Chn *
2634         pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height * pstSoftwareParam->stRpnBbox.u32Stride);
2635     SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,SVP_NNIE_Rpn failed!\n");
2636     return s32Ret;
2637 }
2638 
SAMPLE_SVP_NNIE_Rfcn_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S * pstSoftwareParam)2639 HI_S32 SAMPLE_SVP_NNIE_Rfcn_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2640     SAMPLE_SVP_NNIE_RFCN_SOFTWARE_PARAM_S *pstSoftwareParam)
2641 {
2642     HI_S32 s32Ret = HI_SUCCESS;
2643     HI_U32 i = 0;
2644     HI_U32 u32Offset;
2645     HI_S32 *ps32Proposal = HI_NULL;
2646 
2647     CHECK_NULL_PTR(pstNnieParam);
2648     CHECK_NULL_PTR(pstSoftwareParam);
2649     u32Offset = pstSoftwareParam->stRpnBbox.u32Stride / sizeof(HI_S32);
2650     ps32Proposal = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr);
2651     SAMPLE_SVP_CHECK_EXPR_RET(pstSoftwareParam->stRpnBbox.u64VirAddr == 0, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
2652         "Error,pstSoftwareParam->stRpnBbox.u64VirAddr can't be 0!\n");
2653     for (i = 0; i < pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height; i++) {
2654         *(ps32Proposal + u32Offset * i) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2655         *(ps32Proposal + u32Offset * i + 1) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2656         *(ps32Proposal + u32Offset * i + 2) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2657         *(ps32Proposal + u32Offset * i + 3) /= SAMPLE_SVP_NNIE_QUANT_BASE;
2658     }
2659     s32Ret = SVP_NNIE_Rfcn_GetResult(
2660         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[1].astDst[0].u64VirAddr),
2661         pstNnieParam->astSegData[1].astDst[0].u32Stride,
2662         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[2].astDst[0].u64VirAddr),
2663         pstNnieParam->astSegData[2].astDst[0].u32Stride,
2664         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stRpnBbox.u64VirAddr),
2665         pstSoftwareParam->stRpnBbox.u32Stride, pstSoftwareParam->stRpnBbox.unShape.stWhc.u32Height,
2666         pstSoftwareParam->au32ConfThresh, pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32ClassNum,
2667         pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight, pstSoftwareParam->u32ValidNmsThresh,
2668         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
2669         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
2670         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
2671         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
2672     SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
2673         "Error,SVP_NNIE_Rfcn_GetResult failed!\n");
2674     return s32Ret;
2675 }
2676 
SAMPLE_SVP_NNIE_Ssd_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S * pstSoftwareParam)2677 HI_U32 SAMPLE_SVP_NNIE_Ssd_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2678     SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S *pstSoftwareParam)
2679 {
2680     HI_U64 u64PriorBoxSize = 0;
2681     HI_U64 u64SoftMaxSize = 0;
2682     HI_U64 u64DetectionSize = 0;
2683     HI_U64 u64TotalSize = 0;
2684     HI_U64 u64PriorNum = 0;
2685     HI_U64 u64Tmp;
2686     HI_U32 i;
2687 
2688     CHECK_NULL_PTR(pstNnieParam);
2689     CHECK_NULL_PTR(pstSoftwareParam);
2690     /* priorbox size */
2691     for (i = 0; i < pstNnieParam->pstModel->astSeg[0].u16DstNum / 2; i++) {
2692         u64Tmp = (HI_U64)pstSoftwareParam->au32PriorBoxHeight[i] * pstSoftwareParam->au32PriorBoxWidth[i];
2693         SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2694             "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2695 
2696         u64Tmp *= SAMPLE_SVP_NNIE_COORDI_NUM * 2 * sizeof(HI_U32);
2697         SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2698             "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2699 
2700         u64Tmp *= ((HI_U64)pstSoftwareParam->u32MaxSizeNum + pstSoftwareParam->u32MinSizeNum +
2701             (HI_U64)pstSoftwareParam->au32InputAspectRatioNum[i] * 2 * pstSoftwareParam->u32MinSizeNum);
2702         SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2703             "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2704 
2705         u64PriorBoxSize += u64Tmp;
2706         SAMPLE_SVP_CHECK_EXPR_RET(u64PriorBoxSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2707             "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2708     }
2709     pstSoftwareParam->stPriorBoxTmpBuf.u32Size = (HI_U32)u64PriorBoxSize;
2710     u64TotalSize += u64PriorBoxSize;
2711 
2712     /* softmax size */
2713     for (i = 0; i < pstSoftwareParam->u32ConcatNum; i++) {
2714         u64Tmp = (HI_U64)pstSoftwareParam->au32SoftMaxInChn[i] * sizeof(HI_U32);
2715         SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2716             "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2717 
2718         u64SoftMaxSize += u64Tmp;
2719         SAMPLE_SVP_CHECK_EXPR_RET(u64SoftMaxSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2720             "Error,u64SoftMaxSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2721     }
2722     pstSoftwareParam->stSoftMaxTmpBuf.u32Size = (HI_U32)u64SoftMaxSize;
2723     u64TotalSize += u64SoftMaxSize;
2724     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2725         "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2726 
2727     /* detection size */
2728     for (i = 0; i < pstSoftwareParam->u32ConcatNum; i++) {
2729         u64PriorNum += pstSoftwareParam->au32DetectInputChn[i] / SAMPLE_SVP_NNIE_COORDI_NUM;
2730         SAMPLE_SVP_CHECK_EXPR_RET(u64PriorNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2731             "Error,u64PriorNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2732     }
2733     u64DetectionSize += u64PriorNum * SAMPLE_SVP_NNIE_COORDI_NUM * sizeof(HI_U32);
2734     SAMPLE_SVP_CHECK_EXPR_RET(u64DetectionSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2735         "Error,u64DetectionSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2736 
2737     u64DetectionSize += u64PriorNum * SAMPLE_SVP_NNIE_PROPOSAL_WIDTH * sizeof(HI_U32) * 2;
2738     SAMPLE_SVP_CHECK_EXPR_RET(u64DetectionSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2739         "Error,u64DetectionSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2740 
2741     u64DetectionSize += u64PriorNum * 2 * sizeof(HI_U32);
2742     SAMPLE_SVP_CHECK_EXPR_RET(u64DetectionSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2743         "Error,u64DetectionSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2744     pstSoftwareParam->stGetResultTmpBuf.u32Size = (HI_U32)u64DetectionSize;
2745 
2746     u64TotalSize += u64DetectionSize;
2747     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2748         "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2749     return (HI_U32)u64TotalSize;
2750 }
2751 
SAMPLE_SVP_NNIE_Ssd_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S * pstSoftwareParam)2752 HI_S32 SAMPLE_SVP_NNIE_Ssd_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2753     SAMPLE_SVP_NNIE_SSD_SOFTWARE_PARAM_S *pstSoftwareParam)
2754 {
2755     HI_S32 *aps32PermuteResult[SAMPLE_SVP_NNIE_SSD_REPORT_NODE_NUM];
2756     HI_S32 *aps32PriorboxOutputData[SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM];
2757     HI_S32 *aps32SoftMaxInputData[SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM];
2758     HI_S32 *aps32DetectionLocData[SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM];
2759     HI_S32 *ps32SoftMaxOutputData = NULL;
2760     HI_S32 *ps32DetectionOutTmpBuf = NULL;
2761     HI_U32 au32SoftMaxWidth[SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM];
2762     HI_U32 u32Size = 0;
2763     HI_S32 s32Ret = HI_SUCCESS;
2764     HI_U32 i = 0;
2765 
2766     CHECK_NULL_PTR(pstNnieParam);
2767     CHECK_NULL_PTR(pstSoftwareParam);
2768     /* get permut result */
2769     for (i = 0; i < SAMPLE_SVP_NNIE_SSD_REPORT_NODE_NUM; i++) {
2770         aps32PermuteResult[i] =
2771             SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[0].astDst[i].u64VirAddr);
2772     }
2773 
2774     /* priorbox */
2775     aps32PriorboxOutputData[0] =
2776         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stPriorBoxTmpBuf.u64VirAddr);
2777     for (i = 1; i < SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM; i++) {
2778         u32Size = pstSoftwareParam->au32PriorBoxHeight[i - 1] * pstSoftwareParam->au32PriorBoxWidth[i - 1] *
2779             SAMPLE_SVP_NNIE_COORDI_NUM * 2 *
2780             (pstSoftwareParam->u32MaxSizeNum + pstSoftwareParam->u32MinSizeNum +
2781             pstSoftwareParam->au32InputAspectRatioNum[i - 1] * 2 * pstSoftwareParam->u32MinSizeNum);
2782         aps32PriorboxOutputData[i] = aps32PriorboxOutputData[i - 1] + u32Size;
2783     }
2784 
2785     for (i = 0; i < SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM; i++) {
2786         s32Ret = SVP_NNIE_Ssd_PriorBoxForward(pstSoftwareParam->au32PriorBoxWidth[i],
2787             pstSoftwareParam->au32PriorBoxHeight[i], pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
2788             pstSoftwareParam->af32PriorBoxMinSize[i], pstSoftwareParam->u32MinSizeNum,
2789             pstSoftwareParam->af32PriorBoxMaxSize[i], pstSoftwareParam->u32MaxSizeNum, pstSoftwareParam->bFlip,
2790             pstSoftwareParam->bClip, pstSoftwareParam->au32InputAspectRatioNum[i],
2791             pstSoftwareParam->af32PriorBoxAspectRatio[i], pstSoftwareParam->af32PriorBoxStepWidth[i],
2792             pstSoftwareParam->af32PriorBoxStepHeight[i], pstSoftwareParam->f32Offset, pstSoftwareParam->as32PriorBoxVar,
2793             aps32PriorboxOutputData[i]);
2794         SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
2795             "Error,SVP_NNIE_Ssd_PriorBoxForward failed!\n");
2796     }
2797 
2798     /* softmax */
2799     ps32SoftMaxOutputData = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stSoftMaxTmpBuf.u64VirAddr);
2800     for (i = 0; i < SAMPLE_SVP_NNIE_SSD_SOFTMAX_NUM; i++) {
2801         aps32SoftMaxInputData[i] = aps32PermuteResult[i * 2 + 1];
2802         au32SoftMaxWidth[i] = pstSoftwareParam->au32ConvChannel[i * 2 + 1];
2803     }
2804 
2805     (void)SVP_NNIE_Ssd_SoftmaxForward(pstSoftwareParam->u32SoftMaxInHeight, pstSoftwareParam->au32SoftMaxInChn,
2806         pstSoftwareParam->u32ConcatNum, pstSoftwareParam->au32ConvStride, au32SoftMaxWidth, aps32SoftMaxInputData,
2807         ps32SoftMaxOutputData);
2808 
2809     /* detection */
2810     ps32DetectionOutTmpBuf = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr);
2811     for (i = 0; i < SAMPLE_SVP_NNIE_SSD_PRIORBOX_NUM; i++) {
2812         aps32DetectionLocData[i] = aps32PermuteResult[i * 2];
2813     }
2814 
2815     (void)SVP_NNIE_Ssd_DetectionOutForward(pstSoftwareParam->u32ConcatNum, pstSoftwareParam->u32ConfThresh,
2816         pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32TopK, pstSoftwareParam->u32KeepTopK,
2817         pstSoftwareParam->u32NmsThresh, pstSoftwareParam->au32DetectInputChn, aps32DetectionLocData,
2818         aps32PriorboxOutputData, ps32SoftMaxOutputData, ps32DetectionOutTmpBuf,
2819         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
2820         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
2821         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
2822 
2823     return s32Ret;
2824 }
2825 
SAMPLE_SVP_NNIE_Yolov1_GetResultTmpBuf(SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S * pstSoftwareParam)2826 HI_U32 SAMPLE_SVP_NNIE_Yolov1_GetResultTmpBuf(SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S *pstSoftwareParam)
2827 {
2828     HI_U64 u64TotalGridNum, u64TotalBboxNum, u64TransSize, u64Probsize, u64ScoreSize, u64StackSize, u64TotalSize;
2829     HI_U32 u32ClassNum;
2830     HI_U32 u32EachGridBboxNum;
2831     HI_U64 u64EachVecSize;
2832 
2833     CHECK_NULL_PTR(pstSoftwareParam);
2834     u32ClassNum = pstSoftwareParam->u32ClassNum;
2835     u32EachGridBboxNum = pstSoftwareParam->u32BboxNumEachGrid;
2836     u64TotalGridNum = (HI_U64)pstSoftwareParam->u32GridNumHeight * pstSoftwareParam->u32GridNumWidth;
2837     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalGridNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2838         "Error,u64TotalGridNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2839 
2840     u64TotalBboxNum = (HI_U64)u64TotalGridNum * u32EachGridBboxNum;
2841     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalBboxNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2842         "Error,u64TotalBboxNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2843 
2844     u64EachVecSize = (u32ClassNum + (HI_U64)u32EachGridBboxNum * (SAMPLE_SVP_NNIE_COORDI_NUM + 1)) * sizeof(HI_U32);
2845     SAMPLE_SVP_CHECK_EXPR_RET(u64EachVecSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2846         "Error,u64EachVecSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2847 
2848     u64TransSize = u64EachVecSize * u64TotalGridNum;
2849     SAMPLE_SVP_CHECK_EXPR_RET(u64TransSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2850         "Error,u64TransSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2851 
2852     u64Probsize = u32ClassNum * u64TotalBboxNum * sizeof(HI_U32);
2853     SAMPLE_SVP_CHECK_EXPR_RET(u64Probsize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2854         "Error,u64Probsize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2855 
2856     u64ScoreSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV1_SCORE_S);
2857     SAMPLE_SVP_CHECK_EXPR_RET(u64ScoreSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2858         "Error,u64ScoreSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2859 
2860     u64StackSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_STACK_S);
2861     SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2862         "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2863 
2864     u64TotalSize = u64TransSize + u64Probsize + u64ScoreSize + u64StackSize;
2865     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2866         "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2867     return (HI_U64)u64TotalSize;
2868 }
2869 
SAMPLE_SVP_NNIE_Yolov1_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S * pstSoftwareParam)2870 HI_S32 SAMPLE_SVP_NNIE_Yolov1_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2871     SAMPLE_SVP_NNIE_YOLOV1_SOFTWARE_PARAM_S *pstSoftwareParam)
2872 {
2873     HI_FLOAT *pf32ClassProb = NULL;
2874     HI_FLOAT *pf32Confidence = NULL;
2875     HI_FLOAT *pf32Bbox = NULL;
2876     HI_S32 *ps32Score = NULL;
2877     HI_U32 *pu32AssistBuf = NULL;
2878     HI_U32 u32Offset = 0;
2879     HI_U32 u32Index = 0;
2880     HI_U32 u32GridNum;
2881     HI_U32 i, j, k;
2882     HI_U8 *pu8Tmp = NULL;
2883     HI_FLOAT f32Score = 0.0f;
2884 
2885     CHECK_NULL_PTR(pstNnieParam);
2886     CHECK_NULL_PTR(pstSoftwareParam);
2887     u32GridNum = pstSoftwareParam->u32GridNumHeight * pstSoftwareParam->u32GridNumWidth;
2888     pu8Tmp = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U8, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr);
2889     u32Offset = u32GridNum * (pstSoftwareParam->u32BboxNumEachGrid * SAMPLE_SVP_NNIE_BBOX_AND_CONFIDENCE +
2890         pstSoftwareParam->u32ClassNum);
2891     pf32ClassProb = (HI_FLOAT *)pu8Tmp;
2892     pf32Confidence = pf32ClassProb + u32GridNum * pstSoftwareParam->u32ClassNum;
2893     pf32Bbox = pf32Confidence + u32GridNum * pstSoftwareParam->u32BboxNumEachGrid;
2894 
2895     ps32Score = (HI_S32 *)(pf32ClassProb + u32Offset);
2896     pu32AssistBuf =
2897         (HI_U32 *)(ps32Score + u32GridNum * pstSoftwareParam->u32BboxNumEachGrid * pstSoftwareParam->u32ClassNum);
2898 
2899     for (i = 0; i < u32Offset; i++) {
2900         ((HI_FLOAT *)pu8Tmp)[i] =
2901             (SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[0].astDst[0].u64VirAddr))[i] /
2902             ((HI_FLOAT)SAMPLE_SVP_NNIE_QUANT_BASE);
2903     }
2904     for (i = 0; i < u32GridNum; i++) {
2905         for (j = 0; j < pstSoftwareParam->u32BboxNumEachGrid; j++) {
2906             for (k = 0; k < pstSoftwareParam->u32ClassNum; k++) {
2907                 u32Offset = k * u32GridNum * pstSoftwareParam->u32BboxNumEachGrid;
2908                 f32Score = *(pf32ClassProb + i * pstSoftwareParam->u32ClassNum + k) *
2909                     *(pf32Confidence + i * pstSoftwareParam->u32BboxNumEachGrid + j);
2910                 *(ps32Score + u32Offset + u32Index) = (HI_S32)(f32Score * SAMPLE_SVP_NNIE_QUANT_BASE);
2911             }
2912             u32Index++;
2913         }
2914     }
2915 
2916     for (i = 0; i < u32GridNum; i++) {
2917         for (j = 0; j < pstSoftwareParam->u32BboxNumEachGrid; j++) {
2918             pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2919                 SAMPLE_SVP_NNIE_X_MIN_OFFSET] =
2920                 (pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2921                 SAMPLE_SVP_NNIE_X_MIN_OFFSET] +
2922                 i % pstSoftwareParam->u32GridNumWidth) /
2923                 pstSoftwareParam->u32GridNumWidth * pstSoftwareParam->u32OriImWidth;
2924             pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2925                 SAMPLE_SVP_NNIE_Y_MIN_OFFSET] =
2926                 (pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2927                 SAMPLE_SVP_NNIE_Y_MIN_OFFSET] +
2928                 i / pstSoftwareParam->u32GridNumWidth) /
2929                 pstSoftwareParam->u32GridNumHeight * pstSoftwareParam->u32OriImHeight;
2930             pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2931                 SAMPLE_SVP_NNIE_X_MAX_OFFSET] =
2932                 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2933                 SAMPLE_SVP_NNIE_X_MAX_OFFSET] *
2934                 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2935                 SAMPLE_SVP_NNIE_X_MAX_OFFSET] *
2936                 pstSoftwareParam->u32OriImWidth;
2937             pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2938                 SAMPLE_SVP_NNIE_Y_MAX_OFFSET] =
2939                 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2940                 SAMPLE_SVP_NNIE_Y_MAX_OFFSET] *
2941                 pf32Bbox[(i * pstSoftwareParam->u32BboxNumEachGrid + j) * SAMPLE_SVP_NNIE_COORDI_NUM + \
2942                 SAMPLE_SVP_NNIE_Y_MAX_OFFSET] *
2943                 pstSoftwareParam->u32OriImHeight;
2944         }
2945     }
2946 
2947     (void)SVP_NNIE_Yolov1_Detection(ps32Score, pf32Bbox, pstSoftwareParam->u32ClassNum,
2948         u32GridNum * pstSoftwareParam->u32BboxNumEachGrid, pstSoftwareParam->u32ConfThresh,
2949         pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
2950         pu32AssistBuf, SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
2951         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
2952         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
2953     return HI_SUCCESS;
2954 }
2955 
SAMPLE_SVP_NNIE_Yolov2_GetResultTmpBuf(SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S * pstSoftwareParam)2956 HI_U32 SAMPLE_SVP_NNIE_Yolov2_GetResultTmpBuf(SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S *pstSoftwareParam)
2957 {
2958     HI_U64 u64TotalGridNum, u64ParaLength, u64TotalBboxNum, u64TransSize, u64BboxBufSize, u64BboxTmpBufSize;
2959     HI_U64 u64StackSize, u64TotalSize;
2960 
2961     CHECK_NULL_PTR(pstSoftwareParam);
2962     u64TotalGridNum = (HI_U64)pstSoftwareParam->u32GridNumHeight * pstSoftwareParam->u32GridNumWidth;
2963     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalGridNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2964         "Error,u64TotalGridNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2965 
2966     u64ParaLength =
2967         pstSoftwareParam->u32BboxNumEachGrid * (SAMPLE_SVP_NNIE_COORDI_NUM + 1 + (HI_U64)pstSoftwareParam->u32ClassNum);
2968     SAMPLE_SVP_CHECK_EXPR_RET(u64ParaLength > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2969         "Error,u64ParaLength should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2970 
2971     u64TotalBboxNum = u64TotalGridNum * pstSoftwareParam->u32BboxNumEachGrid;
2972     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalBboxNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2973         "Error,u64TotalBboxNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2974 
2975     u64TransSize = u64TotalGridNum * u64ParaLength * sizeof(HI_U32);
2976     SAMPLE_SVP_CHECK_EXPR_RET(u64TransSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2977         "Error,u64TransSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2978 
2979     u64StackSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_STACK_S);
2980     SAMPLE_SVP_CHECK_EXPR_RET(u64StackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2981         "Error,u64StackSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2982 
2983     u64BboxBufSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S);
2984     SAMPLE_SVP_CHECK_EXPR_RET(u64BboxBufSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2985         "Error,u64BboxBufSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2986 
2987     u64BboxTmpBufSize = u64TotalGridNum * u64ParaLength * sizeof(HI_FLOAT);
2988     SAMPLE_SVP_CHECK_EXPR_RET(u64BboxTmpBufSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2989         "Error,u64BboxTmpBufSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2990 
2991     u64TotalSize = u64TransSize + u64StackSize + u64BboxBufSize + u64BboxTmpBufSize;
2992     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
2993         "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
2994     return (HI_U32)u64TotalSize;
2995 }
2996 
SAMPLE_SVP_NNIE_Yolov2_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S * pstSoftwareParam)2997 HI_S32 SAMPLE_SVP_NNIE_Yolov2_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
2998     SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S *pstSoftwareParam)
2999 {
3000     CHECK_NULL_PTR(pstNnieParam);
3001     CHECK_NULL_PTR(pstSoftwareParam);
3002     return SVP_NNIE_Yolov2_GetResult(
3003         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstNnieParam->astSegData[0].astDst[0].u64VirAddr),
3004         pstSoftwareParam->u32GridNumWidth, pstSoftwareParam->u32GridNumHeight, pstSoftwareParam->u32BboxNumEachGrid,
3005         pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
3006         pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32ConfThresh,
3007         pstSoftwareParam->af32Bias,
3008         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
3009         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
3010         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
3011         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
3012 }
3013 
SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S * pstSoftwareParam)3014 HI_U32 SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
3015     SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftwareParam)
3016 {
3017     HI_U64 u64TotalSize, u64AssistStackSize, u64TotalBboxSize, u64DstBlobSize, u64Tmp;
3018     HI_U64 u64TotalBboxNum = 0;
3019     HI_U64 u64MaxBlobSize = 0;
3020     HI_U32 i;
3021 
3022     CHECK_NULL_PTR(pstNnieParam);
3023     CHECK_NULL_PTR(pstSoftwareParam);
3024     for (i = 0; i < pstNnieParam->pstModel->astSeg[0].u16DstNum; i++) {
3025         u64DstBlobSize = pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Width * sizeof(HI_U32);
3026         SAMPLE_SVP_CHECK_EXPR_RET(u64DstBlobSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3027             "Error,u64DstBlobSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3028 
3029         u64DstBlobSize *= pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Height;
3030         SAMPLE_SVP_CHECK_EXPR_RET(u64DstBlobSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3031             "Error,u64DstBlobSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3032 
3033         u64DstBlobSize *= pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Chn;
3034         SAMPLE_SVP_CHECK_EXPR_RET(u64DstBlobSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3035             "Error,u64DstBlobSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3036 
3037         if (u64MaxBlobSize < u64DstBlobSize) {
3038             u64MaxBlobSize = u64DstBlobSize;
3039         }
3040 
3041         u64Tmp = (HI_U64)pstSoftwareParam->au32GridNumWidth[i] * pstSoftwareParam->au32GridNumHeight[i];
3042         SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3043             "Error, %u-th au32GridNumWidth * au32GridNumHeight should be less than %u!\n", i, SAMPLE_SVP_NNIE_MAX_MEM);
3044         u64Tmp *= pstSoftwareParam->u32BboxNumEachGrid;
3045         SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3046             "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3047 
3048         u64TotalBboxNum += u64Tmp;
3049         SAMPLE_SVP_CHECK_EXPR_RET(u64TotalBboxNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3050             "Error,u64TotalBboxNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3051     }
3052     u64AssistStackSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_STACK_S);
3053     SAMPLE_SVP_CHECK_EXPR_RET(u64AssistStackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3054         "Error,u64TotalBboxSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3055 
3056     u64TotalBboxSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S);
3057     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalBboxSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3058         "Error,u64TotalBboxSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3059 
3060     u64TotalSize = (u64MaxBlobSize + u64AssistStackSize + u64TotalBboxSize);
3061     SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
3062         "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
3063 
3064     return (HI_U32)u64TotalSize;
3065 }
3066 
SAMPLE_SVP_NNIE_Yolov3_GetResult(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S * pstSoftwareParam)3067 HI_S32 SAMPLE_SVP_NNIE_Yolov3_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
3068     SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftwareParam)
3069 {
3070     HI_U32 i = 0;
3071     HI_U64 au64InputBlobAddr[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};
3072     HI_U32 au32Stride[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};
3073 
3074     CHECK_NULL_PTR(pstNnieParam);
3075     CHECK_NULL_PTR(pstSoftwareParam);
3076     for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
3077         au64InputBlobAddr[i] = pstNnieParam->astSegData[0].astDst[i].u64VirAddr;
3078         au32Stride[i] = pstNnieParam->astSegData[0].astDst[i].u32Stride;
3079     }
3080     return SVP_NNIE_Yolov3_GetResult(au64InputBlobAddr, pstSoftwareParam->au32GridNumWidth,
3081         pstSoftwareParam->au32GridNumHeight, au32Stride, pstSoftwareParam->u32BboxNumEachGrid,
3082         pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
3083         pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32ConfThresh,
3084         pstSoftwareParam->af32Bias,
3085         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
3086         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
3087         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
3088         SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
3089 }
3090 
3091 #ifdef __cplusplus
3092 }
3093 #endif
3094