1 /*
2 * Copyright (c) 2022 HiSilicon (Shanghai) Technologies CO., LIMITED.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "sample_comm_nnie.h"
16 #include "mpi_sys.h"
17 #include "sample_comm_svp.h"
18
SAMPLE_COMM_SVP_NNIE_ParamDeinit(SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam)19 HI_S32 SAMPLE_COMM_SVP_NNIE_ParamDeinit(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
20 {
21 SAMPLE_SVP_CHECK_EXPR_RET(pstNnieParam == NULL, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
22 "Error, pstNnieParam can't be NULL!\n");
23
24 if ((pstNnieParam->stTaskBuf.u64PhyAddr != 0) && (pstNnieParam->stTaskBuf.u64VirAddr != 0)) {
25 SAMPLE_SVP_MMZ_FREE(pstNnieParam->stTaskBuf.u64PhyAddr, pstNnieParam->stTaskBuf.u64VirAddr);
26 pstNnieParam->stTaskBuf.u64PhyAddr = 0;
27 pstNnieParam->stTaskBuf.u64VirAddr = 0;
28 }
29
30 if ((pstNnieParam->stStepBuf.u64PhyAddr != 0) && (pstNnieParam->stStepBuf.u64VirAddr != 0)) {
31 SAMPLE_SVP_MMZ_FREE(pstNnieParam->stStepBuf.u64PhyAddr, pstNnieParam->stStepBuf.u64VirAddr);
32 pstNnieParam->stStepBuf.u64PhyAddr = 0;
33 pstNnieParam->stStepBuf.u64VirAddr = 0;
34 }
35 return HI_SUCCESS;
36 }
37
SAMPLE_SVP_NNIE_FillForwardInfo(SAMPLE_SVP_NNIE_CFG_S * pstNnieCfg,SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam)38 static HI_S32 SAMPLE_SVP_NNIE_FillForwardInfo(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg, SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
39 {
40 HI_U32 i, j;
41 HI_U32 u32Num;
42 HI_U32 u32Offset = 0;
43
44 for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++) {
45 /* fill forwardCtrl info */
46 if (pstNnieParam->pstModel->astSeg[i].enNetType == SVP_NNIE_NET_TYPE_ROI) {
47 pstNnieParam->astForwardWithBboxCtrl[i].enNnieId = pstNnieCfg->aenNnieCoreId[i];
48 pstNnieParam->astForwardWithBboxCtrl[i].u32SrcNum = pstNnieParam->pstModel->astSeg[i].u16SrcNum;
49 pstNnieParam->astForwardWithBboxCtrl[i].u32DstNum = pstNnieParam->pstModel->astSeg[i].u16DstNum;
50 pstNnieParam->astForwardWithBboxCtrl[i].u32ProposalNum = 1;
51 pstNnieParam->astForwardWithBboxCtrl[i].u32NetSegId = i;
52 pstNnieParam->astForwardWithBboxCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
53 pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64PhyAddr =
54 pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
55 pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64VirAddr =
56 pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
57 pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
58 } else if ((pstNnieParam->pstModel->astSeg[i].enNetType == SVP_NNIE_NET_TYPE_CNN) ||
59 (pstNnieParam->pstModel->astSeg[i].enNetType == SVP_NNIE_NET_TYPE_RECURRENT)) {
60 pstNnieParam->astForwardCtrl[i].enNnieId = pstNnieCfg->aenNnieCoreId[i];
61 pstNnieParam->astForwardCtrl[i].u32SrcNum = pstNnieParam->pstModel->astSeg[i].u16SrcNum;
62 pstNnieParam->astForwardCtrl[i].u32DstNum = pstNnieParam->pstModel->astSeg[i].u16DstNum;
63 pstNnieParam->astForwardCtrl[i].u32NetSegId = i;
64 pstNnieParam->astForwardCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
65 pstNnieParam->astForwardCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
66 pstNnieParam->astForwardCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
67 pstNnieParam->astForwardCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
68 }
69 u32Offset += pstNnieParam->au32TaskBufSize[i];
70
71 /* fill src blob info */
72 for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16SrcNum; j++) {
73 /* Recurrent blob */
74 if (pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType == SVP_BLOB_TYPE_SEQ_S32) {
75 pstNnieParam->astSegData[i].astSrc[j].enType = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType;
76 pstNnieParam->astSegData[i].astSrc[j].unShape.stSeq.u32Dim =
77 pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.u32Dim;
78 pstNnieParam->astSegData[i].astSrc[j].u32Num = pstNnieCfg->u32MaxInputNum;
79 pstNnieParam->astSegData[i].astSrc[j].unShape.stSeq.u64VirAddrStep =
80 pstNnieCfg->au64StepVirAddr[i * SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM];
81 } else {
82 pstNnieParam->astSegData[i].astSrc[j].enType = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType;
83 pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Chn =
84 pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Chn;
85 pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Height =
86 pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Height;
87 pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Width =
88 pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Width;
89 pstNnieParam->astSegData[i].astSrc[j].u32Num = pstNnieCfg->u32MaxInputNum;
90 }
91 }
92
93 /* fill dst blob info */
94 if (pstNnieParam->pstModel->astSeg[i].enNetType == SVP_NNIE_NET_TYPE_ROI) {
95 SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)pstNnieCfg->u32MaxRoiNum * pstNnieCfg->u32MaxInputNum >
96 SAMPLE_SVP_NNIE_MAX_MEM,
97 HI_ERR_SVP_NNIE_ILLEGAL_PARAM, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u32Num should be less than %u!\n",
98 SAMPLE_SVP_NNIE_MAX_MEM);
99 u32Num = pstNnieCfg->u32MaxRoiNum * pstNnieCfg->u32MaxInputNum;
100 } else {
101 u32Num = pstNnieCfg->u32MaxInputNum;
102 }
103
104 for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16DstNum; j++) {
105 if (pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType == SVP_BLOB_TYPE_SEQ_S32) {
106 pstNnieParam->astSegData[i].astDst[j].enType = pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType;
107 pstNnieParam->astSegData[i].astDst[j].unShape.stSeq.u32Dim =
108 pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.u32Dim;
109 pstNnieParam->astSegData[i].astDst[j].u32Num = u32Num;
110 pstNnieParam->astSegData[i].astDst[j].unShape.stSeq.u64VirAddrStep =
111 pstNnieCfg->au64StepVirAddr[i * SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM + 1];
112 } else {
113 pstNnieParam->astSegData[i].astDst[j].enType = pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType;
114 pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Chn =
115 pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Chn;
116 pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Height =
117 pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Height;
118 pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Width =
119 pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Width;
120 pstNnieParam->astSegData[i].astDst[j].u32Num = u32Num;
121 }
122 }
123 }
124 return HI_SUCCESS;
125 }
126
SAMPLE_SVP_NNIE_GetBlobMemSize(SVP_NNIE_NODE_S astNnieNode[],HI_U32 u32NodeNum,HI_U32 u32TotalStep,SVP_BLOB_S astBlob[],HI_U32 u32Align,HI_U32 * pu32TotalSize,HI_U32 au32BlobSize[])127 static hi_s32 SAMPLE_SVP_NNIE_GetBlobMemSize(SVP_NNIE_NODE_S astNnieNode[], HI_U32 u32NodeNum, HI_U32 u32TotalStep,
128 SVP_BLOB_S astBlob[], HI_U32 u32Align, HI_U32 *pu32TotalSize, HI_U32 au32BlobSize[])
129 {
130 HI_U32 i;
131 HI_U64 u64Size;
132 HI_U64 u64TotalSize = *pu32TotalSize;
133 HI_U64 u64Stride;
134
135 for (i = 0; i < u32NodeNum; i++) {
136 if (SVP_BLOB_TYPE_S32 == astNnieNode[i].enType || SVP_BLOB_TYPE_VEC_S32 == astNnieNode[i].enType ||
137 SVP_BLOB_TYPE_SEQ_S32 == astNnieNode[i].enType) {
138 u64Size = sizeof(HI_U32);
139 } else {
140 u64Size = sizeof(HI_U8);
141 }
142 if (SVP_BLOB_TYPE_SEQ_S32 == astNnieNode[i].enType) {
143 if (SAMPLE_SVP_NNIE_ALIGN_16 == u32Align) {
144 u64Stride = SAMPLE_SVP_NNIE_ALIGN16(astNnieNode[i].unShape.u32Dim * u64Size);
145 } else {
146 u64Stride = SAMPLE_SVP_NNIE_ALIGN32(astNnieNode[i].unShape.u32Dim * u64Size);
147 }
148 SAMPLE_SVP_CHECK_EXPR_RET(u64Stride > SAMPLE_SVP_NNIE_MAX_MEM, HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
149 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u64Stride should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
150 u64Size = u32TotalStep * u64Stride;
151 SAMPLE_SVP_CHECK_EXPR_RET(u64Size > SAMPLE_SVP_NNIE_MAX_MEM, HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
152 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u64Size should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
153 au32BlobSize[i] = u64Size;
154 } else {
155 if (SAMPLE_SVP_NNIE_ALIGN_16 == u32Align) {
156 u64Stride = SAMPLE_SVP_NNIE_ALIGN16(astNnieNode[i].unShape.stWhc.u32Width * u64Size);
157 } else {
158 u64Stride = SAMPLE_SVP_NNIE_ALIGN32(astNnieNode[i].unShape.stWhc.u32Width * u64Size);
159 }
160 SAMPLE_SVP_CHECK_EXPR_RET(u64Stride > SAMPLE_SVP_NNIE_MAX_MEM, HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
161 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u64Stride should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
162 u64Size = astBlob[i].u32Num * u64Stride;
163 SAMPLE_SVP_CHECK_EXPR_RET(u64Size > SAMPLE_SVP_NNIE_MAX_MEM, HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
164 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u64Size should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
165 u64Size *= astNnieNode[i].unShape.stWhc.u32Height;
166 SAMPLE_SVP_CHECK_EXPR_RET(u64Size > SAMPLE_SVP_NNIE_MAX_MEM, HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
167 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u64Size should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
168 u64Size *= astNnieNode[i].unShape.stWhc.u32Chn;
169 SAMPLE_SVP_CHECK_EXPR_RET(u64Size > SAMPLE_SVP_NNIE_MAX_MEM, HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
170 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u64Size should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
171 au32BlobSize[i] = (HI_U32)u64Size;
172 }
173 u64TotalSize += au32BlobSize[i];
174 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
175 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
176 *pu32TotalSize = (HI_U32)u64TotalSize;
177 astBlob[i].u32Stride = (HI_U32)u64Stride;
178 }
179 return HI_SUCCESS;
180 }
181
SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize(SAMPLE_SVP_NNIE_CFG_S * pstNnieCfg,SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam,HI_U32 * pu32TotalTaskBufSize,HI_U32 * pu32TmpBufSize,SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[],HI_U32 * pu32TotalSize,HI_U32 u32BlobSizeNUm)182 static HI_S32 SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,
183 SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, HI_U32 *pu32TotalTaskBufSize, HI_U32 *pu32TmpBufSize,
184 SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[], HI_U32 *pu32TotalSize, HI_U32 u32BlobSizeNUm)
185 {
186 HI_S32 s32Ret = HI_SUCCESS;
187 HI_U32 i, j;
188 HI_U64 u64TotalStep = 0;
189
190 /* Get each seg's task buf size */
191 s32Ret = HI_MPI_SVP_NNIE_GetTskBufSize(pstNnieCfg->u32MaxInputNum, pstNnieCfg->u32MaxRoiNum, pstNnieParam->pstModel,
192 pstNnieParam->au32TaskBufSize, pstNnieParam->pstModel->u32NetSegNum);
193 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
194 "Error, HI_MPI_SVP_NNIE_GetTaskSize failed!\n");
195
196 /* Get total task buf size */
197 *pu32TotalTaskBufSize = 0;
198 for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++) {
199 SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)(*pu32TotalTaskBufSize) + pstNnieParam->au32TaskBufSize[i] >
200 SAMPLE_SVP_NNIE_MAX_MEM,
201 s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, total task buf size can't be greater than %u!\n",
202 SAMPLE_SVP_NNIE_MAX_MEM);
203 *pu32TotalTaskBufSize += pstNnieParam->au32TaskBufSize[i];
204 }
205
206 /* Get tmp buf size */
207 *pu32TmpBufSize = pstNnieParam->pstModel->u32TmpBufSize;
208 SAMPLE_SVP_CHECK_EXPR_RET((HI_U64)*pu32TotalTaskBufSize + *pu32TmpBufSize > SAMPLE_SVP_NNIE_MAX_MEM, s32Ret,
209 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, task_buf + tmpbuf can't be greater than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
210 *pu32TotalSize += *pu32TotalTaskBufSize + *pu32TmpBufSize;
211
212 /* calculate Blob mem size */
213 for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum && i < u32BlobSizeNUm; i++) {
214 if (pstNnieParam->pstModel->astSeg[i].enNetType == SVP_NNIE_NET_TYPE_RECURRENT) {
215 for (j = 0; j < pstNnieParam->astSegData[i].astSrc[0].u32Num; j++) {
216 u64TotalStep += *(SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32,
217 pstNnieParam->astSegData[i].astSrc[0].unShape.stSeq.u64VirAddrStep) + j);
218 SAMPLE_SVP_CHECK_EXPR_RET(u64TotalStep > SAMPLE_SVP_NNIE_MAX_MEM, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
219 "Error, u64TotalStep can't be greater than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
220 }
221 }
222 /* the first seg's Src Blob mem size, other seg's src blobs from the output blobs of
223 those segs before it or from software output results */
224 if (i == 0) {
225 s32Ret = SAMPLE_SVP_NNIE_GetBlobMemSize(&(pstNnieParam->pstModel->astSeg[i].astSrcNode[0]),
226 pstNnieParam->pstModel->astSeg[i].u16SrcNum, (HI_U32)u64TotalStep,
227 &(pstNnieParam->astSegData[i].astSrc[0]), SAMPLE_SVP_NNIE_ALIGN_16, pu32TotalSize,
228 &(astBlobSize[i].au32SrcSize[0]));
229 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
230 "Error, SAMPLE_SVP_NNIE_GetBlobMemSize failed!\n");
231 }
232
233 /* Get each seg's Dst Blob mem size */
234 s32Ret = SAMPLE_SVP_NNIE_GetBlobMemSize(&(pstNnieParam->pstModel->astSeg[i].astDstNode[0]),
235 pstNnieParam->pstModel->astSeg[i].u16DstNum, (HI_U32)u64TotalStep, &(pstNnieParam->astSegData[i].astDst[0]),
236 SAMPLE_SVP_NNIE_ALIGN_16, pu32TotalSize, &(astBlobSize[i].au32DstSize[0]));
237 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
238 "Error, SAMPLE_SVP_NNIE_GetBlobMemSize failed!\n");
239 }
240 return s32Ret;
241 }
242
SAMPLE_SVP_NNIE_ParamInit(SAMPLE_SVP_NNIE_CFG_S * pstNnieCfg,SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam)243 static HI_S32 SAMPLE_SVP_NNIE_ParamInit(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg, SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
244 {
245 HI_U32 i = 0, j = 0;
246 HI_U32 u32TotalSize = 0;
247 HI_U32 u32TotalTaskBufSize = 0;
248 HI_U32 u32TmpBufSize = 0;
249 HI_S32 s32Ret = HI_SUCCESS;
250 HI_U32 u32Offset = 0;
251 HI_U64 u64PhyAddr = 0;
252 HI_U8 *pu8VirAddr = NULL;
253 SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[SVP_NNIE_MAX_NET_SEG_NUM] = {0};
254
255 /* fill forward info */
256 s32Ret = SAMPLE_SVP_NNIE_FillForwardInfo(pstNnieCfg, pstNnieParam);
257 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
258 "Error,SAMPLE_SVP_NNIE_FillForwardCtrl failed!\n");
259
260 /* Get taskInfo and Blob mem size */
261 s32Ret = SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize(pstNnieCfg, pstNnieParam, &u32TotalTaskBufSize, &u32TmpBufSize,
262 astBlobSize, &u32TotalSize, SVP_NNIE_MAX_NET_SEG_NUM);
263 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
264 "Error,SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize failed!\n");
265
266 /* Malloc mem */
267 s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_NNIE_TASK", NULL, (HI_U64 *)&u64PhyAddr, (void **)&pu8VirAddr,
268 u32TotalSize);
269 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
270 "Error,Malloc memory failed!\n");
271 (HI_VOID)memset_s(pu8VirAddr, u32TotalSize, 0, u32TotalSize);
272 SAMPLE_COMM_SVP_FlushCache(u64PhyAddr, (void *)pu8VirAddr, u32TotalSize);
273
274 /* fill taskinfo mem addr */
275 pstNnieParam->stTaskBuf.u32Size = u32TotalTaskBufSize;
276 pstNnieParam->stTaskBuf.u64PhyAddr = u64PhyAddr;
277 pstNnieParam->stTaskBuf.u64VirAddr = (HI_U64)(HI_UINTPTR_T)pu8VirAddr;
278
279 /* fill Tmp mem addr */
280 pstNnieParam->stTmpBuf.u32Size = u32TmpBufSize;
281 pstNnieParam->stTmpBuf.u64PhyAddr = u64PhyAddr + u32TotalTaskBufSize;
282 pstNnieParam->stTmpBuf.u64VirAddr = (HI_U64)(HI_UINTPTR_T)pu8VirAddr + u32TotalTaskBufSize;
283
284 /* fill forward ctrl addr */
285 for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++) {
286 if (pstNnieParam->pstModel->astSeg[i].enNetType == SVP_NNIE_NET_TYPE_ROI) {
287 pstNnieParam->astForwardWithBboxCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
288 pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64PhyAddr =
289 pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
290 pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64VirAddr =
291 pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
292 pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
293 } else if ((pstNnieParam->pstModel->astSeg[i].enNetType == SVP_NNIE_NET_TYPE_CNN) ||
294 (pstNnieParam->pstModel->astSeg[i].enNetType == SVP_NNIE_NET_TYPE_RECURRENT)) {
295 pstNnieParam->astForwardCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
296 pstNnieParam->astForwardCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
297 pstNnieParam->astForwardCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
298 pstNnieParam->astForwardCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
299 }
300 u32Offset += pstNnieParam->au32TaskBufSize[i];
301 }
302
303 /* fill each blob's mem addr */
304 u64PhyAddr = u64PhyAddr + u32TotalTaskBufSize + u32TmpBufSize;
305 pu8VirAddr = pu8VirAddr + u32TotalTaskBufSize + u32TmpBufSize;
306 for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++) {
307 /* first seg has src blobs, other seg's src blobs from the output blobs of
308 those segs before it or from software output results */
309 if (i == 0) {
310 for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16SrcNum; j++) {
311 if (j != 0) {
312 u64PhyAddr += astBlobSize[i].au32SrcSize[j - 1];
313 pu8VirAddr += astBlobSize[i].au32SrcSize[j - 1];
314 }
315 pstNnieParam->astSegData[i].astSrc[j].u64PhyAddr = u64PhyAddr;
316 pstNnieParam->astSegData[i].astSrc[j].u64VirAddr = (HI_U64)(HI_UINTPTR_T)pu8VirAddr;
317 }
318 u64PhyAddr += astBlobSize[i].au32SrcSize[j - 1];
319 pu8VirAddr += astBlobSize[i].au32SrcSize[j - 1];
320 }
321
322 /* fill the mem addrs of each seg's output blobs */
323 for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16DstNum; j++) {
324 if (j != 0) {
325 u64PhyAddr += astBlobSize[i].au32DstSize[j - 1];
326 pu8VirAddr += astBlobSize[i].au32DstSize[j - 1];
327 }
328 pstNnieParam->astSegData[i].astDst[j].u64PhyAddr = u64PhyAddr;
329 pstNnieParam->astSegData[i].astDst[j].u64VirAddr = SAMPLE_SVP_NNIE_CONVERT_PTR_TO_ADDR(HI_U64, pu8VirAddr);
330 }
331 u64PhyAddr += astBlobSize[i].au32DstSize[j - 1];
332 pu8VirAddr += astBlobSize[i].au32DstSize[j - 1];
333 }
334 return s32Ret;
335 }
336
SAMPLE_COMM_SVP_NNIE_ParamInit(SAMPLE_SVP_NNIE_CFG_S * pstNnieCfg,SAMPLE_SVP_NNIE_PARAM_S * pstNnieParam)337 HI_S32 SAMPLE_COMM_SVP_NNIE_ParamInit(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg, SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
338 {
339 HI_S32 s32Ret;
340
341 SAMPLE_SVP_CHECK_EXPR_RET((pstNnieCfg == NULL || pstNnieParam == NULL), HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
342 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,pstNnieCfg and pstNnieParam can't be NULL!\n");
343 SAMPLE_SVP_CHECK_EXPR_RET((pstNnieParam->pstModel == NULL), HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
344 SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,pstNnieParam->pstModel can't be NULL!\n");
345
346 s32Ret = SAMPLE_SVP_NNIE_ParamInit(pstNnieCfg, pstNnieParam);
347 SAMPLE_SVP_CHECK_EXPR_GOTO(s32Ret != HI_SUCCESS, FAIL, SAMPLE_SVP_ERR_LEVEL_ERROR,
348 "Error, SAMPLE_SVP_NNIE_ParamInit failed!\n");
349
350 return s32Ret;
351 FAIL:
352 s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam);
353 SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
354 "Error, SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n");
355 return HI_FAILURE;
356 }
357
SAMPLE_COMM_SVP_NNIE_UnloadModel(SAMPLE_SVP_NNIE_MODEL_S * pstNnieModel)358 HI_S32 SAMPLE_COMM_SVP_NNIE_UnloadModel(SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel)
359 {
360 SAMPLE_SVP_CHECK_EXPR_RET(pstNnieModel == HI_NULL, HI_ERR_SVP_NNIE_NULL_PTR, SAMPLE_SVP_ERR_LEVEL_ERROR,
361 "Error, pstNnieModel is NULL!\n");
362
363 if ((pstNnieModel->stModelBuf.u64PhyAddr != 0) && (pstNnieModel->stModelBuf.u64VirAddr != 0)) {
364 SAMPLE_SVP_MMZ_FREE(pstNnieModel->stModelBuf.u64PhyAddr, pstNnieModel->stModelBuf.u64VirAddr);
365 pstNnieModel->stModelBuf.u64PhyAddr = 0;
366 pstNnieModel->stModelBuf.u64VirAddr = 0;
367 }
368 return HI_SUCCESS;
369 }
370
SAMPLE_COMM_SVP_NNIE_LoadModel(const HI_CHAR * pszModelFile,SAMPLE_SVP_NNIE_MODEL_S * pstNnieModel)371 HI_S32 SAMPLE_COMM_SVP_NNIE_LoadModel(const HI_CHAR *pszModelFile, SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel)
372 {
373 HI_S32 s32Ret = HI_INVALID_VALUE;
374 HI_U64 u64PhyAddr = 0;
375 HI_U8 *pu8VirAddr = NULL;
376 HI_SL slFileSize = 0;
377 HI_ULONG ulSize;
378 HI_CHAR path[PATH_MAX] = {0};
379
380 /* Get model file size */
381 SAMPLE_SVP_CHECK_EXPR_RET(pszModelFile == HI_NULL, HI_ERR_SVP_NNIE_NULL_PTR, SAMPLE_SVP_ERR_LEVEL_ERROR,
382 "Error, pszModelFile is NULL!\n");
383 SAMPLE_SVP_CHECK_EXPR_RET(pstNnieModel == HI_NULL, HI_ERR_SVP_NNIE_NULL_PTR, SAMPLE_SVP_ERR_LEVEL_ERROR,
384 "Error, pstNnieModel is NULL!\n");
385 SAMPLE_SVP_CHECK_EXPR_RET((strlen(pszModelFile) > PATH_MAX) || (realpath(pszModelFile, path) == HI_NULL),
386 HI_ERR_SVP_NNIE_ILLEGAL_PARAM, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, file_name is invalid!\n");
387 FILE *fp = fopen(path, "rb");
388 SAMPLE_SVP_CHECK_EXPR_RET(fp == NULL, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, open model file failed!\n");
389 s32Ret = fseek(fp, 0L, SEEK_END);
390 SAMPLE_SVP_CHECK_EXPR_GOTO(s32Ret == -1, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, fseek failed!\n");
391 slFileSize = ftell(fp);
392 SAMPLE_SVP_CHECK_EXPR_GOTO(slFileSize <= 0, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, ftell failed!\n");
393 s32Ret = fseek(fp, 0L, SEEK_SET);
394 SAMPLE_SVP_CHECK_EXPR_GOTO(s32Ret == -1, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, fseek failed!\n");
395
396 /* malloc model file mem */
397 s32Ret =
398 SAMPLE_COMM_SVP_MallocMem("SAMPLE_NNIE_MODEL", NULL, (HI_U64 *)&u64PhyAddr, (void **)&pu8VirAddr, slFileSize);
399 SAMPLE_SVP_CHECK_EXPR_GOTO(s32Ret != HI_SUCCESS, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
400 "Error(%#x),Malloc memory failed!\n", s32Ret);
401
402 pstNnieModel->stModelBuf.u32Size = (HI_U32)slFileSize;
403 pstNnieModel->stModelBuf.u64PhyAddr = u64PhyAddr;
404 pstNnieModel->stModelBuf.u64VirAddr = (HI_U64)(HI_UINTPTR_T)pu8VirAddr;
405
406 ulSize = fread(pu8VirAddr, slFileSize, 1, fp);
407 SAMPLE_SVP_CHECK_EXPR_GOTO(ulSize != 1, FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,read model file failed!\n");
408
409 /* load model */
410 s32Ret = HI_MPI_SVP_NNIE_LoadModel(&pstNnieModel->stModelBuf, &pstNnieModel->stModel);
411 SAMPLE_SVP_CHECK_EXPR_GOTO(s32Ret != HI_SUCCESS, FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR,
412 "Error,HI_MPI_SVP_NNIE_LoadModel failed!\n");
413
414 (HI_VOID)fclose(fp);
415 return HI_SUCCESS;
416
417 FAIL_1:
418 SAMPLE_SVP_MMZ_FREE(pstNnieModel->stModelBuf.u64PhyAddr, pstNnieModel->stModelBuf.u64VirAddr);
419 pstNnieModel->stModelBuf.u32Size = 0;
420 FAIL_0:
421 if (fp != NULL) {
422 (HI_VOID)fclose(fp);
423 }
424 return HI_FAILURE;
425 }
426
SAMPLE_COMM_SVP_NNIE_FillRect(VIDEO_FRAME_INFO_S * pstFrmInfo,SAMPLE_SVP_NNIE_RECT_ARRAY_S * pstRect,HI_U32 u32Color)427 HI_S32 SAMPLE_COMM_SVP_NNIE_FillRect(VIDEO_FRAME_INFO_S *pstFrmInfo, SAMPLE_SVP_NNIE_RECT_ARRAY_S *pstRect,
428 HI_U32 u32Color)
429 {
430 VGS_HANDLE VgsHandle = -1;
431 HI_S32 s32Ret = HI_SUCCESS;
432 HI_U32 i, j;
433 VGS_TASK_ATTR_S stVgsTask;
434 VGS_ADD_COVER_S stVgsAddCover;
435 static HI_U32 u32Frm = 0;
436
437 SAMPLE_SVP_CHECK_EXPR_RET(pstFrmInfo == HI_NULL, HI_ERR_SVP_NNIE_NULL_PTR, SAMPLE_SVP_ERR_LEVEL_ERROR,
438 "Error, pstFrmInfo is NULL!\n");
439 SAMPLE_SVP_CHECK_EXPR_RET(pstRect == HI_NULL, HI_ERR_SVP_NNIE_NULL_PTR, SAMPLE_SVP_ERR_LEVEL_ERROR,
440 "Error, pstRect is NULL!\n");
441 u32Frm++;
442 if (pstRect->u32TotalNum == 0) {
443 return s32Ret;
444 }
445 s32Ret = HI_MPI_VGS_BeginJob(&VgsHandle);
446 if (s32Ret != HI_SUCCESS) {
447 SAMPLE_PRT("Vgs begin job fail,Error(%#x)\n", s32Ret);
448 return s32Ret;
449 }
450
451 (HI_VOID)memcpy_s(&stVgsTask.stImgIn, sizeof(VIDEO_FRAME_INFO_S), pstFrmInfo, sizeof(VIDEO_FRAME_INFO_S));
452 (HI_VOID)memcpy_s(&stVgsTask.stImgOut, sizeof(VIDEO_FRAME_INFO_S), pstFrmInfo, sizeof(VIDEO_FRAME_INFO_S));
453
454 stVgsAddCover.enCoverType = COVER_QUAD_RANGLE;
455 stVgsAddCover.u32Color = u32Color;
456 stVgsAddCover.stQuadRangle.bSolid = HI_FALSE;
457 stVgsAddCover.stQuadRangle.u32Thick = 2; /* thick value: 2 */
458 for (i = 0; i < pstRect->u32ClsNum; i++) {
459 for (j = 0; j < pstRect->au32RoiNum[i]; j++) {
460 (HI_VOID)memcpy_s(stVgsAddCover.stQuadRangle.stPoint, sizeof(pstRect->astRect[i][j].astPoint),
461 pstRect->astRect[i][j].astPoint, sizeof(pstRect->astRect[i][j].astPoint));
462 s32Ret = HI_MPI_VGS_AddCoverTask(VgsHandle, &stVgsTask, &stVgsAddCover);
463 if (s32Ret != HI_SUCCESS) {
464 SAMPLE_PRT("HI_MPI_VGS_AddCoverTask fail,Error(%#x)\n", s32Ret);
465 HI_MPI_VGS_CancelJob(VgsHandle);
466 return s32Ret;
467 }
468 }
469 }
470
471 s32Ret = HI_MPI_VGS_EndJob(VgsHandle);
472 if (s32Ret != HI_SUCCESS) {
473 SAMPLE_PRT("HI_MPI_VGS_EndJob fail,Error(%#x)\n", s32Ret);
474 HI_MPI_VGS_CancelJob(VgsHandle);
475 return s32Ret;
476 }
477
478 return s32Ret;
479 }
480