1 /*
2 * Copyright (c) 2022 HiSilicon (Shanghai) Technologies CO., LIMITED.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 /*
17 * 该文件提供了基于yolov2的手部检测以及基于resnet18的手势识别,属于两个wk串行推理。
18 * 该文件提供了手部检测和手势识别的模型加载、模型卸载、模型推理以及AI flag业务处理的API接口。
19 * 若一帧图像中出现多个手,我们通过算法将最大手作为目标手送分类网进行推理,
20 * 并将目标手标记为绿色,其他手标记为红色。
21 *
22 * This file provides hand detection based on yolov2 and gesture recognition based on resnet18,
23 * which belongs to two wk serial inferences. This file provides API interfaces for model loading,
24 * model unloading, model reasoning, and AI flag business processing for hand detection
25 * and gesture recognition. If there are multiple hands in one frame of image,
26 * we use the algorithm to use the largest hand as the target hand for inference,
27 * and mark the target hand as green and the other hands as red.
28 */
29
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdio.h>
33 #include <errno.h>
34
35 #include "sample_comm_nnie.h"
36 #include "sample_media_ai.h"
37 #include "ai_infer_process.h"
38 #include "yolov2_hand_detect.h"
39 #include "vgs_img.h"
40 #include "ive_img.h"
41 #include "misc_util.h"
42 #include "hisignalling.h"
43
44 #ifdef __cplusplus
45 #if __cplusplus
46 extern "C" {
47 #endif
48 #endif /* End of #ifdef __cplusplus */
49
50 #define HAND_FRM_WIDTH 640
51 #define HAND_FRM_HEIGHT 384
52 #define DETECT_OBJ_MAX 32
53 #define RET_NUM_MAX 4
54 #define DRAW_RETC_THICK 2 // Draw the width of the line
55 #define WIDTH_LIMIT 32
56 #define HEIGHT_LIMIT 32
57 #define IMAGE_WIDTH 224 // The resolution of the model IMAGE sent to the classification is 224*224
58 #define IMAGE_HEIGHT 224
59 #define MODEL_FILE_GESTURE "/userdata/models/hand_classify/hand_gesture.wk" // darknet framework wk model
60
61 static int biggestBoxIndex;
62 static IVE_IMAGE_S img;
63 static DetectObjInfo objs[DETECT_OBJ_MAX] = {0};
64 static RectBox boxs[DETECT_OBJ_MAX] = {0};
65 static RectBox objBoxs[DETECT_OBJ_MAX] = {0};
66 static RectBox remainingBoxs[DETECT_OBJ_MAX] = {0};
67 static RectBox cnnBoxs[DETECT_OBJ_MAX] = {0}; // Store the results of the classification network
68 static RecogNumInfo numInfo[RET_NUM_MAX] = {0};
69 static IVE_IMAGE_S imgIn;
70 static IVE_IMAGE_S imgDst;
71 static VIDEO_FRAME_INFO_S frmIn;
72 static VIDEO_FRAME_INFO_S frmDst;
73 int uartFd = 0;
74
75 /*
76 * 加载手部检测和手势分类模型
77 * Load hand detect and classify model
78 */
Yolo2HandDetectResnetClassifyLoad(uintptr_t * model)79 HI_S32 Yolo2HandDetectResnetClassifyLoad(uintptr_t* model)
80 {
81 SAMPLE_SVP_NNIE_CFG_S *self = NULL;
82 HI_S32 ret;
83
84 ret = CnnCreate(&self, MODEL_FILE_GESTURE);
85 *model = ret < 0 ? 0 : (uintptr_t)self;
86 HandDetectInit(); // Initialize the hand detection model
87 SAMPLE_PRT("Load hand detect claasify model success\n");
88 /*
89 * Uart串口初始化
90 * Uart open init
91 */
92 uartFd = UartOpenInit();
93 if (uartFd < 0) {
94 printf("uart1 open failed\r\n");
95 } else {
96 printf("uart1 open successed\r\n");
97 }
98 return ret;
99 }
100
101 /*
102 * 卸载手部检测和手势分类模型
103 * Unload hand detect and classify model
104 */
Yolo2HandDetectResnetClassifyUnload(uintptr_t model)105 HI_S32 Yolo2HandDetectResnetClassifyUnload(uintptr_t model)
106 {
107 CnnDestroy((SAMPLE_SVP_NNIE_CFG_S*)model);
108 HandDetectExit(); // Uninitialize the hand detection model
109 SAMPLE_PRT("Unload hand detect claasify model success\n");
110
111 return 0;
112 }
113
114 /*
115 * 获得最大的手
116 * Get the maximum hand
117 */
GetBiggestHandIndex(RectBox boxs[],int detectNum)118 static HI_S32 GetBiggestHandIndex(RectBox boxs[], int detectNum)
119 {
120 HI_S32 handIndex = 0;
121 HI_S32 biggestBoxIndex = handIndex;
122 HI_S32 biggestBoxWidth = boxs[handIndex].xmax - boxs[handIndex].xmin + 1;
123 HI_S32 biggestBoxHeight = boxs[handIndex].ymax - boxs[handIndex].ymin + 1;
124 HI_S32 biggestBoxArea = biggestBoxWidth * biggestBoxHeight;
125
126 for (handIndex = 1; handIndex < detectNum; handIndex++) {
127 HI_S32 boxWidth = boxs[handIndex].xmax - boxs[handIndex].xmin + 1;
128 HI_S32 boxHeight = boxs[handIndex].ymax - boxs[handIndex].ymin + 1;
129 HI_S32 boxArea = boxWidth * boxHeight;
130 if (biggestBoxArea < boxArea) {
131 biggestBoxArea = boxArea;
132 biggestBoxIndex = handIndex;
133 }
134 biggestBoxWidth = boxs[biggestBoxIndex].xmax - boxs[biggestBoxIndex].xmin + 1;
135 biggestBoxHeight = boxs[biggestBoxIndex].ymax - boxs[biggestBoxIndex].ymin + 1;
136 }
137
138 if ((biggestBoxWidth == 1) || (biggestBoxHeight == 1) || (detectNum == 0)) {
139 biggestBoxIndex = -1;
140 }
141
142 return biggestBoxIndex;
143 }
144
145 /*
146 * 手势识别信息
147 * Hand gesture recognition info
148 */
HandDetectFlag(const RecogNumInfo resBuf)149 static void HandDetectFlag(const RecogNumInfo resBuf)
150 {
151 HI_CHAR *gestureName = NULL;
152 switch (resBuf.num) {
153 case 0u:
154 gestureName = "gesture fist";
155 UartSendRead(uartFd, FistGesture); // 拳头手势
156 SAMPLE_PRT("----gesture name----:%s\n", gestureName);
157 break;
158 case 1u:
159 gestureName = "gesture indexUp";
160 UartSendRead(uartFd, ForefingerGesture); // 食指手势
161 SAMPLE_PRT("----gesture name----:%s\n", gestureName);
162 break;
163 case 2u:
164 gestureName = "gesture OK";
165 UartSendRead(uartFd, OkGesture); // OK手势
166 SAMPLE_PRT("----gesture name----:%s\n", gestureName);
167 break;
168 case 3u:
169 gestureName = "gesture palm";
170 UartSendRead(uartFd, PalmGesture); // 手掌手势
171 SAMPLE_PRT("----gesture name----:%s\n", gestureName);
172 break;
173 case 4u:
174 gestureName = "gesture yes";
175 UartSendRead(uartFd, YesGesture); // yes手势
176 SAMPLE_PRT("----gesture name----:%s\n", gestureName);
177 break;
178 case 5u:
179 gestureName = "gesture pinchOpen";
180 UartSendRead(uartFd, ForefingerAndThumbGesture); // 食指 + 大拇指
181 SAMPLE_PRT("----gesture name----:%s\n", gestureName);
182 break;
183 case 6u:
184 gestureName = "gesture phoneCall";
185 UartSendRead(uartFd, LittleFingerAndThumbGesture); // 大拇指 + 小拇指
186 SAMPLE_PRT("----gesture name----:%s\n", gestureName);
187 break;
188 default:
189 gestureName = "gesture others";
190 UartSendRead(uartFd, InvalidGesture); // 无效值
191 SAMPLE_PRT("----gesture name----:%s\n", gestureName);
192 break;
193 }
194 SAMPLE_PRT("hand gesture success\n");
195 }
196
197 /*
198 * 手部检测和手势分类推理
199 * Hand detect and classify calculation
200 */
Yolo2HandDetectResnetClassifyCal(uintptr_t model,VIDEO_FRAME_INFO_S * srcFrm,VIDEO_FRAME_INFO_S * dstFrm)201 HI_S32 Yolo2HandDetectResnetClassifyCal(uintptr_t model, VIDEO_FRAME_INFO_S *srcFrm, VIDEO_FRAME_INFO_S *dstFrm)
202 {
203 SAMPLE_SVP_NNIE_CFG_S *self = (SAMPLE_SVP_NNIE_CFG_S*)model;
204 HI_S32 resLen = 0;
205 int objNum;
206 int ret;
207 int num = 0;
208
209 ret = FrmToOrigImg((VIDEO_FRAME_INFO_S*)srcFrm, &img);
210 SAMPLE_CHECK_EXPR_RET(ret != HI_SUCCESS, ret, "hand detect for YUV Frm to Img FAIL, ret=%#x\n", ret);
211
212 objNum = HandDetectCal(&img, objs); // Send IMG to the detection net for reasoning
213 for (int i = 0; i < objNum; i++) {
214 cnnBoxs[i] = objs[i].box;
215 RectBox *box = &objs[i].box;
216 RectBoxTran(box, HAND_FRM_WIDTH, HAND_FRM_HEIGHT,
217 dstFrm->stVFrame.u32Width, dstFrm->stVFrame.u32Height);
218 SAMPLE_PRT("yolo2_out: {%d, %d, %d, %d}\n", box->xmin, box->ymin, box->xmax, box->ymax);
219 boxs[i] = *box;
220 }
221 biggestBoxIndex = GetBiggestHandIndex(boxs, objNum);
222 SAMPLE_PRT("biggestBoxIndex:%d, objNum:%d\n", biggestBoxIndex, objNum);
223
224 /*
225 * 当检测到对象时,在DSTFRM中绘制一个矩形
226 * When an object is detected, a rectangle is drawn in the DSTFRM
227 */
228 if (biggestBoxIndex >= 0) {
229 objBoxs[0] = boxs[biggestBoxIndex];
230 MppFrmDrawRects(dstFrm, objBoxs, 1, RGB888_GREEN, DRAW_RETC_THICK); // Target hand objnum is equal to 1
231
232 for (int j = 0; (j < objNum) && (objNum > 1); j++) {
233 if (j != biggestBoxIndex) {
234 remainingBoxs[num++] = boxs[j];
235 /*
236 * 其他手objnum等于objnum -1
237 * Others hand objnum is equal to objnum -1
238 */
239 MppFrmDrawRects(dstFrm, remainingBoxs, objNum - 1, RGB888_RED, DRAW_RETC_THICK);
240 }
241 }
242
243 /*
244 * 裁剪出来的图像通过预处理送分类网进行推理
245 * The cropped image is preprocessed and sent to the classification network for inference
246 */
247 ret = ImgYuvCrop(&img, &imgIn, &cnnBoxs[biggestBoxIndex]);
248 SAMPLE_CHECK_EXPR_RET(ret < 0, ret, "ImgYuvCrop FAIL, ret=%#x\n", ret);
249
250 if ((imgIn.u32Width >= WIDTH_LIMIT) && (imgIn.u32Height >= HEIGHT_LIMIT)) {
251 COMPRESS_MODE_E enCompressMode = srcFrm->stVFrame.enCompressMode;
252 ret = OrigImgToFrm(&imgIn, &frmIn);
253 frmIn.stVFrame.enCompressMode = enCompressMode;
254 SAMPLE_PRT("crop u32Width = %d, img.u32Height = %d\n", imgIn.u32Width, imgIn.u32Height);
255 ret = MppFrmResize(&frmIn, &frmDst, IMAGE_WIDTH, IMAGE_HEIGHT);
256 ret = FrmToOrigImg(&frmDst, &imgDst);
257 ret = CnnCalImg(self, &imgDst, numInfo, sizeof(numInfo) / sizeof((numInfo)[0]), &resLen);
258 SAMPLE_CHECK_EXPR_RET(ret < 0, ret, "CnnCalImg FAIL, ret=%#x\n", ret);
259 HI_ASSERT(resLen <= sizeof(numInfo) / sizeof(numInfo[0]));
260 HandDetectFlag(numInfo[0]);
261 MppFrmDestroy(&frmDst);
262 }
263 IveImgDestroy(&imgIn);
264 }
265
266 return ret;
267 }
268
269 #ifdef __cplusplus
270 #if __cplusplus
271 }
272 #endif
273 #endif /* End of #ifdef __cplusplus */
274