• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 HiSilicon (Shanghai) Technologies CO., LIMITED.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 /*
17  * 该文件提供了基于yolov2的手部检测以及基于resnet18的手势识别,属于两个wk串行推理。
18  * 该文件提供了手部检测和手势识别的模型加载、模型卸载、模型推理以及AI flag业务处理的API接口。
19  * 若一帧图像中出现多个手,我们通过算法将最大手作为目标手送分类网进行推理,
20  * 并将目标手标记为绿色,其他手标记为红色。
21  *
22  * This file provides hand detection based on yolov2 and gesture recognition based on resnet18,
23  * which belongs to two wk serial inferences. This file provides API interfaces for model loading,
24  * model unloading, model reasoning, and AI flag business processing for hand detection
25  * and gesture recognition. If there are multiple hands in one frame of image,
26  * we use the algorithm to use the largest hand as the target hand for inference,
27  * and mark the target hand as green and the other hands as red.
28  */
29 
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdio.h>
33 #include <errno.h>
34 
35 #include "sample_comm_nnie.h"
36 #include "sample_media_ai.h"
37 #include "ai_infer_process.h"
38 #include "yolov2_hand_detect.h"
39 #include "vgs_img.h"
40 #include "ive_img.h"
41 #include "misc_util.h"
42 #include "hisignalling.h"
43 
44 #ifdef __cplusplus
45 #if __cplusplus
46 extern "C" {
47 #endif
48 #endif /* End of #ifdef __cplusplus */
49 
50 #define HAND_FRM_WIDTH     640
51 #define HAND_FRM_HEIGHT    384
52 #define DETECT_OBJ_MAX     32
53 #define RET_NUM_MAX        4
54 #define DRAW_RETC_THICK    2    // Draw the width of the line
55 #define WIDTH_LIMIT        32
56 #define HEIGHT_LIMIT       32
57 #define IMAGE_WIDTH        224  // The resolution of the model IMAGE sent to the classification is 224*224
58 #define IMAGE_HEIGHT       224
59 #define MODEL_FILE_GESTURE    "/userdata/models/hand_classify/hand_gesture.wk" // darknet framework wk model
60 
61 static int biggestBoxIndex;
62 static IVE_IMAGE_S img;
63 static DetectObjInfo objs[DETECT_OBJ_MAX] = {0};
64 static RectBox boxs[DETECT_OBJ_MAX] = {0};
65 static RectBox objBoxs[DETECT_OBJ_MAX] = {0};
66 static RectBox remainingBoxs[DETECT_OBJ_MAX] = {0};
67 static RectBox cnnBoxs[DETECT_OBJ_MAX] = {0}; // Store the results of the classification network
68 static RecogNumInfo numInfo[RET_NUM_MAX] = {0};
69 static IVE_IMAGE_S imgIn;
70 static IVE_IMAGE_S imgDst;
71 static VIDEO_FRAME_INFO_S frmIn;
72 static VIDEO_FRAME_INFO_S frmDst;
73 int uartFd = 0;
74 
75 /*
76  * 加载手部检测和手势分类模型
77  * Load hand detect and classify model
78  */
Yolo2HandDetectResnetClassifyLoad(uintptr_t * model)79 HI_S32 Yolo2HandDetectResnetClassifyLoad(uintptr_t* model)
80 {
81     SAMPLE_SVP_NNIE_CFG_S *self = NULL;
82     HI_S32 ret;
83 
84     ret = CnnCreate(&self, MODEL_FILE_GESTURE);
85     *model = ret < 0 ? 0 : (uintptr_t)self;
86     HandDetectInit(); // Initialize the hand detection model
87     SAMPLE_PRT("Load hand detect claasify model success\n");
88     /*
89      * Uart串口初始化
90      * Uart open init
91      */
92     uartFd = UartOpenInit();
93     if (uartFd < 0) {
94         printf("uart1 open failed\r\n");
95     } else {
96         printf("uart1 open successed\r\n");
97     }
98     return ret;
99 }
100 
101 /*
102  * 卸载手部检测和手势分类模型
103  * Unload hand detect and classify model
104  */
Yolo2HandDetectResnetClassifyUnload(uintptr_t model)105 HI_S32 Yolo2HandDetectResnetClassifyUnload(uintptr_t model)
106 {
107     CnnDestroy((SAMPLE_SVP_NNIE_CFG_S*)model);
108     HandDetectExit(); // Uninitialize the hand detection model
109     SAMPLE_PRT("Unload hand detect claasify model success\n");
110 
111     return 0;
112 }
113 
114 /*
115  * 获得最大的手
116  * Get the maximum hand
117  */
GetBiggestHandIndex(RectBox boxs[],int detectNum)118 static HI_S32 GetBiggestHandIndex(RectBox boxs[], int detectNum)
119 {
120     HI_S32 handIndex = 0;
121     HI_S32 biggestBoxIndex = handIndex;
122     HI_S32 biggestBoxWidth = boxs[handIndex].xmax - boxs[handIndex].xmin + 1;
123     HI_S32 biggestBoxHeight = boxs[handIndex].ymax - boxs[handIndex].ymin + 1;
124     HI_S32 biggestBoxArea = biggestBoxWidth * biggestBoxHeight;
125 
126     for (handIndex = 1; handIndex < detectNum; handIndex++) {
127         HI_S32 boxWidth = boxs[handIndex].xmax - boxs[handIndex].xmin + 1;
128         HI_S32 boxHeight = boxs[handIndex].ymax - boxs[handIndex].ymin + 1;
129         HI_S32 boxArea = boxWidth * boxHeight;
130         if (biggestBoxArea < boxArea) {
131             biggestBoxArea = boxArea;
132             biggestBoxIndex = handIndex;
133         }
134         biggestBoxWidth = boxs[biggestBoxIndex].xmax - boxs[biggestBoxIndex].xmin + 1;
135         biggestBoxHeight = boxs[biggestBoxIndex].ymax - boxs[biggestBoxIndex].ymin + 1;
136     }
137 
138     if ((biggestBoxWidth == 1) || (biggestBoxHeight == 1) || (detectNum == 0)) {
139         biggestBoxIndex = -1;
140     }
141 
142     return biggestBoxIndex;
143 }
144 
145 /*
146  * 手势识别信息
147  * Hand gesture recognition info
148  */
HandDetectFlag(const RecogNumInfo resBuf)149 static void HandDetectFlag(const RecogNumInfo resBuf)
150 {
151     HI_CHAR *gestureName = NULL;
152     switch (resBuf.num) {
153         case 0u:
154             gestureName = "gesture fist";
155             UartSendRead(uartFd, FistGesture); // 拳头手势
156             SAMPLE_PRT("----gesture name----:%s\n", gestureName);
157             break;
158         case 1u:
159             gestureName = "gesture indexUp";
160             UartSendRead(uartFd, ForefingerGesture); // 食指手势
161             SAMPLE_PRT("----gesture name----:%s\n", gestureName);
162             break;
163         case 2u:
164             gestureName = "gesture OK";
165             UartSendRead(uartFd, OkGesture); // OK手势
166             SAMPLE_PRT("----gesture name----:%s\n", gestureName);
167             break;
168         case 3u:
169             gestureName = "gesture palm";
170             UartSendRead(uartFd, PalmGesture); // 手掌手势
171             SAMPLE_PRT("----gesture name----:%s\n", gestureName);
172             break;
173         case 4u:
174             gestureName = "gesture yes";
175             UartSendRead(uartFd, YesGesture); // yes手势
176             SAMPLE_PRT("----gesture name----:%s\n", gestureName);
177             break;
178         case 5u:
179             gestureName = "gesture pinchOpen";
180             UartSendRead(uartFd, ForefingerAndThumbGesture); // 食指 + 大拇指
181             SAMPLE_PRT("----gesture name----:%s\n", gestureName);
182             break;
183         case 6u:
184             gestureName = "gesture phoneCall";
185             UartSendRead(uartFd, LittleFingerAndThumbGesture); // 大拇指 + 小拇指
186             SAMPLE_PRT("----gesture name----:%s\n", gestureName);
187             break;
188         default:
189             gestureName = "gesture others";
190             UartSendRead(uartFd, InvalidGesture); // 无效值
191             SAMPLE_PRT("----gesture name----:%s\n", gestureName);
192             break;
193     }
194     SAMPLE_PRT("hand gesture success\n");
195 }
196 
197 /*
198  * 手部检测和手势分类推理
199  * Hand detect and classify calculation
200  */
Yolo2HandDetectResnetClassifyCal(uintptr_t model,VIDEO_FRAME_INFO_S * srcFrm,VIDEO_FRAME_INFO_S * dstFrm)201 HI_S32 Yolo2HandDetectResnetClassifyCal(uintptr_t model, VIDEO_FRAME_INFO_S *srcFrm, VIDEO_FRAME_INFO_S *dstFrm)
202 {
203     SAMPLE_SVP_NNIE_CFG_S *self = (SAMPLE_SVP_NNIE_CFG_S*)model;
204     HI_S32 resLen = 0;
205     int objNum;
206     int ret;
207     int num = 0;
208 
209     ret = FrmToOrigImg((VIDEO_FRAME_INFO_S*)srcFrm, &img);
210     SAMPLE_CHECK_EXPR_RET(ret != HI_SUCCESS, ret, "hand detect for YUV Frm to Img FAIL, ret=%#x\n", ret);
211 
212     objNum = HandDetectCal(&img, objs); // Send IMG to the detection net for reasoning
213     for (int i = 0; i < objNum; i++) {
214         cnnBoxs[i] = objs[i].box;
215         RectBox *box = &objs[i].box;
216         RectBoxTran(box, HAND_FRM_WIDTH, HAND_FRM_HEIGHT,
217             dstFrm->stVFrame.u32Width, dstFrm->stVFrame.u32Height);
218         SAMPLE_PRT("yolo2_out: {%d, %d, %d, %d}\n", box->xmin, box->ymin, box->xmax, box->ymax);
219         boxs[i] = *box;
220     }
221     biggestBoxIndex = GetBiggestHandIndex(boxs, objNum);
222     SAMPLE_PRT("biggestBoxIndex:%d, objNum:%d\n", biggestBoxIndex, objNum);
223 
224     /*
225      * 当检测到对象时,在DSTFRM中绘制一个矩形
226      * When an object is detected, a rectangle is drawn in the DSTFRM
227      */
228     if (biggestBoxIndex >= 0) {
229         objBoxs[0] = boxs[biggestBoxIndex];
230         MppFrmDrawRects(dstFrm, objBoxs, 1, RGB888_GREEN, DRAW_RETC_THICK); // Target hand objnum is equal to 1
231 
232         for (int j = 0; (j < objNum) && (objNum > 1); j++) {
233             if (j != biggestBoxIndex) {
234                 remainingBoxs[num++] = boxs[j];
235                 /*
236                  * 其他手objnum等于objnum -1
237                  * Others hand objnum is equal to objnum -1
238                  */
239                 MppFrmDrawRects(dstFrm, remainingBoxs, objNum - 1, RGB888_RED, DRAW_RETC_THICK);
240             }
241         }
242 
243         /*
244          * 裁剪出来的图像通过预处理送分类网进行推理
245          * The cropped image is preprocessed and sent to the classification network for inference
246          */
247         ret = ImgYuvCrop(&img, &imgIn, &cnnBoxs[biggestBoxIndex]);
248         SAMPLE_CHECK_EXPR_RET(ret < 0, ret, "ImgYuvCrop FAIL, ret=%#x\n", ret);
249 
250         if ((imgIn.u32Width >= WIDTH_LIMIT) && (imgIn.u32Height >= HEIGHT_LIMIT)) {
251             COMPRESS_MODE_E enCompressMode = srcFrm->stVFrame.enCompressMode;
252             ret = OrigImgToFrm(&imgIn, &frmIn);
253             frmIn.stVFrame.enCompressMode = enCompressMode;
254             SAMPLE_PRT("crop u32Width = %d, img.u32Height = %d\n", imgIn.u32Width, imgIn.u32Height);
255             ret = MppFrmResize(&frmIn, &frmDst, IMAGE_WIDTH, IMAGE_HEIGHT);
256             ret = FrmToOrigImg(&frmDst, &imgDst);
257             ret = CnnCalImg(self,  &imgDst, numInfo, sizeof(numInfo) / sizeof((numInfo)[0]), &resLen);
258             SAMPLE_CHECK_EXPR_RET(ret < 0, ret, "CnnCalImg FAIL, ret=%#x\n", ret);
259             HI_ASSERT(resLen <= sizeof(numInfo) / sizeof(numInfo[0]));
260             HandDetectFlag(numInfo[0]);
261             MppFrmDestroy(&frmDst);
262         }
263         IveImgDestroy(&imgIn);
264     }
265 
266     return ret;
267 }
268 
269 #ifdef __cplusplus
270 #if __cplusplus
271 }
272 #endif
273 #endif /* End of #ifdef __cplusplus */
274