1 /*
2 * Copyright (c) 2020
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "dnn_io_proc.h"
22 #include "libavutil/imgutils.h"
23 #include "libswscale/swscale.h"
24 #include "libavutil/avassert.h"
25 #include "libavutil/detection_bbox.h"
26
ff_proc_from_dnn_to_frame(AVFrame * frame,DNNData * output,void * log_ctx)27 int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
28 {
29 struct SwsContext *sws_ctx;
30 int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
31 if (bytewidth < 0) {
32 return AVERROR(EINVAL);
33 }
34 if (output->dt != DNN_FLOAT) {
35 avpriv_report_missing_feature(log_ctx, "data type rather than DNN_FLOAT");
36 return AVERROR(ENOSYS);
37 }
38
39 switch (frame->format) {
40 case AV_PIX_FMT_RGB24:
41 case AV_PIX_FMT_BGR24:
42 sws_ctx = sws_getContext(frame->width * 3,
43 frame->height,
44 AV_PIX_FMT_GRAYF32,
45 frame->width * 3,
46 frame->height,
47 AV_PIX_FMT_GRAY8,
48 0, NULL, NULL, NULL);
49 if (!sws_ctx) {
50 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
51 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
52 av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32), frame->width * 3, frame->height,
53 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height);
54 return AVERROR(EINVAL);
55 }
56 sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
57 (const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0}, 0, frame->height,
58 (uint8_t * const*)frame->data, frame->linesize);
59 sws_freeContext(sws_ctx);
60 return 0;
61 case AV_PIX_FMT_GRAYF32:
62 av_image_copy_plane(frame->data[0], frame->linesize[0],
63 output->data, bytewidth,
64 bytewidth, frame->height);
65 return 0;
66 case AV_PIX_FMT_YUV420P:
67 case AV_PIX_FMT_YUV422P:
68 case AV_PIX_FMT_YUV444P:
69 case AV_PIX_FMT_YUV410P:
70 case AV_PIX_FMT_YUV411P:
71 case AV_PIX_FMT_GRAY8:
72 case AV_PIX_FMT_NV12:
73 sws_ctx = sws_getContext(frame->width,
74 frame->height,
75 AV_PIX_FMT_GRAYF32,
76 frame->width,
77 frame->height,
78 AV_PIX_FMT_GRAY8,
79 0, NULL, NULL, NULL);
80 if (!sws_ctx) {
81 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
82 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
83 av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32), frame->width, frame->height,
84 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height);
85 return AVERROR(EINVAL);
86 }
87 sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
88 (const int[4]){frame->width * sizeof(float), 0, 0, 0}, 0, frame->height,
89 (uint8_t * const*)frame->data, frame->linesize);
90 sws_freeContext(sws_ctx);
91 return 0;
92 default:
93 avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));
94 return AVERROR(ENOSYS);
95 }
96
97 return 0;
98 }
99
ff_proc_from_frame_to_dnn(AVFrame * frame,DNNData * input,void * log_ctx)100 int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
101 {
102 struct SwsContext *sws_ctx;
103 int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
104 if (bytewidth < 0) {
105 return AVERROR(EINVAL);
106 }
107 if (input->dt != DNN_FLOAT) {
108 avpriv_report_missing_feature(log_ctx, "data type rather than DNN_FLOAT");
109 return AVERROR(ENOSYS);
110 }
111
112 switch (frame->format) {
113 case AV_PIX_FMT_RGB24:
114 case AV_PIX_FMT_BGR24:
115 sws_ctx = sws_getContext(frame->width * 3,
116 frame->height,
117 AV_PIX_FMT_GRAY8,
118 frame->width * 3,
119 frame->height,
120 AV_PIX_FMT_GRAYF32,
121 0, NULL, NULL, NULL);
122 if (!sws_ctx) {
123 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
124 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
125 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height,
126 av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32),frame->width * 3, frame->height);
127 return AVERROR(EINVAL);
128 }
129 sws_scale(sws_ctx, (const uint8_t **)frame->data,
130 frame->linesize, 0, frame->height,
131 (uint8_t * const [4]){input->data, 0, 0, 0},
132 (const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0});
133 sws_freeContext(sws_ctx);
134 break;
135 case AV_PIX_FMT_GRAYF32:
136 av_image_copy_plane(input->data, bytewidth,
137 frame->data[0], frame->linesize[0],
138 bytewidth, frame->height);
139 break;
140 case AV_PIX_FMT_YUV420P:
141 case AV_PIX_FMT_YUV422P:
142 case AV_PIX_FMT_YUV444P:
143 case AV_PIX_FMT_YUV410P:
144 case AV_PIX_FMT_YUV411P:
145 case AV_PIX_FMT_GRAY8:
146 case AV_PIX_FMT_NV12:
147 sws_ctx = sws_getContext(frame->width,
148 frame->height,
149 AV_PIX_FMT_GRAY8,
150 frame->width,
151 frame->height,
152 AV_PIX_FMT_GRAYF32,
153 0, NULL, NULL, NULL);
154 if (!sws_ctx) {
155 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
156 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
157 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height,
158 av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32),frame->width, frame->height);
159 return AVERROR(EINVAL);
160 }
161 sws_scale(sws_ctx, (const uint8_t **)frame->data,
162 frame->linesize, 0, frame->height,
163 (uint8_t * const [4]){input->data, 0, 0, 0},
164 (const int [4]){frame->width * sizeof(float), 0, 0, 0});
165 sws_freeContext(sws_ctx);
166 break;
167 default:
168 avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));
169 return AVERROR(ENOSYS);
170 }
171
172 return 0;
173 }
174
get_pixel_format(DNNData * data)175 static enum AVPixelFormat get_pixel_format(DNNData *data)
176 {
177 if (data->dt == DNN_UINT8) {
178 switch (data->order) {
179 case DCO_BGR:
180 return AV_PIX_FMT_BGR24;
181 case DCO_RGB:
182 return AV_PIX_FMT_RGB24;
183 default:
184 av_assert0(!"unsupported data pixel format.\n");
185 return AV_PIX_FMT_BGR24;
186 }
187 }
188
189 av_assert0(!"unsupported data type.\n");
190 return AV_PIX_FMT_BGR24;
191 }
192
ff_frame_to_dnn_classify(AVFrame * frame,DNNData * input,uint32_t bbox_index,void * log_ctx)193 int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index, void *log_ctx)
194 {
195 const AVPixFmtDescriptor *desc;
196 int offsetx[4], offsety[4];
197 uint8_t *bbox_data[4];
198 struct SwsContext *sws_ctx;
199 int linesizes[4];
200 int ret = 0;
201 enum AVPixelFormat fmt;
202 int left, top, width, height;
203 const AVDetectionBBoxHeader *header;
204 const AVDetectionBBox *bbox;
205 AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
206 av_assert0(sd);
207
208 header = (const AVDetectionBBoxHeader *)sd->data;
209 bbox = av_get_detection_bbox(header, bbox_index);
210
211 left = bbox->x;
212 width = bbox->w;
213 top = bbox->y;
214 height = bbox->h;
215
216 fmt = get_pixel_format(input);
217 sws_ctx = sws_getContext(width, height, frame->format,
218 input->width, input->height, fmt,
219 SWS_FAST_BILINEAR, NULL, NULL, NULL);
220 if (!sws_ctx) {
221 av_log(log_ctx, AV_LOG_ERROR, "Failed to create scale context for the conversion "
222 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
223 av_get_pix_fmt_name(frame->format), width, height,
224 av_get_pix_fmt_name(fmt), input->width, input->height);
225 return AVERROR(EINVAL);
226 }
227
228 ret = av_image_fill_linesizes(linesizes, fmt, input->width);
229 if (ret < 0) {
230 av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
231 sws_freeContext(sws_ctx);
232 return ret;
233 }
234
235 desc = av_pix_fmt_desc_get(frame->format);
236 offsetx[1] = offsetx[2] = AV_CEIL_RSHIFT(left, desc->log2_chroma_w);
237 offsetx[0] = offsetx[3] = left;
238
239 offsety[1] = offsety[2] = AV_CEIL_RSHIFT(top, desc->log2_chroma_h);
240 offsety[0] = offsety[3] = top;
241
242 for (int k = 0; frame->data[k]; k++)
243 bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + offsetx[k];
244
245 sws_scale(sws_ctx, (const uint8_t *const *)&bbox_data, frame->linesize,
246 0, height,
247 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
248
249 sws_freeContext(sws_ctx);
250
251 return ret;
252 }
253
ff_frame_to_dnn_detect(AVFrame * frame,DNNData * input,void * log_ctx)254 int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)
255 {
256 struct SwsContext *sws_ctx;
257 int linesizes[4];
258 int ret = 0;
259 enum AVPixelFormat fmt = get_pixel_format(input);
260 sws_ctx = sws_getContext(frame->width, frame->height, frame->format,
261 input->width, input->height, fmt,
262 SWS_FAST_BILINEAR, NULL, NULL, NULL);
263 if (!sws_ctx) {
264 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
265 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
266 av_get_pix_fmt_name(frame->format), frame->width, frame->height,
267 av_get_pix_fmt_name(fmt), input->width, input->height);
268 return AVERROR(EINVAL);
269 }
270
271 ret = av_image_fill_linesizes(linesizes, fmt, input->width);
272 if (ret < 0) {
273 av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
274 sws_freeContext(sws_ctx);
275 return ret;
276 }
277
278 sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height,
279 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
280
281 sws_freeContext(sws_ctx);
282 return ret;
283 }
284