• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019 Guo Yejun
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * implementing a generic image processing filter using deep learning networks.
24  */
25 
26 #include "libavformat/avio.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "libavutil/avassert.h"
30 #include "libavutil/imgutils.h"
31 #include "filters.h"
32 #include "dnn_filter_common.h"
33 #include "formats.h"
34 #include "internal.h"
35 #include "libswscale/swscale.h"
36 #include "libavutil/time.h"
37 
38 typedef struct DnnProcessingContext {
39     const AVClass *class;
40     DnnContext dnnctx;
41     struct SwsContext *sws_uv_scale;
42     int sws_uv_height;
43 } DnnProcessingContext;
44 
45 #define OFFSET(x) offsetof(DnnProcessingContext, dnnctx.x)
46 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
47 static const AVOption dnn_processing_options[] = {
48     { "dnn_backend", "DNN backend",                OFFSET(backend_type),     AV_OPT_TYPE_INT,       { .i64 = 0 },    INT_MIN, INT_MAX, FLAGS, "backend" },
49     { "native",      "native backend flag",        0,                        AV_OPT_TYPE_CONST,     { .i64 = 0 },    0, 0, FLAGS, "backend" },
50 #if (CONFIG_LIBTENSORFLOW == 1)
51     { "tensorflow",  "tensorflow backend flag",    0,                        AV_OPT_TYPE_CONST,     { .i64 = 1 },    0, 0, FLAGS, "backend" },
52 #endif
53 #if (CONFIG_LIBOPENVINO == 1)
54     { "openvino",    "openvino backend flag",      0,                        AV_OPT_TYPE_CONST,     { .i64 = 2 },    0, 0, FLAGS, "backend" },
55 #endif
56     DNN_COMMON_OPTIONS
57     { NULL }
58 };
59 
60 AVFILTER_DEFINE_CLASS(dnn_processing);
61 
init(AVFilterContext * context)62 static av_cold int init(AVFilterContext *context)
63 {
64     DnnProcessingContext *ctx = context->priv;
65     return ff_dnn_init(&ctx->dnnctx, DFT_PROCESS_FRAME, context);
66 }
67 
68 static const enum AVPixelFormat pix_fmts[] = {
69     AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
70     AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32,
71     AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
72     AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
73     AV_PIX_FMT_NV12,
74     AV_PIX_FMT_NONE
75 };
76 
77 #define LOG_FORMAT_CHANNEL_MISMATCH()                       \
78     av_log(ctx, AV_LOG_ERROR,                               \
79            "the frame's format %s does not match "          \
80            "the model input channel %d\n",                  \
81            av_get_pix_fmt_name(fmt),                        \
82            model_input->channels);
83 
check_modelinput_inlink(const DNNData * model_input,const AVFilterLink * inlink)84 static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
85 {
86     AVFilterContext *ctx   = inlink->dst;
87     enum AVPixelFormat fmt = inlink->format;
88 
89     // the design is to add explicit scale filter before this filter
90     if (model_input->height != -1 && model_input->height != inlink->h) {
91         av_log(ctx, AV_LOG_ERROR, "the model requires frame height %d but got %d\n",
92                                    model_input->height, inlink->h);
93         return AVERROR(EIO);
94     }
95     if (model_input->width != -1 && model_input->width != inlink->w) {
96         av_log(ctx, AV_LOG_ERROR, "the model requires frame width %d but got %d\n",
97                                    model_input->width, inlink->w);
98         return AVERROR(EIO);
99     }
100     if (model_input->dt != DNN_FLOAT) {
101         avpriv_report_missing_feature(ctx, "data type rather than DNN_FLOAT");
102         return AVERROR(EIO);
103     }
104 
105     switch (fmt) {
106     case AV_PIX_FMT_RGB24:
107     case AV_PIX_FMT_BGR24:
108         if (model_input->channels != 3) {
109             LOG_FORMAT_CHANNEL_MISMATCH();
110             return AVERROR(EIO);
111         }
112         return 0;
113     case AV_PIX_FMT_GRAYF32:
114     case AV_PIX_FMT_YUV420P:
115     case AV_PIX_FMT_YUV422P:
116     case AV_PIX_FMT_YUV444P:
117     case AV_PIX_FMT_YUV410P:
118     case AV_PIX_FMT_YUV411P:
119     case AV_PIX_FMT_NV12:
120         if (model_input->channels != 1) {
121             LOG_FORMAT_CHANNEL_MISMATCH();
122             return AVERROR(EIO);
123         }
124         return 0;
125     default:
126         avpriv_report_missing_feature(ctx, "%s", av_get_pix_fmt_name(fmt));
127         return AVERROR(EIO);
128     }
129 
130     return 0;
131 }
132 
config_input(AVFilterLink * inlink)133 static int config_input(AVFilterLink *inlink)
134 {
135     AVFilterContext *context     = inlink->dst;
136     DnnProcessingContext *ctx = context->priv;
137     int result;
138     DNNData model_input;
139     int check;
140 
141     result = ff_dnn_get_input(&ctx->dnnctx, &model_input);
142     if (result != 0) {
143         av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
144         return result;
145     }
146 
147     check = check_modelinput_inlink(&model_input, inlink);
148     if (check != 0) {
149         return check;
150     }
151 
152     return 0;
153 }
154 
isPlanarYUV(enum AVPixelFormat pix_fmt)155 static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
156 {
157     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
158     av_assert0(desc);
159     return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3;
160 }
161 
prepare_uv_scale(AVFilterLink * outlink)162 static int prepare_uv_scale(AVFilterLink *outlink)
163 {
164     AVFilterContext *context = outlink->src;
165     DnnProcessingContext *ctx = context->priv;
166     AVFilterLink *inlink = context->inputs[0];
167     enum AVPixelFormat fmt = inlink->format;
168 
169     if (isPlanarYUV(fmt)) {
170         if (inlink->w != outlink->w || inlink->h != outlink->h) {
171             if (fmt == AV_PIX_FMT_NV12) {
172                 ctx->sws_uv_scale = sws_getContext(inlink->w >> 1, inlink->h >> 1, AV_PIX_FMT_YA8,
173                                                    outlink->w >> 1, outlink->h >> 1, AV_PIX_FMT_YA8,
174                                                    SWS_BICUBIC, NULL, NULL, NULL);
175                 ctx->sws_uv_height = inlink->h >> 1;
176             } else {
177                 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
178                 int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
179                 int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
180                 int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h);
181                 int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w);
182                 ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8,
183                                                    sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8,
184                                                    SWS_BICUBIC, NULL, NULL, NULL);
185                 ctx->sws_uv_height = sws_src_h;
186             }
187         }
188     }
189 
190     return 0;
191 }
192 
config_output(AVFilterLink * outlink)193 static int config_output(AVFilterLink *outlink)
194 {
195     AVFilterContext *context = outlink->src;
196     DnnProcessingContext *ctx = context->priv;
197     int result;
198     AVFilterLink *inlink = context->inputs[0];
199 
200     // have a try run in case that the dnn model resize the frame
201     result = ff_dnn_get_output(&ctx->dnnctx, inlink->w, inlink->h, &outlink->w, &outlink->h);
202     if (result != 0) {
203         av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n");
204         return result;
205     }
206 
207     prepare_uv_scale(outlink);
208 
209     return 0;
210 }
211 
copy_uv_planes(DnnProcessingContext * ctx,AVFrame * out,const AVFrame * in)212 static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in)
213 {
214     const AVPixFmtDescriptor *desc;
215     int uv_height;
216 
217     if (!ctx->sws_uv_scale) {
218         av_assert0(in->height == out->height && in->width == out->width);
219         desc = av_pix_fmt_desc_get(in->format);
220         uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h);
221         for (int i = 1; i < 3; ++i) {
222             int bytewidth = av_image_get_linesize(in->format, in->width, i);
223             if (bytewidth < 0) {
224                 return AVERROR(EINVAL);
225             }
226             av_image_copy_plane(out->data[i], out->linesize[i],
227                                 in->data[i], in->linesize[i],
228                                 bytewidth, uv_height);
229         }
230     } else if (in->format == AV_PIX_FMT_NV12) {
231         sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1,
232                   0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
233     } else {
234         sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1,
235                   0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
236         sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), in->linesize + 2,
237                   0, ctx->sws_uv_height, out->data + 2, out->linesize + 2);
238     }
239 
240     return 0;
241 }
242 
flush_frame(AVFilterLink * outlink,int64_t pts,int64_t * out_pts)243 static int flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
244 {
245     DnnProcessingContext *ctx = outlink->src->priv;
246     int ret;
247     DNNAsyncStatusType async_state;
248 
249     ret = ff_dnn_flush(&ctx->dnnctx);
250     if (ret != 0) {
251         return -1;
252     }
253 
254     do {
255         AVFrame *in_frame = NULL;
256         AVFrame *out_frame = NULL;
257         async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);
258         if (out_frame) {
259             if (isPlanarYUV(in_frame->format))
260                 copy_uv_planes(ctx, out_frame, in_frame);
261             av_frame_free(&in_frame);
262             ret = ff_filter_frame(outlink, out_frame);
263             if (ret < 0)
264                 return ret;
265             if (out_pts)
266                 *out_pts = out_frame->pts + pts;
267         }
268         av_usleep(5000);
269     } while (async_state >= DAST_NOT_READY);
270 
271     return 0;
272 }
273 
activate(AVFilterContext * filter_ctx)274 static int activate(AVFilterContext *filter_ctx)
275 {
276     AVFilterLink *inlink = filter_ctx->inputs[0];
277     AVFilterLink *outlink = filter_ctx->outputs[0];
278     DnnProcessingContext *ctx = filter_ctx->priv;
279     AVFrame *in = NULL, *out = NULL;
280     int64_t pts;
281     int ret, status;
282     int got_frame = 0;
283     int async_state;
284 
285     FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
286 
287     do {
288         // drain all input frames
289         ret = ff_inlink_consume_frame(inlink, &in);
290         if (ret < 0)
291             return ret;
292         if (ret > 0) {
293             out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
294             if (!out) {
295                 av_frame_free(&in);
296                 return AVERROR(ENOMEM);
297             }
298             av_frame_copy_props(out, in);
299             if (ff_dnn_execute_model(&ctx->dnnctx, in, out) != 0) {
300                 return AVERROR(EIO);
301             }
302         }
303     } while (ret > 0);
304 
305     // drain all processed frames
306     do {
307         AVFrame *in_frame = NULL;
308         AVFrame *out_frame = NULL;
309         async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);
310         if (out_frame) {
311             if (isPlanarYUV(in_frame->format))
312                 copy_uv_planes(ctx, out_frame, in_frame);
313             av_frame_free(&in_frame);
314             ret = ff_filter_frame(outlink, out_frame);
315             if (ret < 0)
316                 return ret;
317             got_frame = 1;
318         }
319     } while (async_state == DAST_SUCCESS);
320 
321     // if frame got, schedule to next filter
322     if (got_frame)
323         return 0;
324 
325     if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
326         if (status == AVERROR_EOF) {
327             int64_t out_pts = pts;
328             ret = flush_frame(outlink, pts, &out_pts);
329             ff_outlink_set_status(outlink, status, out_pts);
330             return ret;
331         }
332     }
333 
334     FF_FILTER_FORWARD_WANTED(outlink, inlink);
335 
336     return 0;
337 }
338 
uninit(AVFilterContext * ctx)339 static av_cold void uninit(AVFilterContext *ctx)
340 {
341     DnnProcessingContext *context = ctx->priv;
342 
343     sws_freeContext(context->sws_uv_scale);
344     ff_dnn_uninit(&context->dnnctx);
345 }
346 
347 static const AVFilterPad dnn_processing_inputs[] = {
348     {
349         .name         = "default",
350         .type         = AVMEDIA_TYPE_VIDEO,
351         .config_props = config_input,
352     },
353 };
354 
355 static const AVFilterPad dnn_processing_outputs[] = {
356     {
357         .name = "default",
358         .type = AVMEDIA_TYPE_VIDEO,
359         .config_props  = config_output,
360     },
361 };
362 
363 const AVFilter ff_vf_dnn_processing = {
364     .name          = "dnn_processing",
365     .description   = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."),
366     .priv_size     = sizeof(DnnProcessingContext),
367     .init          = init,
368     .uninit        = uninit,
369     FILTER_INPUTS(dnn_processing_inputs),
370     FILTER_OUTPUTS(dnn_processing_outputs),
371     FILTER_PIXFMTS_ARRAY(pix_fmts),
372     .priv_class    = &dnn_processing_class,
373     .activate      = activate,
374 };
375