• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Xuewei Meng
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/imgutils.h"
22 #include "libavutil/opt.h"
23 #include "libavutil/pixdesc.h"
24 #include "avfilter.h"
25 #include "filters.h"
26 #include "formats.h"
27 #include "framesync.h"
28 #include "internal.h"
29 #include "video.h"
30 
31 enum FilterModes {
32     BASIC,
33     FAST,
34     NB_MODES,
35 };
36 
37 enum GuidanceModes {
38     OFF,
39     ON,
40     NB_GUIDANCE_MODES,
41 };
42 
43 typedef struct GuidedContext {
44     const AVClass *class;
45     FFFrameSync fs;
46 
47     int radius;
48     float eps;
49     int mode;
50     int sub;
51     int guidance;
52     int planes;
53 
54     int width;
55     int height;
56 
57     int nb_planes;
58     int depth;
59     int planewidth[4];
60     int planeheight[4];
61 
62     float *I;
63     float *II;
64     float *P;
65     float *IP;
66     float *meanI;
67     float *meanII;
68     float *meanP;
69     float *meanIP;
70 
71     float *A;
72     float *B;
73     float *meanA;
74     float *meanB;
75 
76     int (*box_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
77 } GuidedContext;
78 
79 #define OFFSET(x) offsetof(GuidedContext, x)
80 #define TFLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
81 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
82 
83 static const AVOption guided_options[] = {
84     { "radius",   "set the box radius",                               OFFSET(radius),   AV_OPT_TYPE_INT,   {.i64 = 3    },     1,                    20, TFLAGS },
85     { "eps",      "set the regularization parameter (with square)",   OFFSET(eps),      AV_OPT_TYPE_FLOAT, {.dbl = 0.01 },   0.0,                     1, TFLAGS },
86     { "mode",     "set filtering mode (0: basic mode; 1: fast mode)", OFFSET(mode),     AV_OPT_TYPE_INT,   {.i64 = BASIC}, BASIC,          NB_MODES - 1, TFLAGS, "mode" },
87     { "basic",    "basic guided filter",                              0,                AV_OPT_TYPE_CONST, {.i64 = BASIC},     0,                     0, TFLAGS, "mode" },
88     { "fast",     "fast guided filter",                               0,                AV_OPT_TYPE_CONST, {.i64 = FAST },     0,                     0, TFLAGS, "mode" },
89     { "sub",      "subsampling ratio for fast mode",                  OFFSET(sub),      AV_OPT_TYPE_INT,   {.i64 = 4    },     2,                    64, TFLAGS },
90     { "guidance", "set guidance mode (0: off mode; 1: on mode)",      OFFSET(guidance), AV_OPT_TYPE_INT,   {.i64 = OFF  },   OFF, NB_GUIDANCE_MODES - 1,  FLAGS, "guidance" },
91     { "off",      "only one input is enabled",                        0,                AV_OPT_TYPE_CONST, {.i64 = OFF  },     0,                     0,  FLAGS, "guidance" },
92     { "on",       "two inputs are required",                          0,                AV_OPT_TYPE_CONST, {.i64 = ON   },     0,                     0,  FLAGS, "guidance" },
93     { "planes",   "set planes to filter",                             OFFSET(planes),   AV_OPT_TYPE_INT,   {.i64 = 1    },     0,                   0xF, TFLAGS },
94     { NULL }
95 };
96 
97 AVFILTER_DEFINE_CLASS(guided);
98 
99 typedef struct ThreadData {
100     int width;
101     int height;
102     float *src;
103     float *dst;
104     int srcStride;
105     int dstStride;
106 } ThreadData;
107 
box_slice(AVFilterContext * ctx,void * arg,int jobnr,int nb_jobs)108 static int box_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
109 {
110     GuidedContext *s = ctx->priv;
111     ThreadData *t = arg;
112 
113     const int width  = t->width;
114     const int height = t->height;
115     const int src_stride = t->srcStride;
116     const int dst_stride = t->dstStride;
117     const int slice_start = (height * jobnr) / nb_jobs;
118     const int slice_end   = (height * (jobnr + 1)) / nb_jobs;
119     const int radius = s->radius;
120     const float *src = t->src;
121     float *dst = t->dst;
122 
123     int w;
124     int numPix;
125     w = (radius << 1) + 1;
126     numPix = w * w;
127     for (int i = slice_start;i < slice_end;i++) {
128         for (int j = 0;j < width;j++) {
129             float temp = 0.0;
130             for (int row = -radius;row <= radius;row++) {
131                 for (int col = -radius;col <= radius;col++) {
132                     int x = i + row;
133                     int y = j + col;
134                     x = (x < 0) ? 0 : (x >= height ? height - 1 : x);
135                     y = (y < 0) ? 0 : (y >= width ? width - 1 : y);
136                     temp += src[x * src_stride + y];
137                 }
138             }
139             dst[i * dst_stride + j] = temp / numPix;
140         }
141     }
142     return 0;
143 }
144 
145 static const enum AVPixelFormat pix_fmts[] = {
146     AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
147     AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
148     AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
149     AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
150     AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
151     AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
152     AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
153     AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
154     AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
155     AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
156     AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
157     AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
158     AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
159     AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
160     AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
161     AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
162     AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
163     AV_PIX_FMT_NONE
164 };
165 
config_input(AVFilterLink * inlink)166 static int config_input(AVFilterLink *inlink)
167 {
168     AVFilterContext *ctx = inlink->dst;
169     GuidedContext *s = ctx->priv;
170     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
171 
172     if (s->mode == BASIC) {
173         s->sub = 1;
174     } else if (s->mode == FAST) {
175         if (s->radius >= s->sub)
176             s->radius = s->radius / s->sub;
177         else {
178             s->radius = 1;
179         }
180     }
181 
182     s->depth = desc->comp[0].depth;
183     s->width = ctx->inputs[0]->w;
184     s->height = ctx->inputs[0]->h;
185 
186     s->planewidth[1]  = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
187     s->planewidth[0]  = s->planewidth[3] = inlink->w;
188     s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
189     s->planeheight[0] = s->planeheight[3] = inlink->h;
190 
191     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
192     s->box_slice = box_slice;
193     return 0;
194 }
195 
196 #define GUIDED(type, name)                                                              \
197 static int guided_##name(AVFilterContext *ctx, GuidedContext *s,                        \
198                           const uint8_t *ssrc, const uint8_t *ssrcRef,                  \
199                           uint8_t *ddst, int radius, float eps, int width, int height,  \
200                           int src_stride, int src_ref_stride, int dst_stride,           \
201                           float maxval)                                                 \
202 {                                                                                       \
203     int ret = 0;                                                                        \
204     type *dst = (type *)ddst;                                                           \
205     const type *src = (const type *)ssrc;                                               \
206     const type *srcRef = (const type *)ssrcRef;                                         \
207                                                                                         \
208     int sub = s->sub;                                                                   \
209     int h = (height % sub) == 0 ? height / sub : height / sub + 1;                      \
210     int w = (width % sub) == 0 ? width / sub : width / sub + 1;                         \
211                                                                                         \
212     ThreadData t;                                                                       \
213     const int nb_threads = ff_filter_get_nb_threads(ctx);                               \
214     float *I = s->I;                                                                    \
215     float *II = s->II;                                                                  \
216     float *P = s->P;                                                                    \
217     float *IP = s->IP;                                                                  \
218     float *meanI = s->meanI;                                                             \
219     float *meanII = s->meanII;                                                          \
220     float *meanP = s->meanP;                                                            \
221     float *meanIP = s->meanIP;                                                          \
222     float *A = s->A;                                                                    \
223     float *B = s->B;                                                                    \
224     float *meanA = s->meanA;                                                            \
225     float *meanB = s->meanB;                                                            \
226                                                                                         \
227     for (int i = 0;i < h;i++) {                                                         \
228         for (int j = 0;j < w;j++) {                                                     \
229             int x = i * w + j;                                                          \
230             I[x]  = src[(i * src_stride + j) * sub] / maxval;                           \
231             II[x] = I[x] * I[x];                                                        \
232             P[x]  = srcRef[(i * src_ref_stride + j) * sub] / maxval;                    \
233             IP[x] = I[x] * P[x];                                                        \
234         }                                                                               \
235     }                                                                                   \
236                                                                                         \
237     t.width  = w;                                                                       \
238     t.height = h;                                                                       \
239     t.srcStride = w;                                                                    \
240     t.dstStride = w;                                                                    \
241     t.src = I;                                                                          \
242     t.dst = meanI;                                                                      \
243     ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));               \
244     t.src = II;                                                                         \
245     t.dst = meanII;                                                                     \
246     ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));               \
247     t.src = P;                                                                          \
248     t.dst = meanP;                                                                      \
249     ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));               \
250     t.src = IP;                                                                         \
251     t.dst = meanIP;                                                                     \
252     ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));               \
253                                                                                         \
254     for (int i = 0;i < h;i++) {                                                         \
255         for (int j = 0;j < w;j++) {                                                     \
256             int x = i * w + j;                                                          \
257             float varI = meanII[x] - (meanI[x] * meanI[x]);                             \
258             float covIP = meanIP[x] - (meanI[x] * meanP[x]);                            \
259             A[x] = covIP / (varI + eps);                                                \
260             B[x] = meanP[x] - A[x] * meanI[x];                                          \
261         }                                                                               \
262     }                                                                                   \
263                                                                                         \
264     t.src = A;                                                                          \
265     t.dst = meanA;                                                                      \
266     ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));               \
267     t.src = B;                                                                          \
268     t.dst = meanB;                                                                      \
269     ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));               \
270                                                                                         \
271     for (int i = 0;i < height;i++) {                                                    \
272         for (int j = 0;j < width;j++) {                                                 \
273             int x = i / sub * w + j / sub;                                              \
274             dst[i * dst_stride + j] = meanA[x] * src[i * src_stride + j] +              \
275                                       meanB[x] * maxval;                                \
276         }                                                                               \
277     }                                                                                   \
278                                                                                         \
279     return ret;                                                                         \
280 }
281 
GUIDED(uint8_t,byte)282 GUIDED(uint8_t, byte)
283 GUIDED(uint16_t, word)
284 
285 static int filter_frame(AVFilterContext *ctx, AVFrame **out, AVFrame *in, AVFrame *ref)
286 {
287     GuidedContext *s = ctx->priv;
288     AVFilterLink *outlink = ctx->outputs[0];
289     *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
290     if (!*out)
291         return AVERROR(ENOMEM);
292     av_frame_copy_props(*out, in);
293 
294     for (int plane = 0; plane < s->nb_planes; plane++) {
295         if (!(s->planes & (1 << plane))) {
296             av_image_copy_plane((*out)->data[plane], (*out)->linesize[plane],
297                                 in->data[plane], in->linesize[plane],
298                                 s->planewidth[plane] * ((s->depth + 7) / 8), s->planeheight[plane]);
299             continue;
300         }
301         if (s->depth <= 8)
302             guided_byte(ctx, s, in->data[plane], ref->data[plane], (*out)->data[plane], s->radius, s->eps,
303                         s->planewidth[plane], s->planeheight[plane],
304                         in->linesize[plane], ref->linesize[plane], (*out)->linesize[plane], (1 << s->depth) - 1.f);
305         else
306             guided_word(ctx, s, in->data[plane], ref->data[plane], (*out)->data[plane], s->radius, s->eps,
307                         s->planewidth[plane], s->planeheight[plane],
308                         in->linesize[plane] / 2, ref->linesize[plane] / 2, (*out)->linesize[plane] / 2, (1 << s->depth) - 1.f);
309     }
310 
311     return 0;
312 }
313 
process_frame(FFFrameSync * fs)314 static int process_frame(FFFrameSync *fs)
315 {
316     AVFilterContext *ctx = fs->parent;
317     AVFilterLink *outlink = ctx->outputs[0];
318     AVFrame *out_frame = NULL, *main_frame = NULL, *ref_frame = NULL;
319     int ret;
320     ret = ff_framesync_dualinput_get(fs, &main_frame, &ref_frame);
321     if (ret < 0)
322         return ret;
323 
324     if (ctx->is_disabled)
325         return ff_filter_frame(outlink, main_frame);
326 
327     ret = filter_frame(ctx, &out_frame, main_frame, ref_frame);
328     if (ret < 0)
329         return ret;
330     av_frame_free(&main_frame);
331 
332     return ff_filter_frame(outlink, out_frame);
333 }
334 
config_output(AVFilterLink * outlink)335 static int config_output(AVFilterLink *outlink)
336 {
337     AVFilterContext *ctx = outlink->src;
338     GuidedContext *s = ctx->priv;
339     AVFilterLink *mainlink = ctx->inputs[0];
340     FFFrameSyncIn *in;
341     int w, h, ret;
342 
343     if (s->guidance == ON) {
344         if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
345             ctx->inputs[0]->h != ctx->inputs[1]->h) {
346             av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
347             return AVERROR(EINVAL);
348         }
349     }
350 
351     outlink->w = w = mainlink->w;
352     outlink->h = h = mainlink->h;
353     outlink->time_base = mainlink->time_base;
354     outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
355     outlink->frame_rate = mainlink->frame_rate;
356 
357     s->I      = av_calloc(w * h, sizeof(*s->I));
358     s->II     = av_calloc(w * h, sizeof(*s->II));
359     s->P      = av_calloc(w * h, sizeof(*s->P));
360     s->IP     = av_calloc(w * h, sizeof(*s->IP));
361     s->meanI  = av_calloc(w * h, sizeof(*s->meanI));
362     s->meanII = av_calloc(w * h, sizeof(*s->meanII));
363     s->meanP  = av_calloc(w * h, sizeof(*s->meanP));
364     s->meanIP = av_calloc(w * h, sizeof(*s->meanIP));
365 
366     s->A      = av_calloc(w * h, sizeof(*s->A));
367     s->B      = av_calloc(w * h, sizeof(*s->B));
368     s->meanA  = av_calloc(w * h, sizeof(*s->meanA));
369     s->meanB  = av_calloc(w * h, sizeof(*s->meanA));
370 
371     if (!s->I || !s->II || !s->P || !s->IP || !s->meanI || !s->meanII || !s->meanP ||
372         !s->meanIP || !s->A || !s->B || !s->meanA || !s->meanB)
373         return AVERROR(ENOMEM);
374 
375     if (s->guidance == OFF)
376         return 0;
377 
378     if ((ret = ff_framesync_init(&s->fs, ctx, 2)) < 0)
379         return ret;
380 
381     outlink->time_base = s->fs.time_base;
382 
383     in = s->fs.in;
384     in[0].time_base = mainlink->time_base;
385     in[1].time_base = ctx->inputs[1]->time_base;
386     in[0].sync   = 2;
387     in[0].before = EXT_INFINITY;
388     in[0].after  = EXT_INFINITY;
389     in[1].sync   = 1;
390     in[1].before = EXT_INFINITY;
391     in[1].after  = EXT_INFINITY;
392     s->fs.opaque   = s;
393     s->fs.on_event = process_frame;
394 
395     return ff_framesync_configure(&s->fs);
396 }
397 
activate(AVFilterContext * ctx)398 static int activate(AVFilterContext *ctx)
399 {
400     GuidedContext *s = ctx->priv;
401     AVFilterLink *outlink = ctx->outputs[0];
402     AVFilterLink *inlink = ctx->inputs[0];
403     AVFrame *frame = NULL;
404     AVFrame *out = NULL;
405     int ret, status;
406     int64_t pts;
407     if (s->guidance)
408         return ff_framesync_activate(&s->fs);
409 
410     FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
411 
412     if ((ret = ff_inlink_consume_frame(inlink, &frame)) > 0) {
413         if (ctx->is_disabled)
414             return ff_filter_frame(outlink, frame);
415 
416         ret = filter_frame(ctx, &out, frame, frame);
417         av_frame_free(&frame);
418         if (ret < 0)
419             return ret;
420         ret = ff_filter_frame(outlink, out);
421     }
422     if (ret < 0)
423         return ret;
424     if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
425         ff_outlink_set_status(outlink, status, pts);
426         return 0;
427     }
428     if (ff_outlink_frame_wanted(outlink))
429         ff_inlink_request_frame(inlink);
430     return 0;
431 }
432 
init(AVFilterContext * ctx)433 static av_cold int init(AVFilterContext *ctx)
434 {
435     GuidedContext *s = ctx->priv;
436     AVFilterPad pad = { 0 };
437     int ret;
438 
439     pad.type         = AVMEDIA_TYPE_VIDEO;
440     pad.name         = "source";
441     pad.config_props = config_input;
442 
443     if ((ret = ff_append_inpad(ctx, &pad)) < 0)
444         return ret;
445 
446     if (s->guidance == ON) {
447         pad.type         = AVMEDIA_TYPE_VIDEO;
448         pad.name         = "guidance";
449         pad.config_props = NULL;
450 
451         if ((ret = ff_append_inpad(ctx, &pad)) < 0)
452             return ret;
453     }
454 
455     return 0;
456 }
457 
uninit(AVFilterContext * ctx)458 static av_cold void uninit(AVFilterContext *ctx)
459 {
460     GuidedContext *s = ctx->priv;
461     if (s->guidance == ON)
462         ff_framesync_uninit(&s->fs);
463 
464     av_freep(&s->I);
465     av_freep(&s->II);
466     av_freep(&s->P);
467     av_freep(&s->IP);
468     av_freep(&s->meanI);
469     av_freep(&s->meanII);
470     av_freep(&s->meanP);
471     av_freep(&s->meanIP);
472     av_freep(&s->A);
473     av_freep(&s->B);
474     av_freep(&s->meanA);
475     av_freep(&s->meanB);
476 
477     return;
478 }
479 
480 static const AVFilterPad guided_outputs[] = {
481     {
482         .name = "default",
483         .type = AVMEDIA_TYPE_VIDEO,
484         .config_props  = config_output,
485     },
486 };
487 
488 const AVFilter ff_vf_guided = {
489     .name            = "guided",
490     .description     = NULL_IF_CONFIG_SMALL("Apply Guided filter."),
491     .init            = init,
492     .uninit          = uninit,
493     .priv_size       = sizeof(GuidedContext),
494     .priv_class      = &guided_class,
495     .activate        = activate,
496     .inputs          = NULL,
497     FILTER_OUTPUTS(guided_outputs),
498     FILTER_PIXFMTS_ARRAY(pix_fmts),
499     .flags           = AVFILTER_FLAG_DYNAMIC_INPUTS | AVFILTER_FLAG_SLICE_THREADS |
500                        AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
501     .process_command = ff_filter_process_command,
502 };
503