1 /*
2 * Copyright (c) 2021 Xuewei Meng
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/imgutils.h"
22 #include "libavutil/opt.h"
23 #include "libavutil/pixdesc.h"
24 #include "avfilter.h"
25 #include "filters.h"
26 #include "formats.h"
27 #include "framesync.h"
28 #include "internal.h"
29 #include "video.h"
30
31 enum FilterModes {
32 BASIC,
33 FAST,
34 NB_MODES,
35 };
36
37 enum GuidanceModes {
38 OFF,
39 ON,
40 NB_GUIDANCE_MODES,
41 };
42
43 typedef struct GuidedContext {
44 const AVClass *class;
45 FFFrameSync fs;
46
47 int radius;
48 float eps;
49 int mode;
50 int sub;
51 int guidance;
52 int planes;
53
54 int width;
55 int height;
56
57 int nb_planes;
58 int depth;
59 int planewidth[4];
60 int planeheight[4];
61
62 float *I;
63 float *II;
64 float *P;
65 float *IP;
66 float *meanI;
67 float *meanII;
68 float *meanP;
69 float *meanIP;
70
71 float *A;
72 float *B;
73 float *meanA;
74 float *meanB;
75
76 int (*box_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
77 } GuidedContext;
78
79 #define OFFSET(x) offsetof(GuidedContext, x)
80 #define TFLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
81 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
82
83 static const AVOption guided_options[] = {
84 { "radius", "set the box radius", OFFSET(radius), AV_OPT_TYPE_INT, {.i64 = 3 }, 1, 20, TFLAGS },
85 { "eps", "set the regularization parameter (with square)", OFFSET(eps), AV_OPT_TYPE_FLOAT, {.dbl = 0.01 }, 0.0, 1, TFLAGS },
86 { "mode", "set filtering mode (0: basic mode; 1: fast mode)", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = BASIC}, BASIC, NB_MODES - 1, TFLAGS, "mode" },
87 { "basic", "basic guided filter", 0, AV_OPT_TYPE_CONST, {.i64 = BASIC}, 0, 0, TFLAGS, "mode" },
88 { "fast", "fast guided filter", 0, AV_OPT_TYPE_CONST, {.i64 = FAST }, 0, 0, TFLAGS, "mode" },
89 { "sub", "subsampling ratio for fast mode", OFFSET(sub), AV_OPT_TYPE_INT, {.i64 = 4 }, 2, 64, TFLAGS },
90 { "guidance", "set guidance mode (0: off mode; 1: on mode)", OFFSET(guidance), AV_OPT_TYPE_INT, {.i64 = OFF }, OFF, NB_GUIDANCE_MODES - 1, FLAGS, "guidance" },
91 { "off", "only one input is enabled", 0, AV_OPT_TYPE_CONST, {.i64 = OFF }, 0, 0, FLAGS, "guidance" },
92 { "on", "two inputs are required", 0, AV_OPT_TYPE_CONST, {.i64 = ON }, 0, 0, FLAGS, "guidance" },
93 { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 0xF, TFLAGS },
94 { NULL }
95 };
96
97 AVFILTER_DEFINE_CLASS(guided);
98
99 typedef struct ThreadData {
100 int width;
101 int height;
102 float *src;
103 float *dst;
104 int srcStride;
105 int dstStride;
106 } ThreadData;
107
box_slice(AVFilterContext * ctx,void * arg,int jobnr,int nb_jobs)108 static int box_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
109 {
110 GuidedContext *s = ctx->priv;
111 ThreadData *t = arg;
112
113 const int width = t->width;
114 const int height = t->height;
115 const int src_stride = t->srcStride;
116 const int dst_stride = t->dstStride;
117 const int slice_start = (height * jobnr) / nb_jobs;
118 const int slice_end = (height * (jobnr + 1)) / nb_jobs;
119 const int radius = s->radius;
120 const float *src = t->src;
121 float *dst = t->dst;
122
123 int w;
124 int numPix;
125 w = (radius << 1) + 1;
126 numPix = w * w;
127 for (int i = slice_start;i < slice_end;i++) {
128 for (int j = 0;j < width;j++) {
129 float temp = 0.0;
130 for (int row = -radius;row <= radius;row++) {
131 for (int col = -radius;col <= radius;col++) {
132 int x = i + row;
133 int y = j + col;
134 x = (x < 0) ? 0 : (x >= height ? height - 1 : x);
135 y = (y < 0) ? 0 : (y >= width ? width - 1 : y);
136 temp += src[x * src_stride + y];
137 }
138 }
139 dst[i * dst_stride + j] = temp / numPix;
140 }
141 }
142 return 0;
143 }
144
145 static const enum AVPixelFormat pix_fmts[] = {
146 AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
147 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
148 AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
149 AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
150 AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
151 AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
152 AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
153 AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
154 AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
155 AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
156 AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
157 AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
158 AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
159 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
160 AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
161 AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
162 AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
163 AV_PIX_FMT_NONE
164 };
165
config_input(AVFilterLink * inlink)166 static int config_input(AVFilterLink *inlink)
167 {
168 AVFilterContext *ctx = inlink->dst;
169 GuidedContext *s = ctx->priv;
170 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
171
172 if (s->mode == BASIC) {
173 s->sub = 1;
174 } else if (s->mode == FAST) {
175 if (s->radius >= s->sub)
176 s->radius = s->radius / s->sub;
177 else {
178 s->radius = 1;
179 }
180 }
181
182 s->depth = desc->comp[0].depth;
183 s->width = ctx->inputs[0]->w;
184 s->height = ctx->inputs[0]->h;
185
186 s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
187 s->planewidth[0] = s->planewidth[3] = inlink->w;
188 s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
189 s->planeheight[0] = s->planeheight[3] = inlink->h;
190
191 s->nb_planes = av_pix_fmt_count_planes(inlink->format);
192 s->box_slice = box_slice;
193 return 0;
194 }
195
196 #define GUIDED(type, name) \
197 static int guided_##name(AVFilterContext *ctx, GuidedContext *s, \
198 const uint8_t *ssrc, const uint8_t *ssrcRef, \
199 uint8_t *ddst, int radius, float eps, int width, int height, \
200 int src_stride, int src_ref_stride, int dst_stride, \
201 float maxval) \
202 { \
203 int ret = 0; \
204 type *dst = (type *)ddst; \
205 const type *src = (const type *)ssrc; \
206 const type *srcRef = (const type *)ssrcRef; \
207 \
208 int sub = s->sub; \
209 int h = (height % sub) == 0 ? height / sub : height / sub + 1; \
210 int w = (width % sub) == 0 ? width / sub : width / sub + 1; \
211 \
212 ThreadData t; \
213 const int nb_threads = ff_filter_get_nb_threads(ctx); \
214 float *I = s->I; \
215 float *II = s->II; \
216 float *P = s->P; \
217 float *IP = s->IP; \
218 float *meanI = s->meanI; \
219 float *meanII = s->meanII; \
220 float *meanP = s->meanP; \
221 float *meanIP = s->meanIP; \
222 float *A = s->A; \
223 float *B = s->B; \
224 float *meanA = s->meanA; \
225 float *meanB = s->meanB; \
226 \
227 for (int i = 0;i < h;i++) { \
228 for (int j = 0;j < w;j++) { \
229 int x = i * w + j; \
230 I[x] = src[(i * src_stride + j) * sub] / maxval; \
231 II[x] = I[x] * I[x]; \
232 P[x] = srcRef[(i * src_ref_stride + j) * sub] / maxval; \
233 IP[x] = I[x] * P[x]; \
234 } \
235 } \
236 \
237 t.width = w; \
238 t.height = h; \
239 t.srcStride = w; \
240 t.dstStride = w; \
241 t.src = I; \
242 t.dst = meanI; \
243 ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads)); \
244 t.src = II; \
245 t.dst = meanII; \
246 ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads)); \
247 t.src = P; \
248 t.dst = meanP; \
249 ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads)); \
250 t.src = IP; \
251 t.dst = meanIP; \
252 ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads)); \
253 \
254 for (int i = 0;i < h;i++) { \
255 for (int j = 0;j < w;j++) { \
256 int x = i * w + j; \
257 float varI = meanII[x] - (meanI[x] * meanI[x]); \
258 float covIP = meanIP[x] - (meanI[x] * meanP[x]); \
259 A[x] = covIP / (varI + eps); \
260 B[x] = meanP[x] - A[x] * meanI[x]; \
261 } \
262 } \
263 \
264 t.src = A; \
265 t.dst = meanA; \
266 ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads)); \
267 t.src = B; \
268 t.dst = meanB; \
269 ff_filter_execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads)); \
270 \
271 for (int i = 0;i < height;i++) { \
272 for (int j = 0;j < width;j++) { \
273 int x = i / sub * w + j / sub; \
274 dst[i * dst_stride + j] = meanA[x] * src[i * src_stride + j] + \
275 meanB[x] * maxval; \
276 } \
277 } \
278 \
279 return ret; \
280 }
281
GUIDED(uint8_t,byte)282 GUIDED(uint8_t, byte)
283 GUIDED(uint16_t, word)
284
285 static int filter_frame(AVFilterContext *ctx, AVFrame **out, AVFrame *in, AVFrame *ref)
286 {
287 GuidedContext *s = ctx->priv;
288 AVFilterLink *outlink = ctx->outputs[0];
289 *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
290 if (!*out)
291 return AVERROR(ENOMEM);
292 av_frame_copy_props(*out, in);
293
294 for (int plane = 0; plane < s->nb_planes; plane++) {
295 if (!(s->planes & (1 << plane))) {
296 av_image_copy_plane((*out)->data[plane], (*out)->linesize[plane],
297 in->data[plane], in->linesize[plane],
298 s->planewidth[plane] * ((s->depth + 7) / 8), s->planeheight[plane]);
299 continue;
300 }
301 if (s->depth <= 8)
302 guided_byte(ctx, s, in->data[plane], ref->data[plane], (*out)->data[plane], s->radius, s->eps,
303 s->planewidth[plane], s->planeheight[plane],
304 in->linesize[plane], ref->linesize[plane], (*out)->linesize[plane], (1 << s->depth) - 1.f);
305 else
306 guided_word(ctx, s, in->data[plane], ref->data[plane], (*out)->data[plane], s->radius, s->eps,
307 s->planewidth[plane], s->planeheight[plane],
308 in->linesize[plane] / 2, ref->linesize[plane] / 2, (*out)->linesize[plane] / 2, (1 << s->depth) - 1.f);
309 }
310
311 return 0;
312 }
313
process_frame(FFFrameSync * fs)314 static int process_frame(FFFrameSync *fs)
315 {
316 AVFilterContext *ctx = fs->parent;
317 AVFilterLink *outlink = ctx->outputs[0];
318 AVFrame *out_frame = NULL, *main_frame = NULL, *ref_frame = NULL;
319 int ret;
320 ret = ff_framesync_dualinput_get(fs, &main_frame, &ref_frame);
321 if (ret < 0)
322 return ret;
323
324 if (ctx->is_disabled)
325 return ff_filter_frame(outlink, main_frame);
326
327 ret = filter_frame(ctx, &out_frame, main_frame, ref_frame);
328 if (ret < 0)
329 return ret;
330 av_frame_free(&main_frame);
331
332 return ff_filter_frame(outlink, out_frame);
333 }
334
config_output(AVFilterLink * outlink)335 static int config_output(AVFilterLink *outlink)
336 {
337 AVFilterContext *ctx = outlink->src;
338 GuidedContext *s = ctx->priv;
339 AVFilterLink *mainlink = ctx->inputs[0];
340 FFFrameSyncIn *in;
341 int w, h, ret;
342
343 if (s->guidance == ON) {
344 if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
345 ctx->inputs[0]->h != ctx->inputs[1]->h) {
346 av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
347 return AVERROR(EINVAL);
348 }
349 }
350
351 outlink->w = w = mainlink->w;
352 outlink->h = h = mainlink->h;
353 outlink->time_base = mainlink->time_base;
354 outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
355 outlink->frame_rate = mainlink->frame_rate;
356
357 s->I = av_calloc(w * h, sizeof(*s->I));
358 s->II = av_calloc(w * h, sizeof(*s->II));
359 s->P = av_calloc(w * h, sizeof(*s->P));
360 s->IP = av_calloc(w * h, sizeof(*s->IP));
361 s->meanI = av_calloc(w * h, sizeof(*s->meanI));
362 s->meanII = av_calloc(w * h, sizeof(*s->meanII));
363 s->meanP = av_calloc(w * h, sizeof(*s->meanP));
364 s->meanIP = av_calloc(w * h, sizeof(*s->meanIP));
365
366 s->A = av_calloc(w * h, sizeof(*s->A));
367 s->B = av_calloc(w * h, sizeof(*s->B));
368 s->meanA = av_calloc(w * h, sizeof(*s->meanA));
369 s->meanB = av_calloc(w * h, sizeof(*s->meanA));
370
371 if (!s->I || !s->II || !s->P || !s->IP || !s->meanI || !s->meanII || !s->meanP ||
372 !s->meanIP || !s->A || !s->B || !s->meanA || !s->meanB)
373 return AVERROR(ENOMEM);
374
375 if (s->guidance == OFF)
376 return 0;
377
378 if ((ret = ff_framesync_init(&s->fs, ctx, 2)) < 0)
379 return ret;
380
381 outlink->time_base = s->fs.time_base;
382
383 in = s->fs.in;
384 in[0].time_base = mainlink->time_base;
385 in[1].time_base = ctx->inputs[1]->time_base;
386 in[0].sync = 2;
387 in[0].before = EXT_INFINITY;
388 in[0].after = EXT_INFINITY;
389 in[1].sync = 1;
390 in[1].before = EXT_INFINITY;
391 in[1].after = EXT_INFINITY;
392 s->fs.opaque = s;
393 s->fs.on_event = process_frame;
394
395 return ff_framesync_configure(&s->fs);
396 }
397
activate(AVFilterContext * ctx)398 static int activate(AVFilterContext *ctx)
399 {
400 GuidedContext *s = ctx->priv;
401 AVFilterLink *outlink = ctx->outputs[0];
402 AVFilterLink *inlink = ctx->inputs[0];
403 AVFrame *frame = NULL;
404 AVFrame *out = NULL;
405 int ret, status;
406 int64_t pts;
407 if (s->guidance)
408 return ff_framesync_activate(&s->fs);
409
410 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
411
412 if ((ret = ff_inlink_consume_frame(inlink, &frame)) > 0) {
413 if (ctx->is_disabled)
414 return ff_filter_frame(outlink, frame);
415
416 ret = filter_frame(ctx, &out, frame, frame);
417 av_frame_free(&frame);
418 if (ret < 0)
419 return ret;
420 ret = ff_filter_frame(outlink, out);
421 }
422 if (ret < 0)
423 return ret;
424 if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
425 ff_outlink_set_status(outlink, status, pts);
426 return 0;
427 }
428 if (ff_outlink_frame_wanted(outlink))
429 ff_inlink_request_frame(inlink);
430 return 0;
431 }
432
init(AVFilterContext * ctx)433 static av_cold int init(AVFilterContext *ctx)
434 {
435 GuidedContext *s = ctx->priv;
436 AVFilterPad pad = { 0 };
437 int ret;
438
439 pad.type = AVMEDIA_TYPE_VIDEO;
440 pad.name = "source";
441 pad.config_props = config_input;
442
443 if ((ret = ff_append_inpad(ctx, &pad)) < 0)
444 return ret;
445
446 if (s->guidance == ON) {
447 pad.type = AVMEDIA_TYPE_VIDEO;
448 pad.name = "guidance";
449 pad.config_props = NULL;
450
451 if ((ret = ff_append_inpad(ctx, &pad)) < 0)
452 return ret;
453 }
454
455 return 0;
456 }
457
uninit(AVFilterContext * ctx)458 static av_cold void uninit(AVFilterContext *ctx)
459 {
460 GuidedContext *s = ctx->priv;
461 if (s->guidance == ON)
462 ff_framesync_uninit(&s->fs);
463
464 av_freep(&s->I);
465 av_freep(&s->II);
466 av_freep(&s->P);
467 av_freep(&s->IP);
468 av_freep(&s->meanI);
469 av_freep(&s->meanII);
470 av_freep(&s->meanP);
471 av_freep(&s->meanIP);
472 av_freep(&s->A);
473 av_freep(&s->B);
474 av_freep(&s->meanA);
475 av_freep(&s->meanB);
476
477 return;
478 }
479
480 static const AVFilterPad guided_outputs[] = {
481 {
482 .name = "default",
483 .type = AVMEDIA_TYPE_VIDEO,
484 .config_props = config_output,
485 },
486 };
487
488 const AVFilter ff_vf_guided = {
489 .name = "guided",
490 .description = NULL_IF_CONFIG_SMALL("Apply Guided filter."),
491 .init = init,
492 .uninit = uninit,
493 .priv_size = sizeof(GuidedContext),
494 .priv_class = &guided_class,
495 .activate = activate,
496 .inputs = NULL,
497 FILTER_OUTPUTS(guided_outputs),
498 FILTER_PIXFMTS_ARRAY(pix_fmts),
499 .flags = AVFILTER_FLAG_DYNAMIC_INPUTS | AVFILTER_FLAG_SLICE_THREADS |
500 AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
501 .process_command = ff_filter_process_command,
502 };
503