• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Calculate VMAF Motion score.
25  */
26 
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "avfilter.h"
30 #include "formats.h"
31 #include "internal.h"
32 #include "vmaf_motion.h"
33 
34 #define BIT_SHIFT 15
35 
36 static const float FILTER_5[5] = {
37     0.054488685,
38     0.244201342,
39     0.402619947,
40     0.244201342,
41     0.054488685
42 };
43 
44 typedef struct VMAFMotionContext {
45     const AVClass *class;
46     VMAFMotionData data;
47     FILE *stats_file;
48     char *stats_file_str;
49 } VMAFMotionContext;
50 
51 #define OFFSET(x) offsetof(VMAFMotionContext, x)
52 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
53 
54 static const AVOption vmafmotion_options[] = {
55     {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
56     { NULL }
57 };
58 
59 AVFILTER_DEFINE_CLASS(vmafmotion);
60 
image_sad(const uint16_t * img1,const uint16_t * img2,int w,int h,ptrdiff_t _img1_stride,ptrdiff_t _img2_stride)61 static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
62                           int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
63 {
64     ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
65     ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
66     uint64_t sum = 0;
67     int i, j;
68 
69     for (i = 0; i < h; i++) {
70         for (j = 0; j < w; j++) {
71             sum += abs(img1[j] - img2[j]);
72         }
73         img1 += img1_stride;
74         img2 += img2_stride;
75     }
76 
77     return sum;
78 }
79 
convolution_x(const uint16_t * filter,int filt_w,const uint16_t * src,uint16_t * dst,int w,int h,ptrdiff_t _src_stride,ptrdiff_t _dst_stride)80 static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
81                           uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
82                           ptrdiff_t _dst_stride)
83 {
84     ptrdiff_t src_stride = _src_stride / sizeof(*src);
85     ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
86     int radius = filt_w / 2;
87     int borders_left = radius;
88     int borders_right = w - (filt_w - radius);
89     int i, j, k;
90     int sum = 0;
91 
92     for (i = 0; i < h; i++) {
93         for (j = 0; j < borders_left; j++) {
94             sum = 0;
95             for (k = 0; k < filt_w; k++) {
96                 int j_tap = FFABS(j - radius + k);
97                 if (j_tap >= w) {
98                     j_tap = w - (j_tap - w + 1);
99                 }
100                 sum += filter[k] * src[i * src_stride + j_tap];
101             }
102             dst[i * dst_stride + j] = sum >> BIT_SHIFT;
103         }
104 
105         for (j = borders_left; j < borders_right; j++) {
106             int sum = 0;
107             for (k = 0; k < filt_w; k++) {
108                 sum += filter[k] * src[i * src_stride + j - radius + k];
109             }
110             dst[i * dst_stride + j] = sum >> BIT_SHIFT;
111         }
112 
113         for (j = borders_right; j < w; j++) {
114             sum = 0;
115             for (k = 0; k < filt_w; k++) {
116                 int j_tap = FFABS(j - radius + k);
117                 if (j_tap >= w) {
118                     j_tap = w - (j_tap - w + 1);
119                 }
120                 sum += filter[k] * src[i * src_stride + j_tap];
121             }
122             dst[i * dst_stride + j] = sum >> BIT_SHIFT;
123         }
124     }
125 }
126 
127 #define conv_y_fn(type, bits) \
128 static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
129                                       const uint8_t *_src, uint16_t *dst, \
130                                       int w, int h, ptrdiff_t _src_stride, \
131                                       ptrdiff_t _dst_stride) \
132 { \
133     const type *src = (const type *) _src; \
134     ptrdiff_t src_stride = _src_stride / sizeof(*src); \
135     ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
136     int radius = filt_w / 2; \
137     int borders_top = radius; \
138     int borders_bottom = h - (filt_w - radius); \
139     int i, j, k; \
140     int sum = 0; \
141     \
142     for (i = 0; i < borders_top; i++) { \
143         for (j = 0; j < w; j++) { \
144             sum = 0; \
145             for (k = 0; k < filt_w; k++) { \
146                 int i_tap = FFABS(i - radius + k); \
147                 if (i_tap >= h) { \
148                     i_tap = h - (i_tap - h + 1); \
149                 } \
150                 sum += filter[k] * src[i_tap * src_stride + j]; \
151             } \
152             dst[i * dst_stride + j] = sum >> bits; \
153         } \
154     } \
155     for (i = borders_top; i < borders_bottom; i++) { \
156         for (j = 0; j < w; j++) { \
157             sum = 0; \
158             for (k = 0; k < filt_w; k++) { \
159                 sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
160             } \
161             dst[i * dst_stride + j] = sum >> bits; \
162         } \
163     } \
164     for (i = borders_bottom; i < h; i++) { \
165         for (j = 0; j < w; j++) { \
166             sum = 0; \
167             for (k = 0; k < filt_w; k++) { \
168                 int i_tap = FFABS(i - radius + k); \
169                 if (i_tap >= h) { \
170                     i_tap = h - (i_tap - h + 1); \
171                 } \
172                 sum += filter[k] * src[i_tap * src_stride + j]; \
173             } \
174             dst[i * dst_stride + j] = sum >> bits; \
175         } \
176     } \
177 }
178 
179 conv_y_fn(uint8_t, 8)
180 conv_y_fn(uint16_t, 10)
181 
vmafmotiondsp_init(VMAFMotionDSPContext * dsp,int bpp)182 static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
183     dsp->convolution_x = convolution_x;
184     dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
185     dsp->sad = image_sad;
186 }
187 
ff_vmafmotion_process(VMAFMotionData * s,AVFrame * ref)188 double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
189 {
190     double score;
191 
192     s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
193                              s->width, s->height, ref->linesize[0], s->stride);
194     s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
195                              s->width, s->height, s->stride, s->stride);
196 
197     if (!s->nb_frames) {
198         score = 0.0;
199     } else {
200         uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
201                                       s->width, s->height, s->stride, s->stride);
202         // the output score is always normalized to 8 bits
203         score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
204     }
205 
206     FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
207     s->nb_frames++;
208     s->motion_sum += score;
209 
210     return score;
211 }
212 
set_meta(AVDictionary ** metadata,const char * key,float d)213 static void set_meta(AVDictionary **metadata, const char *key, float d)
214 {
215     char value[128];
216     snprintf(value, sizeof(value), "%0.2f", d);
217     av_dict_set(metadata, key, value, 0);
218 }
219 
do_vmafmotion(AVFilterContext * ctx,AVFrame * ref)220 static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
221 {
222     VMAFMotionContext *s = ctx->priv;
223     double score;
224 
225     score = ff_vmafmotion_process(&s->data, ref);
226     set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
227     if (s->stats_file) {
228         fprintf(s->stats_file,
229                 "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
230     }
231 }
232 
233 
ff_vmafmotion_init(VMAFMotionData * s,int w,int h,enum AVPixelFormat fmt)234 int ff_vmafmotion_init(VMAFMotionData *s,
235                        int w, int h, enum AVPixelFormat fmt)
236 {
237     size_t data_sz;
238     int i;
239     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
240 
241     if (w < 3 || h < 3)
242         return AVERROR(EINVAL);
243 
244     s->width = w;
245     s->height = h;
246     s->stride = FFALIGN(w * sizeof(uint16_t), 32);
247 
248     data_sz = (size_t) s->stride * h;
249     if (!(s->blur_data[0] = av_malloc(data_sz)) ||
250         !(s->blur_data[1] = av_malloc(data_sz)) ||
251         !(s->temp_data    = av_malloc(data_sz))) {
252         return AVERROR(ENOMEM);
253     }
254 
255     for (i = 0; i < 5; i++) {
256         s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
257     }
258 
259     vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
260 
261     return 0;
262 }
263 
query_formats(AVFilterContext * ctx)264 static int query_formats(AVFilterContext *ctx)
265 {
266     AVFilterFormats *fmts_list = NULL;
267     int format, ret;
268 
269     for (format = 0; av_pix_fmt_desc_get(format); format++) {
270         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
271         if (!(desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_PAL)) &&
272             (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) &&
273             (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) &&
274             (desc->comp[0].depth == 8 || desc->comp[0].depth == 10) &&
275             (ret = ff_add_format(&fmts_list, format)) < 0)
276             return ret;
277     }
278 
279     return ff_set_common_formats(ctx, fmts_list);
280 }
281 
config_input_ref(AVFilterLink * inlink)282 static int config_input_ref(AVFilterLink *inlink)
283 {
284     AVFilterContext *ctx  = inlink->dst;
285     VMAFMotionContext *s = ctx->priv;
286 
287     return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
288                               ctx->inputs[0]->h, ctx->inputs[0]->format);
289 }
290 
ff_vmafmotion_uninit(VMAFMotionData * s)291 double ff_vmafmotion_uninit(VMAFMotionData *s)
292 {
293     av_free(s->blur_data[0]);
294     av_free(s->blur_data[1]);
295     av_free(s->temp_data);
296 
297     return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
298 }
299 
filter_frame(AVFilterLink * inlink,AVFrame * ref)300 static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
301 {
302     AVFilterContext *ctx = inlink->dst;
303     do_vmafmotion(ctx, ref);
304     return ff_filter_frame(ctx->outputs[0], ref);
305 }
306 
init(AVFilterContext * ctx)307 static av_cold int init(AVFilterContext *ctx)
308 {
309     VMAFMotionContext *s = ctx->priv;
310 
311     if (s->stats_file_str) {
312         if (!strcmp(s->stats_file_str, "-")) {
313             s->stats_file = stdout;
314         } else {
315             s->stats_file = fopen(s->stats_file_str, "w");
316             if (!s->stats_file) {
317                 int err = AVERROR(errno);
318                 char buf[128];
319                 av_strerror(err, buf, sizeof(buf));
320                 av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
321                        s->stats_file_str, buf);
322                 return err;
323             }
324         }
325     }
326 
327     return 0;
328 }
329 
uninit(AVFilterContext * ctx)330 static av_cold void uninit(AVFilterContext *ctx)
331 {
332     VMAFMotionContext *s = ctx->priv;
333     double avg_motion = ff_vmafmotion_uninit(&s->data);
334 
335     if (s->data.nb_frames > 0) {
336         av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
337     }
338 
339     if (s->stats_file && s->stats_file != stdout)
340         fclose(s->stats_file);
341 }
342 
343 static const AVFilterPad vmafmotion_inputs[] = {
344     {
345         .name         = "reference",
346         .type         = AVMEDIA_TYPE_VIDEO,
347         .filter_frame = filter_frame,
348         .config_props = config_input_ref,
349     },
350     { NULL }
351 };
352 
353 static const AVFilterPad vmafmotion_outputs[] = {
354     {
355         .name          = "default",
356         .type          = AVMEDIA_TYPE_VIDEO,
357     },
358     { NULL }
359 };
360 
361 AVFilter ff_vf_vmafmotion = {
362     .name          = "vmafmotion",
363     .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
364     .init          = init,
365     .uninit        = uninit,
366     .query_formats = query_formats,
367     .priv_size     = sizeof(VMAFMotionContext),
368     .priv_class    = &vmafmotion_class,
369     .inputs        = vmafmotion_inputs,
370     .outputs       = vmafmotion_outputs,
371 };
372