• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Boris Baracaldo
3  * Copyright (c) 2022 Thilo Borgmann
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Calculate Spatial Info (SI) and Temporal Info (TI) scores
25  */
26 
27 #include <math.h>
28 
29 #include "libavutil/imgutils.h"
30 #include "libavutil/internal.h"
31 #include "libavutil/opt.h"
32 
33 #include "avfilter.h"
34 #include "formats.h"
35 #include "internal.h"
36 #include "video.h"
37 
38 static const int X_FILTER[9] = {
39     1, 0, -1,
40     2, 0, -2,
41     1, 0, -1
42 };
43 
44 static const int Y_FILTER[9] = {
45     1, 2, 1,
46     0, 0, 0,
47     -1, -2, -1
48 };
49 
50 typedef struct SiTiContext {
51     const AVClass *class;
52     int pixel_depth;
53     int width, height;
54     uint64_t nb_frames;
55     uint8_t *prev_frame;
56     float max_si;
57     float max_ti;
58     float min_si;
59     float min_ti;
60     float sum_si;
61     float sum_ti;
62     float *gradient_matrix;
63     float *motion_matrix;
64     int full_range;
65     int print_summary;
66 } SiTiContext;
67 
68 static const enum AVPixelFormat pix_fmts[] = {
69     AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
70     AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
71     AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
72     AV_PIX_FMT_NONE
73 };
74 
init(AVFilterContext * ctx)75 static av_cold int init(AVFilterContext *ctx)
76 {
77     // User options but no input data
78     SiTiContext *s = ctx->priv;
79     s->max_si = 0;
80     s->max_ti = 0;
81     return 0;
82 }
83 
uninit(AVFilterContext * ctx)84 static av_cold void uninit(AVFilterContext *ctx)
85 {
86     SiTiContext *s = ctx->priv;
87 
88     if (s->print_summary) {
89         float avg_si = s->sum_si / s->nb_frames;
90         float avg_ti = s->sum_ti / s->nb_frames;
91         av_log(ctx, AV_LOG_INFO,
92                "SITI Summary:\nTotal frames: %"PRId64"\n\n"
93                "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
94                "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
95                s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
96         );
97     }
98 
99     av_freep(&s->prev_frame);
100     av_freep(&s->gradient_matrix);
101     av_freep(&s->motion_matrix);
102 }
103 
config_input(AVFilterLink * inlink)104 static int config_input(AVFilterLink *inlink)
105 {
106     // Video input data avilable
107     AVFilterContext *ctx = inlink->dst;
108     SiTiContext *s = ctx->priv;
109     int max_pixsteps[4];
110     size_t pixel_sz;
111     size_t data_sz;
112     size_t gradient_sz;
113     size_t motion_sz;
114 
115     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
116     av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
117 
118     // free previous buffers in case they are allocated already
119     av_freep(&s->prev_frame);
120     av_freep(&s->gradient_matrix);
121     av_freep(&s->motion_matrix);
122 
123     s->pixel_depth = max_pixsteps[0];
124     s->width = inlink->w;
125     s->height = inlink->h;
126     pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
127     data_sz = s->width * pixel_sz * s->height;
128 
129     s->prev_frame = av_malloc(data_sz);
130 
131     gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2);
132     s->gradient_matrix = av_malloc(gradient_sz);
133 
134     motion_sz = s->width * sizeof(float) * s->height;
135     s->motion_matrix = av_malloc(motion_sz);
136 
137     if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
138         return AVERROR(ENOMEM);
139     }
140 
141     return 0;
142 }
143 
144 // Determine whether the video is in full or limited range. If not defined, assume limited.
is_full_range(AVFrame * frame)145 static int is_full_range(AVFrame* frame)
146 {
147     // If color range not specified, fallback to pixel format
148     if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
149         return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
150     return frame->color_range == AVCOL_RANGE_JPEG;
151 }
152 
153 // Check frame's color range and convert to full range if needed
convert_full_range(int factor,uint16_t y)154 static uint16_t convert_full_range(int factor, uint16_t y)
155 {
156     int shift;
157     int limit_upper;
158     int full_upper;
159     int limit_y;
160 
161     // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
162     shift = 16 * factor;
163     limit_upper = 235 * factor - shift;
164     full_upper = 256 * factor - 1;
165     limit_y = fminf(fmaxf(y - shift, 0), limit_upper);
166     return (full_upper * limit_y / limit_upper);
167 }
168 
169 // Applies sobel convolution
convolve_sobel(SiTiContext * s,const uint8_t * src,float * dst,int linesize)170 static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
171 {
172     double x_conv_sum;
173     double y_conv_sum;
174     float gradient;
175     int ki;
176     int kj;
177     int index;
178     uint16_t data;
179     int filter_width = 3;
180     int filter_size = filter_width * filter_width;
181     int stride = linesize / s->pixel_depth;
182     // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
183     int factor = s->pixel_depth == 1 ? 1 : 4;
184 
185     // Dst matrix is smaller than src since we ignore edges that can't be convolved
186     #define CONVOLVE(bps)                                           \
187     {                                                               \
188         uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
189         for (int j = 1; j < s->height - 1; j++) {                   \
190             for (int i = 1; i < s->width - 1; i++) {                \
191                 x_conv_sum = 0.0;                                   \
192                 y_conv_sum = 0.0;                                   \
193                 for (int k = 0; k < filter_size; k++) {             \
194                     ki = k % filter_width - 1;                      \
195                     kj = floor(k / filter_width) - 1;               \
196                     index = (j + kj) * stride + (i + ki);           \
197                     data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
198                     x_conv_sum += data * X_FILTER[k];               \
199                     y_conv_sum += data * Y_FILTER[k];               \
200                 }                                                   \
201                 gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
202                 dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
203             }                                                       \
204         }                                                           \
205     }
206 
207     if (s->pixel_depth == 2) {
208         CONVOLVE(16);
209     } else {
210         CONVOLVE(8);
211     }
212 }
213 
214 // Calculate pixel difference between current and previous frame, and update previous
calculate_motion(SiTiContext * s,const uint8_t * curr,float * motion_matrix,int linesize)215 static void calculate_motion(SiTiContext *s, const uint8_t *curr,
216                              float *motion_matrix, int linesize)
217 {
218     int stride = linesize / s->pixel_depth;
219     float motion;
220     int curr_index;
221     int prev_index;
222     uint16_t curr_data;
223     // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
224     int factor = s->pixel_depth == 1 ? 1 : 4;
225 
226     // Previous frame is already converted to full range
227     #define CALCULATE(bps)                                           \
228     {                                                                \
229         uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
230         uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame;         \
231         for (int j = 0; j < s->height; j++) {                        \
232             for (int i = 0; i < s->width; i++) {                     \
233                 motion = 0;                                          \
234                 curr_index = j * stride + i;                         \
235                 prev_index = j * s->width + i;                       \
236                 curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
237                 if (s->nb_frames > 1)                                \
238                     motion = curr_data - vdst[prev_index];           \
239                 vdst[prev_index] = curr_data;                        \
240                 motion_matrix[j * s->width + i] = motion;            \
241             }                                                        \
242         }                                                            \
243     }
244 
245     if (s->pixel_depth == 2) {
246         CALCULATE(16);
247     } else {
248         CALCULATE(8);
249     }
250 }
251 
std_deviation(float * img_metrics,int width,int height)252 static float std_deviation(float *img_metrics, int width, int height)
253 {
254     int size = height * width;
255     double mean = 0.0;
256     double sqr_diff = 0;
257 
258     for (int j = 0; j < height; j++)
259         for (int i = 0; i < width; i++)
260             mean += img_metrics[j * width + i];
261 
262     mean /= size;
263 
264     for (int j = 0; j < height; j++) {
265         for (int i = 0; i < width; i++) {
266             float mean_diff = img_metrics[j * width + i] - mean;
267             sqr_diff += (mean_diff * mean_diff);
268         }
269     }
270     sqr_diff = sqr_diff / size;
271     return sqrt(sqr_diff);
272 }
273 
set_meta(AVDictionary ** metadata,const char * key,float d)274 static void set_meta(AVDictionary **metadata, const char *key, float d)
275 {
276     char value[128];
277     snprintf(value, sizeof(value), "%0.2f", d);
278     av_dict_set(metadata, key, value, 0);
279 }
280 
filter_frame(AVFilterLink * inlink,AVFrame * frame)281 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
282 {
283     AVFilterContext *ctx = inlink->dst;
284     SiTiContext *s = ctx->priv;
285     float si;
286     float ti;
287 
288     s->full_range = is_full_range(frame);
289     s->nb_frames++;
290 
291     // Calculate si and ti
292     convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
293     calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
294     si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
295     ti = std_deviation(s->motion_matrix, s->width, s->height);
296 
297     // Calculate statistics
298     s->max_si  = fmaxf(si, s->max_si);
299     s->max_ti  = fmaxf(ti, s->max_ti);
300     s->sum_si += si;
301     s->sum_ti += ti;
302     s->min_si  = s->nb_frames == 1 ? si : fminf(si, s->min_si);
303     s->min_ti  = s->nb_frames == 1 ? ti : fminf(ti, s->min_ti);
304 
305     // Set si ti information in frame metadata
306     set_meta(&frame->metadata, "lavfi.siti.si", si);
307     set_meta(&frame->metadata, "lavfi.siti.ti", ti);
308 
309     return ff_filter_frame(inlink->dst->outputs[0], frame);
310 }
311 
312 #define OFFSET(x) offsetof(SiTiContext, x)
313 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
314 
315 static const AVOption siti_options[] = {
316     { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
317     { NULL }
318 };
319 
320 AVFILTER_DEFINE_CLASS(siti);
321 
322 static const AVFilterPad avfilter_vf_siti_inputs[] = {
323     {
324         .name         = "default",
325         .type         = AVMEDIA_TYPE_VIDEO,
326         .config_props = config_input,
327         .filter_frame = filter_frame,
328     },
329 };
330 
331 static const AVFilterPad avfilter_vf_siti_outputs[] = {
332     {
333         .name = "default",
334         .type = AVMEDIA_TYPE_VIDEO
335     },
336 };
337 
338 const AVFilter ff_vf_siti = {
339     .name          = "siti",
340     .description   = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
341     .priv_size     = sizeof(SiTiContext),
342     .priv_class    = &siti_class,
343     .init          = init,
344     .uninit        = uninit,
345     .flags         = AVFILTER_FLAG_METADATA_ONLY,
346     FILTER_PIXFMTS_ARRAY(pix_fmts),
347     FILTER_INPUTS(avfilter_vf_siti_inputs),
348     FILTER_OUTPUTS(avfilter_vf_siti_outputs),
349 };
350