1 /*
2 * Copyright (c) 2021 Paul B Mahol
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <float.h>
22 #include <math.h>
23
24 #include "libavutil/opt.h"
25 #include "libavutil/tx.h"
26 #include "audio.h"
27 #include "avfilter.h"
28 #include "filters.h"
29 #include "internal.h"
30 #include "window_func.h"
31
32 typedef struct ChannelSpectralStats {
33 float mean;
34 float variance;
35 float centroid;
36 float spread;
37 float skewness;
38 float kurtosis;
39 float entropy;
40 float flatness;
41 float crest;
42 float flux;
43 float slope;
44 float decrease;
45 float rolloff;
46 } ChannelSpectralStats;
47
48 typedef struct AudioSpectralStatsContext {
49 const AVClass *class;
50 int win_size;
51 int win_func;
52 float overlap;
53 int nb_channels;
54 int hop_size;
55 ChannelSpectralStats *stats;
56 float *window_func_lut;
57 av_tx_fn tx_fn;
58 AVTXContext **fft;
59 AVComplexFloat **fft_in;
60 AVComplexFloat **fft_out;
61 float **prev_magnitude;
62 float **magnitude;
63 AVFrame *window;
64 } AudioSpectralStatsContext;
65
66 #define OFFSET(x) offsetof(AudioSpectralStatsContext, x)
67 #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
68
69 static const AVOption aspectralstats_options[] = {
70 { "win_size", "set the window size", OFFSET(win_size), AV_OPT_TYPE_INT, {.i64=2048}, 32, 65536, A },
71 WIN_FUNC_OPTION("win_func", OFFSET(win_func), A, WFUNC_HANNING),
72 { "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=0.5}, 0, 1, A },
73 { NULL }
74 };
75
76 AVFILTER_DEFINE_CLASS(aspectralstats);
77
config_output(AVFilterLink * outlink)78 static int config_output(AVFilterLink *outlink)
79 {
80 AudioSpectralStatsContext *s = outlink->src->priv;
81 float overlap, scale;
82 int ret;
83
84 s->nb_channels = outlink->ch_layout.nb_channels;
85 s->window_func_lut = av_realloc_f(s->window_func_lut, s->win_size,
86 sizeof(*s->window_func_lut));
87 if (!s->window_func_lut)
88 return AVERROR(ENOMEM);
89 generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap);
90 if (s->overlap == 1.f)
91 s->overlap = overlap;
92
93 s->hop_size = s->win_size * (1.f - s->overlap);
94 if (s->hop_size <= 0)
95 return AVERROR(EINVAL);
96
97 s->stats = av_calloc(s->nb_channels, sizeof(*s->stats));
98 if (!s->stats)
99 return AVERROR(ENOMEM);
100
101 s->fft = av_calloc(s->nb_channels, sizeof(*s->fft));
102 if (!s->fft)
103 return AVERROR(ENOMEM);
104
105 s->magnitude = av_calloc(s->nb_channels, sizeof(*s->magnitude));
106 if (!s->magnitude)
107 return AVERROR(ENOMEM);
108
109 s->prev_magnitude = av_calloc(s->nb_channels, sizeof(*s->prev_magnitude));
110 if (!s->prev_magnitude)
111 return AVERROR(ENOMEM);
112
113 s->fft_in = av_calloc(s->nb_channels, sizeof(*s->fft_in));
114 if (!s->fft_in)
115 return AVERROR(ENOMEM);
116
117 s->fft_out = av_calloc(s->nb_channels, sizeof(*s->fft_out));
118 if (!s->fft_out)
119 return AVERROR(ENOMEM);
120
121 for (int ch = 0; ch < s->nb_channels; ch++) {
122 ret = av_tx_init(&s->fft[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->win_size, &scale, 0);
123 if (ret < 0)
124 return ret;
125
126 s->fft_in[ch] = av_calloc(s->win_size, sizeof(**s->fft_in));
127 if (!s->fft_in[ch])
128 return AVERROR(ENOMEM);
129
130 s->fft_out[ch] = av_calloc(s->win_size, sizeof(**s->fft_out));
131 if (!s->fft_out[ch])
132 return AVERROR(ENOMEM);
133
134 s->magnitude[ch] = av_calloc(s->win_size, sizeof(**s->magnitude));
135 if (!s->magnitude[ch])
136 return AVERROR(ENOMEM);
137
138 s->prev_magnitude[ch] = av_calloc(s->win_size, sizeof(**s->prev_magnitude));
139 if (!s->prev_magnitude[ch])
140 return AVERROR(ENOMEM);
141 }
142
143 s->window = ff_get_audio_buffer(outlink, s->win_size);
144 if (!s->window)
145 return AVERROR(ENOMEM);
146
147 return 0;
148 }
149
set_meta(AVDictionary ** metadata,int chan,const char * key,const char * fmt,float val)150 static void set_meta(AVDictionary **metadata, int chan, const char *key,
151 const char *fmt, float val)
152 {
153 uint8_t value[128];
154 uint8_t key2[128];
155
156 snprintf(value, sizeof(value), fmt, val);
157 if (chan)
158 snprintf(key2, sizeof(key2), "lavfi.aspectralstats.%d.%s", chan, key);
159 else
160 snprintf(key2, sizeof(key2), "lavfi.aspectralstats.%s", key);
161 av_dict_set(metadata, key2, value, 0);
162 }
163
set_metadata(AudioSpectralStatsContext * s,AVDictionary ** metadata)164 static void set_metadata(AudioSpectralStatsContext *s, AVDictionary **metadata)
165 {
166 for (int ch = 0; ch < s->nb_channels; ch++) {
167 ChannelSpectralStats *stats = &s->stats[ch];
168
169 set_meta(metadata, ch + 1, "mean", "%g", stats->mean);
170 set_meta(metadata, ch + 1, "variance", "%g", stats->variance);
171 set_meta(metadata, ch + 1, "centroid", "%g", stats->centroid);
172 set_meta(metadata, ch + 1, "spread", "%g", stats->spread);
173 set_meta(metadata, ch + 1, "skewness", "%g", stats->skewness);
174 set_meta(metadata, ch + 1, "kurtosis", "%g", stats->kurtosis);
175 set_meta(metadata, ch + 1, "entropy", "%g", stats->entropy);
176 set_meta(metadata, ch + 1, "flatness", "%g", stats->flatness);
177 set_meta(metadata, ch + 1, "crest", "%g", stats->crest);
178 set_meta(metadata, ch + 1, "flux", "%g", stats->flux);
179 set_meta(metadata, ch + 1, "slope", "%g", stats->slope);
180 set_meta(metadata, ch + 1, "decrease", "%g", stats->decrease);
181 set_meta(metadata, ch + 1, "rolloff", "%g", stats->rolloff);
182 }
183 }
184
spectral_mean(const float * const spectral,int size,int max_freq)185 static float spectral_mean(const float *const spectral, int size, int max_freq)
186 {
187 float sum = 0.f;
188
189 for (int n = 0; n < size; n++)
190 sum += spectral[n];
191
192 return sum / size;
193 }
194
sqrf(float a)195 static float sqrf(float a)
196 {
197 return a * a;
198 }
199
spectral_variance(const float * const spectral,int size,int max_freq,float mean)200 static float spectral_variance(const float *const spectral, int size, int max_freq, float mean)
201 {
202 float sum = 0.f;
203
204 for (int n = 0; n < size; n++)
205 sum += sqrf(spectral[n] - mean);
206
207 return sum / size;
208 }
209
spectral_centroid(const float * const spectral,int size,int max_freq)210 static float spectral_centroid(const float *const spectral, int size, int max_freq)
211 {
212 const float scale = max_freq / (float)size;
213 float num = 0.f, den = 0.f;
214
215 for (int n = 0; n < size; n++) {
216 num += spectral[n] * n * scale;
217 den += spectral[n];
218 }
219
220 if (den <= FLT_EPSILON)
221 return 1.f;
222 return num / den;
223 }
224
spectral_spread(const float * const spectral,int size,int max_freq,float centroid)225 static float spectral_spread(const float *const spectral, int size, int max_freq, float centroid)
226 {
227 const float scale = max_freq / (float)size;
228 float num = 0.f, den = 0.f;
229
230 for (int n = 0; n < size; n++) {
231 num += spectral[n] * sqrf(n * scale - centroid);
232 den += spectral[n];
233 }
234
235 if (den <= FLT_EPSILON)
236 return 1.f;
237 return sqrtf(num / den);
238 }
239
cbrf(float a)240 static float cbrf(float a)
241 {
242 return a * a * a;
243 }
244
spectral_skewness(const float * const spectral,int size,int max_freq,float centroid,float spread)245 static float spectral_skewness(const float *const spectral, int size, int max_freq, float centroid, float spread)
246 {
247 const float scale = max_freq / (float)size;
248 float num = 0.f, den = 0.f;
249
250 for (int n = 0; n < size; n++) {
251 num += spectral[n] * cbrf(n * scale - centroid);
252 den += spectral[n];
253 }
254
255 den *= cbrf(spread);
256 if (den <= FLT_EPSILON)
257 return 1.f;
258 return num / den;
259 }
260
spectral_kurtosis(const float * const spectral,int size,int max_freq,float centroid,float spread)261 static float spectral_kurtosis(const float *const spectral, int size, int max_freq, float centroid, float spread)
262 {
263 const float scale = max_freq / (float)size;
264 float num = 0.f, den = 0.f;
265
266 for (int n = 0; n < size; n++) {
267 num += spectral[n] * sqrf(sqrf(n * scale - centroid));
268 den += spectral[n];
269 }
270
271 den *= sqrf(sqrf(spread));
272 if (den <= FLT_EPSILON)
273 return 1.f;
274 return num / den;
275 }
276
spectral_entropy(const float * const spectral,int size,int max_freq)277 static float spectral_entropy(const float *const spectral, int size, int max_freq)
278 {
279 float num = 0.f, den = 0.f;
280
281 for (int n = 0; n < size; n++) {
282 num += spectral[n] * logf(spectral[n] + FLT_EPSILON);
283 }
284
285 den = logf(size);
286 if (den <= FLT_EPSILON)
287 return 1.f;
288 return -num / den;
289 }
290
spectral_flatness(const float * const spectral,int size,int max_freq)291 static float spectral_flatness(const float *const spectral, int size, int max_freq)
292 {
293 float num = 0.f, den = 0.f;
294
295 for (int n = 0; n < size; n++) {
296 float v = FLT_EPSILON + spectral[n];
297 num += logf(v);
298 den += v;
299 }
300
301 num /= size;
302 den /= size;
303 num = expf(num);
304 if (den <= FLT_EPSILON)
305 return 0.f;
306 return num / den;
307 }
308
spectral_crest(const float * const spectral,int size,int max_freq)309 static float spectral_crest(const float *const spectral, int size, int max_freq)
310 {
311 float max = 0.f, mean = 0.f;
312
313 for (int n = 0; n < size; n++) {
314 max = fmaxf(max, spectral[n]);
315 mean += spectral[n];
316 }
317
318 mean /= size;
319 if (mean <= FLT_EPSILON)
320 return 0.f;
321 return max / mean;
322 }
323
spectral_flux(const float * const spectral,const float * const prev_spectral,int size,int max_freq)324 static float spectral_flux(const float *const spectral, const float *const prev_spectral,
325 int size, int max_freq)
326 {
327 float sum = 0.f;
328
329 for (int n = 0; n < size; n++)
330 sum += sqrf(spectral[n] - prev_spectral[n]);
331
332 return sqrtf(sum);
333 }
334
spectral_slope(const float * const spectral,int size,int max_freq)335 static float spectral_slope(const float *const spectral, int size, int max_freq)
336 {
337 const float mean_freq = size * 0.5f;
338 float mean_spectral = 0.f, num = 0.f, den = 0.f;
339
340 for (int n = 0; n < size; n++)
341 mean_spectral += spectral[n];
342 mean_spectral /= size;
343
344 for (int n = 0; n < size; n++) {
345 num += ((n - mean_freq) / mean_freq) * (spectral[n] - mean_spectral);
346 den += sqrf((n - mean_freq) / mean_freq);
347 }
348
349 if (fabsf(den) <= FLT_EPSILON)
350 return 0.f;
351 return num / den;
352 }
353
spectral_decrease(const float * const spectral,int size,int max_freq)354 static float spectral_decrease(const float *const spectral, int size, int max_freq)
355 {
356 float num = 0.f, den = 0.f;
357
358 for (int n = 1; n < size; n++) {
359 num += (spectral[n] - spectral[0]) / n;
360 den += spectral[n];
361 }
362
363 if (den <= FLT_EPSILON)
364 return 0.f;
365 return num / den;
366 }
367
spectral_rolloff(const float * const spectral,int size,int max_freq)368 static float spectral_rolloff(const float *const spectral, int size, int max_freq)
369 {
370 const float scale = max_freq / (float)size;
371 float norm = 0.f, sum = 0.f;
372 int idx = 0.f;
373
374 for (int n = 0; n < size; n++)
375 norm += spectral[n];
376 norm *= 0.85f;
377
378 for (int n = 0; n < size; n++) {
379 sum += spectral[n];
380 if (sum >= norm) {
381 idx = n;
382 break;
383 }
384 }
385
386 return idx * scale;
387 }
388
filter_channel(AVFilterContext * ctx,void * arg,int jobnr,int nb_jobs)389 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
390 {
391 AudioSpectralStatsContext *s = ctx->priv;
392 const float *window_func_lut = s->window_func_lut;
393 AVFrame *in = arg;
394 const int channels = s->nb_channels;
395 const int start = (channels * jobnr) / nb_jobs;
396 const int end = (channels * (jobnr+1)) / nb_jobs;
397 const int offset = s->win_size - s->hop_size;
398
399 for (int ch = start; ch < end; ch++) {
400 float *window = (float *)s->window->extended_data[ch];
401 ChannelSpectralStats *stats = &s->stats[ch];
402 AVComplexFloat *fft_out = s->fft_out[ch];
403 AVComplexFloat *fft_in = s->fft_in[ch];
404 float *magnitude = s->magnitude[ch];
405 float *prev_magnitude = s->prev_magnitude[ch];
406 const float scale = 1.f / s->win_size;
407
408 memmove(window, &window[s->hop_size], offset * sizeof(float));
409 memcpy(&window[offset], in->extended_data[ch], in->nb_samples * sizeof(float));
410 memset(&window[offset + in->nb_samples], 0, (s->hop_size - in->nb_samples) * sizeof(float));
411
412 for (int n = 0; n < s->win_size; n++) {
413 fft_in[n].re = window[n] * window_func_lut[n];
414 fft_in[n].im = 0;
415 }
416
417 s->tx_fn(s->fft[ch], fft_out, fft_in, sizeof(float));
418
419 for (int n = 0; n < s->win_size / 2; n++) {
420 fft_out[n].re *= scale;
421 fft_out[n].im *= scale;
422 }
423
424 for (int n = 0; n < s->win_size / 2; n++)
425 magnitude[n] = hypotf(fft_out[n].re, fft_out[n].im);
426
427 stats->mean = spectral_mean(magnitude, s->win_size / 2, in->sample_rate / 2);
428 stats->variance = spectral_variance(magnitude, s->win_size / 2, in->sample_rate / 2, stats->mean);
429 stats->centroid = spectral_centroid(magnitude, s->win_size / 2, in->sample_rate / 2);
430 stats->spread = spectral_spread(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid);
431 stats->skewness = spectral_skewness(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid, stats->spread);
432 stats->kurtosis = spectral_kurtosis(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid, stats->spread);
433 stats->entropy = spectral_entropy(magnitude, s->win_size / 2, in->sample_rate / 2);
434 stats->flatness = spectral_flatness(magnitude, s->win_size / 2, in->sample_rate / 2);
435 stats->crest = spectral_crest(magnitude, s->win_size / 2, in->sample_rate / 2);
436 stats->flux = spectral_flux(magnitude, prev_magnitude, s->win_size / 2, in->sample_rate / 2);
437 stats->slope = spectral_slope(magnitude, s->win_size / 2, in->sample_rate / 2);
438 stats->decrease = spectral_decrease(magnitude, s->win_size / 2, in->sample_rate / 2);
439 stats->rolloff = spectral_rolloff(magnitude, s->win_size / 2, in->sample_rate / 2);
440
441 memcpy(prev_magnitude, magnitude, s->win_size * sizeof(float));
442 }
443
444 return 0;
445 }
446
filter_frame(AVFilterLink * inlink,AVFrame * in)447 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
448 {
449 AVFilterContext *ctx = inlink->dst;
450 AVFilterLink *outlink = ctx->outputs[0];
451 AudioSpectralStatsContext *s = ctx->priv;
452 AVDictionary **metadata;
453 AVFrame *out;
454 int ret;
455
456 if (av_frame_is_writable(in)) {
457 out = in;
458 } else {
459 out = ff_get_audio_buffer(outlink, in->nb_samples);
460 if (!out) {
461 av_frame_free(&in);
462 return AVERROR(ENOMEM);
463 }
464 ret = av_frame_copy_props(out, in);
465 if (ret < 0)
466 goto fail;
467 ret = av_frame_copy(out, in);
468 if (ret < 0)
469 goto fail;
470 }
471
472 metadata = &out->metadata;
473 ff_filter_execute(ctx, filter_channel, in, NULL,
474 FFMIN(inlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
475
476 set_metadata(s, metadata);
477
478 if (out != in)
479 av_frame_free(&in);
480 return ff_filter_frame(outlink, out);
481 fail:
482 av_frame_free(&in);
483 av_frame_free(&out);
484 return ret;
485 }
486
activate(AVFilterContext * ctx)487 static int activate(AVFilterContext *ctx)
488 {
489 AudioSpectralStatsContext *s = ctx->priv;
490 AVFilterLink *outlink = ctx->outputs[0];
491 AVFilterLink *inlink = ctx->inputs[0];
492 AVFrame *in;
493 int ret;
494
495 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
496
497 ret = ff_inlink_consume_samples(inlink, s->hop_size, s->hop_size, &in);
498 if (ret < 0)
499 return ret;
500 if (ret > 0)
501 ret = filter_frame(inlink, in);
502 if (ret < 0)
503 return ret;
504
505 if (ff_inlink_queued_samples(inlink) >= s->hop_size) {
506 ff_filter_set_ready(ctx, 10);
507 return 0;
508 }
509
510 FF_FILTER_FORWARD_STATUS(inlink, outlink);
511 FF_FILTER_FORWARD_WANTED(outlink, inlink);
512
513 return FFERROR_NOT_READY;
514 }
515
uninit(AVFilterContext * ctx)516 static av_cold void uninit(AVFilterContext *ctx)
517 {
518 AudioSpectralStatsContext *s = ctx->priv;
519
520 for (int ch = 0; ch < s->nb_channels; ch++) {
521 if (s->fft)
522 av_tx_uninit(&s->fft[ch]);
523 if (s->fft_in)
524 av_freep(&s->fft_in[ch]);
525 if (s->fft_out)
526 av_freep(&s->fft_out[ch]);
527 if (s->magnitude)
528 av_freep(&s->magnitude[ch]);
529 if (s->prev_magnitude)
530 av_freep(&s->prev_magnitude[ch]);
531 }
532
533 av_freep(&s->fft);
534 av_freep(&s->magnitude);
535 av_freep(&s->prev_magnitude);
536 av_freep(&s->fft_in);
537 av_freep(&s->fft_out);
538 av_freep(&s->stats);
539
540 av_freep(&s->window_func_lut);
541 av_frame_free(&s->window);
542 }
543
544 static const AVFilterPad aspectralstats_inputs[] = {
545 {
546 .name = "default",
547 .type = AVMEDIA_TYPE_AUDIO,
548 },
549 };
550
551 static const AVFilterPad aspectralstats_outputs[] = {
552 {
553 .name = "default",
554 .type = AVMEDIA_TYPE_AUDIO,
555 .config_props = config_output,
556 },
557 };
558
559 const AVFilter ff_af_aspectralstats = {
560 .name = "aspectralstats",
561 .description = NULL_IF_CONFIG_SMALL("Show frequency domain statistics about audio frames."),
562 .priv_size = sizeof(AudioSpectralStatsContext),
563 .priv_class = &aspectralstats_class,
564 .uninit = uninit,
565 .activate = activate,
566 FILTER_INPUTS(aspectralstats_inputs),
567 FILTER_OUTPUTS(aspectralstats_outputs),
568 FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
569 .flags = AVFILTER_FLAG_SLICE_THREADS,
570 };
571