• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Paul B Mahol
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <float.h>
22 #include <math.h>
23 
24 #include "libavutil/opt.h"
25 #include "libavutil/tx.h"
26 #include "audio.h"
27 #include "avfilter.h"
28 #include "filters.h"
29 #include "internal.h"
30 #include "window_func.h"
31 
32 typedef struct ChannelSpectralStats {
33     float mean;
34     float variance;
35     float centroid;
36     float spread;
37     float skewness;
38     float kurtosis;
39     float entropy;
40     float flatness;
41     float crest;
42     float flux;
43     float slope;
44     float decrease;
45     float rolloff;
46 } ChannelSpectralStats;
47 
48 typedef struct AudioSpectralStatsContext {
49     const AVClass *class;
50     int win_size;
51     int win_func;
52     float overlap;
53     int nb_channels;
54     int hop_size;
55     ChannelSpectralStats *stats;
56     float *window_func_lut;
57     av_tx_fn tx_fn;
58     AVTXContext **fft;
59     AVComplexFloat **fft_in;
60     AVComplexFloat **fft_out;
61     float **prev_magnitude;
62     float **magnitude;
63     AVFrame *window;
64 } AudioSpectralStatsContext;
65 
66 #define OFFSET(x) offsetof(AudioSpectralStatsContext, x)
67 #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
68 
69 static const AVOption aspectralstats_options[] = {
70     { "win_size", "set the window size", OFFSET(win_size), AV_OPT_TYPE_INT, {.i64=2048}, 32, 65536, A },
71     WIN_FUNC_OPTION("win_func", OFFSET(win_func), A, WFUNC_HANNING),
72     { "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=0.5}, 0,  1, A },
73     { NULL }
74 };
75 
76 AVFILTER_DEFINE_CLASS(aspectralstats);
77 
config_output(AVFilterLink * outlink)78 static int config_output(AVFilterLink *outlink)
79 {
80     AudioSpectralStatsContext *s = outlink->src->priv;
81     float overlap, scale;
82     int ret;
83 
84     s->nb_channels = outlink->ch_layout.nb_channels;
85     s->window_func_lut = av_realloc_f(s->window_func_lut, s->win_size,
86                                       sizeof(*s->window_func_lut));
87     if (!s->window_func_lut)
88         return AVERROR(ENOMEM);
89     generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap);
90     if (s->overlap == 1.f)
91         s->overlap = overlap;
92 
93     s->hop_size = s->win_size * (1.f - s->overlap);
94     if (s->hop_size <= 0)
95         return AVERROR(EINVAL);
96 
97     s->stats = av_calloc(s->nb_channels, sizeof(*s->stats));
98     if (!s->stats)
99         return AVERROR(ENOMEM);
100 
101     s->fft = av_calloc(s->nb_channels, sizeof(*s->fft));
102     if (!s->fft)
103         return AVERROR(ENOMEM);
104 
105     s->magnitude = av_calloc(s->nb_channels, sizeof(*s->magnitude));
106     if (!s->magnitude)
107         return AVERROR(ENOMEM);
108 
109     s->prev_magnitude = av_calloc(s->nb_channels, sizeof(*s->prev_magnitude));
110     if (!s->prev_magnitude)
111         return AVERROR(ENOMEM);
112 
113     s->fft_in = av_calloc(s->nb_channels, sizeof(*s->fft_in));
114     if (!s->fft_in)
115         return AVERROR(ENOMEM);
116 
117     s->fft_out = av_calloc(s->nb_channels, sizeof(*s->fft_out));
118     if (!s->fft_out)
119         return AVERROR(ENOMEM);
120 
121     for (int ch = 0; ch < s->nb_channels; ch++) {
122         ret = av_tx_init(&s->fft[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->win_size, &scale, 0);
123         if (ret < 0)
124             return ret;
125 
126         s->fft_in[ch] = av_calloc(s->win_size, sizeof(**s->fft_in));
127         if (!s->fft_in[ch])
128             return AVERROR(ENOMEM);
129 
130         s->fft_out[ch] = av_calloc(s->win_size, sizeof(**s->fft_out));
131         if (!s->fft_out[ch])
132             return AVERROR(ENOMEM);
133 
134         s->magnitude[ch] = av_calloc(s->win_size, sizeof(**s->magnitude));
135         if (!s->magnitude[ch])
136             return AVERROR(ENOMEM);
137 
138         s->prev_magnitude[ch] = av_calloc(s->win_size, sizeof(**s->prev_magnitude));
139         if (!s->prev_magnitude[ch])
140             return AVERROR(ENOMEM);
141     }
142 
143     s->window = ff_get_audio_buffer(outlink, s->win_size);
144     if (!s->window)
145         return AVERROR(ENOMEM);
146 
147     return 0;
148 }
149 
set_meta(AVDictionary ** metadata,int chan,const char * key,const char * fmt,float val)150 static void set_meta(AVDictionary **metadata, int chan, const char *key,
151                      const char *fmt, float val)
152 {
153     uint8_t value[128];
154     uint8_t key2[128];
155 
156     snprintf(value, sizeof(value), fmt, val);
157     if (chan)
158         snprintf(key2, sizeof(key2), "lavfi.aspectralstats.%d.%s", chan, key);
159     else
160         snprintf(key2, sizeof(key2), "lavfi.aspectralstats.%s", key);
161     av_dict_set(metadata, key2, value, 0);
162 }
163 
set_metadata(AudioSpectralStatsContext * s,AVDictionary ** metadata)164 static void set_metadata(AudioSpectralStatsContext *s, AVDictionary **metadata)
165 {
166     for (int ch = 0; ch < s->nb_channels; ch++) {
167         ChannelSpectralStats *stats = &s->stats[ch];
168 
169         set_meta(metadata, ch + 1, "mean",     "%g", stats->mean);
170         set_meta(metadata, ch + 1, "variance", "%g", stats->variance);
171         set_meta(metadata, ch + 1, "centroid", "%g", stats->centroid);
172         set_meta(metadata, ch + 1, "spread",   "%g", stats->spread);
173         set_meta(metadata, ch + 1, "skewness", "%g", stats->skewness);
174         set_meta(metadata, ch + 1, "kurtosis", "%g", stats->kurtosis);
175         set_meta(metadata, ch + 1, "entropy",  "%g", stats->entropy);
176         set_meta(metadata, ch + 1, "flatness", "%g", stats->flatness);
177         set_meta(metadata, ch + 1, "crest",    "%g", stats->crest);
178         set_meta(metadata, ch + 1, "flux",     "%g", stats->flux);
179         set_meta(metadata, ch + 1, "slope",    "%g", stats->slope);
180         set_meta(metadata, ch + 1, "decrease", "%g", stats->decrease);
181         set_meta(metadata, ch + 1, "rolloff",  "%g", stats->rolloff);
182     }
183 }
184 
spectral_mean(const float * const spectral,int size,int max_freq)185 static float spectral_mean(const float *const spectral, int size, int max_freq)
186 {
187     float sum = 0.f;
188 
189     for (int n = 0; n < size; n++)
190         sum += spectral[n];
191 
192     return sum / size;
193 }
194 
sqrf(float a)195 static float sqrf(float a)
196 {
197     return a * a;
198 }
199 
spectral_variance(const float * const spectral,int size,int max_freq,float mean)200 static float spectral_variance(const float *const spectral, int size, int max_freq, float mean)
201 {
202     float sum = 0.f;
203 
204     for (int n = 0; n < size; n++)
205         sum += sqrf(spectral[n] - mean);
206 
207     return sum / size;
208 }
209 
spectral_centroid(const float * const spectral,int size,int max_freq)210 static float spectral_centroid(const float *const spectral, int size, int max_freq)
211 {
212     const float scale = max_freq / (float)size;
213     float num = 0.f, den = 0.f;
214 
215     for (int n = 0; n < size; n++) {
216         num += spectral[n] * n * scale;
217         den += spectral[n];
218     }
219 
220     if (den <= FLT_EPSILON)
221         return 1.f;
222     return num / den;
223 }
224 
spectral_spread(const float * const spectral,int size,int max_freq,float centroid)225 static float spectral_spread(const float *const spectral, int size, int max_freq, float centroid)
226 {
227     const float scale = max_freq / (float)size;
228     float num = 0.f, den = 0.f;
229 
230     for (int n = 0; n < size; n++) {
231         num += spectral[n] * sqrf(n * scale - centroid);
232         den += spectral[n];
233     }
234 
235     if (den <= FLT_EPSILON)
236         return 1.f;
237     return sqrtf(num / den);
238 }
239 
cbrf(float a)240 static float cbrf(float a)
241 {
242     return a * a * a;
243 }
244 
spectral_skewness(const float * const spectral,int size,int max_freq,float centroid,float spread)245 static float spectral_skewness(const float *const spectral, int size, int max_freq, float centroid, float spread)
246 {
247     const float scale = max_freq / (float)size;
248     float num = 0.f, den = 0.f;
249 
250     for (int n = 0; n < size; n++) {
251         num += spectral[n] * cbrf(n * scale - centroid);
252         den += spectral[n];
253     }
254 
255     den *= cbrf(spread);
256     if (den <= FLT_EPSILON)
257         return 1.f;
258     return num / den;
259 }
260 
spectral_kurtosis(const float * const spectral,int size,int max_freq,float centroid,float spread)261 static float spectral_kurtosis(const float *const spectral, int size, int max_freq, float centroid, float spread)
262 {
263     const float scale = max_freq / (float)size;
264     float num = 0.f, den = 0.f;
265 
266     for (int n = 0; n < size; n++) {
267         num += spectral[n] * sqrf(sqrf(n * scale - centroid));
268         den += spectral[n];
269     }
270 
271     den *= sqrf(sqrf(spread));
272     if (den <= FLT_EPSILON)
273         return 1.f;
274     return num / den;
275 }
276 
spectral_entropy(const float * const spectral,int size,int max_freq)277 static float spectral_entropy(const float *const spectral, int size, int max_freq)
278 {
279     float num = 0.f, den = 0.f;
280 
281     for (int n = 0; n < size; n++) {
282         num += spectral[n] * logf(spectral[n] + FLT_EPSILON);
283     }
284 
285     den = logf(size);
286     if (den <= FLT_EPSILON)
287         return 1.f;
288     return -num / den;
289 }
290 
spectral_flatness(const float * const spectral,int size,int max_freq)291 static float spectral_flatness(const float *const spectral, int size, int max_freq)
292 {
293     float num = 0.f, den = 0.f;
294 
295     for (int n = 0; n < size; n++) {
296         float v = FLT_EPSILON + spectral[n];
297         num += logf(v);
298         den += v;
299     }
300 
301     num /= size;
302     den /= size;
303     num = expf(num);
304     if (den <= FLT_EPSILON)
305         return 0.f;
306     return num / den;
307 }
308 
spectral_crest(const float * const spectral,int size,int max_freq)309 static float spectral_crest(const float *const spectral, int size, int max_freq)
310 {
311     float max = 0.f, mean = 0.f;
312 
313     for (int n = 0; n < size; n++) {
314         max = fmaxf(max, spectral[n]);
315         mean += spectral[n];
316     }
317 
318     mean /= size;
319     if (mean <= FLT_EPSILON)
320         return 0.f;
321     return max / mean;
322 }
323 
spectral_flux(const float * const spectral,const float * const prev_spectral,int size,int max_freq)324 static float spectral_flux(const float *const spectral, const float *const prev_spectral,
325                            int size, int max_freq)
326 {
327     float sum = 0.f;
328 
329     for (int n = 0; n < size; n++)
330         sum += sqrf(spectral[n] - prev_spectral[n]);
331 
332     return sqrtf(sum);
333 }
334 
spectral_slope(const float * const spectral,int size,int max_freq)335 static float spectral_slope(const float *const spectral, int size, int max_freq)
336 {
337     const float mean_freq = size * 0.5f;
338     float mean_spectral = 0.f, num = 0.f, den = 0.f;
339 
340     for (int n = 0; n < size; n++)
341         mean_spectral += spectral[n];
342     mean_spectral /= size;
343 
344     for (int n = 0; n < size; n++) {
345         num += ((n - mean_freq) / mean_freq) * (spectral[n] - mean_spectral);
346         den += sqrf((n - mean_freq) / mean_freq);
347     }
348 
349     if (fabsf(den) <= FLT_EPSILON)
350         return 0.f;
351     return num / den;
352 }
353 
spectral_decrease(const float * const spectral,int size,int max_freq)354 static float spectral_decrease(const float *const spectral, int size, int max_freq)
355 {
356     float num = 0.f, den = 0.f;
357 
358     for (int n = 1; n < size; n++) {
359         num += (spectral[n] - spectral[0]) / n;
360         den += spectral[n];
361     }
362 
363     if (den <= FLT_EPSILON)
364         return 0.f;
365     return num / den;
366 }
367 
spectral_rolloff(const float * const spectral,int size,int max_freq)368 static float spectral_rolloff(const float *const spectral, int size, int max_freq)
369 {
370     const float scale = max_freq / (float)size;
371     float norm = 0.f, sum = 0.f;
372     int idx = 0.f;
373 
374     for (int n = 0; n < size; n++)
375         norm += spectral[n];
376     norm *= 0.85f;
377 
378     for (int n = 0; n < size; n++) {
379         sum += spectral[n];
380         if (sum >= norm) {
381             idx = n;
382             break;
383         }
384     }
385 
386     return idx * scale;
387 }
388 
filter_channel(AVFilterContext * ctx,void * arg,int jobnr,int nb_jobs)389 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
390 {
391     AudioSpectralStatsContext *s = ctx->priv;
392     const float *window_func_lut = s->window_func_lut;
393     AVFrame *in = arg;
394     const int channels = s->nb_channels;
395     const int start = (channels * jobnr) / nb_jobs;
396     const int end = (channels * (jobnr+1)) / nb_jobs;
397     const int offset = s->win_size - s->hop_size;
398 
399     for (int ch = start; ch < end; ch++) {
400         float *window = (float *)s->window->extended_data[ch];
401         ChannelSpectralStats *stats = &s->stats[ch];
402         AVComplexFloat *fft_out = s->fft_out[ch];
403         AVComplexFloat *fft_in = s->fft_in[ch];
404         float *magnitude = s->magnitude[ch];
405         float *prev_magnitude = s->prev_magnitude[ch];
406         const float scale = 1.f / s->win_size;
407 
408         memmove(window, &window[s->hop_size], offset * sizeof(float));
409         memcpy(&window[offset], in->extended_data[ch], in->nb_samples * sizeof(float));
410         memset(&window[offset + in->nb_samples], 0, (s->hop_size - in->nb_samples) * sizeof(float));
411 
412         for (int n = 0; n < s->win_size; n++) {
413             fft_in[n].re = window[n] * window_func_lut[n];
414             fft_in[n].im = 0;
415         }
416 
417         s->tx_fn(s->fft[ch], fft_out, fft_in, sizeof(float));
418 
419         for (int n = 0; n < s->win_size / 2; n++) {
420             fft_out[n].re *= scale;
421             fft_out[n].im *= scale;
422         }
423 
424         for (int n = 0; n < s->win_size / 2; n++)
425             magnitude[n] = hypotf(fft_out[n].re, fft_out[n].im);
426 
427         stats->mean     = spectral_mean(magnitude, s->win_size / 2, in->sample_rate / 2);
428         stats->variance = spectral_variance(magnitude, s->win_size / 2, in->sample_rate / 2, stats->mean);
429         stats->centroid = spectral_centroid(magnitude, s->win_size / 2, in->sample_rate / 2);
430         stats->spread   = spectral_spread(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid);
431         stats->skewness = spectral_skewness(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid, stats->spread);
432         stats->kurtosis = spectral_kurtosis(magnitude, s->win_size / 2, in->sample_rate / 2, stats->centroid, stats->spread);
433         stats->entropy  = spectral_entropy(magnitude, s->win_size / 2, in->sample_rate / 2);
434         stats->flatness = spectral_flatness(magnitude, s->win_size / 2, in->sample_rate / 2);
435         stats->crest    = spectral_crest(magnitude, s->win_size / 2, in->sample_rate / 2);
436         stats->flux     = spectral_flux(magnitude, prev_magnitude, s->win_size / 2, in->sample_rate / 2);
437         stats->slope    = spectral_slope(magnitude, s->win_size / 2, in->sample_rate / 2);
438         stats->decrease = spectral_decrease(magnitude, s->win_size / 2, in->sample_rate / 2);
439         stats->rolloff  = spectral_rolloff(magnitude, s->win_size / 2, in->sample_rate / 2);
440 
441         memcpy(prev_magnitude, magnitude, s->win_size * sizeof(float));
442     }
443 
444     return 0;
445 }
446 
filter_frame(AVFilterLink * inlink,AVFrame * in)447 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
448 {
449     AVFilterContext *ctx = inlink->dst;
450     AVFilterLink *outlink = ctx->outputs[0];
451     AudioSpectralStatsContext *s = ctx->priv;
452     AVDictionary **metadata;
453     AVFrame *out;
454     int ret;
455 
456     if (av_frame_is_writable(in)) {
457         out = in;
458     } else {
459         out = ff_get_audio_buffer(outlink, in->nb_samples);
460         if (!out) {
461             av_frame_free(&in);
462             return AVERROR(ENOMEM);
463         }
464         ret = av_frame_copy_props(out, in);
465         if (ret < 0)
466             goto fail;
467         ret = av_frame_copy(out, in);
468         if (ret < 0)
469             goto fail;
470     }
471 
472     metadata = &out->metadata;
473     ff_filter_execute(ctx, filter_channel, in, NULL,
474                       FFMIN(inlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
475 
476     set_metadata(s, metadata);
477 
478     if (out != in)
479         av_frame_free(&in);
480     return ff_filter_frame(outlink, out);
481 fail:
482     av_frame_free(&in);
483     av_frame_free(&out);
484     return ret;
485 }
486 
activate(AVFilterContext * ctx)487 static int activate(AVFilterContext *ctx)
488 {
489     AudioSpectralStatsContext *s = ctx->priv;
490     AVFilterLink *outlink = ctx->outputs[0];
491     AVFilterLink *inlink = ctx->inputs[0];
492     AVFrame *in;
493     int ret;
494 
495     FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
496 
497     ret = ff_inlink_consume_samples(inlink, s->hop_size, s->hop_size, &in);
498     if (ret < 0)
499         return ret;
500     if (ret > 0)
501         ret = filter_frame(inlink, in);
502     if (ret < 0)
503         return ret;
504 
505     if (ff_inlink_queued_samples(inlink) >= s->hop_size) {
506         ff_filter_set_ready(ctx, 10);
507         return 0;
508     }
509 
510     FF_FILTER_FORWARD_STATUS(inlink, outlink);
511     FF_FILTER_FORWARD_WANTED(outlink, inlink);
512 
513     return FFERROR_NOT_READY;
514 }
515 
uninit(AVFilterContext * ctx)516 static av_cold void uninit(AVFilterContext *ctx)
517 {
518     AudioSpectralStatsContext *s = ctx->priv;
519 
520     for (int ch = 0; ch < s->nb_channels; ch++) {
521         if (s->fft)
522             av_tx_uninit(&s->fft[ch]);
523         if (s->fft_in)
524             av_freep(&s->fft_in[ch]);
525         if (s->fft_out)
526             av_freep(&s->fft_out[ch]);
527         if (s->magnitude)
528             av_freep(&s->magnitude[ch]);
529         if (s->prev_magnitude)
530             av_freep(&s->prev_magnitude[ch]);
531     }
532 
533     av_freep(&s->fft);
534     av_freep(&s->magnitude);
535     av_freep(&s->prev_magnitude);
536     av_freep(&s->fft_in);
537     av_freep(&s->fft_out);
538     av_freep(&s->stats);
539 
540     av_freep(&s->window_func_lut);
541     av_frame_free(&s->window);
542 }
543 
544 static const AVFilterPad aspectralstats_inputs[] = {
545     {
546         .name         = "default",
547         .type         = AVMEDIA_TYPE_AUDIO,
548     },
549 };
550 
551 static const AVFilterPad aspectralstats_outputs[] = {
552     {
553         .name         = "default",
554         .type         = AVMEDIA_TYPE_AUDIO,
555         .config_props = config_output,
556     },
557 };
558 
559 const AVFilter ff_af_aspectralstats = {
560     .name          = "aspectralstats",
561     .description   = NULL_IF_CONFIG_SMALL("Show frequency domain statistics about audio frames."),
562     .priv_size     = sizeof(AudioSpectralStatsContext),
563     .priv_class    = &aspectralstats_class,
564     .uninit        = uninit,
565     .activate      = activate,
566     FILTER_INPUTS(aspectralstats_inputs),
567     FILTER_OUTPUTS(aspectralstats_outputs),
568     FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
569     .flags         = AVFILTER_FLAG_SLICE_THREADS,
570 };
571