• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018 The FFmpeg Project
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <float.h>
22 
23 #include "libavutil/avstring.h"
24 #include "libavutil/channel_layout.h"
25 #include "libavutil/opt.h"
26 #include "libavutil/tx.h"
27 #include "avfilter.h"
28 #include "audio.h"
29 #include "formats.h"
30 #include "filters.h"
31 
32 #define C       (M_LN10 * 0.1)
33 #define SOLVE_SIZE (5)
34 #define NB_PROFILE_BANDS (15)
35 
36 enum SampleNoiseModes {
37     SAMPLE_NONE,
38     SAMPLE_START,
39     SAMPLE_STOP,
40     NB_SAMPLEMODES
41 };
42 
43 enum OutModes {
44     IN_MODE,
45     OUT_MODE,
46     NOISE_MODE,
47     NB_MODES
48 };
49 
50 enum NoiseLinkType {
51     NONE_LINK,
52     MIN_LINK,
53     MAX_LINK,
54     AVERAGE_LINK,
55     NB_LINK
56 };
57 
58 enum NoiseType {
59     WHITE_NOISE,
60     VINYL_NOISE,
61     SHELLAC_NOISE,
62     CUSTOM_NOISE,
63     NB_NOISE
64 };
65 
66 typedef struct DeNoiseChannel {
67     double      band_noise[NB_PROFILE_BANDS];
68     double      noise_band_auto_var[NB_PROFILE_BANDS];
69     double      noise_band_sample[NB_PROFILE_BANDS];
70     double     *amt;
71     double     *band_amt;
72     double     *band_excit;
73     double     *gain;
74     double     *smoothed_gain;
75     double     *prior;
76     double     *prior_band_excit;
77     double     *clean_data;
78     double     *noisy_data;
79     double     *out_samples;
80     double     *spread_function;
81     double     *abs_var;
82     double     *rel_var;
83     double     *min_abs_var;
84     float      *fft_in;
85     AVComplexFloat *fft_out;
86     AVTXContext *fft, *ifft;
87     av_tx_fn   tx_fn, itx_fn;
88 
89     double      noise_band_norm[NB_PROFILE_BANDS];
90     double      noise_band_avr[NB_PROFILE_BANDS];
91     double      noise_band_avi[NB_PROFILE_BANDS];
92     double      noise_band_var[NB_PROFILE_BANDS];
93 
94     double      noise_reduction;
95     double      last_noise_reduction;
96     double      noise_floor;
97     double      last_noise_floor;
98     double      residual_floor;
99     double      last_residual_floor;
100     double      max_gain;
101     double      max_var;
102     double      gain_scale;
103 } DeNoiseChannel;
104 
105 typedef struct AudioFFTDeNoiseContext {
106     const AVClass *class;
107 
108     float   noise_reduction;
109     float   noise_floor;
110     int     noise_type;
111     char   *band_noise_str;
112     float   residual_floor;
113     int     track_noise;
114     int     track_residual;
115     int     output_mode;
116     int     noise_floor_link;
117     float   ratio;
118     int     gain_smooth;
119     float   band_multiplier;
120     float   floor_offset;
121 
122     int     channels;
123     int     sample_noise;
124     int     sample_noise_blocks;
125     int     sample_noise_mode;
126     float   sample_rate;
127     int     buffer_length;
128     int     fft_length;
129     int     fft_length2;
130     int     bin_count;
131     int     window_length;
132     int     sample_advance;
133     int     number_of_bands;
134 
135     int     band_centre[NB_PROFILE_BANDS];
136 
137     int    *bin2band;
138     double *window;
139     double *band_alpha;
140     double *band_beta;
141 
142     DeNoiseChannel *dnch;
143 
144     AVFrame *winframe;
145 
146     double  window_weight;
147     double  floor;
148     double  sample_floor;
149 
150     int     noise_band_edge[NB_PROFILE_BANDS + 2];
151     int     noise_band_count;
152     double  matrix_a[SOLVE_SIZE * SOLVE_SIZE];
153     double  vector_b[SOLVE_SIZE];
154     double  matrix_b[SOLVE_SIZE * NB_PROFILE_BANDS];
155     double  matrix_c[SOLVE_SIZE * NB_PROFILE_BANDS];
156 } AudioFFTDeNoiseContext;
157 
158 #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
159 #define AF  AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
160 #define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
161 
162 static const AVOption afftdn_options[] = {
163     { "noise_reduction", "set the noise reduction",OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,{.dbl = 12},   .01, 97, AFR },
164     { "nr", "set the noise reduction",    OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,  {.dbl = 12},          .01, 97, AFR },
165     { "noise_floor", "set the noise floor",OFFSET(noise_floor),    AV_OPT_TYPE_FLOAT,  {.dbl =-50},          -80,-20, AFR },
166     { "nf", "set the noise floor",        OFFSET(noise_floor),     AV_OPT_TYPE_FLOAT,  {.dbl =-50},          -80,-20, AFR },
167     { "noise_type", "set the noise type", OFFSET(noise_type),      AV_OPT_TYPE_INT,    {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, "type" },
168     { "nt", "set the noise type",         OFFSET(noise_type),      AV_OPT_TYPE_INT,    {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, "type" },
169     {  "white", "white noise",            0,                       AV_OPT_TYPE_CONST,  {.i64 = WHITE_NOISE},   0,  0, AF, "type" },
170     {  "w", "white noise",                0,                       AV_OPT_TYPE_CONST,  {.i64 = WHITE_NOISE},   0,  0, AF, "type" },
171     {  "vinyl", "vinyl noise",            0,                       AV_OPT_TYPE_CONST,  {.i64 = VINYL_NOISE},   0,  0, AF, "type" },
172     {  "v", "vinyl noise",                0,                       AV_OPT_TYPE_CONST,  {.i64 = VINYL_NOISE},   0,  0, AF, "type" },
173     {  "shellac", "shellac noise",        0,                       AV_OPT_TYPE_CONST,  {.i64 = SHELLAC_NOISE}, 0,  0, AF, "type" },
174     {  "s", "shellac noise",              0,                       AV_OPT_TYPE_CONST,  {.i64 = SHELLAC_NOISE}, 0,  0, AF, "type" },
175     {  "custom", "custom noise",          0,                       AV_OPT_TYPE_CONST,  {.i64 = CUSTOM_NOISE},  0,  0, AF, "type" },
176     {  "c", "custom noise",               0,                       AV_OPT_TYPE_CONST,  {.i64 = CUSTOM_NOISE},  0,  0, AF, "type" },
177     { "band_noise", "set the custom bands noise", OFFSET(band_noise_str),  AV_OPT_TYPE_STRING, {.str = 0},     0,  0, AF },
178     { "bn", "set the custom bands noise", OFFSET(band_noise_str),  AV_OPT_TYPE_STRING, {.str = 0},             0,  0, AF },
179     { "residual_floor", "set the residual floor",OFFSET(residual_floor),  AV_OPT_TYPE_FLOAT, {.dbl =-38},    -80,-20, AFR },
180     { "rf", "set the residual floor",     OFFSET(residual_floor),  AV_OPT_TYPE_FLOAT,  {.dbl =-38},          -80,-20, AFR },
181     { "track_noise", "track noise",       OFFSET(track_noise),     AV_OPT_TYPE_BOOL,   {.i64 =  0},            0,  1, AFR },
182     { "tn", "track noise",                OFFSET(track_noise),     AV_OPT_TYPE_BOOL,   {.i64 =  0},            0,  1, AFR },
183     { "track_residual", "track residual", OFFSET(track_residual),  AV_OPT_TYPE_BOOL,   {.i64 =  0},            0,  1, AFR },
184     { "tr", "track residual",             OFFSET(track_residual),  AV_OPT_TYPE_BOOL,   {.i64 =  0},            0,  1, AFR },
185     { "output_mode", "set output mode",   OFFSET(output_mode),     AV_OPT_TYPE_INT,    {.i64 = OUT_MODE},      0,  NB_MODES-1, AFR, "mode" },
186     { "om", "set output mode",            OFFSET(output_mode),     AV_OPT_TYPE_INT,    {.i64 = OUT_MODE},      0,  NB_MODES-1, AFR, "mode" },
187     {  "input", "input",                  0,                       AV_OPT_TYPE_CONST,  {.i64 = IN_MODE},       0,  0, AFR, "mode" },
188     {  "i", "input",                      0,                       AV_OPT_TYPE_CONST,  {.i64 = IN_MODE},       0,  0, AFR, "mode" },
189     {  "output", "output",                0,                       AV_OPT_TYPE_CONST,  {.i64 = OUT_MODE},      0,  0, AFR, "mode" },
190     {  "o", "output",                     0,                       AV_OPT_TYPE_CONST,  {.i64 = OUT_MODE},      0,  0, AFR, "mode" },
191     {  "noise", "noise",                  0,                       AV_OPT_TYPE_CONST,  {.i64 = NOISE_MODE},    0,  0, AFR, "mode" },
192     {  "n", "noise",                      0,                       AV_OPT_TYPE_CONST,  {.i64 = NOISE_MODE},    0,  0, AFR, "mode" },
193     { "adaptivity", "set adaptivity factor",OFFSET(ratio),         AV_OPT_TYPE_FLOAT,  {.dbl = 0.5},           0,  1, AFR },
194     { "ad",         "set adaptivity factor",OFFSET(ratio),         AV_OPT_TYPE_FLOAT,  {.dbl = 0.5},           0,  1, AFR },
195     { "floor_offset", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0},  -2,  2, AFR },
196     { "fo",           "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0},  -2,  2, AFR },
197     { "noise_link", "set the noise floor link",OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK},     0,  NB_LINK-1, AFR, "link" },
198     { "nl", "set the noise floor link",        OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK},     0,  NB_LINK-1, AFR, "link" },
199     {  "none",    "none",                 0,                       AV_OPT_TYPE_CONST,  {.i64 = NONE_LINK},     0,  0, AFR, "link" },
200     {  "min",     "min",                  0,                       AV_OPT_TYPE_CONST,  {.i64 = MIN_LINK},      0,  0, AFR, "link" },
201     {  "max",     "max",                  0,                       AV_OPT_TYPE_CONST,  {.i64 = MAX_LINK},      0,  0, AFR, "link" },
202     {  "average", "average",              0,                       AV_OPT_TYPE_CONST,  {.i64 = AVERAGE_LINK},  0,  0, AFR, "link" },
203     { "band_multiplier", "set band multiplier",OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25},       0.2,5, AF  },
204     { "bm",       "set band multiplier",       OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25},       0.2,5, AF  },
205     { "sample_noise", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE},  0,  NB_SAMPLEMODES-1, AFR, "sample" },
206     { "sn",           "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE},  0,  NB_SAMPLEMODES-1, AFR, "sample" },
207     {  "none",    "none",                 0,                       AV_OPT_TYPE_CONST,  {.i64 = SAMPLE_NONE},   0,  0, AFR, "sample" },
208     {  "start",   "start",                0,                       AV_OPT_TYPE_CONST,  {.i64 = SAMPLE_START},  0,  0, AFR, "sample" },
209     {  "begin",   "start",                0,                       AV_OPT_TYPE_CONST,  {.i64 = SAMPLE_START},  0,  0, AFR, "sample" },
210     {  "stop",    "stop",                 0,                       AV_OPT_TYPE_CONST,  {.i64 = SAMPLE_STOP},   0,  0, AFR, "sample" },
211     {  "end",     "stop",                 0,                       AV_OPT_TYPE_CONST,  {.i64 = SAMPLE_STOP},   0,  0, AFR, "sample" },
212     { "gain_smooth", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT,    {.i64 = 0},             0, 50, AFR },
213     { "gs",          "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT,    {.i64 = 0},             0, 50, AFR },
214     { NULL }
215 };
216 
217 AVFILTER_DEFINE_CLASS(afftdn);
218 
get_band_noise(AudioFFTDeNoiseContext * s,int band,double a,double b,double c)219 static double get_band_noise(AudioFFTDeNoiseContext *s,
220                              int band, double a,
221                              double b, double c)
222 {
223     double d1, d2, d3;
224 
225     d1 = a / s->band_centre[band];
226     d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
227     d2 = b / s->band_centre[band];
228     d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
229     d3 = s->band_centre[band] / c;
230     d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
231 
232     return -d1 + d2 - d3;
233 }
234 
factor(double * array,int size)235 static void factor(double *array, int size)
236 {
237     for (int i = 0; i < size - 1; i++) {
238         for (int j = i + 1; j < size; j++) {
239             double d = array[j + i * size] / array[i + i * size];
240 
241             array[j + i * size] = d;
242             for (int k = i + 1; k < size; k++) {
243                 array[j + k * size] -= d * array[i + k * size];
244             }
245         }
246     }
247 }
248 
solve(double * matrix,double * vector,int size)249 static void solve(double *matrix, double *vector, int size)
250 {
251     for (int i = 0; i < size - 1; i++) {
252         for (int j = i + 1; j < size; j++) {
253             double d = matrix[j + i * size];
254             vector[j] -= d * vector[i];
255         }
256     }
257 
258     vector[size - 1] /= matrix[size * size - 1];
259 
260     for (int i = size - 2; i >= 0; i--) {
261         double d = vector[i];
262         for (int j = i + 1; j < size; j++)
263             d -= matrix[i + j * size] * vector[j];
264         vector[i] = d / matrix[i + i * size];
265     }
266 }
267 
process_get_band_noise(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,int band)268 static double process_get_band_noise(AudioFFTDeNoiseContext *s,
269                                      DeNoiseChannel *dnch,
270                                      int band)
271 {
272     double product, sum, f;
273     int i = 0;
274 
275     if (band < NB_PROFILE_BANDS)
276         return dnch->band_noise[band];
277 
278     for (int j = 0; j < SOLVE_SIZE; j++) {
279         sum = 0.0;
280         for (int k = 0; k < NB_PROFILE_BANDS; k++)
281             sum += s->matrix_b[i++] * dnch->band_noise[k];
282         s->vector_b[j] = sum;
283     }
284 
285     solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
286     f = (0.5 * s->sample_rate) / s->band_centre[NB_PROFILE_BANDS-1];
287     f = 15.0 + log(f / 1.5) / log(1.5);
288     sum = 0.0;
289     product = 1.0;
290     for (int j = 0; j < SOLVE_SIZE; j++) {
291         sum += product * s->vector_b[j];
292         product *= f;
293     }
294 
295     return sum;
296 }
297 
limit_gain(double a,double b)298 static double limit_gain(double a, double b)
299 {
300     if (a > 1.0)
301         return (b * a - 1.0) / (b + a - 2.0);
302     if (a < 1.0)
303         return (b * a - 2.0 * a + 1.0) / (b - a);
304     return 1.0;
305 }
306 
spectral_flatness(AudioFFTDeNoiseContext * s,const double * const spectral,double floor,int len,double * rnum,double * rden)307 static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral,
308                               double floor, int len, double *rnum, double *rden)
309 {
310     double num = 0., den = 0.;
311     int size = 0;
312 
313     for (int n = 0; n < len; n++) {
314         const double v = spectral[n];
315         if (v > floor) {
316             num += log(v);
317             den += v;
318             size++;
319         }
320     }
321 
322     size = FFMAX(size, 1);
323 
324     num /= size;
325     den /= size;
326 
327     num = exp(num);
328 
329     *rnum = num;
330     *rden = den;
331 }
332 
333 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var);
334 
floor_offset(const double * S,int size,double mean)335 static double floor_offset(const double *S, int size, double mean)
336 {
337     double offset = 0.0;
338 
339     for (int n = 0; n < size; n++) {
340         const double p = S[n] - mean;
341 
342         offset = fmax(offset, fabs(p));
343     }
344 
345     return offset / mean;
346 }
347 
process_frame(AVFilterContext * ctx,AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,AVComplexFloat * fft_data,double * prior,double * prior_band_excit,int track_noise)348 static void process_frame(AVFilterContext *ctx,
349                           AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
350                           AVComplexFloat *fft_data,
351                           double *prior, double *prior_band_excit, int track_noise)
352 {
353     AVFilterLink *outlink = ctx->outputs[0];
354     const double *abs_var = dnch->abs_var;
355     const double ratio = outlink->frame_count_out ? s->ratio : 1.0;
356     const double rratio = 1. - ratio;
357     const int *bin2band = s->bin2band;
358     double *noisy_data = dnch->noisy_data;
359     double *band_excit = dnch->band_excit;
360     double *band_amt = dnch->band_amt;
361     double *smoothed_gain = dnch->smoothed_gain;
362     double *gain = dnch->gain;
363 
364     for (int i = 0; i < s->bin_count; i++) {
365         double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var;
366 
367         noisy_data[i] = mag = hypot(fft_data[i].re, fft_data[i].im);
368         power = mag * mag;
369         mag_abs_var = power / abs_var[i];
370         new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0);
371         new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var);
372         sqr_new_gain = new_gain * new_gain;
373         prior[i] = mag_abs_var * sqr_new_gain;
374         dnch->clean_data[i] = power * sqr_new_gain;
375         gain[i] = new_gain;
376     }
377 
378     if (track_noise) {
379         double flatness, num, den;
380 
381         spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den);
382 
383         flatness = num / den;
384         if (flatness > 0.8) {
385             const double offset = s->floor_offset * floor_offset(noisy_data, s->bin_count, den);
386             const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.);
387 
388             dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9;
389             set_parameters(s, dnch, 1, 1);
390         }
391     }
392 
393     for (int i = 0; i < s->number_of_bands; i++) {
394         band_excit[i] = 0.0;
395         band_amt[i] = 0.0;
396     }
397 
398     for (int i = 0; i < s->bin_count; i++)
399         band_excit[bin2band[i]] += dnch->clean_data[i];
400 
401     for (int i = 0; i < s->number_of_bands; i++) {
402         band_excit[i] = fmax(band_excit[i],
403                              s->band_alpha[i] * band_excit[i] +
404                              s->band_beta[i] * prior_band_excit[i]);
405         prior_band_excit[i] = band_excit[i];
406     }
407 
408     for (int j = 0, i = 0; j < s->number_of_bands; j++) {
409         for (int k = 0; k < s->number_of_bands; k++) {
410             band_amt[j] += dnch->spread_function[i++] * band_excit[k];
411         }
412     }
413 
414     for (int i = 0; i < s->bin_count; i++)
415         dnch->amt[i] = band_amt[bin2band[i]];
416 
417     for (int i = 0; i < s->bin_count; i++) {
418         if (dnch->amt[i] > abs_var[i]) {
419             gain[i] = 1.0;
420         } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
421             const double limit = sqrt(abs_var[i] / dnch->amt[i]);
422 
423             gain[i] = limit_gain(gain[i], limit);
424         } else {
425             gain[i] = limit_gain(gain[i], dnch->max_gain);
426         }
427     }
428 
429     memcpy(smoothed_gain, gain, s->bin_count * sizeof(*smoothed_gain));
430     if (s->gain_smooth > 0) {
431         const int r = s->gain_smooth;
432 
433         for (int i = r; i < s->bin_count - r; i++) {
434             const double gc = gain[i];
435             double num = 0., den = 0.;
436 
437             for (int j = -r; j <= r; j++) {
438                 const double g = gain[i + j];
439                 const double d = 1. - fabs(g - gc);
440 
441                 num += g * d;
442                 den += d;
443             }
444 
445             smoothed_gain[i] = num / den;
446         }
447     }
448 
449     for (int i = 0; i < s->bin_count; i++) {
450         const double new_gain = smoothed_gain[i];
451 
452         fft_data[i].re *= new_gain;
453         fft_data[i].im *= new_gain;
454     }
455 }
456 
freq2bark(double x)457 static double freq2bark(double x)
458 {
459     double d = x / 7500.0;
460 
461     return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
462 }
463 
get_band_centre(AudioFFTDeNoiseContext * s,int band)464 static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
465 {
466     if (band == -1)
467         return lrint(s->band_centre[0] / 1.5);
468 
469     return s->band_centre[band];
470 }
471 
get_band_edge(AudioFFTDeNoiseContext * s,int band)472 static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
473 {
474     int i;
475 
476     if (band == NB_PROFILE_BANDS) {
477         i = lrint(s->band_centre[NB_PROFILE_BANDS - 1] * 1.224745);
478     } else {
479         i = lrint(s->band_centre[band] / 1.224745);
480     }
481 
482     return FFMIN(i, s->sample_rate / 2);
483 }
484 
set_band_parameters(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch)485 static void set_band_parameters(AudioFFTDeNoiseContext *s,
486                                 DeNoiseChannel *dnch)
487 {
488     double band_noise, d2, d3, d4, d5;
489     int i = 0, j = 0, k = 0;
490 
491     d5 = 0.0;
492     band_noise = process_get_band_noise(s, dnch, 0);
493     for (int m = j; m < s->bin_count; m++) {
494         if (m == j) {
495             i = j;
496             d5 = band_noise;
497             if (k >= NB_PROFILE_BANDS) {
498                 j = s->bin_count;
499             } else {
500                 j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
501             }
502             d2 = j - i;
503             band_noise = process_get_band_noise(s, dnch, k);
504             k++;
505         }
506         d3 = (j - m) / d2;
507         d4 = (m - i) / d2;
508         dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
509     }
510 
511     for (i = 0; i < NB_PROFILE_BANDS; i++)
512         dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
513 }
514 
read_custom_noise(AudioFFTDeNoiseContext * s,int ch)515 static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
516 {
517     DeNoiseChannel *dnch = &s->dnch[ch];
518     char *custom_noise_str, *p, *arg, *saveptr = NULL;
519     double band_noise[NB_PROFILE_BANDS] = { 0.f };
520     int ret;
521 
522     if (!s->band_noise_str)
523         return;
524 
525     custom_noise_str = p = av_strdup(s->band_noise_str);
526     if (!p)
527         return;
528 
529     for (int i = 0; i < NB_PROFILE_BANDS; i++) {
530         float noise;
531 
532         if (!(arg = av_strtok(p, "| ", &saveptr)))
533             break;
534 
535         p = NULL;
536 
537         ret = av_sscanf(arg, "%f", &noise);
538         if (ret != 1) {
539             av_log(s, AV_LOG_ERROR, "Custom band noise must be float.\n");
540             break;
541         }
542 
543         band_noise[i] = av_clipd(noise, -24., 24.);
544     }
545 
546     av_free(custom_noise_str);
547     memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
548 }
549 
set_parameters(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,int update_var,int update_auto_var)550 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var)
551 {
552     if (dnch->last_noise_floor != dnch->noise_floor)
553         dnch->last_noise_floor = dnch->noise_floor;
554 
555     if (s->track_residual)
556         dnch->last_noise_floor = fmax(dnch->last_noise_floor, dnch->residual_floor);
557 
558     dnch->max_var = s->floor * exp((100.0 + dnch->last_noise_floor) * C);
559     if (update_auto_var) {
560         for (int i = 0; i < NB_PROFILE_BANDS; i++)
561             dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
562     }
563 
564     if (s->track_residual) {
565         if (update_var || dnch->last_residual_floor != dnch->residual_floor) {
566             update_var = 1;
567             dnch->last_residual_floor = dnch->residual_floor;
568             dnch->last_noise_reduction = fmax(dnch->last_noise_floor - dnch->last_residual_floor + 100., 0);
569             dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
570         }
571     } else if (update_var || dnch->noise_reduction != dnch->last_noise_reduction) {
572         update_var = 1;
573         dnch->last_noise_reduction = dnch->noise_reduction;
574         dnch->last_residual_floor = av_clipd(dnch->last_noise_floor - dnch->last_noise_reduction, -80, -20);
575         dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
576     }
577 
578     dnch->gain_scale = 1.0 / (dnch->max_gain * dnch->max_gain);
579 
580     if (update_var) {
581         set_band_parameters(s, dnch);
582 
583         for (int i = 0; i < s->bin_count; i++) {
584             dnch->abs_var[i] = fmax(dnch->max_var * dnch->rel_var[i], 1.0);
585             dnch->min_abs_var[i] = dnch->gain_scale * dnch->abs_var[i];
586         }
587     }
588 }
589 
reduce_mean(double * band_noise)590 static void reduce_mean(double *band_noise)
591 {
592     double mean = 0.f;
593 
594     for (int i = 0; i < NB_PROFILE_BANDS; i++)
595         mean += band_noise[i];
596     mean /= NB_PROFILE_BANDS;
597 
598     for (int i = 0; i < NB_PROFILE_BANDS; i++)
599         band_noise[i] -= mean;
600 }
601 
config_input(AVFilterLink * inlink)602 static int config_input(AVFilterLink *inlink)
603 {
604     AVFilterContext *ctx = inlink->dst;
605     AudioFFTDeNoiseContext *s = ctx->priv;
606     double wscale, sar, sum, sdiv;
607     int i, j, k, m, n, ret;
608 
609     s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch));
610     if (!s->dnch)
611         return AVERROR(ENOMEM);
612 
613     s->channels = inlink->ch_layout.nb_channels;
614     s->sample_rate = inlink->sample_rate;
615     s->sample_advance = s->sample_rate / 80;
616     s->window_length = 3 * s->sample_advance;
617     s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
618     s->fft_length = s->fft_length2;
619     s->buffer_length = s->fft_length * 2;
620     s->bin_count = s->fft_length2 / 2 + 1;
621 
622     s->band_centre[0] = 80;
623     for (i = 1; i < NB_PROFILE_BANDS; i++) {
624         s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
625         if (s->band_centre[i] < 1000) {
626             s->band_centre[i] = 10 * (s->band_centre[i] / 10);
627         } else if (s->band_centre[i] < 5000) {
628             s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
629         } else if (s->band_centre[i] < 15000) {
630             s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
631         } else {
632             s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
633         }
634     }
635 
636     for (j = 0; j < SOLVE_SIZE; j++) {
637         for (k = 0; k < SOLVE_SIZE; k++) {
638             s->matrix_a[j + k * SOLVE_SIZE] = 0.0;
639             for (m = 0; m < NB_PROFILE_BANDS; m++)
640                 s->matrix_a[j + k * SOLVE_SIZE] += pow(m, j + k);
641         }
642     }
643 
644     factor(s->matrix_a, SOLVE_SIZE);
645 
646     i = 0;
647     for (j = 0; j < SOLVE_SIZE; j++)
648         for (k = 0; k < NB_PROFILE_BANDS; k++)
649             s->matrix_b[i++] = pow(k, j);
650 
651     i = 0;
652     for (j = 0; j < NB_PROFILE_BANDS; j++)
653         for (k = 0; k < SOLVE_SIZE; k++)
654             s->matrix_c[i++] = pow(j, k);
655 
656     s->window = av_calloc(s->window_length, sizeof(*s->window));
657     s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
658     if (!s->window || !s->bin2band)
659         return AVERROR(ENOMEM);
660 
661     sdiv = s->band_multiplier;
662     for (i = 0; i < s->bin_count; i++)
663         s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
664 
665     s->number_of_bands = s->bin2band[s->bin_count - 1] + 1;
666 
667     s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
668     s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
669     if (!s->band_alpha || !s->band_beta)
670         return AVERROR(ENOMEM);
671 
672     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
673         DeNoiseChannel *dnch = &s->dnch[ch];
674         float scale = 1.f;
675 
676         switch (s->noise_type) {
677         case WHITE_NOISE:
678             for (i = 0; i < NB_PROFILE_BANDS; i++)
679                 dnch->band_noise[i] = 0.;
680             break;
681         case VINYL_NOISE:
682             for (i = 0; i < NB_PROFILE_BANDS; i++)
683                 dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0);
684             break;
685         case SHELLAC_NOISE:
686             for (i = 0; i < NB_PROFILE_BANDS; i++)
687                 dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10);
688             break;
689         case CUSTOM_NOISE:
690             read_custom_noise(s, ch);
691             break;
692         default:
693             return AVERROR_BUG;
694         }
695 
696         reduce_mean(dnch->band_noise);
697 
698         dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
699         dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
700         dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
701         dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
702         dnch->smoothed_gain = av_calloc(s->bin_count, sizeof(*dnch->smoothed_gain));
703         dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
704         dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
705         dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
706         dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
707         dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
708         dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
709         dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
710         dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
711         dnch->fft_in = av_calloc(s->fft_length2, sizeof(*dnch->fft_in));
712         dnch->fft_out = av_calloc(s->fft_length2 + 1, sizeof(*dnch->fft_out));
713         ret = av_tx_init(&dnch->fft, &dnch->tx_fn, AV_TX_FLOAT_RDFT, 0, s->fft_length2, &scale, 0);
714         if (ret < 0)
715             return ret;
716         ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, AV_TX_FLOAT_RDFT, 1, s->fft_length2, &scale, 0);
717         if (ret < 0)
718             return ret;
719         dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
720                                           sizeof(*dnch->spread_function));
721 
722         if (!dnch->amt ||
723             !dnch->band_amt ||
724             !dnch->band_excit ||
725             !dnch->gain ||
726             !dnch->smoothed_gain ||
727             !dnch->prior ||
728             !dnch->prior_band_excit ||
729             !dnch->clean_data ||
730             !dnch->noisy_data ||
731             !dnch->out_samples ||
732             !dnch->fft_in ||
733             !dnch->fft_out ||
734             !dnch->abs_var ||
735             !dnch->rel_var ||
736             !dnch->min_abs_var ||
737             !dnch->spread_function ||
738             !dnch->fft ||
739             !dnch->ifft)
740             return AVERROR(ENOMEM);
741     }
742 
743     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
744         DeNoiseChannel *dnch = &s->dnch[ch];
745         double *prior_band_excit = dnch->prior_band_excit;
746         double min, max;
747         double p1, p2;
748 
749         p1 = pow(0.1, 2.5 / sdiv);
750         p2 = pow(0.1, 1.0 / sdiv);
751         j = 0;
752         for (m = 0; m < s->number_of_bands; m++) {
753             for (n = 0; n < s->number_of_bands; n++) {
754                 if (n < m) {
755                     dnch->spread_function[j++] = pow(p2, m - n);
756                 } else if (n > m) {
757                     dnch->spread_function[j++] = pow(p1, n - m);
758                 } else {
759                     dnch->spread_function[j++] = 1.0;
760                 }
761             }
762         }
763 
764         for (m = 0; m < s->number_of_bands; m++) {
765             dnch->band_excit[m] = 0.0;
766             prior_band_excit[m] = 0.0;
767         }
768 
769         for (m = 0; m < s->bin_count; m++)
770             dnch->band_excit[s->bin2band[m]] += 1.0;
771 
772         j = 0;
773         for (m = 0; m < s->number_of_bands; m++) {
774             for (n = 0; n < s->number_of_bands; n++)
775                 prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
776         }
777 
778         min = pow(0.1, 2.5);
779         max = pow(0.1, 1.0);
780         for (int i = 0; i < s->number_of_bands; i++) {
781             if (i < lrint(12.0 * sdiv)) {
782                 dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
783             } else {
784                 dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
785             }
786             dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
787         }
788 
789         for (int i = 0; i < s->buffer_length; i++)
790             dnch->out_samples[i] = 0;
791 
792         j = 0;
793         for (int i = 0; i < s->number_of_bands; i++)
794             for (int k = 0; k < s->number_of_bands; k++)
795                 dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
796     }
797 
798     j = 0;
799     sar = s->sample_advance / s->sample_rate;
800     for (int i = 0; i < s->bin_count; i++) {
801         if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
802             double d6 = (i - 1) * s->sample_rate / s->fft_length;
803             double d7 = fmin(0.008 + 2.2 / d6, 0.03);
804             s->band_alpha[j] = exp(-sar / d7);
805             s->band_beta[j] = 1.0 - s->band_alpha[j];
806             j = s->bin2band[i];
807         }
808     }
809 
810     s->winframe = ff_get_audio_buffer(inlink, s->window_length);
811     if (!s->winframe)
812         return AVERROR(ENOMEM);
813 
814     wscale = sqrt(8.0 / (9.0 * s->fft_length));
815     sum = 0.0;
816     for (int i = 0; i < s->window_length; i++) {
817         double d10 = sin(i * M_PI / s->window_length);
818         d10 *= wscale * d10;
819         s->window[i] = d10;
820         sum += d10 * d10;
821     }
822 
823     s->window_weight = 0.5 * sum;
824     s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
825     s->sample_floor = s->floor * exp(4.144600506562284);
826 
827     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
828         DeNoiseChannel *dnch = &s->dnch[ch];
829 
830         dnch->noise_reduction = s->noise_reduction;
831         dnch->noise_floor     = s->noise_floor;
832         dnch->residual_floor  = s->residual_floor;
833 
834         set_parameters(s, dnch, 1, 1);
835     }
836 
837     s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
838     i = 0;
839     for (int j = 1; j < NB_PROFILE_BANDS + 1; j++) {
840         s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
841         if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
842             i++;
843         s->noise_band_edge[NB_PROFILE_BANDS + 1] = i;
844     }
845     s->noise_band_count = s->noise_band_edge[NB_PROFILE_BANDS + 1];
846 
847     return 0;
848 }
849 
init_sample_noise(DeNoiseChannel * dnch)850 static void init_sample_noise(DeNoiseChannel *dnch)
851 {
852     for (int i = 0; i < NB_PROFILE_BANDS; i++) {
853         dnch->noise_band_norm[i] = 0.0;
854         dnch->noise_band_avr[i] = 0.0;
855         dnch->noise_band_avi[i] = 0.0;
856         dnch->noise_band_var[i] = 0.0;
857     }
858 }
859 
sample_noise_block(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,AVFrame * in,int ch)860 static void sample_noise_block(AudioFFTDeNoiseContext *s,
861                                DeNoiseChannel *dnch,
862                                AVFrame *in, int ch)
863 {
864     float *src = (float *)in->extended_data[ch];
865     double mag2, var = 0.0, avr = 0.0, avi = 0.0;
866     int edge, j, k, n, edgemax;
867 
868     for (int i = 0; i < s->window_length; i++)
869         dnch->fft_in[i] = s->window[i] * src[i] * (1LL << 23);
870 
871     for (int i = s->window_length; i < s->fft_length2; i++)
872         dnch->fft_in[i] = 0.0;
873 
874     dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, sizeof(float));
875 
876     edge = s->noise_band_edge[0];
877     j = edge;
878     k = 0;
879     n = j;
880     edgemax = fmin(s->fft_length2, s->noise_band_edge[NB_PROFILE_BANDS]);
881     for (int i = j; i <= edgemax; i++) {
882         if ((i == j) && (i < edgemax)) {
883             if (j > edge) {
884                 dnch->noise_band_norm[k - 1] += j - edge;
885                 dnch->noise_band_avr[k - 1] += avr;
886                 dnch->noise_band_avi[k - 1] += avi;
887                 dnch->noise_band_var[k - 1] += var;
888             }
889             k++;
890             edge = j;
891             j = s->noise_band_edge[k];
892             if (k == NB_PROFILE_BANDS) {
893                 j++;
894             }
895             var = 0.0;
896             avr = 0.0;
897             avi = 0.0;
898         }
899         avr += dnch->fft_out[n].re;
900         avi += dnch->fft_out[n].im;
901         mag2 = dnch->fft_out[n].re * dnch->fft_out[n].re +
902                dnch->fft_out[n].im * dnch->fft_out[n].im;
903 
904         mag2 = fmax(mag2, s->sample_floor);
905 
906         var += mag2;
907         n++;
908     }
909 
910     dnch->noise_band_norm[k - 1] += j - edge;
911     dnch->noise_band_avr[k - 1] += avr;
912     dnch->noise_band_avi[k - 1] += avi;
913     dnch->noise_band_var[k - 1] += var;
914 }
915 
finish_sample_noise(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,double * sample_noise)916 static void finish_sample_noise(AudioFFTDeNoiseContext *s,
917                                 DeNoiseChannel *dnch,
918                                 double *sample_noise)
919 {
920     for (int i = 0; i < s->noise_band_count; i++) {
921         dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
922         dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
923         dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
924         dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
925                                    dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
926         dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
927         sample_noise[i] = 10.0 * log10(dnch->noise_band_var[i] / s->floor) - 100.0;
928     }
929     if (s->noise_band_count < NB_PROFILE_BANDS) {
930         for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++)
931             sample_noise[i] = sample_noise[i - 1];
932     }
933 }
934 
set_noise_profile(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,double * sample_noise)935 static void set_noise_profile(AudioFFTDeNoiseContext *s,
936                               DeNoiseChannel *dnch,
937                               double *sample_noise)
938 {
939     double new_band_noise[NB_PROFILE_BANDS];
940     double temp[NB_PROFILE_BANDS];
941     double sum = 0.0;
942 
943     for (int m = 0; m < NB_PROFILE_BANDS; m++)
944         temp[m] = sample_noise[m];
945 
946     for (int m = 0, i = 0; m < SOLVE_SIZE; m++) {
947         sum = 0.0;
948         for (int n = 0; n < NB_PROFILE_BANDS; n++)
949             sum += s->matrix_b[i++] * temp[n];
950         s->vector_b[m] = sum;
951     }
952     solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
953     for (int m = 0, i = 0; m < NB_PROFILE_BANDS; m++) {
954         sum = 0.0;
955         for (int n = 0; n < SOLVE_SIZE; n++)
956             sum += s->matrix_c[i++] * s->vector_b[n];
957         temp[m] = sum;
958     }
959 
960     reduce_mean(temp);
961 
962     av_log(s, AV_LOG_INFO, "bn=");
963     for (int m = 0; m < NB_PROFILE_BANDS; m++) {
964         new_band_noise[m] = temp[m];
965         new_band_noise[m] = av_clipd(new_band_noise[m], -24.0, 24.0);
966         av_log(s, AV_LOG_INFO, "%f ", new_band_noise[m]);
967     }
968     av_log(s, AV_LOG_INFO, "\n");
969     memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
970 }
971 
filter_channel(AVFilterContext * ctx,void * arg,int jobnr,int nb_jobs)972 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
973 {
974     AudioFFTDeNoiseContext *s = ctx->priv;
975     AVFrame *in = arg;
976     const int start = (in->ch_layout.nb_channels * jobnr) / nb_jobs;
977     const int end = (in->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
978     const int window_length = s->window_length;
979     const double *window = s->window;
980 
981     for (int ch = start; ch < end; ch++) {
982         DeNoiseChannel *dnch = &s->dnch[ch];
983         const float *src = (const float *)in->extended_data[ch];
984         double *dst = dnch->out_samples;
985         float *fft_in = dnch->fft_in;
986 
987         for (int m = 0; m < window_length; m++)
988             fft_in[m] = window[m] * src[m] * (1LL << 23);
989 
990         for (int m = window_length; m < s->fft_length2; m++)
991             fft_in[m] = 0;
992 
993         dnch->tx_fn(dnch->fft, dnch->fft_out, fft_in, sizeof(float));
994 
995         process_frame(ctx, s, dnch, dnch->fft_out,
996                       dnch->prior,
997                       dnch->prior_band_excit,
998                       s->track_noise);
999 
1000         dnch->itx_fn(dnch->ifft, fft_in, dnch->fft_out, sizeof(float));
1001 
1002         for (int m = 0; m < window_length; m++)
1003             dst[m] += s->window[m] * fft_in[m] / (1LL << 23);
1004     }
1005 
1006     return 0;
1007 }
1008 
output_frame(AVFilterLink * inlink,AVFrame * in)1009 static int output_frame(AVFilterLink *inlink, AVFrame *in)
1010 {
1011     AVFilterContext *ctx = inlink->dst;
1012     AVFilterLink *outlink = ctx->outputs[0];
1013     AudioFFTDeNoiseContext *s = ctx->priv;
1014     const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode;
1015     const int offset = s->window_length - s->sample_advance;
1016     AVFrame *out;
1017 
1018     for (int ch = 0; ch < s->channels; ch++) {
1019         float *src = (float *)s->winframe->extended_data[ch];
1020 
1021         memmove(src, &src[s->sample_advance], offset * sizeof(float));
1022         memcpy(&src[offset], in->extended_data[ch], in->nb_samples * sizeof(float));
1023         memset(&src[offset + in->nb_samples], 0, (s->sample_advance - in->nb_samples) * sizeof(float));
1024     }
1025 
1026     if (s->track_noise) {
1027         double average = 0.0, min = DBL_MAX, max = -DBL_MAX;
1028 
1029         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1030             DeNoiseChannel *dnch = &s->dnch[ch];
1031 
1032             average += dnch->noise_floor;
1033             max = fmax(max, dnch->noise_floor);
1034             min = fmin(min, dnch->noise_floor);
1035         }
1036 
1037         average /= inlink->ch_layout.nb_channels;
1038 
1039         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1040             DeNoiseChannel *dnch = &s->dnch[ch];
1041 
1042             switch (s->noise_floor_link) {
1043             case MIN_LINK:     dnch->noise_floor = min;     break;
1044             case MAX_LINK:     dnch->noise_floor = max;     break;
1045             case AVERAGE_LINK: dnch->noise_floor = average; break;
1046             case NONE_LINK:
1047             default:
1048                 break;
1049             }
1050 
1051             if (dnch->noise_floor != dnch->last_noise_floor)
1052                 set_parameters(s, dnch, 1, 0);
1053         }
1054     }
1055 
1056     if (s->sample_noise_mode == SAMPLE_START) {
1057         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1058             DeNoiseChannel *dnch = &s->dnch[ch];
1059 
1060             init_sample_noise(dnch);
1061         }
1062         s->sample_noise_mode = SAMPLE_NONE;
1063         s->sample_noise = 1;
1064         s->sample_noise_blocks = 0;
1065     }
1066 
1067     if (s->sample_noise) {
1068         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1069             DeNoiseChannel *dnch = &s->dnch[ch];
1070 
1071             sample_noise_block(s, dnch, s->winframe, ch);
1072         }
1073         s->sample_noise_blocks++;
1074     }
1075 
1076     if (s->sample_noise_mode == SAMPLE_STOP) {
1077         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1078             DeNoiseChannel *dnch = &s->dnch[ch];
1079             double sample_noise[NB_PROFILE_BANDS];
1080 
1081             if (s->sample_noise_blocks <= 0)
1082                 break;
1083             finish_sample_noise(s, dnch, sample_noise);
1084             set_noise_profile(s, dnch, sample_noise);
1085             set_parameters(s, dnch, 1, 1);
1086         }
1087         s->sample_noise = 0;
1088         s->sample_noise_blocks = 0;
1089         s->sample_noise_mode = SAMPLE_NONE;
1090     }
1091 
1092     ff_filter_execute(ctx, filter_channel, s->winframe, NULL,
1093                       FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
1094 
1095     if (av_frame_is_writable(in)) {
1096         out = in;
1097     } else {
1098         out = ff_get_audio_buffer(outlink, in->nb_samples);
1099         if (!out) {
1100             av_frame_free(&in);
1101             return AVERROR(ENOMEM);
1102         }
1103 
1104         out->pts = in->pts;
1105     }
1106 
1107     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1108         DeNoiseChannel *dnch = &s->dnch[ch];
1109         double *src = dnch->out_samples;
1110         const float *orig = (const float *)s->winframe->extended_data[ch];
1111         float *dst = (float *)out->extended_data[ch];
1112 
1113         switch (output_mode) {
1114         case IN_MODE:
1115             for (int m = 0; m < out->nb_samples; m++)
1116                 dst[m] = orig[m];
1117             break;
1118         case OUT_MODE:
1119             for (int m = 0; m < out->nb_samples; m++)
1120                 dst[m] = src[m];
1121             break;
1122         case NOISE_MODE:
1123             for (int m = 0; m < out->nb_samples; m++)
1124                 dst[m] = orig[m] - src[m];
1125             break;
1126         default:
1127             if (in != out)
1128                 av_frame_free(&in);
1129             av_frame_free(&out);
1130             return AVERROR_BUG;
1131         }
1132         memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
1133         memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
1134     }
1135 
1136     if (out != in)
1137         av_frame_free(&in);
1138     return ff_filter_frame(outlink, out);
1139 }
1140 
activate(AVFilterContext * ctx)1141 static int activate(AVFilterContext *ctx)
1142 {
1143     AVFilterLink *inlink = ctx->inputs[0];
1144     AVFilterLink *outlink = ctx->outputs[0];
1145     AudioFFTDeNoiseContext *s = ctx->priv;
1146     AVFrame *in = NULL;
1147     int ret;
1148 
1149     FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
1150 
1151     ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in);
1152     if (ret < 0)
1153         return ret;
1154     if (ret > 0)
1155         return output_frame(inlink, in);
1156 
1157     if (ff_inlink_queued_samples(inlink) >= s->sample_advance) {
1158         ff_filter_set_ready(ctx, 10);
1159         return 0;
1160     }
1161 
1162     FF_FILTER_FORWARD_STATUS(inlink, outlink);
1163     FF_FILTER_FORWARD_WANTED(outlink, inlink);
1164 
1165     return FFERROR_NOT_READY;
1166 }
1167 
uninit(AVFilterContext * ctx)1168 static av_cold void uninit(AVFilterContext *ctx)
1169 {
1170     AudioFFTDeNoiseContext *s = ctx->priv;
1171 
1172     av_freep(&s->window);
1173     av_freep(&s->bin2band);
1174     av_freep(&s->band_alpha);
1175     av_freep(&s->band_beta);
1176     av_frame_free(&s->winframe);
1177 
1178     if (s->dnch) {
1179         for (int ch = 0; ch < s->channels; ch++) {
1180             DeNoiseChannel *dnch = &s->dnch[ch];
1181             av_freep(&dnch->amt);
1182             av_freep(&dnch->band_amt);
1183             av_freep(&dnch->band_excit);
1184             av_freep(&dnch->gain);
1185             av_freep(&dnch->smoothed_gain);
1186             av_freep(&dnch->prior);
1187             av_freep(&dnch->prior_band_excit);
1188             av_freep(&dnch->clean_data);
1189             av_freep(&dnch->noisy_data);
1190             av_freep(&dnch->out_samples);
1191             av_freep(&dnch->spread_function);
1192             av_freep(&dnch->abs_var);
1193             av_freep(&dnch->rel_var);
1194             av_freep(&dnch->min_abs_var);
1195             av_freep(&dnch->fft_in);
1196             av_freep(&dnch->fft_out);
1197             av_tx_uninit(&dnch->fft);
1198             av_tx_uninit(&dnch->ifft);
1199         }
1200         av_freep(&s->dnch);
1201     }
1202 }
1203 
process_command(AVFilterContext * ctx,const char * cmd,const char * args,char * res,int res_len,int flags)1204 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
1205                            char *res, int res_len, int flags)
1206 {
1207     AudioFFTDeNoiseContext *s = ctx->priv;
1208     int ret = 0;
1209 
1210     ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
1211     if (ret < 0)
1212         return ret;
1213 
1214     if (!strcmp(cmd, "sample_noise") || !strcmp(cmd, "sn"))
1215         return 0;
1216 
1217     for (int ch = 0; ch < s->channels; ch++) {
1218         DeNoiseChannel *dnch = &s->dnch[ch];
1219 
1220         dnch->noise_reduction = s->noise_reduction;
1221         dnch->noise_floor     = s->noise_floor;
1222         dnch->residual_floor  = s->residual_floor;
1223 
1224         set_parameters(s, dnch, 1, 1);
1225     }
1226 
1227     return 0;
1228 }
1229 
1230 static const AVFilterPad inputs[] = {
1231     {
1232         .name         = "default",
1233         .type         = AVMEDIA_TYPE_AUDIO,
1234         .config_props = config_input,
1235     },
1236 };
1237 
1238 static const AVFilterPad outputs[] = {
1239     {
1240         .name = "default",
1241         .type = AVMEDIA_TYPE_AUDIO,
1242     },
1243 };
1244 
1245 const AVFilter ff_af_afftdn = {
1246     .name            = "afftdn",
1247     .description     = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
1248     .priv_size       = sizeof(AudioFFTDeNoiseContext),
1249     .priv_class      = &afftdn_class,
1250     .activate        = activate,
1251     .uninit          = uninit,
1252     FILTER_INPUTS(inputs),
1253     FILTER_OUTPUTS(outputs),
1254     FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
1255     .process_command = process_command,
1256     .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
1257                        AVFILTER_FLAG_SLICE_THREADS,
1258 };
1259