1 /*
2 * Copyright (c) 2018 The FFmpeg Project
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <float.h>
22
23 #include "libavutil/avstring.h"
24 #include "libavutil/channel_layout.h"
25 #include "libavutil/opt.h"
26 #include "libavutil/tx.h"
27 #include "avfilter.h"
28 #include "audio.h"
29 #include "formats.h"
30 #include "filters.h"
31
32 #define C (M_LN10 * 0.1)
33 #define SOLVE_SIZE (5)
34 #define NB_PROFILE_BANDS (15)
35
36 enum SampleNoiseModes {
37 SAMPLE_NONE,
38 SAMPLE_START,
39 SAMPLE_STOP,
40 NB_SAMPLEMODES
41 };
42
43 enum OutModes {
44 IN_MODE,
45 OUT_MODE,
46 NOISE_MODE,
47 NB_MODES
48 };
49
50 enum NoiseLinkType {
51 NONE_LINK,
52 MIN_LINK,
53 MAX_LINK,
54 AVERAGE_LINK,
55 NB_LINK
56 };
57
58 enum NoiseType {
59 WHITE_NOISE,
60 VINYL_NOISE,
61 SHELLAC_NOISE,
62 CUSTOM_NOISE,
63 NB_NOISE
64 };
65
66 typedef struct DeNoiseChannel {
67 double band_noise[NB_PROFILE_BANDS];
68 double noise_band_auto_var[NB_PROFILE_BANDS];
69 double noise_band_sample[NB_PROFILE_BANDS];
70 double *amt;
71 double *band_amt;
72 double *band_excit;
73 double *gain;
74 double *smoothed_gain;
75 double *prior;
76 double *prior_band_excit;
77 double *clean_data;
78 double *noisy_data;
79 double *out_samples;
80 double *spread_function;
81 double *abs_var;
82 double *rel_var;
83 double *min_abs_var;
84 float *fft_in;
85 AVComplexFloat *fft_out;
86 AVTXContext *fft, *ifft;
87 av_tx_fn tx_fn, itx_fn;
88
89 double noise_band_norm[NB_PROFILE_BANDS];
90 double noise_band_avr[NB_PROFILE_BANDS];
91 double noise_band_avi[NB_PROFILE_BANDS];
92 double noise_band_var[NB_PROFILE_BANDS];
93
94 double noise_reduction;
95 double last_noise_reduction;
96 double noise_floor;
97 double last_noise_floor;
98 double residual_floor;
99 double last_residual_floor;
100 double max_gain;
101 double max_var;
102 double gain_scale;
103 } DeNoiseChannel;
104
105 typedef struct AudioFFTDeNoiseContext {
106 const AVClass *class;
107
108 float noise_reduction;
109 float noise_floor;
110 int noise_type;
111 char *band_noise_str;
112 float residual_floor;
113 int track_noise;
114 int track_residual;
115 int output_mode;
116 int noise_floor_link;
117 float ratio;
118 int gain_smooth;
119 float band_multiplier;
120 float floor_offset;
121
122 int channels;
123 int sample_noise;
124 int sample_noise_blocks;
125 int sample_noise_mode;
126 float sample_rate;
127 int buffer_length;
128 int fft_length;
129 int fft_length2;
130 int bin_count;
131 int window_length;
132 int sample_advance;
133 int number_of_bands;
134
135 int band_centre[NB_PROFILE_BANDS];
136
137 int *bin2band;
138 double *window;
139 double *band_alpha;
140 double *band_beta;
141
142 DeNoiseChannel *dnch;
143
144 AVFrame *winframe;
145
146 double window_weight;
147 double floor;
148 double sample_floor;
149
150 int noise_band_edge[NB_PROFILE_BANDS + 2];
151 int noise_band_count;
152 double matrix_a[SOLVE_SIZE * SOLVE_SIZE];
153 double vector_b[SOLVE_SIZE];
154 double matrix_b[SOLVE_SIZE * NB_PROFILE_BANDS];
155 double matrix_c[SOLVE_SIZE * NB_PROFILE_BANDS];
156 } AudioFFTDeNoiseContext;
157
158 #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
159 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
160 #define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
161
162 static const AVOption afftdn_options[] = {
163 { "noise_reduction", "set the noise reduction",OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,{.dbl = 12}, .01, 97, AFR },
164 { "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, AFR },
165 { "noise_floor", "set the noise floor",OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
166 { "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
167 { "noise_type", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, "type" },
168 { "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, "type" },
169 { "white", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, "type" },
170 { "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, "type" },
171 { "vinyl", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, "type" },
172 { "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, "type" },
173 { "shellac", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, "type" },
174 { "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, "type" },
175 { "custom", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, "type" },
176 { "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, "type" },
177 { "band_noise", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
178 { "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
179 { "residual_floor", "set the residual floor",OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
180 { "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
181 { "track_noise", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
182 { "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
183 { "track_residual", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
184 { "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
185 { "output_mode", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, "mode" },
186 { "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, "mode" },
187 { "input", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, "mode" },
188 { "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, "mode" },
189 { "output", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, "mode" },
190 { "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, "mode" },
191 { "noise", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, "mode" },
192 { "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, "mode" },
193 { "adaptivity", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR },
194 { "ad", "set adaptivity factor",OFFSET(ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1, AFR },
195 { "floor_offset", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR },
196 { "fo", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, -2, 2, AFR },
197 { "noise_link", "set the noise floor link",OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, "link" },
198 { "nl", "set the noise floor link", OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK}, 0, NB_LINK-1, AFR, "link" },
199 { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = NONE_LINK}, 0, 0, AFR, "link" },
200 { "min", "min", 0, AV_OPT_TYPE_CONST, {.i64 = MIN_LINK}, 0, 0, AFR, "link" },
201 { "max", "max", 0, AV_OPT_TYPE_CONST, {.i64 = MAX_LINK}, 0, 0, AFR, "link" },
202 { "average", "average", 0, AV_OPT_TYPE_CONST, {.i64 = AVERAGE_LINK}, 0, 0, AFR, "link" },
203 { "band_multiplier", "set band multiplier",OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF },
204 { "bm", "set band multiplier", OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25}, 0.2,5, AF },
205 { "sample_noise", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, "sample" },
206 { "sn", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE}, 0, NB_SAMPLEMODES-1, AFR, "sample" },
207 { "none", "none", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_NONE}, 0, 0, AFR, "sample" },
208 { "start", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, "sample" },
209 { "begin", "start", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_START}, 0, 0, AFR, "sample" },
210 { "stop", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, "sample" },
211 { "end", "stop", 0, AV_OPT_TYPE_CONST, {.i64 = SAMPLE_STOP}, 0, 0, AFR, "sample" },
212 { "gain_smooth", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR },
213 { "gs", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 50, AFR },
214 { NULL }
215 };
216
217 AVFILTER_DEFINE_CLASS(afftdn);
218
get_band_noise(AudioFFTDeNoiseContext * s,int band,double a,double b,double c)219 static double get_band_noise(AudioFFTDeNoiseContext *s,
220 int band, double a,
221 double b, double c)
222 {
223 double d1, d2, d3;
224
225 d1 = a / s->band_centre[band];
226 d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
227 d2 = b / s->band_centre[band];
228 d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
229 d3 = s->band_centre[band] / c;
230 d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
231
232 return -d1 + d2 - d3;
233 }
234
factor(double * array,int size)235 static void factor(double *array, int size)
236 {
237 for (int i = 0; i < size - 1; i++) {
238 for (int j = i + 1; j < size; j++) {
239 double d = array[j + i * size] / array[i + i * size];
240
241 array[j + i * size] = d;
242 for (int k = i + 1; k < size; k++) {
243 array[j + k * size] -= d * array[i + k * size];
244 }
245 }
246 }
247 }
248
solve(double * matrix,double * vector,int size)249 static void solve(double *matrix, double *vector, int size)
250 {
251 for (int i = 0; i < size - 1; i++) {
252 for (int j = i + 1; j < size; j++) {
253 double d = matrix[j + i * size];
254 vector[j] -= d * vector[i];
255 }
256 }
257
258 vector[size - 1] /= matrix[size * size - 1];
259
260 for (int i = size - 2; i >= 0; i--) {
261 double d = vector[i];
262 for (int j = i + 1; j < size; j++)
263 d -= matrix[i + j * size] * vector[j];
264 vector[i] = d / matrix[i + i * size];
265 }
266 }
267
process_get_band_noise(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,int band)268 static double process_get_band_noise(AudioFFTDeNoiseContext *s,
269 DeNoiseChannel *dnch,
270 int band)
271 {
272 double product, sum, f;
273 int i = 0;
274
275 if (band < NB_PROFILE_BANDS)
276 return dnch->band_noise[band];
277
278 for (int j = 0; j < SOLVE_SIZE; j++) {
279 sum = 0.0;
280 for (int k = 0; k < NB_PROFILE_BANDS; k++)
281 sum += s->matrix_b[i++] * dnch->band_noise[k];
282 s->vector_b[j] = sum;
283 }
284
285 solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
286 f = (0.5 * s->sample_rate) / s->band_centre[NB_PROFILE_BANDS-1];
287 f = 15.0 + log(f / 1.5) / log(1.5);
288 sum = 0.0;
289 product = 1.0;
290 for (int j = 0; j < SOLVE_SIZE; j++) {
291 sum += product * s->vector_b[j];
292 product *= f;
293 }
294
295 return sum;
296 }
297
limit_gain(double a,double b)298 static double limit_gain(double a, double b)
299 {
300 if (a > 1.0)
301 return (b * a - 1.0) / (b + a - 2.0);
302 if (a < 1.0)
303 return (b * a - 2.0 * a + 1.0) / (b - a);
304 return 1.0;
305 }
306
spectral_flatness(AudioFFTDeNoiseContext * s,const double * const spectral,double floor,int len,double * rnum,double * rden)307 static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral,
308 double floor, int len, double *rnum, double *rden)
309 {
310 double num = 0., den = 0.;
311 int size = 0;
312
313 for (int n = 0; n < len; n++) {
314 const double v = spectral[n];
315 if (v > floor) {
316 num += log(v);
317 den += v;
318 size++;
319 }
320 }
321
322 size = FFMAX(size, 1);
323
324 num /= size;
325 den /= size;
326
327 num = exp(num);
328
329 *rnum = num;
330 *rden = den;
331 }
332
333 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var);
334
floor_offset(const double * S,int size,double mean)335 static double floor_offset(const double *S, int size, double mean)
336 {
337 double offset = 0.0;
338
339 for (int n = 0; n < size; n++) {
340 const double p = S[n] - mean;
341
342 offset = fmax(offset, fabs(p));
343 }
344
345 return offset / mean;
346 }
347
process_frame(AVFilterContext * ctx,AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,AVComplexFloat * fft_data,double * prior,double * prior_band_excit,int track_noise)348 static void process_frame(AVFilterContext *ctx,
349 AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
350 AVComplexFloat *fft_data,
351 double *prior, double *prior_band_excit, int track_noise)
352 {
353 AVFilterLink *outlink = ctx->outputs[0];
354 const double *abs_var = dnch->abs_var;
355 const double ratio = outlink->frame_count_out ? s->ratio : 1.0;
356 const double rratio = 1. - ratio;
357 const int *bin2band = s->bin2band;
358 double *noisy_data = dnch->noisy_data;
359 double *band_excit = dnch->band_excit;
360 double *band_amt = dnch->band_amt;
361 double *smoothed_gain = dnch->smoothed_gain;
362 double *gain = dnch->gain;
363
364 for (int i = 0; i < s->bin_count; i++) {
365 double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var;
366
367 noisy_data[i] = mag = hypot(fft_data[i].re, fft_data[i].im);
368 power = mag * mag;
369 mag_abs_var = power / abs_var[i];
370 new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0);
371 new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var);
372 sqr_new_gain = new_gain * new_gain;
373 prior[i] = mag_abs_var * sqr_new_gain;
374 dnch->clean_data[i] = power * sqr_new_gain;
375 gain[i] = new_gain;
376 }
377
378 if (track_noise) {
379 double flatness, num, den;
380
381 spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den);
382
383 flatness = num / den;
384 if (flatness > 0.8) {
385 const double offset = s->floor_offset * floor_offset(noisy_data, s->bin_count, den);
386 const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.);
387
388 dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9;
389 set_parameters(s, dnch, 1, 1);
390 }
391 }
392
393 for (int i = 0; i < s->number_of_bands; i++) {
394 band_excit[i] = 0.0;
395 band_amt[i] = 0.0;
396 }
397
398 for (int i = 0; i < s->bin_count; i++)
399 band_excit[bin2band[i]] += dnch->clean_data[i];
400
401 for (int i = 0; i < s->number_of_bands; i++) {
402 band_excit[i] = fmax(band_excit[i],
403 s->band_alpha[i] * band_excit[i] +
404 s->band_beta[i] * prior_band_excit[i]);
405 prior_band_excit[i] = band_excit[i];
406 }
407
408 for (int j = 0, i = 0; j < s->number_of_bands; j++) {
409 for (int k = 0; k < s->number_of_bands; k++) {
410 band_amt[j] += dnch->spread_function[i++] * band_excit[k];
411 }
412 }
413
414 for (int i = 0; i < s->bin_count; i++)
415 dnch->amt[i] = band_amt[bin2band[i]];
416
417 for (int i = 0; i < s->bin_count; i++) {
418 if (dnch->amt[i] > abs_var[i]) {
419 gain[i] = 1.0;
420 } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
421 const double limit = sqrt(abs_var[i] / dnch->amt[i]);
422
423 gain[i] = limit_gain(gain[i], limit);
424 } else {
425 gain[i] = limit_gain(gain[i], dnch->max_gain);
426 }
427 }
428
429 memcpy(smoothed_gain, gain, s->bin_count * sizeof(*smoothed_gain));
430 if (s->gain_smooth > 0) {
431 const int r = s->gain_smooth;
432
433 for (int i = r; i < s->bin_count - r; i++) {
434 const double gc = gain[i];
435 double num = 0., den = 0.;
436
437 for (int j = -r; j <= r; j++) {
438 const double g = gain[i + j];
439 const double d = 1. - fabs(g - gc);
440
441 num += g * d;
442 den += d;
443 }
444
445 smoothed_gain[i] = num / den;
446 }
447 }
448
449 for (int i = 0; i < s->bin_count; i++) {
450 const double new_gain = smoothed_gain[i];
451
452 fft_data[i].re *= new_gain;
453 fft_data[i].im *= new_gain;
454 }
455 }
456
freq2bark(double x)457 static double freq2bark(double x)
458 {
459 double d = x / 7500.0;
460
461 return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
462 }
463
get_band_centre(AudioFFTDeNoiseContext * s,int band)464 static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
465 {
466 if (band == -1)
467 return lrint(s->band_centre[0] / 1.5);
468
469 return s->band_centre[band];
470 }
471
get_band_edge(AudioFFTDeNoiseContext * s,int band)472 static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
473 {
474 int i;
475
476 if (band == NB_PROFILE_BANDS) {
477 i = lrint(s->band_centre[NB_PROFILE_BANDS - 1] * 1.224745);
478 } else {
479 i = lrint(s->band_centre[band] / 1.224745);
480 }
481
482 return FFMIN(i, s->sample_rate / 2);
483 }
484
set_band_parameters(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch)485 static void set_band_parameters(AudioFFTDeNoiseContext *s,
486 DeNoiseChannel *dnch)
487 {
488 double band_noise, d2, d3, d4, d5;
489 int i = 0, j = 0, k = 0;
490
491 d5 = 0.0;
492 band_noise = process_get_band_noise(s, dnch, 0);
493 for (int m = j; m < s->bin_count; m++) {
494 if (m == j) {
495 i = j;
496 d5 = band_noise;
497 if (k >= NB_PROFILE_BANDS) {
498 j = s->bin_count;
499 } else {
500 j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
501 }
502 d2 = j - i;
503 band_noise = process_get_band_noise(s, dnch, k);
504 k++;
505 }
506 d3 = (j - m) / d2;
507 d4 = (m - i) / d2;
508 dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
509 }
510
511 for (i = 0; i < NB_PROFILE_BANDS; i++)
512 dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
513 }
514
read_custom_noise(AudioFFTDeNoiseContext * s,int ch)515 static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
516 {
517 DeNoiseChannel *dnch = &s->dnch[ch];
518 char *custom_noise_str, *p, *arg, *saveptr = NULL;
519 double band_noise[NB_PROFILE_BANDS] = { 0.f };
520 int ret;
521
522 if (!s->band_noise_str)
523 return;
524
525 custom_noise_str = p = av_strdup(s->band_noise_str);
526 if (!p)
527 return;
528
529 for (int i = 0; i < NB_PROFILE_BANDS; i++) {
530 float noise;
531
532 if (!(arg = av_strtok(p, "| ", &saveptr)))
533 break;
534
535 p = NULL;
536
537 ret = av_sscanf(arg, "%f", &noise);
538 if (ret != 1) {
539 av_log(s, AV_LOG_ERROR, "Custom band noise must be float.\n");
540 break;
541 }
542
543 band_noise[i] = av_clipd(noise, -24., 24.);
544 }
545
546 av_free(custom_noise_str);
547 memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
548 }
549
set_parameters(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,int update_var,int update_auto_var)550 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var)
551 {
552 if (dnch->last_noise_floor != dnch->noise_floor)
553 dnch->last_noise_floor = dnch->noise_floor;
554
555 if (s->track_residual)
556 dnch->last_noise_floor = fmax(dnch->last_noise_floor, dnch->residual_floor);
557
558 dnch->max_var = s->floor * exp((100.0 + dnch->last_noise_floor) * C);
559 if (update_auto_var) {
560 for (int i = 0; i < NB_PROFILE_BANDS; i++)
561 dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
562 }
563
564 if (s->track_residual) {
565 if (update_var || dnch->last_residual_floor != dnch->residual_floor) {
566 update_var = 1;
567 dnch->last_residual_floor = dnch->residual_floor;
568 dnch->last_noise_reduction = fmax(dnch->last_noise_floor - dnch->last_residual_floor + 100., 0);
569 dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
570 }
571 } else if (update_var || dnch->noise_reduction != dnch->last_noise_reduction) {
572 update_var = 1;
573 dnch->last_noise_reduction = dnch->noise_reduction;
574 dnch->last_residual_floor = av_clipd(dnch->last_noise_floor - dnch->last_noise_reduction, -80, -20);
575 dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
576 }
577
578 dnch->gain_scale = 1.0 / (dnch->max_gain * dnch->max_gain);
579
580 if (update_var) {
581 set_band_parameters(s, dnch);
582
583 for (int i = 0; i < s->bin_count; i++) {
584 dnch->abs_var[i] = fmax(dnch->max_var * dnch->rel_var[i], 1.0);
585 dnch->min_abs_var[i] = dnch->gain_scale * dnch->abs_var[i];
586 }
587 }
588 }
589
reduce_mean(double * band_noise)590 static void reduce_mean(double *band_noise)
591 {
592 double mean = 0.f;
593
594 for (int i = 0; i < NB_PROFILE_BANDS; i++)
595 mean += band_noise[i];
596 mean /= NB_PROFILE_BANDS;
597
598 for (int i = 0; i < NB_PROFILE_BANDS; i++)
599 band_noise[i] -= mean;
600 }
601
config_input(AVFilterLink * inlink)602 static int config_input(AVFilterLink *inlink)
603 {
604 AVFilterContext *ctx = inlink->dst;
605 AudioFFTDeNoiseContext *s = ctx->priv;
606 double wscale, sar, sum, sdiv;
607 int i, j, k, m, n, ret;
608
609 s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch));
610 if (!s->dnch)
611 return AVERROR(ENOMEM);
612
613 s->channels = inlink->ch_layout.nb_channels;
614 s->sample_rate = inlink->sample_rate;
615 s->sample_advance = s->sample_rate / 80;
616 s->window_length = 3 * s->sample_advance;
617 s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
618 s->fft_length = s->fft_length2;
619 s->buffer_length = s->fft_length * 2;
620 s->bin_count = s->fft_length2 / 2 + 1;
621
622 s->band_centre[0] = 80;
623 for (i = 1; i < NB_PROFILE_BANDS; i++) {
624 s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
625 if (s->band_centre[i] < 1000) {
626 s->band_centre[i] = 10 * (s->band_centre[i] / 10);
627 } else if (s->band_centre[i] < 5000) {
628 s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
629 } else if (s->band_centre[i] < 15000) {
630 s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
631 } else {
632 s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
633 }
634 }
635
636 for (j = 0; j < SOLVE_SIZE; j++) {
637 for (k = 0; k < SOLVE_SIZE; k++) {
638 s->matrix_a[j + k * SOLVE_SIZE] = 0.0;
639 for (m = 0; m < NB_PROFILE_BANDS; m++)
640 s->matrix_a[j + k * SOLVE_SIZE] += pow(m, j + k);
641 }
642 }
643
644 factor(s->matrix_a, SOLVE_SIZE);
645
646 i = 0;
647 for (j = 0; j < SOLVE_SIZE; j++)
648 for (k = 0; k < NB_PROFILE_BANDS; k++)
649 s->matrix_b[i++] = pow(k, j);
650
651 i = 0;
652 for (j = 0; j < NB_PROFILE_BANDS; j++)
653 for (k = 0; k < SOLVE_SIZE; k++)
654 s->matrix_c[i++] = pow(j, k);
655
656 s->window = av_calloc(s->window_length, sizeof(*s->window));
657 s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
658 if (!s->window || !s->bin2band)
659 return AVERROR(ENOMEM);
660
661 sdiv = s->band_multiplier;
662 for (i = 0; i < s->bin_count; i++)
663 s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
664
665 s->number_of_bands = s->bin2band[s->bin_count - 1] + 1;
666
667 s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
668 s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
669 if (!s->band_alpha || !s->band_beta)
670 return AVERROR(ENOMEM);
671
672 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
673 DeNoiseChannel *dnch = &s->dnch[ch];
674 float scale = 1.f;
675
676 switch (s->noise_type) {
677 case WHITE_NOISE:
678 for (i = 0; i < NB_PROFILE_BANDS; i++)
679 dnch->band_noise[i] = 0.;
680 break;
681 case VINYL_NOISE:
682 for (i = 0; i < NB_PROFILE_BANDS; i++)
683 dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0);
684 break;
685 case SHELLAC_NOISE:
686 for (i = 0; i < NB_PROFILE_BANDS; i++)
687 dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10);
688 break;
689 case CUSTOM_NOISE:
690 read_custom_noise(s, ch);
691 break;
692 default:
693 return AVERROR_BUG;
694 }
695
696 reduce_mean(dnch->band_noise);
697
698 dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
699 dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
700 dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
701 dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
702 dnch->smoothed_gain = av_calloc(s->bin_count, sizeof(*dnch->smoothed_gain));
703 dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
704 dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
705 dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
706 dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
707 dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
708 dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
709 dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
710 dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
711 dnch->fft_in = av_calloc(s->fft_length2, sizeof(*dnch->fft_in));
712 dnch->fft_out = av_calloc(s->fft_length2 + 1, sizeof(*dnch->fft_out));
713 ret = av_tx_init(&dnch->fft, &dnch->tx_fn, AV_TX_FLOAT_RDFT, 0, s->fft_length2, &scale, 0);
714 if (ret < 0)
715 return ret;
716 ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, AV_TX_FLOAT_RDFT, 1, s->fft_length2, &scale, 0);
717 if (ret < 0)
718 return ret;
719 dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
720 sizeof(*dnch->spread_function));
721
722 if (!dnch->amt ||
723 !dnch->band_amt ||
724 !dnch->band_excit ||
725 !dnch->gain ||
726 !dnch->smoothed_gain ||
727 !dnch->prior ||
728 !dnch->prior_band_excit ||
729 !dnch->clean_data ||
730 !dnch->noisy_data ||
731 !dnch->out_samples ||
732 !dnch->fft_in ||
733 !dnch->fft_out ||
734 !dnch->abs_var ||
735 !dnch->rel_var ||
736 !dnch->min_abs_var ||
737 !dnch->spread_function ||
738 !dnch->fft ||
739 !dnch->ifft)
740 return AVERROR(ENOMEM);
741 }
742
743 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
744 DeNoiseChannel *dnch = &s->dnch[ch];
745 double *prior_band_excit = dnch->prior_band_excit;
746 double min, max;
747 double p1, p2;
748
749 p1 = pow(0.1, 2.5 / sdiv);
750 p2 = pow(0.1, 1.0 / sdiv);
751 j = 0;
752 for (m = 0; m < s->number_of_bands; m++) {
753 for (n = 0; n < s->number_of_bands; n++) {
754 if (n < m) {
755 dnch->spread_function[j++] = pow(p2, m - n);
756 } else if (n > m) {
757 dnch->spread_function[j++] = pow(p1, n - m);
758 } else {
759 dnch->spread_function[j++] = 1.0;
760 }
761 }
762 }
763
764 for (m = 0; m < s->number_of_bands; m++) {
765 dnch->band_excit[m] = 0.0;
766 prior_band_excit[m] = 0.0;
767 }
768
769 for (m = 0; m < s->bin_count; m++)
770 dnch->band_excit[s->bin2band[m]] += 1.0;
771
772 j = 0;
773 for (m = 0; m < s->number_of_bands; m++) {
774 for (n = 0; n < s->number_of_bands; n++)
775 prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
776 }
777
778 min = pow(0.1, 2.5);
779 max = pow(0.1, 1.0);
780 for (int i = 0; i < s->number_of_bands; i++) {
781 if (i < lrint(12.0 * sdiv)) {
782 dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
783 } else {
784 dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
785 }
786 dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
787 }
788
789 for (int i = 0; i < s->buffer_length; i++)
790 dnch->out_samples[i] = 0;
791
792 j = 0;
793 for (int i = 0; i < s->number_of_bands; i++)
794 for (int k = 0; k < s->number_of_bands; k++)
795 dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
796 }
797
798 j = 0;
799 sar = s->sample_advance / s->sample_rate;
800 for (int i = 0; i < s->bin_count; i++) {
801 if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
802 double d6 = (i - 1) * s->sample_rate / s->fft_length;
803 double d7 = fmin(0.008 + 2.2 / d6, 0.03);
804 s->band_alpha[j] = exp(-sar / d7);
805 s->band_beta[j] = 1.0 - s->band_alpha[j];
806 j = s->bin2band[i];
807 }
808 }
809
810 s->winframe = ff_get_audio_buffer(inlink, s->window_length);
811 if (!s->winframe)
812 return AVERROR(ENOMEM);
813
814 wscale = sqrt(8.0 / (9.0 * s->fft_length));
815 sum = 0.0;
816 for (int i = 0; i < s->window_length; i++) {
817 double d10 = sin(i * M_PI / s->window_length);
818 d10 *= wscale * d10;
819 s->window[i] = d10;
820 sum += d10 * d10;
821 }
822
823 s->window_weight = 0.5 * sum;
824 s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
825 s->sample_floor = s->floor * exp(4.144600506562284);
826
827 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
828 DeNoiseChannel *dnch = &s->dnch[ch];
829
830 dnch->noise_reduction = s->noise_reduction;
831 dnch->noise_floor = s->noise_floor;
832 dnch->residual_floor = s->residual_floor;
833
834 set_parameters(s, dnch, 1, 1);
835 }
836
837 s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
838 i = 0;
839 for (int j = 1; j < NB_PROFILE_BANDS + 1; j++) {
840 s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
841 if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
842 i++;
843 s->noise_band_edge[NB_PROFILE_BANDS + 1] = i;
844 }
845 s->noise_band_count = s->noise_band_edge[NB_PROFILE_BANDS + 1];
846
847 return 0;
848 }
849
init_sample_noise(DeNoiseChannel * dnch)850 static void init_sample_noise(DeNoiseChannel *dnch)
851 {
852 for (int i = 0; i < NB_PROFILE_BANDS; i++) {
853 dnch->noise_band_norm[i] = 0.0;
854 dnch->noise_band_avr[i] = 0.0;
855 dnch->noise_band_avi[i] = 0.0;
856 dnch->noise_band_var[i] = 0.0;
857 }
858 }
859
sample_noise_block(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,AVFrame * in,int ch)860 static void sample_noise_block(AudioFFTDeNoiseContext *s,
861 DeNoiseChannel *dnch,
862 AVFrame *in, int ch)
863 {
864 float *src = (float *)in->extended_data[ch];
865 double mag2, var = 0.0, avr = 0.0, avi = 0.0;
866 int edge, j, k, n, edgemax;
867
868 for (int i = 0; i < s->window_length; i++)
869 dnch->fft_in[i] = s->window[i] * src[i] * (1LL << 23);
870
871 for (int i = s->window_length; i < s->fft_length2; i++)
872 dnch->fft_in[i] = 0.0;
873
874 dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, sizeof(float));
875
876 edge = s->noise_band_edge[0];
877 j = edge;
878 k = 0;
879 n = j;
880 edgemax = fmin(s->fft_length2, s->noise_band_edge[NB_PROFILE_BANDS]);
881 for (int i = j; i <= edgemax; i++) {
882 if ((i == j) && (i < edgemax)) {
883 if (j > edge) {
884 dnch->noise_band_norm[k - 1] += j - edge;
885 dnch->noise_band_avr[k - 1] += avr;
886 dnch->noise_band_avi[k - 1] += avi;
887 dnch->noise_band_var[k - 1] += var;
888 }
889 k++;
890 edge = j;
891 j = s->noise_band_edge[k];
892 if (k == NB_PROFILE_BANDS) {
893 j++;
894 }
895 var = 0.0;
896 avr = 0.0;
897 avi = 0.0;
898 }
899 avr += dnch->fft_out[n].re;
900 avi += dnch->fft_out[n].im;
901 mag2 = dnch->fft_out[n].re * dnch->fft_out[n].re +
902 dnch->fft_out[n].im * dnch->fft_out[n].im;
903
904 mag2 = fmax(mag2, s->sample_floor);
905
906 var += mag2;
907 n++;
908 }
909
910 dnch->noise_band_norm[k - 1] += j - edge;
911 dnch->noise_band_avr[k - 1] += avr;
912 dnch->noise_band_avi[k - 1] += avi;
913 dnch->noise_band_var[k - 1] += var;
914 }
915
finish_sample_noise(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,double * sample_noise)916 static void finish_sample_noise(AudioFFTDeNoiseContext *s,
917 DeNoiseChannel *dnch,
918 double *sample_noise)
919 {
920 for (int i = 0; i < s->noise_band_count; i++) {
921 dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
922 dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
923 dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
924 dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
925 dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
926 dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
927 sample_noise[i] = 10.0 * log10(dnch->noise_band_var[i] / s->floor) - 100.0;
928 }
929 if (s->noise_band_count < NB_PROFILE_BANDS) {
930 for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++)
931 sample_noise[i] = sample_noise[i - 1];
932 }
933 }
934
set_noise_profile(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,double * sample_noise)935 static void set_noise_profile(AudioFFTDeNoiseContext *s,
936 DeNoiseChannel *dnch,
937 double *sample_noise)
938 {
939 double new_band_noise[NB_PROFILE_BANDS];
940 double temp[NB_PROFILE_BANDS];
941 double sum = 0.0;
942
943 for (int m = 0; m < NB_PROFILE_BANDS; m++)
944 temp[m] = sample_noise[m];
945
946 for (int m = 0, i = 0; m < SOLVE_SIZE; m++) {
947 sum = 0.0;
948 for (int n = 0; n < NB_PROFILE_BANDS; n++)
949 sum += s->matrix_b[i++] * temp[n];
950 s->vector_b[m] = sum;
951 }
952 solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
953 for (int m = 0, i = 0; m < NB_PROFILE_BANDS; m++) {
954 sum = 0.0;
955 for (int n = 0; n < SOLVE_SIZE; n++)
956 sum += s->matrix_c[i++] * s->vector_b[n];
957 temp[m] = sum;
958 }
959
960 reduce_mean(temp);
961
962 av_log(s, AV_LOG_INFO, "bn=");
963 for (int m = 0; m < NB_PROFILE_BANDS; m++) {
964 new_band_noise[m] = temp[m];
965 new_band_noise[m] = av_clipd(new_band_noise[m], -24.0, 24.0);
966 av_log(s, AV_LOG_INFO, "%f ", new_band_noise[m]);
967 }
968 av_log(s, AV_LOG_INFO, "\n");
969 memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
970 }
971
filter_channel(AVFilterContext * ctx,void * arg,int jobnr,int nb_jobs)972 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
973 {
974 AudioFFTDeNoiseContext *s = ctx->priv;
975 AVFrame *in = arg;
976 const int start = (in->ch_layout.nb_channels * jobnr) / nb_jobs;
977 const int end = (in->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
978 const int window_length = s->window_length;
979 const double *window = s->window;
980
981 for (int ch = start; ch < end; ch++) {
982 DeNoiseChannel *dnch = &s->dnch[ch];
983 const float *src = (const float *)in->extended_data[ch];
984 double *dst = dnch->out_samples;
985 float *fft_in = dnch->fft_in;
986
987 for (int m = 0; m < window_length; m++)
988 fft_in[m] = window[m] * src[m] * (1LL << 23);
989
990 for (int m = window_length; m < s->fft_length2; m++)
991 fft_in[m] = 0;
992
993 dnch->tx_fn(dnch->fft, dnch->fft_out, fft_in, sizeof(float));
994
995 process_frame(ctx, s, dnch, dnch->fft_out,
996 dnch->prior,
997 dnch->prior_band_excit,
998 s->track_noise);
999
1000 dnch->itx_fn(dnch->ifft, fft_in, dnch->fft_out, sizeof(float));
1001
1002 for (int m = 0; m < window_length; m++)
1003 dst[m] += s->window[m] * fft_in[m] / (1LL << 23);
1004 }
1005
1006 return 0;
1007 }
1008
output_frame(AVFilterLink * inlink,AVFrame * in)1009 static int output_frame(AVFilterLink *inlink, AVFrame *in)
1010 {
1011 AVFilterContext *ctx = inlink->dst;
1012 AVFilterLink *outlink = ctx->outputs[0];
1013 AudioFFTDeNoiseContext *s = ctx->priv;
1014 const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode;
1015 const int offset = s->window_length - s->sample_advance;
1016 AVFrame *out;
1017
1018 for (int ch = 0; ch < s->channels; ch++) {
1019 float *src = (float *)s->winframe->extended_data[ch];
1020
1021 memmove(src, &src[s->sample_advance], offset * sizeof(float));
1022 memcpy(&src[offset], in->extended_data[ch], in->nb_samples * sizeof(float));
1023 memset(&src[offset + in->nb_samples], 0, (s->sample_advance - in->nb_samples) * sizeof(float));
1024 }
1025
1026 if (s->track_noise) {
1027 double average = 0.0, min = DBL_MAX, max = -DBL_MAX;
1028
1029 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1030 DeNoiseChannel *dnch = &s->dnch[ch];
1031
1032 average += dnch->noise_floor;
1033 max = fmax(max, dnch->noise_floor);
1034 min = fmin(min, dnch->noise_floor);
1035 }
1036
1037 average /= inlink->ch_layout.nb_channels;
1038
1039 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1040 DeNoiseChannel *dnch = &s->dnch[ch];
1041
1042 switch (s->noise_floor_link) {
1043 case MIN_LINK: dnch->noise_floor = min; break;
1044 case MAX_LINK: dnch->noise_floor = max; break;
1045 case AVERAGE_LINK: dnch->noise_floor = average; break;
1046 case NONE_LINK:
1047 default:
1048 break;
1049 }
1050
1051 if (dnch->noise_floor != dnch->last_noise_floor)
1052 set_parameters(s, dnch, 1, 0);
1053 }
1054 }
1055
1056 if (s->sample_noise_mode == SAMPLE_START) {
1057 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1058 DeNoiseChannel *dnch = &s->dnch[ch];
1059
1060 init_sample_noise(dnch);
1061 }
1062 s->sample_noise_mode = SAMPLE_NONE;
1063 s->sample_noise = 1;
1064 s->sample_noise_blocks = 0;
1065 }
1066
1067 if (s->sample_noise) {
1068 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1069 DeNoiseChannel *dnch = &s->dnch[ch];
1070
1071 sample_noise_block(s, dnch, s->winframe, ch);
1072 }
1073 s->sample_noise_blocks++;
1074 }
1075
1076 if (s->sample_noise_mode == SAMPLE_STOP) {
1077 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1078 DeNoiseChannel *dnch = &s->dnch[ch];
1079 double sample_noise[NB_PROFILE_BANDS];
1080
1081 if (s->sample_noise_blocks <= 0)
1082 break;
1083 finish_sample_noise(s, dnch, sample_noise);
1084 set_noise_profile(s, dnch, sample_noise);
1085 set_parameters(s, dnch, 1, 1);
1086 }
1087 s->sample_noise = 0;
1088 s->sample_noise_blocks = 0;
1089 s->sample_noise_mode = SAMPLE_NONE;
1090 }
1091
1092 ff_filter_execute(ctx, filter_channel, s->winframe, NULL,
1093 FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
1094
1095 if (av_frame_is_writable(in)) {
1096 out = in;
1097 } else {
1098 out = ff_get_audio_buffer(outlink, in->nb_samples);
1099 if (!out) {
1100 av_frame_free(&in);
1101 return AVERROR(ENOMEM);
1102 }
1103
1104 out->pts = in->pts;
1105 }
1106
1107 for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1108 DeNoiseChannel *dnch = &s->dnch[ch];
1109 double *src = dnch->out_samples;
1110 const float *orig = (const float *)s->winframe->extended_data[ch];
1111 float *dst = (float *)out->extended_data[ch];
1112
1113 switch (output_mode) {
1114 case IN_MODE:
1115 for (int m = 0; m < out->nb_samples; m++)
1116 dst[m] = orig[m];
1117 break;
1118 case OUT_MODE:
1119 for (int m = 0; m < out->nb_samples; m++)
1120 dst[m] = src[m];
1121 break;
1122 case NOISE_MODE:
1123 for (int m = 0; m < out->nb_samples; m++)
1124 dst[m] = orig[m] - src[m];
1125 break;
1126 default:
1127 if (in != out)
1128 av_frame_free(&in);
1129 av_frame_free(&out);
1130 return AVERROR_BUG;
1131 }
1132 memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
1133 memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
1134 }
1135
1136 if (out != in)
1137 av_frame_free(&in);
1138 return ff_filter_frame(outlink, out);
1139 }
1140
activate(AVFilterContext * ctx)1141 static int activate(AVFilterContext *ctx)
1142 {
1143 AVFilterLink *inlink = ctx->inputs[0];
1144 AVFilterLink *outlink = ctx->outputs[0];
1145 AudioFFTDeNoiseContext *s = ctx->priv;
1146 AVFrame *in = NULL;
1147 int ret;
1148
1149 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
1150
1151 ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in);
1152 if (ret < 0)
1153 return ret;
1154 if (ret > 0)
1155 return output_frame(inlink, in);
1156
1157 if (ff_inlink_queued_samples(inlink) >= s->sample_advance) {
1158 ff_filter_set_ready(ctx, 10);
1159 return 0;
1160 }
1161
1162 FF_FILTER_FORWARD_STATUS(inlink, outlink);
1163 FF_FILTER_FORWARD_WANTED(outlink, inlink);
1164
1165 return FFERROR_NOT_READY;
1166 }
1167
uninit(AVFilterContext * ctx)1168 static av_cold void uninit(AVFilterContext *ctx)
1169 {
1170 AudioFFTDeNoiseContext *s = ctx->priv;
1171
1172 av_freep(&s->window);
1173 av_freep(&s->bin2band);
1174 av_freep(&s->band_alpha);
1175 av_freep(&s->band_beta);
1176 av_frame_free(&s->winframe);
1177
1178 if (s->dnch) {
1179 for (int ch = 0; ch < s->channels; ch++) {
1180 DeNoiseChannel *dnch = &s->dnch[ch];
1181 av_freep(&dnch->amt);
1182 av_freep(&dnch->band_amt);
1183 av_freep(&dnch->band_excit);
1184 av_freep(&dnch->gain);
1185 av_freep(&dnch->smoothed_gain);
1186 av_freep(&dnch->prior);
1187 av_freep(&dnch->prior_band_excit);
1188 av_freep(&dnch->clean_data);
1189 av_freep(&dnch->noisy_data);
1190 av_freep(&dnch->out_samples);
1191 av_freep(&dnch->spread_function);
1192 av_freep(&dnch->abs_var);
1193 av_freep(&dnch->rel_var);
1194 av_freep(&dnch->min_abs_var);
1195 av_freep(&dnch->fft_in);
1196 av_freep(&dnch->fft_out);
1197 av_tx_uninit(&dnch->fft);
1198 av_tx_uninit(&dnch->ifft);
1199 }
1200 av_freep(&s->dnch);
1201 }
1202 }
1203
process_command(AVFilterContext * ctx,const char * cmd,const char * args,char * res,int res_len,int flags)1204 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
1205 char *res, int res_len, int flags)
1206 {
1207 AudioFFTDeNoiseContext *s = ctx->priv;
1208 int ret = 0;
1209
1210 ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
1211 if (ret < 0)
1212 return ret;
1213
1214 if (!strcmp(cmd, "sample_noise") || !strcmp(cmd, "sn"))
1215 return 0;
1216
1217 for (int ch = 0; ch < s->channels; ch++) {
1218 DeNoiseChannel *dnch = &s->dnch[ch];
1219
1220 dnch->noise_reduction = s->noise_reduction;
1221 dnch->noise_floor = s->noise_floor;
1222 dnch->residual_floor = s->residual_floor;
1223
1224 set_parameters(s, dnch, 1, 1);
1225 }
1226
1227 return 0;
1228 }
1229
1230 static const AVFilterPad inputs[] = {
1231 {
1232 .name = "default",
1233 .type = AVMEDIA_TYPE_AUDIO,
1234 .config_props = config_input,
1235 },
1236 };
1237
1238 static const AVFilterPad outputs[] = {
1239 {
1240 .name = "default",
1241 .type = AVMEDIA_TYPE_AUDIO,
1242 },
1243 };
1244
1245 const AVFilter ff_af_afftdn = {
1246 .name = "afftdn",
1247 .description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
1248 .priv_size = sizeof(AudioFFTDeNoiseContext),
1249 .priv_class = &afftdn_class,
1250 .activate = activate,
1251 .uninit = uninit,
1252 FILTER_INPUTS(inputs),
1253 FILTER_OUTPUTS(outputs),
1254 FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
1255 .process_command = process_command,
1256 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
1257 AVFILTER_FLAG_SLICE_THREADS,
1258 };
1259