1 /*
2 * Copyright (c) 2018 The FFmpeg Project
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <float.h>
22
23 #include "libavutil/audio_fifo.h"
24 #include "libavutil/avstring.h"
25 #include "libavutil/channel_layout.h"
26 #include "libavutil/opt.h"
27 #include "libavcodec/avfft.h"
28 #include "avfilter.h"
29 #include "audio.h"
30 #include "formats.h"
31 #include "filters.h"
32
33 #define C (M_LN10 * 0.1)
34 #define RATIO 0.98
35 #define RRATIO (1.0 - RATIO)
36
37 enum OutModes {
38 IN_MODE,
39 OUT_MODE,
40 NOISE_MODE,
41 NB_MODES
42 };
43
44 enum NoiseType {
45 WHITE_NOISE,
46 VINYL_NOISE,
47 SHELLAC_NOISE,
48 CUSTOM_NOISE,
49 NB_NOISE
50 };
51
52 typedef struct DeNoiseChannel {
53 int band_noise[15];
54 double noise_band_auto_var[15];
55 double noise_band_sample[15];
56 double *amt;
57 double *band_amt;
58 double *band_excit;
59 double *gain;
60 double *prior;
61 double *prior_band_excit;
62 double *clean_data;
63 double *noisy_data;
64 double *out_samples;
65 double *spread_function;
66 double *abs_var;
67 double *rel_var;
68 double *min_abs_var;
69 FFTComplex *fft_data;
70 FFTContext *fft, *ifft;
71
72 double noise_band_norm[15];
73 double noise_band_avr[15];
74 double noise_band_avi[15];
75 double noise_band_var[15];
76
77 double sfm_threshold;
78 double sfm_alpha;
79 double sfm_results[3];
80 int sfm_fail_flags[512];
81 int sfm_fail_total;
82 } DeNoiseChannel;
83
84 typedef struct AudioFFTDeNoiseContext {
85 const AVClass *class;
86
87 float noise_reduction;
88 float noise_floor;
89 int noise_type;
90 char *band_noise_str;
91 float residual_floor;
92 int track_noise;
93 int track_residual;
94 int output_mode;
95
96 float last_residual_floor;
97 float last_noise_floor;
98 float last_noise_reduction;
99 float last_noise_balance;
100 int64_t block_count;
101
102 int64_t pts;
103 int channels;
104 int sample_noise;
105 int sample_noise_start;
106 int sample_noise_end;
107 float sample_rate;
108 int buffer_length;
109 int fft_length;
110 int fft_length2;
111 int bin_count;
112 int window_length;
113 int sample_advance;
114 int number_of_bands;
115
116 int band_centre[15];
117
118 int *bin2band;
119 double *window;
120 double *band_alpha;
121 double *band_beta;
122
123 DeNoiseChannel *dnch;
124
125 double max_gain;
126 double max_var;
127 double gain_scale;
128 double window_weight;
129 double floor;
130 double sample_floor;
131 double auto_floor;
132
133 int noise_band_edge[17];
134 int noise_band_count;
135 double matrix_a[25];
136 double vector_b[5];
137 double matrix_b[75];
138 double matrix_c[75];
139
140 AVAudioFifo *fifo;
141 } AudioFFTDeNoiseContext;
142
143 #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
144 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
145 #define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
146
147 static const AVOption afftdn_options[] = {
148 { "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, AFR },
149 { "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
150 { "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, "type" },
151 { "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, "type" },
152 { "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, "type" },
153 { "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, "type" },
154 { "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, "type" },
155 { "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
156 { "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
157 { "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
158 { "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
159 { "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, "mode" },
160 { "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, "mode" },
161 { "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, "mode" },
162 { "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, "mode" },
163 { NULL }
164 };
165
166 AVFILTER_DEFINE_CLASS(afftdn);
167
get_band_noise(AudioFFTDeNoiseContext * s,int band,double a,double b,double c)168 static int get_band_noise(AudioFFTDeNoiseContext *s,
169 int band, double a,
170 double b, double c)
171 {
172 double d1, d2, d3;
173
174 d1 = a / s->band_centre[band];
175 d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
176 d2 = b / s->band_centre[band];
177 d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
178 d3 = s->band_centre[band] / c;
179 d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
180
181 return lrint(-d1 + d2 - d3);
182 }
183
factor(double * array,int size)184 static void factor(double *array, int size)
185 {
186 for (int i = 0; i < size - 1; i++) {
187 for (int j = i + 1; j < size; j++) {
188 double d = array[j + i * size] / array[i + i * size];
189
190 array[j + i * size] = d;
191 for (int k = i + 1; k < size; k++) {
192 array[j + k * size] -= d * array[i + k * size];
193 }
194 }
195 }
196 }
197
solve(double * matrix,double * vector,int size)198 static void solve(double *matrix, double *vector, int size)
199 {
200 for (int i = 0; i < size - 1; i++) {
201 for (int j = i + 1; j < size; j++) {
202 double d = matrix[j + i * size];
203 vector[j] -= d * vector[i];
204 }
205 }
206
207 vector[size - 1] /= matrix[size * size - 1];
208
209 for (int i = size - 2; i >= 0; i--) {
210 double d = vector[i];
211 for (int j = i + 1; j < size; j++)
212 d -= matrix[i + j * size] * vector[j];
213 vector[i] = d / matrix[i + i * size];
214 }
215 }
216
process_get_band_noise(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,int band)217 static int process_get_band_noise(AudioFFTDeNoiseContext *s,
218 DeNoiseChannel *dnch,
219 int band)
220 {
221 double product, sum, f;
222 int i = 0;
223
224 if (band < 15)
225 return dnch->band_noise[band];
226
227 for (int j = 0; j < 5; j++) {
228 sum = 0.0;
229 for (int k = 0; k < 15; k++)
230 sum += s->matrix_b[i++] * dnch->band_noise[k];
231 s->vector_b[j] = sum;
232 }
233
234 solve(s->matrix_a, s->vector_b, 5);
235 f = (0.5 * s->sample_rate) / s->band_centre[14];
236 f = 15.0 + log(f / 1.5) / log(1.5);
237 sum = 0.0;
238 product = 1.0;
239 for (int j = 0; j < 5; j++) {
240 sum += product * s->vector_b[j];
241 product *= f;
242 }
243
244 return lrint(sum);
245 }
246
calculate_sfm(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,int start,int end)247 static void calculate_sfm(AudioFFTDeNoiseContext *s,
248 DeNoiseChannel *dnch,
249 int start, int end)
250 {
251 double d1 = 0.0, d2 = 1.0;
252 int i = 0, j = 0;
253
254 for (int k = start; k < end; k++) {
255 if (dnch->noisy_data[k] > s->sample_floor) {
256 j++;
257 d1 += dnch->noisy_data[k];
258 d2 *= dnch->noisy_data[k];
259 if (d2 > 1.0E100) {
260 d2 *= 1.0E-100;
261 i++;
262 } else if (d2 < 1.0E-100) {
263 d2 *= 1.0E100;
264 i--;
265 }
266 }
267 }
268 if (j > 1) {
269 d1 /= j;
270 dnch->sfm_results[0] = d1;
271 d2 = log(d2) + 230.2585 * i;
272 d2 /= j;
273 d1 = log(d1);
274 dnch->sfm_results[1] = d1;
275 dnch->sfm_results[2] = d1 - d2;
276 } else {
277 dnch->sfm_results[0] = s->auto_floor;
278 dnch->sfm_results[1] = dnch->sfm_threshold;
279 dnch->sfm_results[2] = dnch->sfm_threshold;
280 }
281 }
282
limit_gain(double a,double b)283 static double limit_gain(double a, double b)
284 {
285 if (a > 1.0)
286 return (b * a - 1.0) / (b + a - 2.0);
287 if (a < 1.0)
288 return (b * a - 2.0 * a + 1.0) / (b - a);
289 return 1.0;
290 }
291
process_frame(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,FFTComplex * fft_data,double * prior,double * prior_band_excit,int track_noise)292 static void process_frame(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
293 FFTComplex *fft_data,
294 double *prior, double *prior_band_excit, int track_noise)
295 {
296 double d1, d2, d3, gain;
297 int n, i1;
298
299 d1 = fft_data[0].re * fft_data[0].re;
300 dnch->noisy_data[0] = d1;
301 d2 = d1 / dnch->abs_var[0];
302 d3 = RATIO * prior[0] + RRATIO * fmax(d2 - 1.0, 0.0);
303 gain = d3 / (1.0 + d3);
304 gain *= (gain + M_PI_4 / fmax(d2, 1.0E-6));
305 prior[0] = (d2 * gain);
306 dnch->clean_data[0] = (d1 * gain);
307 gain = sqrt(gain);
308 dnch->gain[0] = gain;
309 n = 0;
310 for (int i = 1; i < s->fft_length2; i++) {
311 d1 = fft_data[i].re * fft_data[i].re + fft_data[i].im * fft_data[i].im;
312 if (d1 > s->sample_floor)
313 n = i;
314
315 dnch->noisy_data[i] = d1;
316 d2 = d1 / dnch->abs_var[i];
317 d3 = RATIO * prior[i] + RRATIO * fmax(d2 - 1.0, 0.0);
318 gain = d3 / (1.0 + d3);
319 gain *= (gain + M_PI_4 / fmax(d2, 1.0E-6));
320 prior[i] = d2 * gain;
321 dnch->clean_data[i] = d1 * gain;
322 gain = sqrt(gain);
323 dnch->gain[i] = gain;
324 }
325 d1 = fft_data[0].im * fft_data[0].im;
326 if (d1 > s->sample_floor)
327 n = s->fft_length2;
328
329 dnch->noisy_data[s->fft_length2] = d1;
330 d2 = d1 / dnch->abs_var[s->fft_length2];
331 d3 = RATIO * prior[s->fft_length2] + RRATIO * fmax(d2 - 1.0, 0.0);
332 gain = d3 / (1.0 + d3);
333 gain *= gain + M_PI_4 / fmax(d2, 1.0E-6);
334 prior[s->fft_length2] = d2 * gain;
335 dnch->clean_data[s->fft_length2] = d1 * gain;
336 gain = sqrt(gain);
337 dnch->gain[s->fft_length2] = gain;
338 if (n > s->fft_length2 - 2) {
339 n = s->bin_count;
340 i1 = s->noise_band_count;
341 } else {
342 i1 = 0;
343 for (int i = 0; i <= s->noise_band_count; i++) {
344 if (n > 1.1 * s->noise_band_edge[i]) {
345 i1 = i;
346 }
347 }
348 }
349
350 if (track_noise && (i1 > s->noise_band_count / 2)) {
351 int j = FFMIN(n, s->noise_band_edge[i1]);
352 int m = 3, k;
353
354 for (k = i1 - 1; k >= 0; k--) {
355 int i = s->noise_band_edge[k];
356 calculate_sfm(s, dnch, i, j);
357 dnch->noise_band_sample[k] = dnch->sfm_results[0];
358 if (dnch->sfm_results[2] + 0.013 * m * fmax(0.0, dnch->sfm_results[1] - 20.53) >= dnch->sfm_threshold) {
359 break;
360 }
361 j = i;
362 m++;
363 }
364
365 if (k < i1 - 1) {
366 double sum = 0.0, min, max;
367 int i;
368
369 for (i = i1 - 1; i > k; i--) {
370 min = log(dnch->noise_band_sample[i] / dnch->noise_band_auto_var[i]);
371 sum += min;
372 }
373
374 i = i1 - k - 1;
375 if (i < 5) {
376 min = 3.0E-4 * i * i;
377 } else {
378 min = 3.0E-4 * (8 * i - 16);
379 }
380 if (i < 3) {
381 max = 2.0E-4 * i * i;
382 } else {
383 max = 2.0E-4 * (4 * i - 4);
384 }
385
386 if (s->track_residual) {
387 if (s->last_noise_floor > s->last_residual_floor + 9) {
388 min *= 0.5;
389 max *= 0.75;
390 } else if (s->last_noise_floor > s->last_residual_floor + 6) {
391 min *= 0.4;
392 max *= 1.0;
393 } else if (s->last_noise_floor > s->last_residual_floor + 4) {
394 min *= 0.3;
395 max *= 1.3;
396 } else if (s->last_noise_floor > s->last_residual_floor + 2) {
397 min *= 0.2;
398 max *= 1.6;
399 } else if (s->last_noise_floor > s->last_residual_floor) {
400 min *= 0.1;
401 max *= 2.0;
402 } else {
403 min = 0.0;
404 max *= 2.5;
405 }
406 }
407
408 sum = av_clipd(sum, -min, max);
409 sum = exp(sum);
410 for (int i = 0; i < 15; i++)
411 dnch->noise_band_auto_var[i] *= sum;
412 } else if (dnch->sfm_results[2] >= dnch->sfm_threshold) {
413 dnch->sfm_fail_flags[s->block_count & 0x1FF] = 1;
414 dnch->sfm_fail_total += 1;
415 }
416 }
417
418 for (int i = 0; i < s->number_of_bands; i++) {
419 dnch->band_excit[i] = 0.0;
420 dnch->band_amt[i] = 0.0;
421 }
422
423 for (int i = 0; i < s->bin_count; i++) {
424 dnch->band_excit[s->bin2band[i]] += dnch->clean_data[i];
425 }
426
427 for (int i = 0; i < s->number_of_bands; i++) {
428 dnch->band_excit[i] = fmax(dnch->band_excit[i],
429 s->band_alpha[i] * dnch->band_excit[i] +
430 s->band_beta[i] * prior_band_excit[i]);
431 prior_band_excit[i] = dnch->band_excit[i];
432 }
433
434 for (int j = 0, i = 0; j < s->number_of_bands; j++) {
435 for (int k = 0; k < s->number_of_bands; k++) {
436 dnch->band_amt[j] += dnch->spread_function[i++] * dnch->band_excit[k];
437 }
438 }
439
440 for (int i = 0; i < s->bin_count; i++)
441 dnch->amt[i] = dnch->band_amt[s->bin2band[i]];
442
443 if (dnch->amt[0] > dnch->abs_var[0]) {
444 dnch->gain[0] = 1.0;
445 } else if (dnch->amt[0] > dnch->min_abs_var[0]) {
446 double limit = sqrt(dnch->abs_var[0] / dnch->amt[0]);
447 dnch->gain[0] = limit_gain(dnch->gain[0], limit);
448 } else {
449 dnch->gain[0] = limit_gain(dnch->gain[0], s->max_gain);
450 }
451 if (dnch->amt[s->fft_length2] > dnch->abs_var[s->fft_length2]) {
452 dnch->gain[s->fft_length2] = 1.0;
453 } else if (dnch->amt[s->fft_length2] > dnch->min_abs_var[s->fft_length2]) {
454 double limit = sqrt(dnch->abs_var[s->fft_length2] / dnch->amt[s->fft_length2]);
455 dnch->gain[s->fft_length2] = limit_gain(dnch->gain[s->fft_length2], limit);
456 } else {
457 dnch->gain[s->fft_length2] = limit_gain(dnch->gain[s->fft_length2], s->max_gain);
458 }
459
460 for (int i = 1; i < s->fft_length2; i++) {
461 if (dnch->amt[i] > dnch->abs_var[i]) {
462 dnch->gain[i] = 1.0;
463 } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
464 double limit = sqrt(dnch->abs_var[i] / dnch->amt[i]);
465 dnch->gain[i] = limit_gain(dnch->gain[i], limit);
466 } else {
467 dnch->gain[i] = limit_gain(dnch->gain[i], s->max_gain);
468 }
469 }
470
471 gain = dnch->gain[0];
472 dnch->clean_data[0] = (gain * gain * dnch->noisy_data[0]);
473 fft_data[0].re *= gain;
474 gain = dnch->gain[s->fft_length2];
475 dnch->clean_data[s->fft_length2] = (gain * gain * dnch->noisy_data[s->fft_length2]);
476 fft_data[0].im *= gain;
477 for (int i = 1; i < s->fft_length2; i++) {
478 gain = dnch->gain[i];
479 dnch->clean_data[i] = (gain * gain * dnch->noisy_data[i]);
480 fft_data[i].re *= gain;
481 fft_data[i].im *= gain;
482 }
483 }
484
freq2bark(double x)485 static double freq2bark(double x)
486 {
487 double d = x / 7500.0;
488
489 return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
490 }
491
get_band_centre(AudioFFTDeNoiseContext * s,int band)492 static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
493 {
494 if (band == -1)
495 return lrint(s->band_centre[0] / 1.5);
496
497 return s->band_centre[band];
498 }
499
get_band_edge(AudioFFTDeNoiseContext * s,int band)500 static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
501 {
502 int i;
503
504 if (band == 15) {
505 i = lrint(s->band_centre[14] * 1.224745);
506 } else {
507 i = lrint(s->band_centre[band] / 1.224745);
508 }
509
510 return FFMIN(i, s->sample_rate / 2);
511 }
512
set_band_parameters(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch)513 static void set_band_parameters(AudioFFTDeNoiseContext *s,
514 DeNoiseChannel *dnch)
515 {
516 double band_noise, d2, d3, d4, d5;
517 int i = 0, j = 0, k = 0;
518
519 d5 = 0.0;
520 band_noise = process_get_band_noise(s, dnch, 0);
521 for (int m = j; m <= s->fft_length2; m++) {
522 if (m == j) {
523 i = j;
524 d5 = band_noise;
525 if (k == 15) {
526 j = s->bin_count;
527 } else {
528 j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
529 }
530 d2 = j - i;
531 band_noise = process_get_band_noise(s, dnch, k);
532 k++;
533 }
534 d3 = (j - m) / d2;
535 d4 = (m - i) / d2;
536 dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
537 }
538 dnch->rel_var[s->fft_length2] = exp(band_noise * C);
539
540 for (i = 0; i < 15; i++)
541 dnch->noise_band_auto_var[i] = s->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
542
543 for (i = 0; i <= s->fft_length2; i++) {
544 dnch->abs_var[i] = fmax(s->max_var * dnch->rel_var[i], 1.0);
545 dnch->min_abs_var[i] = s->gain_scale * dnch->abs_var[i];
546 }
547 }
548
read_custom_noise(AudioFFTDeNoiseContext * s,int ch)549 static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
550 {
551 DeNoiseChannel *dnch = &s->dnch[ch];
552 char *p, *arg, *saveptr = NULL;
553 int i, ret, band_noise[15] = { 0 };
554
555 if (!s->band_noise_str)
556 return;
557
558 p = av_strdup(s->band_noise_str);
559 if (!p)
560 return;
561
562 for (i = 0; i < 15; i++) {
563 if (!(arg = av_strtok(p, "| ", &saveptr)))
564 break;
565
566 p = NULL;
567
568 ret = av_sscanf(arg, "%d", &band_noise[i]);
569 if (ret != 1) {
570 av_log(s, AV_LOG_ERROR, "Custom band noise must be integer.\n");
571 break;
572 }
573
574 band_noise[i] = av_clip(band_noise[i], -24, 24);
575 }
576
577 av_free(p);
578 memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
579 }
580
set_parameters(AudioFFTDeNoiseContext * s)581 static void set_parameters(AudioFFTDeNoiseContext *s)
582 {
583 if (s->last_noise_floor != s->noise_floor)
584 s->last_noise_floor = s->noise_floor;
585
586 if (s->track_residual)
587 s->last_noise_floor = fmaxf(s->last_noise_floor, s->residual_floor);
588
589 s->max_var = s->floor * exp((100.0 + s->last_noise_floor) * C);
590
591 if (s->track_residual) {
592 s->last_residual_floor = s->residual_floor;
593 s->last_noise_reduction = fmax(s->last_noise_floor - s->last_residual_floor, 0);
594 s->max_gain = exp(s->last_noise_reduction * (0.5 * C));
595 } else if (s->noise_reduction != s->last_noise_reduction) {
596 s->last_noise_reduction = s->noise_reduction;
597 s->last_residual_floor = av_clipf(s->last_noise_floor - s->last_noise_reduction, -80, -20);
598 s->max_gain = exp(s->last_noise_reduction * (0.5 * C));
599 }
600
601 s->gain_scale = 1.0 / (s->max_gain * s->max_gain);
602
603 for (int ch = 0; ch < s->channels; ch++) {
604 DeNoiseChannel *dnch = &s->dnch[ch];
605
606 set_band_parameters(s, dnch);
607 }
608 }
609
config_input(AVFilterLink * inlink)610 static int config_input(AVFilterLink *inlink)
611 {
612 AVFilterContext *ctx = inlink->dst;
613 AudioFFTDeNoiseContext *s = ctx->priv;
614 double wscale, sar, sum, sdiv;
615 int i, j, k, m, n;
616
617 s->dnch = av_calloc(inlink->channels, sizeof(*s->dnch));
618 if (!s->dnch)
619 return AVERROR(ENOMEM);
620
621 s->pts = AV_NOPTS_VALUE;
622 s->channels = inlink->channels;
623 s->sample_rate = inlink->sample_rate;
624 s->sample_advance = s->sample_rate / 80;
625 s->window_length = 3 * s->sample_advance;
626 s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
627 s->fft_length = s->fft_length2 * 2;
628 s->buffer_length = s->fft_length * 2;
629 s->bin_count = s->fft_length2 + 1;
630
631 s->band_centre[0] = 80;
632 for (i = 1; i < 15; i++) {
633 s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
634 if (s->band_centre[i] < 1000) {
635 s->band_centre[i] = 10 * (s->band_centre[i] / 10);
636 } else if (s->band_centre[i] < 5000) {
637 s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
638 } else if (s->band_centre[i] < 15000) {
639 s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
640 } else {
641 s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
642 }
643 }
644
645 for (j = 0; j < 5; j++) {
646 for (k = 0; k < 5; k++) {
647 s->matrix_a[j + k * 5] = 0.0;
648 for (m = 0; m < 15; m++)
649 s->matrix_a[j + k * 5] += pow(m, j + k);
650 }
651 }
652
653 factor(s->matrix_a, 5);
654
655 i = 0;
656 for (j = 0; j < 5; j++)
657 for (k = 0; k < 15; k++)
658 s->matrix_b[i++] = pow(k, j);
659
660 i = 0;
661 for (j = 0; j < 15; j++)
662 for (k = 0; k < 5; k++)
663 s->matrix_c[i++] = pow(j, k);
664
665 s->window = av_calloc(s->window_length, sizeof(*s->window));
666 s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
667 if (!s->window || !s->bin2band)
668 return AVERROR(ENOMEM);
669
670 sdiv = s->sample_rate / 17640.0;
671 for (i = 0; i <= s->fft_length2; i++)
672 s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
673
674 s->number_of_bands = s->bin2band[s->fft_length2] + 1;
675
676 s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
677 s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
678 if (!s->band_alpha || !s->band_beta)
679 return AVERROR(ENOMEM);
680
681 for (int ch = 0; ch < inlink->channels; ch++) {
682 DeNoiseChannel *dnch = &s->dnch[ch];
683
684 switch (s->noise_type) {
685 case WHITE_NOISE:
686 for (i = 0; i < 15; i++)
687 dnch->band_noise[i] = 0;
688 break;
689 case VINYL_NOISE:
690 for (i = 0; i < 15; i++)
691 dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0) + FFMAX(i - 7, 0);
692 break;
693 case SHELLAC_NOISE:
694 for (i = 0; i < 15; i++)
695 dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10) + FFMAX(i - 12, -5);
696 break;
697 case CUSTOM_NOISE:
698 read_custom_noise(s, ch);
699 break;
700 default:
701 return AVERROR_BUG;
702 }
703
704
705 dnch->sfm_threshold = 0.8;
706 dnch->sfm_alpha = 0.05;
707 for (i = 0; i < 512; i++)
708 dnch->sfm_fail_flags[i] = 0;
709
710 dnch->sfm_fail_total = 0;
711 j = FFMAX((int)(10.0 * (1.3 - dnch->sfm_threshold)), 1);
712
713 for (i = 0; i < 512; i += j) {
714 dnch->sfm_fail_flags[i] = 1;
715 dnch->sfm_fail_total += 1;
716 }
717
718 dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
719 dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
720 dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
721 dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
722 dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
723 dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
724 dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
725 dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
726 dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
727 dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
728 dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
729 dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
730 dnch->fft_data = av_calloc(s->fft_length2 + 1, sizeof(*dnch->fft_data));
731 dnch->fft = av_fft_init(av_log2(s->fft_length2), 0);
732 dnch->ifft = av_fft_init(av_log2(s->fft_length2), 1);
733 dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
734 sizeof(*dnch->spread_function));
735
736 if (!dnch->amt ||
737 !dnch->band_amt ||
738 !dnch->band_excit ||
739 !dnch->gain ||
740 !dnch->prior ||
741 !dnch->prior_band_excit ||
742 !dnch->clean_data ||
743 !dnch->noisy_data ||
744 !dnch->out_samples ||
745 !dnch->fft_data ||
746 !dnch->abs_var ||
747 !dnch->rel_var ||
748 !dnch->min_abs_var ||
749 !dnch->spread_function ||
750 !dnch->fft ||
751 !dnch->ifft)
752 return AVERROR(ENOMEM);
753 }
754
755 for (int ch = 0; ch < inlink->channels; ch++) {
756 DeNoiseChannel *dnch = &s->dnch[ch];
757 double *prior_band_excit = dnch->prior_band_excit;
758 double *prior = dnch->prior;
759 double min, max;
760 double p1, p2;
761
762 p1 = pow(0.1, 2.5 / sdiv);
763 p2 = pow(0.1, 1.0 / sdiv);
764 j = 0;
765 for (m = 0; m < s->number_of_bands; m++) {
766 for (n = 0; n < s->number_of_bands; n++) {
767 if (n < m) {
768 dnch->spread_function[j++] = pow(p2, m - n);
769 } else if (n > m) {
770 dnch->spread_function[j++] = pow(p1, n - m);
771 } else {
772 dnch->spread_function[j++] = 1.0;
773 }
774 }
775 }
776
777 for (m = 0; m < s->number_of_bands; m++) {
778 dnch->band_excit[m] = 0.0;
779 prior_band_excit[m] = 0.0;
780 }
781
782 for (m = 0; m <= s->fft_length2; m++)
783 dnch->band_excit[s->bin2band[m]] += 1.0;
784
785 j = 0;
786 for (m = 0; m < s->number_of_bands; m++) {
787 for (n = 0; n < s->number_of_bands; n++)
788 prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
789 }
790
791 min = pow(0.1, 2.5);
792 max = pow(0.1, 1.0);
793 for (int i = 0; i < s->number_of_bands; i++) {
794 if (i < lrint(12.0 * sdiv)) {
795 dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
796 } else {
797 dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
798 }
799 dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
800 }
801
802 for (int i = 0; i <= s->fft_length2; i++)
803 prior[i] = RRATIO;
804 for (int i = 0; i < s->buffer_length; i++)
805 dnch->out_samples[i] = 0;
806
807 j = 0;
808 for (int i = 0; i < s->number_of_bands; i++)
809 for (int k = 0; k < s->number_of_bands; k++)
810 dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
811 }
812
813 j = 0;
814 sar = s->sample_advance / s->sample_rate;
815 for (int i = 0; i <= s->fft_length2; i++) {
816 if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
817 double d6 = (i - 1) * s->sample_rate / s->fft_length;
818 double d7 = fmin(0.008 + 2.2 / d6, 0.03);
819 s->band_alpha[j] = exp(-sar / d7);
820 s->band_beta[j] = 1.0 - s->band_alpha[j];
821 j = s->bin2band[i];
822 }
823 }
824
825 wscale = sqrt(16.0 / (9.0 * s->fft_length));
826 sum = 0.0;
827 for (int i = 0; i < s->window_length; i++) {
828 double d10 = sin(i * M_PI / s->window_length);
829 d10 *= wscale * d10;
830 s->window[i] = d10;
831 sum += d10 * d10;
832 }
833
834 s->window_weight = 0.5 * sum;
835 s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
836 s->sample_floor = s->floor * exp(4.144600506562284);
837 s->auto_floor = s->floor * exp(6.907667510937141);
838
839 set_parameters(s);
840
841 s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
842 i = 0;
843 for (int j = 1; j < 16; j++) {
844 s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
845 if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
846 i++;
847 s->noise_band_edge[16] = i;
848 }
849 s->noise_band_count = s->noise_band_edge[16];
850
851 s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->fft_length);
852 if (!s->fifo)
853 return AVERROR(ENOMEM);
854
855 return 0;
856 }
857
preprocess(FFTComplex * in,int len)858 static void preprocess(FFTComplex *in, int len)
859 {
860 double d1, d2, d3, d4, d5, d6, d7, d8, d9, d10;
861 int n, i, k;
862
863 d5 = 2.0 * M_PI / len;
864 d8 = sin(0.5 * d5);
865 d8 = -2.0 * d8 * d8;
866 d7 = sin(d5);
867 d9 = 1.0 + d8;
868 d6 = d7;
869 n = len / 2;
870
871 for (i = 1; i < len / 4; i++) {
872 k = n - i;
873 d2 = 0.5 * (in[i].re + in[k].re);
874 d1 = 0.5 * (in[i].im - in[k].im);
875 d4 = 0.5 * (in[i].im + in[k].im);
876 d3 = 0.5 * (in[k].re - in[i].re);
877 in[i].re = d2 + d9 * d4 + d6 * d3;
878 in[i].im = d1 + d9 * d3 - d6 * d4;
879 in[k].re = d2 - d9 * d4 - d6 * d3;
880 in[k].im = -d1 + d9 * d3 - d6 * d4;
881 d10 = d9;
882 d9 += d9 * d8 - d6 * d7;
883 d6 += d6 * d8 + d10 * d7;
884 }
885
886 d2 = in[0].re;
887 in[0].re = d2 + in[0].im;
888 in[0].im = d2 - in[0].im;
889 }
890
postprocess(FFTComplex * in,int len)891 static void postprocess(FFTComplex *in, int len)
892 {
893 double d1, d2, d3, d4, d5, d6, d7, d8, d9, d10;
894 int n, i, k;
895
896 d5 = 2.0 * M_PI / len;
897 d8 = sin(0.5 * d5);
898 d8 = -2.0 * d8 * d8;
899 d7 = sin(d5);
900 d9 = 1.0 + d8;
901 d6 = d7;
902 n = len / 2;
903 for (i = 1; i < len / 4; i++) {
904 k = n - i;
905 d2 = 0.5 * (in[i].re + in[k].re);
906 d1 = 0.5 * (in[i].im - in[k].im);
907 d4 = 0.5 * (in[i].re - in[k].re);
908 d3 = 0.5 * (in[i].im + in[k].im);
909 in[i].re = d2 - d9 * d3 - d6 * d4;
910 in[i].im = d1 + d9 * d4 - d6 * d3;
911 in[k].re = d2 + d9 * d3 + d6 * d4;
912 in[k].im = -d1 + d9 * d4 - d6 * d3;
913 d10 = d9;
914 d9 += d9 * d8 - d6 * d7;
915 d6 += d6 * d8 + d10 * d7;
916 }
917 d2 = in[0].re;
918 in[0].re = 0.5 * (d2 + in[0].im);
919 in[0].im = 0.5 * (d2 - in[0].im);
920 }
921
init_sample_noise(DeNoiseChannel * dnch)922 static void init_sample_noise(DeNoiseChannel *dnch)
923 {
924 for (int i = 0; i < 15; i++) {
925 dnch->noise_band_norm[i] = 0.0;
926 dnch->noise_band_avr[i] = 0.0;
927 dnch->noise_band_avi[i] = 0.0;
928 dnch->noise_band_var[i] = 0.0;
929 }
930 }
931
sample_noise_block(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,AVFrame * in,int ch)932 static void sample_noise_block(AudioFFTDeNoiseContext *s,
933 DeNoiseChannel *dnch,
934 AVFrame *in, int ch)
935 {
936 float *src = (float *)in->extended_data[ch];
937 double mag2, var = 0.0, avr = 0.0, avi = 0.0;
938 int edge, j, k, n, edgemax;
939
940 for (int i = 0; i < s->window_length; i++) {
941 dnch->fft_data[i].re = s->window[i] * src[i] * (1LL << 24);
942 dnch->fft_data[i].im = 0.0;
943 }
944
945 for (int i = s->window_length; i < s->fft_length2; i++) {
946 dnch->fft_data[i].re = 0.0;
947 dnch->fft_data[i].im = 0.0;
948 }
949
950 av_fft_permute(dnch->fft, dnch->fft_data);
951 av_fft_calc(dnch->fft, dnch->fft_data);
952
953 preprocess(dnch->fft_data, s->fft_length);
954
955 edge = s->noise_band_edge[0];
956 j = edge;
957 k = 0;
958 n = j;
959 edgemax = fmin(s->fft_length2, s->noise_band_edge[15]);
960 dnch->fft_data[s->fft_length2].re = dnch->fft_data[0].im;
961 dnch->fft_data[0].im = 0.0;
962 dnch->fft_data[s->fft_length2].im = 0.0;
963
964 for (int i = j; i <= edgemax; i++) {
965 if ((i == j) && (i < edgemax)) {
966 if (j > edge) {
967 dnch->noise_band_norm[k - 1] += j - edge;
968 dnch->noise_band_avr[k - 1] += avr;
969 dnch->noise_band_avi[k - 1] += avi;
970 dnch->noise_band_var[k - 1] += var;
971 }
972 k++;
973 edge = j;
974 j = s->noise_band_edge[k];
975 if (k == 15) {
976 j++;
977 }
978 var = 0.0;
979 avr = 0.0;
980 avi = 0.0;
981 }
982 avr += dnch->fft_data[n].re;
983 avi += dnch->fft_data[n].im;
984 mag2 = dnch->fft_data[n].re * dnch->fft_data[n].re +
985 dnch->fft_data[n].im * dnch->fft_data[n].im;
986
987 mag2 = fmax(mag2, s->sample_floor);
988
989 dnch->noisy_data[i] = mag2;
990 var += mag2;
991 n++;
992 }
993
994 dnch->noise_band_norm[k - 1] += j - edge;
995 dnch->noise_band_avr[k - 1] += avr;
996 dnch->noise_band_avi[k - 1] += avi;
997 dnch->noise_band_var[k - 1] += var;
998 }
999
finish_sample_noise(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,double * sample_noise)1000 static void finish_sample_noise(AudioFFTDeNoiseContext *s,
1001 DeNoiseChannel *dnch,
1002 double *sample_noise)
1003 {
1004 for (int i = 0; i < s->noise_band_count; i++) {
1005 dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
1006 dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
1007 dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
1008 dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
1009 dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
1010 dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
1011 sample_noise[i] = (1.0 / C) * log(dnch->noise_band_var[i] / s->floor) - 100.0;
1012 }
1013 if (s->noise_band_count < 15) {
1014 for (int i = s->noise_band_count; i < 15; i++)
1015 sample_noise[i] = sample_noise[i - 1];
1016 }
1017 }
1018
set_noise_profile(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,double * sample_noise,int new_profile)1019 static void set_noise_profile(AudioFFTDeNoiseContext *s,
1020 DeNoiseChannel *dnch,
1021 double *sample_noise,
1022 int new_profile)
1023 {
1024 int new_band_noise[15];
1025 double temp[15];
1026 double sum = 0.0, d1;
1027 float new_noise_floor;
1028 int i, n;
1029
1030 for (int m = 0; m < 15; m++)
1031 temp[m] = sample_noise[m];
1032
1033 if (new_profile) {
1034 i = 0;
1035 for (int m = 0; m < 5; m++) {
1036 sum = 0.0;
1037 for (n = 0; n < 15; n++)
1038 sum += s->matrix_b[i++] * temp[n];
1039 s->vector_b[m] = sum;
1040 }
1041 solve(s->matrix_a, s->vector_b, 5);
1042 i = 0;
1043 for (int m = 0; m < 15; m++) {
1044 sum = 0.0;
1045 for (n = 0; n < 5; n++)
1046 sum += s->matrix_c[i++] * s->vector_b[n];
1047 temp[m] = sum;
1048 }
1049 }
1050
1051 sum = 0.0;
1052 for (int m = 0; m < 15; m++)
1053 sum += temp[m];
1054
1055 d1 = (int)(sum / 15.0 - 0.5);
1056 if (!new_profile)
1057 i = lrint(temp[7] - d1);
1058
1059 for (d1 -= dnch->band_noise[7] - i; d1 > -20.0; d1 -= 1.0)
1060 ;
1061
1062 for (int m = 0; m < 15; m++)
1063 temp[m] -= d1;
1064
1065 new_noise_floor = d1 + 2.5;
1066
1067 if (new_profile) {
1068 av_log(s, AV_LOG_INFO, "bn=");
1069 for (int m = 0; m < 15; m++) {
1070 new_band_noise[m] = lrint(temp[m]);
1071 new_band_noise[m] = av_clip(new_band_noise[m], -24, 24);
1072 av_log(s, AV_LOG_INFO, "%d ", new_band_noise[m]);
1073 }
1074 av_log(s, AV_LOG_INFO, "\n");
1075 memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
1076 }
1077
1078 if (s->track_noise)
1079 s->noise_floor = new_noise_floor;
1080 }
1081
1082 typedef struct ThreadData {
1083 AVFrame *in;
1084 } ThreadData;
1085
filter_channel(AVFilterContext * ctx,void * arg,int jobnr,int nb_jobs)1086 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
1087 {
1088 AudioFFTDeNoiseContext *s = ctx->priv;
1089 ThreadData *td = arg;
1090 AVFrame *in = td->in;
1091 const int start = (in->channels * jobnr) / nb_jobs;
1092 const int end = (in->channels * (jobnr+1)) / nb_jobs;
1093
1094 for (int ch = start; ch < end; ch++) {
1095 DeNoiseChannel *dnch = &s->dnch[ch];
1096 const float *src = (const float *)in->extended_data[ch];
1097 double *dst = dnch->out_samples;
1098
1099 if (s->track_noise) {
1100 int i = s->block_count & 0x1FF;
1101
1102 if (dnch->sfm_fail_flags[i])
1103 dnch->sfm_fail_total--;
1104 dnch->sfm_fail_flags[i] = 0;
1105 dnch->sfm_threshold *= 1.0 - dnch->sfm_alpha;
1106 dnch->sfm_threshold += dnch->sfm_alpha * (0.5 + (1.0 / 640) * dnch->sfm_fail_total);
1107 }
1108
1109 for (int m = 0; m < s->window_length; m++) {
1110 dnch->fft_data[m].re = s->window[m] * src[m] * (1LL << 24);
1111 dnch->fft_data[m].im = 0;
1112 }
1113
1114 for (int m = s->window_length; m < s->fft_length2; m++) {
1115 dnch->fft_data[m].re = 0;
1116 dnch->fft_data[m].im = 0;
1117 }
1118
1119 av_fft_permute(dnch->fft, dnch->fft_data);
1120 av_fft_calc(dnch->fft, dnch->fft_data);
1121
1122 preprocess(dnch->fft_data, s->fft_length);
1123 process_frame(s, dnch, dnch->fft_data,
1124 dnch->prior,
1125 dnch->prior_band_excit,
1126 s->track_noise);
1127 postprocess(dnch->fft_data, s->fft_length);
1128
1129 av_fft_permute(dnch->ifft, dnch->fft_data);
1130 av_fft_calc(dnch->ifft, dnch->fft_data);
1131
1132 for (int m = 0; m < s->window_length; m++)
1133 dst[m] += s->window[m] * dnch->fft_data[m].re / (1LL << 24);
1134 }
1135
1136 return 0;
1137 }
1138
get_auto_noise_levels(AudioFFTDeNoiseContext * s,DeNoiseChannel * dnch,double * levels)1139 static void get_auto_noise_levels(AudioFFTDeNoiseContext *s,
1140 DeNoiseChannel *dnch,
1141 double *levels)
1142 {
1143 if (s->noise_band_count > 0) {
1144 for (int i = 0; i < s->noise_band_count; i++) {
1145 levels[i] = (1.0 / C) * log(dnch->noise_band_auto_var[i] / s->floor) - 100.0;
1146 }
1147 if (s->noise_band_count < 15) {
1148 for (int i = s->noise_band_count; i < 15; i++)
1149 levels[i] = levels[i - 1];
1150 }
1151 } else {
1152 for (int i = 0; i < 15; i++) {
1153 levels[i] = -100.0;
1154 }
1155 }
1156 }
1157
output_frame(AVFilterLink * inlink)1158 static int output_frame(AVFilterLink *inlink)
1159 {
1160 AVFilterContext *ctx = inlink->dst;
1161 AVFilterLink *outlink = ctx->outputs[0];
1162 AudioFFTDeNoiseContext *s = ctx->priv;
1163 AVFrame *out = NULL, *in = NULL;
1164 ThreadData td;
1165 int ret = 0;
1166
1167 in = ff_get_audio_buffer(outlink, s->window_length);
1168 if (!in)
1169 return AVERROR(ENOMEM);
1170
1171 ret = av_audio_fifo_peek(s->fifo, (void **)in->extended_data, s->window_length);
1172 if (ret < 0)
1173 goto end;
1174
1175 if (s->track_noise) {
1176 for (int ch = 0; ch < inlink->channels; ch++) {
1177 DeNoiseChannel *dnch = &s->dnch[ch];
1178 double levels[15];
1179
1180 get_auto_noise_levels(s, dnch, levels);
1181 set_noise_profile(s, dnch, levels, 0);
1182 }
1183
1184 if (s->noise_floor != s->last_noise_floor)
1185 set_parameters(s);
1186 }
1187
1188 if (s->sample_noise_start) {
1189 for (int ch = 0; ch < inlink->channels; ch++) {
1190 DeNoiseChannel *dnch = &s->dnch[ch];
1191
1192 init_sample_noise(dnch);
1193 }
1194 s->sample_noise_start = 0;
1195 s->sample_noise = 1;
1196 }
1197
1198 if (s->sample_noise) {
1199 for (int ch = 0; ch < inlink->channels; ch++) {
1200 DeNoiseChannel *dnch = &s->dnch[ch];
1201
1202 sample_noise_block(s, dnch, in, ch);
1203 }
1204 }
1205
1206 if (s->sample_noise_end) {
1207 for (int ch = 0; ch < inlink->channels; ch++) {
1208 DeNoiseChannel *dnch = &s->dnch[ch];
1209 double sample_noise[15];
1210
1211 finish_sample_noise(s, dnch, sample_noise);
1212 set_noise_profile(s, dnch, sample_noise, 1);
1213 set_band_parameters(s, dnch);
1214 }
1215 s->sample_noise = 0;
1216 s->sample_noise_end = 0;
1217 }
1218
1219 s->block_count++;
1220 td.in = in;
1221 ctx->internal->execute(ctx, filter_channel, &td, NULL,
1222 FFMIN(outlink->channels, ff_filter_get_nb_threads(ctx)));
1223
1224 out = ff_get_audio_buffer(outlink, s->sample_advance);
1225 if (!out) {
1226 ret = AVERROR(ENOMEM);
1227 goto end;
1228 }
1229
1230 for (int ch = 0; ch < inlink->channels; ch++) {
1231 DeNoiseChannel *dnch = &s->dnch[ch];
1232 double *src = dnch->out_samples;
1233 float *orig = (float *)in->extended_data[ch];
1234 float *dst = (float *)out->extended_data[ch];
1235
1236 switch (s->output_mode) {
1237 case IN_MODE:
1238 for (int m = 0; m < s->sample_advance; m++)
1239 dst[m] = orig[m];
1240 break;
1241 case OUT_MODE:
1242 for (int m = 0; m < s->sample_advance; m++)
1243 dst[m] = src[m];
1244 break;
1245 case NOISE_MODE:
1246 for (int m = 0; m < s->sample_advance; m++)
1247 dst[m] = orig[m] - src[m];
1248 break;
1249 default:
1250 av_frame_free(&out);
1251 ret = AVERROR_BUG;
1252 goto end;
1253 }
1254 memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
1255 memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
1256 }
1257
1258 av_audio_fifo_drain(s->fifo, s->sample_advance);
1259
1260 out->pts = s->pts;
1261 ret = ff_filter_frame(outlink, out);
1262 if (ret < 0)
1263 goto end;
1264 s->pts += av_rescale_q(s->sample_advance, (AVRational){1, outlink->sample_rate}, outlink->time_base);
1265 end:
1266 av_frame_free(&in);
1267
1268 return ret;
1269 }
1270
activate(AVFilterContext * ctx)1271 static int activate(AVFilterContext *ctx)
1272 {
1273 AVFilterLink *inlink = ctx->inputs[0];
1274 AVFilterLink *outlink = ctx->outputs[0];
1275 AudioFFTDeNoiseContext *s = ctx->priv;
1276 AVFrame *frame = NULL;
1277 int ret;
1278
1279 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
1280
1281 ret = ff_inlink_consume_frame(inlink, &frame);
1282 if (ret < 0)
1283 return ret;
1284
1285 if (ret > 0) {
1286 if (s->pts == AV_NOPTS_VALUE)
1287 s->pts = frame->pts;
1288
1289 ret = av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
1290 av_frame_free(&frame);
1291 if (ret < 0)
1292 return ret;
1293 }
1294
1295 if (av_audio_fifo_size(s->fifo) >= s->window_length)
1296 return output_frame(inlink);
1297
1298 FF_FILTER_FORWARD_STATUS(inlink, outlink);
1299 if (ff_outlink_frame_wanted(outlink) &&
1300 av_audio_fifo_size(s->fifo) < s->window_length) {
1301 ff_inlink_request_frame(inlink);
1302 return 0;
1303 }
1304
1305 return FFERROR_NOT_READY;
1306 }
1307
uninit(AVFilterContext * ctx)1308 static av_cold void uninit(AVFilterContext *ctx)
1309 {
1310 AudioFFTDeNoiseContext *s = ctx->priv;
1311
1312 av_freep(&s->window);
1313 av_freep(&s->bin2band);
1314 av_freep(&s->band_alpha);
1315 av_freep(&s->band_beta);
1316
1317 if (s->dnch) {
1318 for (int ch = 0; ch < s->channels; ch++) {
1319 DeNoiseChannel *dnch = &s->dnch[ch];
1320 av_freep(&dnch->amt);
1321 av_freep(&dnch->band_amt);
1322 av_freep(&dnch->band_excit);
1323 av_freep(&dnch->gain);
1324 av_freep(&dnch->prior);
1325 av_freep(&dnch->prior_band_excit);
1326 av_freep(&dnch->clean_data);
1327 av_freep(&dnch->noisy_data);
1328 av_freep(&dnch->out_samples);
1329 av_freep(&dnch->spread_function);
1330 av_freep(&dnch->abs_var);
1331 av_freep(&dnch->rel_var);
1332 av_freep(&dnch->min_abs_var);
1333 av_freep(&dnch->fft_data);
1334 av_fft_end(dnch->fft);
1335 dnch->fft = NULL;
1336 av_fft_end(dnch->ifft);
1337 dnch->ifft = NULL;
1338 }
1339 av_freep(&s->dnch);
1340 }
1341
1342 av_audio_fifo_free(s->fifo);
1343 }
1344
query_formats(AVFilterContext * ctx)1345 static int query_formats(AVFilterContext *ctx)
1346 {
1347 AVFilterFormats *formats = NULL;
1348 AVFilterChannelLayouts *layouts = NULL;
1349 static const enum AVSampleFormat sample_fmts[] = {
1350 AV_SAMPLE_FMT_FLTP,
1351 AV_SAMPLE_FMT_NONE
1352 };
1353 int ret;
1354
1355 formats = ff_make_format_list(sample_fmts);
1356 if (!formats)
1357 return AVERROR(ENOMEM);
1358 ret = ff_set_common_formats(ctx, formats);
1359 if (ret < 0)
1360 return ret;
1361
1362 layouts = ff_all_channel_counts();
1363 if (!layouts)
1364 return AVERROR(ENOMEM);
1365
1366 ret = ff_set_common_channel_layouts(ctx, layouts);
1367 if (ret < 0)
1368 return ret;
1369
1370 formats = ff_all_samplerates();
1371 return ff_set_common_samplerates(ctx, formats);
1372 }
1373
process_command(AVFilterContext * ctx,const char * cmd,const char * args,char * res,int res_len,int flags)1374 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
1375 char *res, int res_len, int flags)
1376 {
1377 AudioFFTDeNoiseContext *s = ctx->priv;
1378 int need_reset = 0;
1379 int ret = 0;
1380
1381 if (!strcmp(cmd, "sample_noise") ||
1382 !strcmp(cmd, "sn")) {
1383 if (!strcmp(args, "start")) {
1384 s->sample_noise_start = 1;
1385 s->sample_noise_end = 0;
1386 } else if (!strcmp(args, "end") ||
1387 !strcmp(args, "stop")) {
1388 s->sample_noise_start = 0;
1389 s->sample_noise_end = 1;
1390 }
1391 } else {
1392 ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
1393 if (ret < 0)
1394 return ret;
1395 need_reset = 1;
1396 }
1397
1398 if (need_reset)
1399 set_parameters(s);
1400
1401 return 0;
1402 }
1403
1404 static const AVFilterPad inputs[] = {
1405 {
1406 .name = "default",
1407 .type = AVMEDIA_TYPE_AUDIO,
1408 .config_props = config_input,
1409 },
1410 { NULL }
1411 };
1412
1413 static const AVFilterPad outputs[] = {
1414 {
1415 .name = "default",
1416 .type = AVMEDIA_TYPE_AUDIO,
1417 },
1418 { NULL }
1419 };
1420
1421 AVFilter ff_af_afftdn = {
1422 .name = "afftdn",
1423 .description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
1424 .query_formats = query_formats,
1425 .priv_size = sizeof(AudioFFTDeNoiseContext),
1426 .priv_class = &afftdn_class,
1427 .activate = activate,
1428 .uninit = uninit,
1429 .inputs = inputs,
1430 .outputs = outputs,
1431 .process_command = process_command,
1432 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC |
1433 AVFILTER_FLAG_SLICE_THREADS,
1434 };
1435