1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 /*
12 * The core AEC algorithm, which is presented with time-aligned signals.
13 */
14
15 #include "aec_core.h"
16
17 #include <assert.h>
18 #include <math.h>
19 #include <stddef.h> // size_t
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include "aec_rdft.h"
24 #include "delay_estimator_wrapper.h"
25 #include "ring_buffer.h"
26 #include "system_wrappers/interface/cpu_features_wrapper.h"
27 #include "typedefs.h"
28
29 // Buffer size (samples)
30 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz.
31
32 // Noise suppression
33 static const int converged = 250;
34
35 // Metrics
36 static const int subCountLen = 4;
37 static const int countLen = 50;
38
39 // Quantities to control H band scaling for SWB input
40 static const int flagHbandCn = 1; // flag for adding comfort noise in H band
41 static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band
42 // Initial bin for averaging nlp gain in low band
43 static const int freqAvgIc = PART_LEN / 2;
44
45 // Matlab code to produce table:
46 // win = sqrt(hanning(63)); win = [0 ; win(1:32)];
47 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
48 static const float sqrtHanning[65] = {
49 0.00000000000000f, 0.02454122852291f, 0.04906767432742f,
50 0.07356456359967f, 0.09801714032956f, 0.12241067519922f,
51 0.14673047445536f, 0.17096188876030f, 0.19509032201613f,
52 0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
53 0.29028467725446f, 0.31368174039889f, 0.33688985339222f,
54 0.35989503653499f, 0.38268343236509f, 0.40524131400499f,
55 0.42755509343028f, 0.44961132965461f, 0.47139673682600f,
56 0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
57 0.55557023301960f, 0.57580819141785f, 0.59569930449243f,
58 0.61523159058063f, 0.63439328416365f, 0.65317284295378f,
59 0.67155895484702f, 0.68954054473707f, 0.70710678118655f,
60 0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
61 0.77301045336274f, 0.78834642762661f, 0.80320753148064f,
62 0.81758481315158f, 0.83146961230255f, 0.84485356524971f,
63 0.85772861000027f, 0.87008699110871f, 0.88192126434835f,
64 0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
65 0.92387953251129f, 0.93299279883474f, 0.94154406518302f,
66 0.94952818059304f, 0.95694033573221f, 0.96377606579544f,
67 0.97003125319454f, 0.97570213003853f, 0.98078528040323f,
68 0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
69 0.99518472667220f, 0.99729045667869f, 0.99879545620517f,
70 0.99969881869620f, 1.00000000000000f
71 };
72
73 // Matlab code to produce table:
74 // weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
75 // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
76 const float WebRtcAec_weightCurve[65] = {
77 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f,
78 0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f,
79 0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f,
80 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f,
81 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f,
82 0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f,
83 0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f,
84 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f,
85 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f,
86 0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f,
87 0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f
88 };
89
90 // Matlab code to produce table:
91 // overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
92 // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
93 const float WebRtcAec_overDriveCurve[65] = {
94 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f,
95 1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f,
96 1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f,
97 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f,
98 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f,
99 1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f,
100 1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f,
101 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f,
102 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f,
103 1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f,
104 1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f
105 };
106
107 // "Private" function prototypes.
108 static void ProcessBlock(aec_t* aec);
109
110 static void NonLinearProcessing(aec_t *aec, short *output, short *outputH);
111
112 static void GetHighbandGain(const float *lambda, float *nlpGainHband);
113
114 // Comfort_noise also computes noise for H band returned in comfortNoiseHband
115 static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1],
116 complex_t *comfortNoiseHband,
117 const float *noisePow, const float *lambda);
118
119 static void WebRtcAec_InitLevel(power_level_t *level);
120 static void WebRtcAec_InitStats(stats_t *stats);
121 static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]);
122 static void UpdateMetrics(aec_t *aec);
123 // Convert from time domain to frequency domain. Note that |time_data| are
124 // overwritten.
125 static void TimeToFrequency(float time_data[PART_LEN2],
126 float freq_data[2][PART_LEN1],
127 int window);
128
MulRe(float aRe,float aIm,float bRe,float bIm)129 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm)
130 {
131 return aRe * bRe - aIm * bIm;
132 }
133
MulIm(float aRe,float aIm,float bRe,float bIm)134 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm)
135 {
136 return aRe * bIm + aIm * bRe;
137 }
138
CmpFloat(const void * a,const void * b)139 static int CmpFloat(const void *a, const void *b)
140 {
141 const float *da = (const float *)a;
142 const float *db = (const float *)b;
143
144 return (*da > *db) - (*da < *db);
145 }
146
WebRtcAec_CreateAec(aec_t ** aecInst)147 int WebRtcAec_CreateAec(aec_t **aecInst)
148 {
149 aec_t *aec = malloc(sizeof(aec_t));
150 *aecInst = aec;
151 if (aec == NULL) {
152 return -1;
153 }
154
155 if (WebRtc_CreateBuffer(&aec->nearFrBuf,
156 FRAME_LEN + PART_LEN,
157 sizeof(int16_t)) == -1) {
158 WebRtcAec_FreeAec(aec);
159 aec = NULL;
160 return -1;
161 }
162
163 if (WebRtc_CreateBuffer(&aec->outFrBuf,
164 FRAME_LEN + PART_LEN,
165 sizeof(int16_t)) == -1) {
166 WebRtcAec_FreeAec(aec);
167 aec = NULL;
168 return -1;
169 }
170
171 if (WebRtc_CreateBuffer(&aec->nearFrBufH,
172 FRAME_LEN + PART_LEN,
173 sizeof(int16_t)) == -1) {
174 WebRtcAec_FreeAec(aec);
175 aec = NULL;
176 return -1;
177 }
178
179 if (WebRtc_CreateBuffer(&aec->outFrBufH,
180 FRAME_LEN + PART_LEN,
181 sizeof(int16_t)) == -1) {
182 WebRtcAec_FreeAec(aec);
183 aec = NULL;
184 return -1;
185 }
186
187 // Create far-end buffers.
188 if (WebRtc_CreateBuffer(&aec->far_buf, kBufSizePartitions,
189 sizeof(float) * 2 * PART_LEN1) == -1) {
190 WebRtcAec_FreeAec(aec);
191 aec = NULL;
192 return -1;
193 }
194 if (WebRtc_CreateBuffer(&aec->far_buf_windowed, kBufSizePartitions,
195 sizeof(float) * 2 * PART_LEN1) == -1) {
196 WebRtcAec_FreeAec(aec);
197 aec = NULL;
198 return -1;
199 }
200 #ifdef WEBRTC_AEC_DEBUG_DUMP
201 if (WebRtc_CreateBuffer(&aec->far_time_buf, kBufSizePartitions,
202 sizeof(int16_t) * PART_LEN) == -1) {
203 WebRtcAec_FreeAec(aec);
204 aec = NULL;
205 return -1;
206 }
207 #endif
208 if (WebRtc_CreateDelayEstimator(&aec->delay_estimator,
209 PART_LEN1,
210 kMaxDelayBlocks,
211 kLookaheadBlocks) == -1) {
212 WebRtcAec_FreeAec(aec);
213 aec = NULL;
214 return -1;
215 }
216
217 return 0;
218 }
219
WebRtcAec_FreeAec(aec_t * aec)220 int WebRtcAec_FreeAec(aec_t *aec)
221 {
222 if (aec == NULL) {
223 return -1;
224 }
225
226 WebRtc_FreeBuffer(aec->nearFrBuf);
227 WebRtc_FreeBuffer(aec->outFrBuf);
228
229 WebRtc_FreeBuffer(aec->nearFrBufH);
230 WebRtc_FreeBuffer(aec->outFrBufH);
231
232 WebRtc_FreeBuffer(aec->far_buf);
233 WebRtc_FreeBuffer(aec->far_buf_windowed);
234 #ifdef WEBRTC_AEC_DEBUG_DUMP
235 WebRtc_FreeBuffer(aec->far_time_buf);
236 #endif
237 WebRtc_FreeDelayEstimator(aec->delay_estimator);
238
239 free(aec);
240 return 0;
241 }
242
FilterFar(aec_t * aec,float yf[2][PART_LEN1])243 static void FilterFar(aec_t *aec, float yf[2][PART_LEN1])
244 {
245 int i;
246 for (i = 0; i < NR_PART; i++) {
247 int j;
248 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
249 int pos = i * PART_LEN1;
250 // Check for wrap
251 if (i + aec->xfBufBlockPos >= NR_PART) {
252 xPos -= NR_PART*(PART_LEN1);
253 }
254
255 for (j = 0; j < PART_LEN1; j++) {
256 yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
257 aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
258 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
259 aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
260 }
261 }
262 }
263
ScaleErrorSignal(aec_t * aec,float ef[2][PART_LEN1])264 static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1])
265 {
266 int i;
267 float absEf;
268 for (i = 0; i < (PART_LEN1); i++) {
269 ef[0][i] /= (aec->xPow[i] + 1e-10f);
270 ef[1][i] /= (aec->xPow[i] + 1e-10f);
271 absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
272
273 if (absEf > aec->errThresh) {
274 absEf = aec->errThresh / (absEf + 1e-10f);
275 ef[0][i] *= absEf;
276 ef[1][i] *= absEf;
277 }
278
279 // Stepsize factor
280 ef[0][i] *= aec->mu;
281 ef[1][i] *= aec->mu;
282 }
283 }
284
285 // Time-unconstrined filter adaptation.
286 // TODO(andrew): consider for a low-complexity mode.
287 //static void FilterAdaptationUnconstrained(aec_t *aec, float *fft,
288 // float ef[2][PART_LEN1]) {
289 // int i, j;
290 // for (i = 0; i < NR_PART; i++) {
291 // int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
292 // int pos;
293 // // Check for wrap
294 // if (i + aec->xfBufBlockPos >= NR_PART) {
295 // xPos -= NR_PART * PART_LEN1;
296 // }
297 //
298 // pos = i * PART_LEN1;
299 //
300 // for (j = 0; j < PART_LEN1; j++) {
301 // aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
302 // -aec->xfBuf[xPos + j][1],
303 // ef[j][0], ef[j][1]);
304 // aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
305 // -aec->xfBuf[xPos + j][1],
306 // ef[j][0], ef[j][1]);
307 // }
308 // }
309 //}
310
FilterAdaptation(aec_t * aec,float * fft,float ef[2][PART_LEN1])311 static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
312 int i, j;
313 for (i = 0; i < NR_PART; i++) {
314 int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
315 int pos;
316 // Check for wrap
317 if (i + aec->xfBufBlockPos >= NR_PART) {
318 xPos -= NR_PART * PART_LEN1;
319 }
320
321 pos = i * PART_LEN1;
322
323 for (j = 0; j < PART_LEN; j++) {
324
325 fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
326 -aec->xfBuf[1][xPos + j],
327 ef[0][j], ef[1][j]);
328 fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
329 -aec->xfBuf[1][xPos + j],
330 ef[0][j], ef[1][j]);
331 }
332 fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
333 -aec->xfBuf[1][xPos + PART_LEN],
334 ef[0][PART_LEN], ef[1][PART_LEN]);
335
336 aec_rdft_inverse_128(fft);
337 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
338
339 // fft scaling
340 {
341 float scale = 2.0f / PART_LEN2;
342 for (j = 0; j < PART_LEN; j++) {
343 fft[j] *= scale;
344 }
345 }
346 aec_rdft_forward_128(fft);
347
348 aec->wfBuf[0][pos] += fft[0];
349 aec->wfBuf[0][pos + PART_LEN] += fft[1];
350
351 for (j = 1; j < PART_LEN; j++) {
352 aec->wfBuf[0][pos + j] += fft[2 * j];
353 aec->wfBuf[1][pos + j] += fft[2 * j + 1];
354 }
355 }
356 }
357
OverdriveAndSuppress(aec_t * aec,float hNl[PART_LEN1],const float hNlFb,float efw[2][PART_LEN1])358 static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1],
359 const float hNlFb,
360 float efw[2][PART_LEN1]) {
361 int i;
362 for (i = 0; i < PART_LEN1; i++) {
363 // Weight subbands
364 if (hNl[i] > hNlFb) {
365 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
366 (1 - WebRtcAec_weightCurve[i]) * hNl[i];
367 }
368 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
369
370 // Suppress error signal
371 efw[0][i] *= hNl[i];
372 efw[1][i] *= hNl[i];
373
374 // Ooura fft returns incorrect sign on imaginary component. It matters here
375 // because we are making an additive change with comfort noise.
376 efw[1][i] *= -1;
377 }
378 }
379
380 WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
381 WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
382 WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
383 WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
384
WebRtcAec_InitAec(aec_t * aec,int sampFreq)385 int WebRtcAec_InitAec(aec_t *aec, int sampFreq)
386 {
387 int i;
388
389 aec->sampFreq = sampFreq;
390
391 if (sampFreq == 8000) {
392 aec->mu = 0.6f;
393 aec->errThresh = 2e-6f;
394 }
395 else {
396 aec->mu = 0.5f;
397 aec->errThresh = 1.5e-6f;
398 }
399
400 if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) {
401 return -1;
402 }
403
404 if (WebRtc_InitBuffer(aec->outFrBuf) == -1) {
405 return -1;
406 }
407
408 if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) {
409 return -1;
410 }
411
412 if (WebRtc_InitBuffer(aec->outFrBufH) == -1) {
413 return -1;
414 }
415
416 // Initialize far-end buffers.
417 if (WebRtc_InitBuffer(aec->far_buf) == -1) {
418 return -1;
419 }
420 if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) {
421 return -1;
422 }
423 #ifdef WEBRTC_AEC_DEBUG_DUMP
424 if (WebRtc_InitBuffer(aec->far_time_buf) == -1) {
425 return -1;
426 }
427 #endif
428 aec->system_delay = 0;
429
430 if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) {
431 return -1;
432 }
433 aec->delay_logging_enabled = 0;
434 memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
435
436 // Default target suppression level
437 aec->targetSupp = -11.5;
438 aec->minOverDrive = 2.0;
439
440 // Sampling frequency multiplier
441 // SWB is processed as 160 frame size
442 if (aec->sampFreq == 32000) {
443 aec->mult = (short)aec->sampFreq / 16000;
444 }
445 else {
446 aec->mult = (short)aec->sampFreq / 8000;
447 }
448
449 aec->farBufWritePos = 0;
450 aec->farBufReadPos = 0;
451
452 aec->inSamples = 0;
453 aec->outSamples = 0;
454 aec->knownDelay = 0;
455
456 // Initialize buffers
457 memset(aec->dBuf, 0, sizeof(aec->dBuf));
458 memset(aec->eBuf, 0, sizeof(aec->eBuf));
459 // For H band
460 memset(aec->dBufH, 0, sizeof(aec->dBufH));
461
462 memset(aec->xPow, 0, sizeof(aec->xPow));
463 memset(aec->dPow, 0, sizeof(aec->dPow));
464 memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow));
465 aec->noisePow = aec->dInitMinPow;
466 aec->noiseEstCtr = 0;
467
468 // Initial comfort noise power
469 for (i = 0; i < PART_LEN1; i++) {
470 aec->dMinPow[i] = 1.0e6f;
471 }
472
473 // Holds the last block written to
474 aec->xfBufBlockPos = 0;
475 // TODO: Investigate need for these initializations. Deleting them doesn't
476 // change the output at all and yields 0.4% overall speedup.
477 memset(aec->xfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
478 memset(aec->wfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
479 memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
480 memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
481 memset(aec->xfwBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
482 memset(aec->se, 0, sizeof(float) * PART_LEN1);
483
484 // To prevent numerical instability in the first block.
485 for (i = 0; i < PART_LEN1; i++) {
486 aec->sd[i] = 1;
487 }
488 for (i = 0; i < PART_LEN1; i++) {
489 aec->sx[i] = 1;
490 }
491
492 memset(aec->hNs, 0, sizeof(aec->hNs));
493 memset(aec->outBuf, 0, sizeof(float) * PART_LEN);
494
495 aec->hNlFbMin = 1;
496 aec->hNlFbLocalMin = 1;
497 aec->hNlXdAvgMin = 1;
498 aec->hNlNewMin = 0;
499 aec->hNlMinCtr = 0;
500 aec->overDrive = 2;
501 aec->overDriveSm = 2;
502 aec->delayIdx = 0;
503 aec->stNearState = 0;
504 aec->echoState = 0;
505 aec->divergeState = 0;
506
507 aec->seed = 777;
508 aec->delayEstCtr = 0;
509
510 // Metrics disabled by default
511 aec->metricsMode = 0;
512 WebRtcAec_InitMetrics(aec);
513
514 // Assembly optimization
515 WebRtcAec_FilterFar = FilterFar;
516 WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
517 WebRtcAec_FilterAdaptation = FilterAdaptation;
518 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
519 if (WebRtc_GetCPUInfo(kSSE2)) {
520 #if defined(WEBRTC_USE_SSE2)
521 WebRtcAec_InitAec_SSE2();
522 #endif
523 }
524 aec_rdft_init();
525
526 return 0;
527 }
528
WebRtcAec_InitMetrics(aec_t * aec)529 void WebRtcAec_InitMetrics(aec_t *aec)
530 {
531 aec->stateCounter = 0;
532 WebRtcAec_InitLevel(&aec->farlevel);
533 WebRtcAec_InitLevel(&aec->nearlevel);
534 WebRtcAec_InitLevel(&aec->linoutlevel);
535 WebRtcAec_InitLevel(&aec->nlpoutlevel);
536
537 WebRtcAec_InitStats(&aec->erl);
538 WebRtcAec_InitStats(&aec->erle);
539 WebRtcAec_InitStats(&aec->aNlp);
540 WebRtcAec_InitStats(&aec->rerl);
541 }
542
543
WebRtcAec_BufferFarendPartition(aec_t * aec,const float * farend)544 void WebRtcAec_BufferFarendPartition(aec_t *aec, const float* farend) {
545 float fft[PART_LEN2];
546 float xf[2][PART_LEN1];
547
548 // Check if the buffer is full, and in that case flush the oldest data.
549 if (WebRtc_available_write(aec->far_buf) < 1) {
550 WebRtc_MoveReadPtr(aec->far_buf, 1);
551 WebRtc_MoveReadPtr(aec->far_buf_windowed, 1);
552 aec->system_delay -= PART_LEN;
553 #ifdef WEBRTC_AEC_DEBUG_DUMP
554 WebRtc_MoveReadPtr(aec->far_time_buf, 1);
555 #endif
556 }
557 // Convert far-end partition to the frequency domain without windowing.
558 memcpy(fft, farend, sizeof(float) * PART_LEN2);
559 TimeToFrequency(fft, xf, 0);
560 WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
561
562 // Convert far-end partition to the frequency domain with windowing.
563 memcpy(fft, farend, sizeof(float) * PART_LEN2);
564 TimeToFrequency(fft, xf, 1);
565 WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
566 }
567
WebRtcAec_ProcessFrame(aec_t * aec,const short * nearend,const short * nearendH,int knownDelay)568 void WebRtcAec_ProcessFrame(aec_t *aec,
569 const short *nearend,
570 const short *nearendH,
571 int knownDelay)
572 {
573 // For each frame the process is as follows:
574 // 1) If the system_delay indicates on being too small for processing a
575 // frame we stuff the buffer with enough data for 10 ms.
576 // 2) Adjust the buffer to the system delay, by moving the read pointer.
577 // 3) If we can't move read pointer due to buffer size limitations we
578 // flush/stuff the buffer.
579 // 4) Process as many partitions as possible.
580 // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN
581 // samples. Even though we will have data left to process (we work with
582 // partitions) we consider updating a whole frame, since that's the
583 // amount of data we input and output in audio_processing.
584
585 // TODO(bjornv): Investigate how we should round the delay difference; right
586 // now we know that incoming |knownDelay| is underestimated when it's less
587 // than |aec->knownDelay|. We therefore, round (-32) in that direction. In
588 // the other direction, we don't have this situation, but might flush one
589 // partition too little. This can cause non-causality, which should be
590 // investigated. Maybe, allow for a non-symmetric rounding, like -16.
591 int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
592 int moved_elements = 0;
593
594 // TODO(bjornv): Change the near-end buffer handling to be the same as for
595 // far-end, that is, with a near_pre_buf.
596 // Buffer the near-end frame.
597 WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN);
598 // For H band
599 if (aec->sampFreq == 32000) {
600 WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN);
601 }
602
603 // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
604 // have enough far-end data for that by stuffing the buffer if the
605 // |system_delay| indicates others.
606 if (aec->system_delay < FRAME_LEN) {
607 // We don't have enough data so we rewind 10 ms.
608 WebRtc_MoveReadPtr(aec->far_buf_windowed, -(aec->mult + 1));
609 aec->system_delay -= WebRtc_MoveReadPtr(aec->far_buf, -(aec->mult + 1)) *
610 PART_LEN;
611 #ifdef WEBRTC_AEC_DEBUG_DUMP
612 WebRtc_MoveReadPtr(aec->far_time_buf, -(aec->mult + 1));
613 #endif
614 }
615
616 // 2) Compensate for a possible change in the system delay.
617
618 WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
619 moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
620 aec->knownDelay -= moved_elements * PART_LEN;
621 #ifdef WEBRTC_AEC_DEBUG_DUMP
622 WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
623 #endif
624
625 // 4) Process as many blocks as possible.
626 while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
627 ProcessBlock(aec);
628 }
629
630 // 5) Update system delay with respect to the entire frame.
631 aec->system_delay -= FRAME_LEN;
632 }
633
ProcessBlock(aec_t * aec)634 static void ProcessBlock(aec_t* aec) {
635 int i;
636 float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN];
637 float scale;
638
639 float fft[PART_LEN2];
640 float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
641 float df[2][PART_LEN1];
642 float far_spectrum = 0.0f;
643 float near_spectrum = 0.0f;
644 float abs_far_spectrum[PART_LEN1];
645 float abs_near_spectrum[PART_LEN1];
646
647 const float gPow[2] = {0.9f, 0.1f};
648
649 // Noise estimate constants.
650 const int noiseInitBlocks = 500 * aec->mult;
651 const float step = 0.1f;
652 const float ramp = 1.0002f;
653 const float gInitNoise[2] = {0.999f, 0.001f};
654
655 int16_t nearend[PART_LEN];
656 int16_t* nearend_ptr = NULL;
657 int16_t output[PART_LEN];
658 int16_t outputH[PART_LEN];
659
660 float* xf_ptr = NULL;
661
662 memset(dH, 0, sizeof(dH));
663 if (aec->sampFreq == 32000) {
664 // Get the upper band first so we can reuse |nearend|.
665 WebRtc_ReadBuffer(aec->nearFrBufH,
666 (void**) &nearend_ptr,
667 nearend,
668 PART_LEN);
669 for (i = 0; i < PART_LEN; i++) {
670 dH[i] = (float) (nearend_ptr[i]);
671 }
672 memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN);
673 }
674 WebRtc_ReadBuffer(aec->nearFrBuf, (void**) &nearend_ptr, nearend, PART_LEN);
675
676 // ---------- Ooura fft ----------
677 // Concatenate old and new nearend blocks.
678 for (i = 0; i < PART_LEN; i++) {
679 d[i] = (float) (nearend_ptr[i]);
680 }
681 memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN);
682
683 #ifdef WEBRTC_AEC_DEBUG_DUMP
684 {
685 int16_t farend[PART_LEN];
686 int16_t* farend_ptr = NULL;
687 WebRtc_ReadBuffer(aec->far_time_buf, (void**) &farend_ptr, farend, 1);
688 fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile);
689 fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile);
690 }
691 #endif
692
693 // We should always have at least one element stored in |far_buf|.
694 assert(WebRtc_available_read(aec->far_buf) > 0);
695 WebRtc_ReadBuffer(aec->far_buf, (void**) &xf_ptr, &xf[0][0], 1);
696
697 // Near fft
698 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
699 TimeToFrequency(fft, df, 0);
700
701 // Power smoothing
702 for (i = 0; i < PART_LEN1; i++) {
703 far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
704 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
705 aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * far_spectrum;
706 // Calculate absolute spectra
707 abs_far_spectrum[i] = sqrtf(far_spectrum);
708
709 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
710 aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
711 // Calculate absolute spectra
712 abs_near_spectrum[i] = sqrtf(near_spectrum);
713 }
714
715 // Estimate noise power. Wait until dPow is more stable.
716 if (aec->noiseEstCtr > 50) {
717 for (i = 0; i < PART_LEN1; i++) {
718 if (aec->dPow[i] < aec->dMinPow[i]) {
719 aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] -
720 aec->dPow[i])) * ramp;
721 }
722 else {
723 aec->dMinPow[i] *= ramp;
724 }
725 }
726 }
727
728 // Smooth increasing noise power from zero at the start,
729 // to avoid a sudden burst of comfort noise.
730 if (aec->noiseEstCtr < noiseInitBlocks) {
731 aec->noiseEstCtr++;
732 for (i = 0; i < PART_LEN1; i++) {
733 if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
734 aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
735 gInitNoise[1] * aec->dMinPow[i];
736 }
737 else {
738 aec->dInitMinPow[i] = aec->dMinPow[i];
739 }
740 }
741 aec->noisePow = aec->dInitMinPow;
742 }
743 else {
744 aec->noisePow = aec->dMinPow;
745 }
746
747 // Block wise delay estimation used for logging
748 if (aec->delay_logging_enabled) {
749 int delay_estimate = 0;
750 // Estimate the delay
751 delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator,
752 abs_far_spectrum,
753 abs_near_spectrum,
754 PART_LEN1);
755 if (delay_estimate >= 0) {
756 // Update delay estimate buffer.
757 aec->delay_histogram[delay_estimate]++;
758 }
759 }
760
761 // Update the xfBuf block position.
762 aec->xfBufBlockPos--;
763 if (aec->xfBufBlockPos == -1) {
764 aec->xfBufBlockPos = NR_PART - 1;
765 }
766
767 // Buffer xf
768 memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf_ptr,
769 sizeof(float) * PART_LEN1);
770 memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, &xf_ptr[PART_LEN1],
771 sizeof(float) * PART_LEN1);
772
773 memset(yf[0], 0, sizeof(float) * (PART_LEN1 * 2));
774
775 // Filter far
776 WebRtcAec_FilterFar(aec, yf);
777
778 // Inverse fft to obtain echo estimate and error.
779 fft[0] = yf[0][0];
780 fft[1] = yf[0][PART_LEN];
781 for (i = 1; i < PART_LEN; i++) {
782 fft[2 * i] = yf[0][i];
783 fft[2 * i + 1] = yf[1][i];
784 }
785 aec_rdft_inverse_128(fft);
786
787 scale = 2.0f / PART_LEN2;
788 for (i = 0; i < PART_LEN; i++) {
789 y[i] = fft[PART_LEN + i] * scale; // fft scaling
790 }
791
792 for (i = 0; i < PART_LEN; i++) {
793 e[i] = d[i] - y[i];
794 }
795
796 // Error fft
797 memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
798 memset(fft, 0, sizeof(float) * PART_LEN);
799 memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
800 // TODO(bjornv): Change to use TimeToFrequency().
801 aec_rdft_forward_128(fft);
802
803 ef[1][0] = 0;
804 ef[1][PART_LEN] = 0;
805 ef[0][0] = fft[0];
806 ef[0][PART_LEN] = fft[1];
807 for (i = 1; i < PART_LEN; i++) {
808 ef[0][i] = fft[2 * i];
809 ef[1][i] = fft[2 * i + 1];
810 }
811
812 if (aec->metricsMode == 1) {
813 // Note that the first PART_LEN samples in fft (before transformation) are
814 // zero. Hence, the scaling by two in UpdateLevel() should not be
815 // performed. That scaling is taken care of in UpdateMetrics() instead.
816 UpdateLevel(&aec->linoutlevel, ef);
817 }
818
819 // Scale error signal inversely with far power.
820 WebRtcAec_ScaleErrorSignal(aec, ef);
821 WebRtcAec_FilterAdaptation(aec, fft, ef);
822 NonLinearProcessing(aec, output, outputH);
823
824 if (aec->metricsMode == 1) {
825 // Update power levels and echo metrics
826 UpdateLevel(&aec->farlevel, (float (*)[PART_LEN1]) xf_ptr);
827 UpdateLevel(&aec->nearlevel, df);
828 UpdateMetrics(aec);
829 }
830
831 // Store the output block.
832 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
833 // For H band
834 if (aec->sampFreq == 32000) {
835 WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN);
836 }
837
838 #ifdef WEBRTC_AEC_DEBUG_DUMP
839 {
840 int16_t eInt16[PART_LEN];
841 for (i = 0; i < PART_LEN; i++) {
842 eInt16[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i],
843 WEBRTC_SPL_WORD16_MIN);
844 }
845
846 fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile);
847 fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile);
848 }
849 #endif
850 }
851
NonLinearProcessing(aec_t * aec,short * output,short * outputH)852 static void NonLinearProcessing(aec_t *aec, short *output, short *outputH)
853 {
854 float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1];
855 complex_t comfortNoiseHband[PART_LEN1];
856 float fft[PART_LEN2];
857 float scale, dtmp;
858 float nlpGainHband;
859 int i, j, pos;
860
861 // Coherence and non-linear filter
862 float cohde[PART_LEN1], cohxd[PART_LEN1];
863 float hNlDeAvg, hNlXdAvg;
864 float hNl[PART_LEN1];
865 float hNlPref[PREF_BAND_SIZE];
866 float hNlFb = 0, hNlFbLow = 0;
867 const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
868 const int prefBandSize = PREF_BAND_SIZE / aec->mult;
869 const int minPrefBand = 4 / aec->mult;
870
871 // Near and error power sums
872 float sdSum = 0, seSum = 0;
873
874 // Power estimate smoothing coefficients
875 const float gCoh[2][2] = {{0.9f, 0.1f}, {0.93f, 0.07f}};
876 const float *ptrGCoh = gCoh[aec->mult - 1];
877
878 // Filter energy
879 float wfEnMax = 0, wfEn = 0;
880 const int delayEstInterval = 10 * aec->mult;
881
882 float* xfw_ptr = NULL;
883
884 aec->delayEstCtr++;
885 if (aec->delayEstCtr == delayEstInterval) {
886 aec->delayEstCtr = 0;
887 }
888
889 // initialize comfort noise for H band
890 memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband));
891 nlpGainHband = (float)0.0;
892 dtmp = (float)0.0;
893
894 // Measure energy in each filter partition to determine delay.
895 // TODO: Spread by computing one partition per block?
896 if (aec->delayEstCtr == 0) {
897 wfEnMax = 0;
898 aec->delayIdx = 0;
899 for (i = 0; i < NR_PART; i++) {
900 pos = i * PART_LEN1;
901 wfEn = 0;
902 for (j = 0; j < PART_LEN1; j++) {
903 wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
904 aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
905 }
906
907 if (wfEn > wfEnMax) {
908 wfEnMax = wfEn;
909 aec->delayIdx = i;
910 }
911 }
912 }
913
914 // We should always have at least one element stored in |far_buf|.
915 assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
916 // NLP
917 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**) &xfw_ptr, &xfw[0][0], 1);
918
919 // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
920 // |xfwBuf|.
921 // Buffer far.
922 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
923
924 // Use delayed far.
925 memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw));
926
927 // Windowed near fft
928 for (i = 0; i < PART_LEN; i++) {
929 fft[i] = aec->dBuf[i] * sqrtHanning[i];
930 fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
931 }
932 aec_rdft_forward_128(fft);
933
934 dfw[1][0] = 0;
935 dfw[1][PART_LEN] = 0;
936 dfw[0][0] = fft[0];
937 dfw[0][PART_LEN] = fft[1];
938 for (i = 1; i < PART_LEN; i++) {
939 dfw[0][i] = fft[2 * i];
940 dfw[1][i] = fft[2 * i + 1];
941 }
942
943 // Windowed error fft
944 for (i = 0; i < PART_LEN; i++) {
945 fft[i] = aec->eBuf[i] * sqrtHanning[i];
946 fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
947 }
948 aec_rdft_forward_128(fft);
949 efw[1][0] = 0;
950 efw[1][PART_LEN] = 0;
951 efw[0][0] = fft[0];
952 efw[0][PART_LEN] = fft[1];
953 for (i = 1; i < PART_LEN; i++) {
954 efw[0][i] = fft[2 * i];
955 efw[1][i] = fft[2 * i + 1];
956 }
957
958 // Smoothed PSD
959 for (i = 0; i < PART_LEN1; i++) {
960 aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] *
961 (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
962 aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] *
963 (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
964 // We threshold here to protect against the ill-effects of a zero farend.
965 // The threshold is not arbitrarily chosen, but balances protection and
966 // adverse interaction with the algorithm's tuning.
967 // TODO: investigate further why this is so sensitive.
968 aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] *
969 WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15);
970
971 aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] *
972 (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
973 aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] *
974 (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
975
976 aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] *
977 (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
978 aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] *
979 (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
980
981 sdSum += aec->sd[i];
982 seSum += aec->se[i];
983 }
984
985 // Divergent filter safeguard.
986 if (aec->divergeState == 0) {
987 if (seSum > sdSum) {
988 aec->divergeState = 1;
989 }
990 }
991 else {
992 if (seSum * 1.05f < sdSum) {
993 aec->divergeState = 0;
994 }
995 }
996
997 if (aec->divergeState == 1) {
998 memcpy(efw, dfw, sizeof(efw));
999 }
1000
1001 // Reset if error is significantly larger than nearend (13 dB).
1002 if (seSum > (19.95f * sdSum)) {
1003 memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
1004 }
1005
1006 // Subband coherence
1007 for (i = 0; i < PART_LEN1; i++) {
1008 cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
1009 (aec->sd[i] * aec->se[i] + 1e-10f);
1010 cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
1011 (aec->sx[i] * aec->sd[i] + 1e-10f);
1012 }
1013
1014 hNlXdAvg = 0;
1015 for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
1016 hNlXdAvg += cohxd[i];
1017 }
1018 hNlXdAvg /= prefBandSize;
1019 hNlXdAvg = 1 - hNlXdAvg;
1020
1021 hNlDeAvg = 0;
1022 for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
1023 hNlDeAvg += cohde[i];
1024 }
1025 hNlDeAvg /= prefBandSize;
1026
1027 if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) {
1028 aec->hNlXdAvgMin = hNlXdAvg;
1029 }
1030
1031 if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) {
1032 aec->stNearState = 1;
1033 }
1034 else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
1035 aec->stNearState = 0;
1036 }
1037
1038 if (aec->hNlXdAvgMin == 1) {
1039 aec->echoState = 0;
1040 aec->overDrive = aec->minOverDrive;
1041
1042 if (aec->stNearState == 1) {
1043 memcpy(hNl, cohde, sizeof(hNl));
1044 hNlFb = hNlDeAvg;
1045 hNlFbLow = hNlDeAvg;
1046 }
1047 else {
1048 for (i = 0; i < PART_LEN1; i++) {
1049 hNl[i] = 1 - cohxd[i];
1050 }
1051 hNlFb = hNlXdAvg;
1052 hNlFbLow = hNlXdAvg;
1053 }
1054 }
1055 else {
1056
1057 if (aec->stNearState == 1) {
1058 aec->echoState = 0;
1059 memcpy(hNl, cohde, sizeof(hNl));
1060 hNlFb = hNlDeAvg;
1061 hNlFbLow = hNlDeAvg;
1062 }
1063 else {
1064 aec->echoState = 1;
1065 for (i = 0; i < PART_LEN1; i++) {
1066 hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]);
1067 }
1068
1069 // Select an order statistic from the preferred bands.
1070 // TODO: Using quicksort now, but a selection algorithm may be preferred.
1071 memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize);
1072 qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat);
1073 hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))];
1074 hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))];
1075 }
1076 }
1077
1078 // Track the local filter minimum to determine suppression overdrive.
1079 if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) {
1080 aec->hNlFbLocalMin = hNlFbLow;
1081 aec->hNlFbMin = hNlFbLow;
1082 aec->hNlNewMin = 1;
1083 aec->hNlMinCtr = 0;
1084 }
1085 aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
1086 aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1);
1087
1088 if (aec->hNlNewMin == 1) {
1089 aec->hNlMinCtr++;
1090 }
1091 if (aec->hNlMinCtr == 2) {
1092 aec->hNlNewMin = 0;
1093 aec->hNlMinCtr = 0;
1094 aec->overDrive = WEBRTC_SPL_MAX(aec->targetSupp /
1095 ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), aec->minOverDrive);
1096 }
1097
1098 // Smooth the overdrive.
1099 if (aec->overDrive < aec->overDriveSm) {
1100 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
1101 }
1102 else {
1103 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
1104 }
1105
1106 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);
1107
1108 // Add comfort noise.
1109 ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
1110
1111 // TODO(bjornv): Investigate how to take the windowing below into account if
1112 // needed.
1113 if (aec->metricsMode == 1) {
1114 // Note that we have a scaling by two in the time domain |eBuf|.
1115 // In addition the time domain signal is windowed before transformation,
1116 // losing half the energy on the average. We take care of the first
1117 // scaling only in UpdateMetrics().
1118 UpdateLevel(&aec->nlpoutlevel, efw);
1119 }
1120 // Inverse error fft.
1121 fft[0] = efw[0][0];
1122 fft[1] = efw[0][PART_LEN];
1123 for (i = 1; i < PART_LEN; i++) {
1124 fft[2*i] = efw[0][i];
1125 // Sign change required by Ooura fft.
1126 fft[2*i + 1] = -efw[1][i];
1127 }
1128 aec_rdft_inverse_128(fft);
1129
1130 // Overlap and add to obtain output.
1131 scale = 2.0f / PART_LEN2;
1132 for (i = 0; i < PART_LEN; i++) {
1133 fft[i] *= scale; // fft scaling
1134 fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i];
1135
1136 // Saturation protection
1137 output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i],
1138 WEBRTC_SPL_WORD16_MIN);
1139
1140 fft[PART_LEN + i] *= scale; // fft scaling
1141 aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i];
1142 }
1143
1144 // For H band
1145 if (aec->sampFreq == 32000) {
1146
1147 // H band gain
1148 // average nlp over low band: average over second half of freq spectrum
1149 // (4->8khz)
1150 GetHighbandGain(hNl, &nlpGainHband);
1151
1152 // Inverse comfort_noise
1153 if (flagHbandCn == 1) {
1154 fft[0] = comfortNoiseHband[0][0];
1155 fft[1] = comfortNoiseHband[PART_LEN][0];
1156 for (i = 1; i < PART_LEN; i++) {
1157 fft[2*i] = comfortNoiseHband[i][0];
1158 fft[2*i + 1] = comfortNoiseHband[i][1];
1159 }
1160 aec_rdft_inverse_128(fft);
1161 scale = 2.0f / PART_LEN2;
1162 }
1163
1164 // compute gain factor
1165 for (i = 0; i < PART_LEN; i++) {
1166 dtmp = (float)aec->dBufH[i];
1167 dtmp = (float)dtmp * nlpGainHband; // for variable gain
1168
1169 // add some comfort noise where Hband is attenuated
1170 if (flagHbandCn == 1) {
1171 fft[i] *= scale; // fft scaling
1172 dtmp += cnScaleHband * fft[i];
1173 }
1174
1175 // Saturation protection
1176 outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp,
1177 WEBRTC_SPL_WORD16_MIN);
1178 }
1179 }
1180
1181 // Copy the current block to the old position.
1182 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
1183 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);
1184
1185 // Copy the current block to the old position for H band
1186 if (aec->sampFreq == 32000) {
1187 memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN);
1188 }
1189
1190 memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) -
1191 sizeof(complex_t) * PART_LEN1);
1192 }
1193
GetHighbandGain(const float * lambda,float * nlpGainHband)1194 static void GetHighbandGain(const float *lambda, float *nlpGainHband)
1195 {
1196 int i;
1197
1198 nlpGainHband[0] = (float)0.0;
1199 for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
1200 nlpGainHband[0] += lambda[i];
1201 }
1202 nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
1203 }
1204
ComfortNoise(aec_t * aec,float efw[2][PART_LEN1],complex_t * comfortNoiseHband,const float * noisePow,const float * lambda)1205 static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1],
1206 complex_t *comfortNoiseHband, const float *noisePow, const float *lambda)
1207 {
1208 int i, num;
1209 float rand[PART_LEN];
1210 float noise, noiseAvg, tmp, tmpAvg;
1211 WebRtc_Word16 randW16[PART_LEN];
1212 complex_t u[PART_LEN1];
1213
1214 const float pi2 = 6.28318530717959f;
1215
1216 // Generate a uniform random array on [0 1]
1217 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
1218 for (i = 0; i < PART_LEN; i++) {
1219 rand[i] = ((float)randW16[i]) / 32768;
1220 }
1221
1222 // Reject LF noise
1223 u[0][0] = 0;
1224 u[0][1] = 0;
1225 for (i = 1; i < PART_LEN1; i++) {
1226 tmp = pi2 * rand[i - 1];
1227
1228 noise = sqrtf(noisePow[i]);
1229 u[i][0] = noise * (float)cos(tmp);
1230 u[i][1] = -noise * (float)sin(tmp);
1231 }
1232 u[PART_LEN][1] = 0;
1233
1234 for (i = 0; i < PART_LEN1; i++) {
1235 // This is the proper weighting to match the background noise power
1236 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
1237 //tmp = 1 - lambda[i];
1238 efw[0][i] += tmp * u[i][0];
1239 efw[1][i] += tmp * u[i][1];
1240 }
1241
1242 // For H band comfort noise
1243 // TODO: don't compute noise and "tmp" twice. Use the previous results.
1244 noiseAvg = 0.0;
1245 tmpAvg = 0.0;
1246 num = 0;
1247 if (aec->sampFreq == 32000 && flagHbandCn == 1) {
1248
1249 // average noise scale
1250 // average over second half of freq spectrum (i.e., 4->8khz)
1251 // TODO: we shouldn't need num. We know how many elements we're summing.
1252 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
1253 num++;
1254 noiseAvg += sqrtf(noisePow[i]);
1255 }
1256 noiseAvg /= (float)num;
1257
1258 // average nlp scale
1259 // average over second half of freq spectrum (i.e., 4->8khz)
1260 // TODO: we shouldn't need num. We know how many elements we're summing.
1261 num = 0;
1262 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
1263 num++;
1264 tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
1265 }
1266 tmpAvg /= (float)num;
1267
1268 // Use average noise for H band
1269 // TODO: we should probably have a new random vector here.
1270 // Reject LF noise
1271 u[0][0] = 0;
1272 u[0][1] = 0;
1273 for (i = 1; i < PART_LEN1; i++) {
1274 tmp = pi2 * rand[i - 1];
1275
1276 // Use average noise for H band
1277 u[i][0] = noiseAvg * (float)cos(tmp);
1278 u[i][1] = -noiseAvg * (float)sin(tmp);
1279 }
1280 u[PART_LEN][1] = 0;
1281
1282 for (i = 0; i < PART_LEN1; i++) {
1283 // Use average NLP weight for H band
1284 comfortNoiseHband[i][0] = tmpAvg * u[i][0];
1285 comfortNoiseHband[i][1] = tmpAvg * u[i][1];
1286 }
1287 }
1288 }
1289
WebRtcAec_InitLevel(power_level_t * level)1290 static void WebRtcAec_InitLevel(power_level_t *level)
1291 {
1292 const float bigFloat = 1E17f;
1293
1294 level->averagelevel = 0;
1295 level->framelevel = 0;
1296 level->minlevel = bigFloat;
1297 level->frsum = 0;
1298 level->sfrsum = 0;
1299 level->frcounter = 0;
1300 level->sfrcounter = 0;
1301 }
1302
WebRtcAec_InitStats(stats_t * stats)1303 static void WebRtcAec_InitStats(stats_t *stats)
1304 {
1305 stats->instant = offsetLevel;
1306 stats->average = offsetLevel;
1307 stats->max = offsetLevel;
1308 stats->min = offsetLevel * (-1);
1309 stats->sum = 0;
1310 stats->hisum = 0;
1311 stats->himean = offsetLevel;
1312 stats->counter = 0;
1313 stats->hicounter = 0;
1314 }
1315
UpdateLevel(power_level_t * level,float in[2][PART_LEN1])1316 static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]) {
1317 // Do the energy calculation in the frequency domain. The FFT is performed on
1318 // a segment of PART_LEN2 samples due to overlap, but we only want the energy
1319 // of half that data (the last PART_LEN samples). Parseval's relation states
1320 // that the energy is preserved according to
1321 //
1322 // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
1323 // = ENERGY,
1324 //
1325 // where N = PART_LEN2. Since we are only interested in calculating the energy
1326 // for the last PART_LEN samples we approximate by calculating ENERGY and
1327 // divide by 2,
1328 //
1329 // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
1330 //
1331 // Since we deal with real valued time domain signals we only store frequency
1332 // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
1333 // need to add the contribution from the missing part in
1334 // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
1335 // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
1336 // is the values in the for loop below, but multiplication by 2 and division
1337 // by 2 cancel.
1338
1339 // TODO(bjornv): Investigate reusing energy calculations performed at other
1340 // places in the code.
1341 int k = 1;
1342 // Imaginary parts are zero at end points and left out of the calculation.
1343 float energy = (in[0][0] * in[0][0]) / 2;
1344 energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
1345
1346 for (k = 1; k < PART_LEN; k++) {
1347 energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
1348 }
1349 energy /= PART_LEN2;
1350
1351 level->sfrsum += energy;
1352 level->sfrcounter++;
1353
1354 if (level->sfrcounter > subCountLen) {
1355 level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
1356 level->sfrsum = 0;
1357 level->sfrcounter = 0;
1358 if (level->framelevel > 0) {
1359 if (level->framelevel < level->minlevel) {
1360 level->minlevel = level->framelevel; // New minimum.
1361 } else {
1362 level->minlevel *= (1 + 0.001f); // Small increase.
1363 }
1364 }
1365 level->frcounter++;
1366 level->frsum += level->framelevel;
1367 if (level->frcounter > countLen) {
1368 level->averagelevel = level->frsum / countLen;
1369 level->frsum = 0;
1370 level->frcounter = 0;
1371 }
1372 }
1373 }
1374
UpdateMetrics(aec_t * aec)1375 static void UpdateMetrics(aec_t *aec)
1376 {
1377 float dtmp, dtmp2;
1378
1379 const float actThresholdNoisy = 8.0f;
1380 const float actThresholdClean = 40.0f;
1381 const float safety = 0.99995f;
1382 const float noisyPower = 300000.0f;
1383
1384 float actThreshold;
1385 float echo, suppressedEcho;
1386
1387 if (aec->echoState) { // Check if echo is likely present
1388 aec->stateCounter++;
1389 }
1390
1391 if (aec->farlevel.frcounter == 0) {
1392
1393 if (aec->farlevel.minlevel < noisyPower) {
1394 actThreshold = actThresholdClean;
1395 }
1396 else {
1397 actThreshold = actThresholdNoisy;
1398 }
1399
1400 if ((aec->stateCounter > (0.5f * countLen * subCountLen))
1401 && (aec->farlevel.sfrcounter == 0)
1402
1403 // Estimate in active far-end segments only
1404 && (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel))
1405 ) {
1406
1407 // Subtract noise power
1408 echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
1409
1410 // ERL
1411 dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
1412 aec->nearlevel.averagelevel + 1e-10f);
1413 dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
1414
1415 aec->erl.instant = dtmp;
1416 if (dtmp > aec->erl.max) {
1417 aec->erl.max = dtmp;
1418 }
1419
1420 if (dtmp < aec->erl.min) {
1421 aec->erl.min = dtmp;
1422 }
1423
1424 aec->erl.counter++;
1425 aec->erl.sum += dtmp;
1426 aec->erl.average = aec->erl.sum / aec->erl.counter;
1427
1428 // Upper mean
1429 if (dtmp > aec->erl.average) {
1430 aec->erl.hicounter++;
1431 aec->erl.hisum += dtmp;
1432 aec->erl.himean = aec->erl.hisum / aec->erl.hicounter;
1433 }
1434
1435 // A_NLP
1436 dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
1437 (2 * aec->linoutlevel.averagelevel) + 1e-10f);
1438
1439 // subtract noise power
1440 suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
1441 safety * aec->linoutlevel.minlevel);
1442
1443 dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
1444
1445 aec->aNlp.instant = dtmp2;
1446 if (dtmp > aec->aNlp.max) {
1447 aec->aNlp.max = dtmp;
1448 }
1449
1450 if (dtmp < aec->aNlp.min) {
1451 aec->aNlp.min = dtmp;
1452 }
1453
1454 aec->aNlp.counter++;
1455 aec->aNlp.sum += dtmp;
1456 aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter;
1457
1458 // Upper mean
1459 if (dtmp > aec->aNlp.average) {
1460 aec->aNlp.hicounter++;
1461 aec->aNlp.hisum += dtmp;
1462 aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter;
1463 }
1464
1465 // ERLE
1466
1467 // subtract noise power
1468 suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel -
1469 safety * aec->nlpoutlevel.minlevel);
1470
1471 dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
1472 (2 * aec->nlpoutlevel.averagelevel) + 1e-10f);
1473 dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
1474
1475 dtmp = dtmp2;
1476 aec->erle.instant = dtmp;
1477 if (dtmp > aec->erle.max) {
1478 aec->erle.max = dtmp;
1479 }
1480
1481 if (dtmp < aec->erle.min) {
1482 aec->erle.min = dtmp;
1483 }
1484
1485 aec->erle.counter++;
1486 aec->erle.sum += dtmp;
1487 aec->erle.average = aec->erle.sum / aec->erle.counter;
1488
1489 // Upper mean
1490 if (dtmp > aec->erle.average) {
1491 aec->erle.hicounter++;
1492 aec->erle.hisum += dtmp;
1493 aec->erle.himean = aec->erle.hisum / aec->erle.hicounter;
1494 }
1495 }
1496
1497 aec->stateCounter = 0;
1498 }
1499 }
1500
TimeToFrequency(float time_data[PART_LEN2],float freq_data[2][PART_LEN1],int window)1501 static void TimeToFrequency(float time_data[PART_LEN2],
1502 float freq_data[2][PART_LEN1],
1503 int window) {
1504 int i = 0;
1505
1506 // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
1507 if (window) {
1508 for (i = 0; i < PART_LEN; i++) {
1509 time_data[i] *= sqrtHanning[i];
1510 time_data[PART_LEN + i] *= sqrtHanning[PART_LEN - i];
1511 }
1512 }
1513
1514 aec_rdft_forward_128(time_data);
1515 // Reorder.
1516 freq_data[1][0] = 0;
1517 freq_data[1][PART_LEN] = 0;
1518 freq_data[0][0] = time_data[0];
1519 freq_data[0][PART_LEN] = time_data[1];
1520 for (i = 1; i < PART_LEN; i++) {
1521 freq_data[0][i] = time_data[2 * i];
1522 freq_data[1][i] = time_data[2 * i + 1];
1523 }
1524 }
1525