• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * The core AEC algorithm, which is presented with time-aligned signals.
13  */
14 
15 #include "aec_core.h"
16 
17 #include <assert.h>
18 #include <math.h>
19 #include <stddef.h>  // size_t
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "aec_rdft.h"
24 #include "delay_estimator_wrapper.h"
25 #include "ring_buffer.h"
26 #include "system_wrappers/interface/cpu_features_wrapper.h"
27 #include "typedefs.h"
28 
29 // Buffer size (samples)
30 static const size_t kBufSizePartitions = 250;  // 1 second of audio in 16 kHz.
31 
32 // Noise suppression
33 static const int converged = 250;
34 
35 // Metrics
36 static const int subCountLen = 4;
37 static const int countLen = 50;
38 
39 // Quantities to control H band scaling for SWB input
40 static const int flagHbandCn = 1; // flag for adding comfort noise in H band
41 static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band
42 // Initial bin for averaging nlp gain in low band
43 static const int freqAvgIc = PART_LEN / 2;
44 
45 // Matlab code to produce table:
46 // win = sqrt(hanning(63)); win = [0 ; win(1:32)];
47 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
48 static const float sqrtHanning[65] = {
49     0.00000000000000f, 0.02454122852291f, 0.04906767432742f,
50     0.07356456359967f, 0.09801714032956f, 0.12241067519922f,
51     0.14673047445536f, 0.17096188876030f, 0.19509032201613f,
52     0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
53     0.29028467725446f, 0.31368174039889f, 0.33688985339222f,
54     0.35989503653499f, 0.38268343236509f, 0.40524131400499f,
55     0.42755509343028f, 0.44961132965461f, 0.47139673682600f,
56     0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
57     0.55557023301960f, 0.57580819141785f, 0.59569930449243f,
58     0.61523159058063f, 0.63439328416365f, 0.65317284295378f,
59     0.67155895484702f, 0.68954054473707f, 0.70710678118655f,
60     0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
61     0.77301045336274f, 0.78834642762661f, 0.80320753148064f,
62     0.81758481315158f, 0.83146961230255f, 0.84485356524971f,
63     0.85772861000027f, 0.87008699110871f, 0.88192126434835f,
64     0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
65     0.92387953251129f, 0.93299279883474f, 0.94154406518302f,
66     0.94952818059304f, 0.95694033573221f, 0.96377606579544f,
67     0.97003125319454f, 0.97570213003853f, 0.98078528040323f,
68     0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
69     0.99518472667220f, 0.99729045667869f, 0.99879545620517f,
70     0.99969881869620f, 1.00000000000000f
71 };
72 
73 // Matlab code to produce table:
74 // weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
75 // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
76 const float WebRtcAec_weightCurve[65] = {
77     0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f,
78     0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f,
79     0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f,
80     0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f,
81     0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f,
82     0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f,
83     0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f,
84     0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f,
85     0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f,
86     0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f,
87     0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f
88 };
89 
90 // Matlab code to produce table:
91 // overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
92 // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
93 const float WebRtcAec_overDriveCurve[65] = {
94     1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f,
95     1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f,
96     1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f,
97     1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f,
98     1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f,
99     1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f,
100     1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f,
101     1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f,
102     1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f,
103     1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f,
104     1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f
105 };
106 
107 // "Private" function prototypes.
108 static void ProcessBlock(aec_t* aec);
109 
110 static void NonLinearProcessing(aec_t *aec, short *output, short *outputH);
111 
112 static void GetHighbandGain(const float *lambda, float *nlpGainHband);
113 
114 // Comfort_noise also computes noise for H band returned in comfortNoiseHband
115 static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1],
116                                   complex_t *comfortNoiseHband,
117                                   const float *noisePow, const float *lambda);
118 
119 static void WebRtcAec_InitLevel(power_level_t *level);
120 static void WebRtcAec_InitStats(stats_t *stats);
121 static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]);
122 static void UpdateMetrics(aec_t *aec);
123 // Convert from time domain to frequency domain. Note that |time_data| are
124 // overwritten.
125 static void TimeToFrequency(float time_data[PART_LEN2],
126                             float freq_data[2][PART_LEN1],
127                             int window);
128 
MulRe(float aRe,float aIm,float bRe,float bIm)129 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm)
130 {
131     return aRe * bRe - aIm * bIm;
132 }
133 
MulIm(float aRe,float aIm,float bRe,float bIm)134 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm)
135 {
136     return aRe * bIm + aIm * bRe;
137 }
138 
CmpFloat(const void * a,const void * b)139 static int CmpFloat(const void *a, const void *b)
140 {
141     const float *da = (const float *)a;
142     const float *db = (const float *)b;
143 
144     return (*da > *db) - (*da < *db);
145 }
146 
WebRtcAec_CreateAec(aec_t ** aecInst)147 int WebRtcAec_CreateAec(aec_t **aecInst)
148 {
149     aec_t *aec = malloc(sizeof(aec_t));
150     *aecInst = aec;
151     if (aec == NULL) {
152         return -1;
153     }
154 
155     if (WebRtc_CreateBuffer(&aec->nearFrBuf,
156                             FRAME_LEN + PART_LEN,
157                             sizeof(int16_t)) == -1) {
158         WebRtcAec_FreeAec(aec);
159         aec = NULL;
160         return -1;
161     }
162 
163     if (WebRtc_CreateBuffer(&aec->outFrBuf,
164                             FRAME_LEN + PART_LEN,
165                             sizeof(int16_t)) == -1) {
166         WebRtcAec_FreeAec(aec);
167         aec = NULL;
168         return -1;
169     }
170 
171     if (WebRtc_CreateBuffer(&aec->nearFrBufH,
172                             FRAME_LEN + PART_LEN,
173                             sizeof(int16_t)) == -1) {
174         WebRtcAec_FreeAec(aec);
175         aec = NULL;
176         return -1;
177     }
178 
179     if (WebRtc_CreateBuffer(&aec->outFrBufH,
180                             FRAME_LEN + PART_LEN,
181                             sizeof(int16_t)) == -1) {
182         WebRtcAec_FreeAec(aec);
183         aec = NULL;
184         return -1;
185     }
186 
187     // Create far-end buffers.
188     if (WebRtc_CreateBuffer(&aec->far_buf, kBufSizePartitions,
189                             sizeof(float) * 2 * PART_LEN1) == -1) {
190         WebRtcAec_FreeAec(aec);
191         aec = NULL;
192         return -1;
193     }
194     if (WebRtc_CreateBuffer(&aec->far_buf_windowed, kBufSizePartitions,
195                             sizeof(float) * 2 * PART_LEN1) == -1) {
196         WebRtcAec_FreeAec(aec);
197         aec = NULL;
198         return -1;
199     }
200 #ifdef WEBRTC_AEC_DEBUG_DUMP
201     if (WebRtc_CreateBuffer(&aec->far_time_buf, kBufSizePartitions,
202                             sizeof(int16_t) * PART_LEN) == -1) {
203         WebRtcAec_FreeAec(aec);
204         aec = NULL;
205         return -1;
206     }
207 #endif
208     if (WebRtc_CreateDelayEstimator(&aec->delay_estimator,
209                                     PART_LEN1,
210                                     kMaxDelayBlocks,
211                                     kLookaheadBlocks) == -1) {
212       WebRtcAec_FreeAec(aec);
213       aec = NULL;
214       return -1;
215     }
216 
217     return 0;
218 }
219 
WebRtcAec_FreeAec(aec_t * aec)220 int WebRtcAec_FreeAec(aec_t *aec)
221 {
222     if (aec == NULL) {
223         return -1;
224     }
225 
226     WebRtc_FreeBuffer(aec->nearFrBuf);
227     WebRtc_FreeBuffer(aec->outFrBuf);
228 
229     WebRtc_FreeBuffer(aec->nearFrBufH);
230     WebRtc_FreeBuffer(aec->outFrBufH);
231 
232     WebRtc_FreeBuffer(aec->far_buf);
233     WebRtc_FreeBuffer(aec->far_buf_windowed);
234 #ifdef WEBRTC_AEC_DEBUG_DUMP
235     WebRtc_FreeBuffer(aec->far_time_buf);
236 #endif
237     WebRtc_FreeDelayEstimator(aec->delay_estimator);
238 
239     free(aec);
240     return 0;
241 }
242 
FilterFar(aec_t * aec,float yf[2][PART_LEN1])243 static void FilterFar(aec_t *aec, float yf[2][PART_LEN1])
244 {
245   int i;
246   for (i = 0; i < NR_PART; i++) {
247     int j;
248     int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
249     int pos = i * PART_LEN1;
250     // Check for wrap
251     if (i + aec->xfBufBlockPos >= NR_PART) {
252       xPos -= NR_PART*(PART_LEN1);
253     }
254 
255     for (j = 0; j < PART_LEN1; j++) {
256       yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
257                         aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
258       yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
259                         aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
260     }
261   }
262 }
263 
ScaleErrorSignal(aec_t * aec,float ef[2][PART_LEN1])264 static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1])
265 {
266   int i;
267   float absEf;
268   for (i = 0; i < (PART_LEN1); i++) {
269     ef[0][i] /= (aec->xPow[i] + 1e-10f);
270     ef[1][i] /= (aec->xPow[i] + 1e-10f);
271     absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
272 
273     if (absEf > aec->errThresh) {
274       absEf = aec->errThresh / (absEf + 1e-10f);
275       ef[0][i] *= absEf;
276       ef[1][i] *= absEf;
277     }
278 
279     // Stepsize factor
280     ef[0][i] *= aec->mu;
281     ef[1][i] *= aec->mu;
282   }
283 }
284 
285 // Time-unconstrined filter adaptation.
286 // TODO(andrew): consider for a low-complexity mode.
287 //static void FilterAdaptationUnconstrained(aec_t *aec, float *fft,
288 //                                          float ef[2][PART_LEN1]) {
289 //  int i, j;
290 //  for (i = 0; i < NR_PART; i++) {
291 //    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
292 //    int pos;
293 //    // Check for wrap
294 //    if (i + aec->xfBufBlockPos >= NR_PART) {
295 //      xPos -= NR_PART * PART_LEN1;
296 //    }
297 //
298 //    pos = i * PART_LEN1;
299 //
300 //    for (j = 0; j < PART_LEN1; j++) {
301 //      aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
302 //                                      -aec->xfBuf[xPos + j][1],
303 //                                      ef[j][0], ef[j][1]);
304 //      aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
305 //                                      -aec->xfBuf[xPos + j][1],
306 //                                      ef[j][0], ef[j][1]);
307 //    }
308 //  }
309 //}
310 
FilterAdaptation(aec_t * aec,float * fft,float ef[2][PART_LEN1])311 static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
312   int i, j;
313   for (i = 0; i < NR_PART; i++) {
314     int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
315     int pos;
316     // Check for wrap
317     if (i + aec->xfBufBlockPos >= NR_PART) {
318       xPos -= NR_PART * PART_LEN1;
319     }
320 
321     pos = i * PART_LEN1;
322 
323     for (j = 0; j < PART_LEN; j++) {
324 
325       fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
326                          -aec->xfBuf[1][xPos + j],
327                          ef[0][j], ef[1][j]);
328       fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
329                              -aec->xfBuf[1][xPos + j],
330                              ef[0][j], ef[1][j]);
331     }
332     fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
333                    -aec->xfBuf[1][xPos + PART_LEN],
334                    ef[0][PART_LEN], ef[1][PART_LEN]);
335 
336     aec_rdft_inverse_128(fft);
337     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
338 
339     // fft scaling
340     {
341       float scale = 2.0f / PART_LEN2;
342       for (j = 0; j < PART_LEN; j++) {
343         fft[j] *= scale;
344       }
345     }
346     aec_rdft_forward_128(fft);
347 
348     aec->wfBuf[0][pos] += fft[0];
349     aec->wfBuf[0][pos + PART_LEN] += fft[1];
350 
351     for (j = 1; j < PART_LEN; j++) {
352       aec->wfBuf[0][pos + j] += fft[2 * j];
353       aec->wfBuf[1][pos + j] += fft[2 * j + 1];
354     }
355   }
356 }
357 
OverdriveAndSuppress(aec_t * aec,float hNl[PART_LEN1],const float hNlFb,float efw[2][PART_LEN1])358 static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1],
359                                  const float hNlFb,
360                                  float efw[2][PART_LEN1]) {
361   int i;
362   for (i = 0; i < PART_LEN1; i++) {
363     // Weight subbands
364     if (hNl[i] > hNlFb) {
365       hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
366           (1 - WebRtcAec_weightCurve[i]) * hNl[i];
367     }
368     hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
369 
370     // Suppress error signal
371     efw[0][i] *= hNl[i];
372     efw[1][i] *= hNl[i];
373 
374     // Ooura fft returns incorrect sign on imaginary component. It matters here
375     // because we are making an additive change with comfort noise.
376     efw[1][i] *= -1;
377   }
378 }
379 
380 WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
381 WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
382 WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
383 WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
384 
WebRtcAec_InitAec(aec_t * aec,int sampFreq)385 int WebRtcAec_InitAec(aec_t *aec, int sampFreq)
386 {
387     int i;
388 
389     aec->sampFreq = sampFreq;
390 
391     if (sampFreq == 8000) {
392         aec->mu = 0.6f;
393         aec->errThresh = 2e-6f;
394     }
395     else {
396         aec->mu = 0.5f;
397         aec->errThresh = 1.5e-6f;
398     }
399 
400     if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) {
401         return -1;
402     }
403 
404     if (WebRtc_InitBuffer(aec->outFrBuf) == -1) {
405         return -1;
406     }
407 
408     if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) {
409         return -1;
410     }
411 
412     if (WebRtc_InitBuffer(aec->outFrBufH) == -1) {
413         return -1;
414     }
415 
416     // Initialize far-end buffers.
417     if (WebRtc_InitBuffer(aec->far_buf) == -1) {
418         return -1;
419     }
420     if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) {
421         return -1;
422     }
423 #ifdef WEBRTC_AEC_DEBUG_DUMP
424     if (WebRtc_InitBuffer(aec->far_time_buf) == -1) {
425         return -1;
426     }
427 #endif
428     aec->system_delay = 0;
429 
430     if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) {
431       return -1;
432     }
433     aec->delay_logging_enabled = 0;
434     memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
435 
436     // Default target suppression level
437     aec->targetSupp = -11.5;
438     aec->minOverDrive = 2.0;
439 
440     // Sampling frequency multiplier
441     // SWB is processed as 160 frame size
442     if (aec->sampFreq == 32000) {
443       aec->mult = (short)aec->sampFreq / 16000;
444     }
445     else {
446         aec->mult = (short)aec->sampFreq / 8000;
447     }
448 
449     aec->farBufWritePos = 0;
450     aec->farBufReadPos = 0;
451 
452     aec->inSamples = 0;
453     aec->outSamples = 0;
454     aec->knownDelay = 0;
455 
456     // Initialize buffers
457     memset(aec->dBuf, 0, sizeof(aec->dBuf));
458     memset(aec->eBuf, 0, sizeof(aec->eBuf));
459     // For H band
460     memset(aec->dBufH, 0, sizeof(aec->dBufH));
461 
462     memset(aec->xPow, 0, sizeof(aec->xPow));
463     memset(aec->dPow, 0, sizeof(aec->dPow));
464     memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow));
465     aec->noisePow = aec->dInitMinPow;
466     aec->noiseEstCtr = 0;
467 
468     // Initial comfort noise power
469     for (i = 0; i < PART_LEN1; i++) {
470         aec->dMinPow[i] = 1.0e6f;
471     }
472 
473     // Holds the last block written to
474     aec->xfBufBlockPos = 0;
475     // TODO: Investigate need for these initializations. Deleting them doesn't
476     //       change the output at all and yields 0.4% overall speedup.
477     memset(aec->xfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
478     memset(aec->wfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
479     memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
480     memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
481     memset(aec->xfwBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
482     memset(aec->se, 0, sizeof(float) * PART_LEN1);
483 
484     // To prevent numerical instability in the first block.
485     for (i = 0; i < PART_LEN1; i++) {
486         aec->sd[i] = 1;
487     }
488     for (i = 0; i < PART_LEN1; i++) {
489         aec->sx[i] = 1;
490     }
491 
492     memset(aec->hNs, 0, sizeof(aec->hNs));
493     memset(aec->outBuf, 0, sizeof(float) * PART_LEN);
494 
495     aec->hNlFbMin = 1;
496     aec->hNlFbLocalMin = 1;
497     aec->hNlXdAvgMin = 1;
498     aec->hNlNewMin = 0;
499     aec->hNlMinCtr = 0;
500     aec->overDrive = 2;
501     aec->overDriveSm = 2;
502     aec->delayIdx = 0;
503     aec->stNearState = 0;
504     aec->echoState = 0;
505     aec->divergeState = 0;
506 
507     aec->seed = 777;
508     aec->delayEstCtr = 0;
509 
510     // Metrics disabled by default
511     aec->metricsMode = 0;
512     WebRtcAec_InitMetrics(aec);
513 
514     // Assembly optimization
515     WebRtcAec_FilterFar = FilterFar;
516     WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
517     WebRtcAec_FilterAdaptation = FilterAdaptation;
518     WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
519     if (WebRtc_GetCPUInfo(kSSE2)) {
520 #if defined(WEBRTC_USE_SSE2)
521       WebRtcAec_InitAec_SSE2();
522 #endif
523     }
524     aec_rdft_init();
525 
526     return 0;
527 }
528 
WebRtcAec_InitMetrics(aec_t * aec)529 void WebRtcAec_InitMetrics(aec_t *aec)
530 {
531     aec->stateCounter = 0;
532     WebRtcAec_InitLevel(&aec->farlevel);
533     WebRtcAec_InitLevel(&aec->nearlevel);
534     WebRtcAec_InitLevel(&aec->linoutlevel);
535     WebRtcAec_InitLevel(&aec->nlpoutlevel);
536 
537     WebRtcAec_InitStats(&aec->erl);
538     WebRtcAec_InitStats(&aec->erle);
539     WebRtcAec_InitStats(&aec->aNlp);
540     WebRtcAec_InitStats(&aec->rerl);
541 }
542 
543 
WebRtcAec_BufferFarendPartition(aec_t * aec,const float * farend)544 void WebRtcAec_BufferFarendPartition(aec_t *aec, const float* farend) {
545   float fft[PART_LEN2];
546   float xf[2][PART_LEN1];
547 
548   // Check if the buffer is full, and in that case flush the oldest data.
549   if (WebRtc_available_write(aec->far_buf) < 1) {
550     WebRtc_MoveReadPtr(aec->far_buf, 1);
551     WebRtc_MoveReadPtr(aec->far_buf_windowed, 1);
552     aec->system_delay -= PART_LEN;
553 #ifdef WEBRTC_AEC_DEBUG_DUMP
554     WebRtc_MoveReadPtr(aec->far_time_buf, 1);
555 #endif
556   }
557   // Convert far-end partition to the frequency domain without windowing.
558   memcpy(fft, farend, sizeof(float) * PART_LEN2);
559   TimeToFrequency(fft, xf, 0);
560   WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
561 
562   // Convert far-end partition to the frequency domain with windowing.
563   memcpy(fft, farend, sizeof(float) * PART_LEN2);
564   TimeToFrequency(fft, xf, 1);
565   WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
566 }
567 
WebRtcAec_ProcessFrame(aec_t * aec,const short * nearend,const short * nearendH,int knownDelay)568 void WebRtcAec_ProcessFrame(aec_t *aec,
569                             const short *nearend,
570                             const short *nearendH,
571                             int knownDelay)
572 {
573     // For each frame the process is as follows:
574     // 1) If the system_delay indicates on being too small for processing a
575     //    frame we stuff the buffer with enough data for 10 ms.
576     // 2) Adjust the buffer to the system delay, by moving the read pointer.
577     // 3) If we can't move read pointer due to buffer size limitations we
578     //    flush/stuff the buffer.
579     // 4) Process as many partitions as possible.
580     // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN
581     //    samples. Even though we will have data left to process (we work with
582     //    partitions) we consider updating a whole frame, since that's the
583     //    amount of data we input and output in audio_processing.
584 
585     // TODO(bjornv): Investigate how we should round the delay difference; right
586     // now we know that incoming |knownDelay| is underestimated when it's less
587     // than |aec->knownDelay|. We therefore, round (-32) in that direction. In
588     // the other direction, we don't have this situation, but might flush one
589     // partition too little. This can cause non-causality, which should be
590     // investigated. Maybe, allow for a non-symmetric rounding, like -16.
591     int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
592     int moved_elements = 0;
593 
594     // TODO(bjornv): Change the near-end buffer handling to be the same as for
595     // far-end, that is, with a near_pre_buf.
596     // Buffer the near-end frame.
597     WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN);
598     // For H band
599     if (aec->sampFreq == 32000) {
600         WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN);
601     }
602 
603     // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
604     // have enough far-end data for that by stuffing the buffer if the
605     // |system_delay| indicates others.
606     if (aec->system_delay < FRAME_LEN) {
607       // We don't have enough data so we rewind 10 ms.
608       WebRtc_MoveReadPtr(aec->far_buf_windowed, -(aec->mult + 1));
609       aec->system_delay -= WebRtc_MoveReadPtr(aec->far_buf, -(aec->mult + 1)) *
610           PART_LEN;
611 #ifdef WEBRTC_AEC_DEBUG_DUMP
612       WebRtc_MoveReadPtr(aec->far_time_buf, -(aec->mult + 1));
613 #endif
614     }
615 
616     // 2) Compensate for a possible change in the system delay.
617 
618     WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
619     moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
620     aec->knownDelay -= moved_elements * PART_LEN;
621 #ifdef WEBRTC_AEC_DEBUG_DUMP
622     WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
623 #endif
624 
625     // 4) Process as many blocks as possible.
626     while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
627         ProcessBlock(aec);
628     }
629 
630     // 5) Update system delay with respect to the entire frame.
631     aec->system_delay -= FRAME_LEN;
632 }
633 
ProcessBlock(aec_t * aec)634 static void ProcessBlock(aec_t* aec) {
635     int i;
636     float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN];
637     float scale;
638 
639     float fft[PART_LEN2];
640     float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
641     float df[2][PART_LEN1];
642     float far_spectrum = 0.0f;
643     float near_spectrum = 0.0f;
644     float abs_far_spectrum[PART_LEN1];
645     float abs_near_spectrum[PART_LEN1];
646 
647     const float gPow[2] = {0.9f, 0.1f};
648 
649     // Noise estimate constants.
650     const int noiseInitBlocks = 500 * aec->mult;
651     const float step = 0.1f;
652     const float ramp = 1.0002f;
653     const float gInitNoise[2] = {0.999f, 0.001f};
654 
655     int16_t nearend[PART_LEN];
656     int16_t* nearend_ptr = NULL;
657     int16_t output[PART_LEN];
658     int16_t outputH[PART_LEN];
659 
660     float* xf_ptr = NULL;
661 
662     memset(dH, 0, sizeof(dH));
663     if (aec->sampFreq == 32000) {
664       // Get the upper band first so we can reuse |nearend|.
665       WebRtc_ReadBuffer(aec->nearFrBufH,
666                         (void**) &nearend_ptr,
667                         nearend,
668                         PART_LEN);
669       for (i = 0; i < PART_LEN; i++) {
670           dH[i] = (float) (nearend_ptr[i]);
671       }
672       memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN);
673     }
674     WebRtc_ReadBuffer(aec->nearFrBuf, (void**) &nearend_ptr, nearend, PART_LEN);
675 
676     // ---------- Ooura fft ----------
677     // Concatenate old and new nearend blocks.
678     for (i = 0; i < PART_LEN; i++) {
679         d[i] = (float) (nearend_ptr[i]);
680     }
681     memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN);
682 
683 #ifdef WEBRTC_AEC_DEBUG_DUMP
684     {
685         int16_t farend[PART_LEN];
686         int16_t* farend_ptr = NULL;
687         WebRtc_ReadBuffer(aec->far_time_buf, (void**) &farend_ptr, farend, 1);
688         fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile);
689         fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile);
690     }
691 #endif
692 
693     // We should always have at least one element stored in |far_buf|.
694     assert(WebRtc_available_read(aec->far_buf) > 0);
695     WebRtc_ReadBuffer(aec->far_buf, (void**) &xf_ptr, &xf[0][0], 1);
696 
697     // Near fft
698     memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
699     TimeToFrequency(fft, df, 0);
700 
701     // Power smoothing
702     for (i = 0; i < PART_LEN1; i++) {
703       far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
704           (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
705       aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * far_spectrum;
706       // Calculate absolute spectra
707       abs_far_spectrum[i] = sqrtf(far_spectrum);
708 
709       near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
710       aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
711       // Calculate absolute spectra
712       abs_near_spectrum[i] = sqrtf(near_spectrum);
713     }
714 
715     // Estimate noise power. Wait until dPow is more stable.
716     if (aec->noiseEstCtr > 50) {
717         for (i = 0; i < PART_LEN1; i++) {
718             if (aec->dPow[i] < aec->dMinPow[i]) {
719                 aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] -
720                     aec->dPow[i])) * ramp;
721             }
722             else {
723                 aec->dMinPow[i] *= ramp;
724             }
725         }
726     }
727 
728     // Smooth increasing noise power from zero at the start,
729     // to avoid a sudden burst of comfort noise.
730     if (aec->noiseEstCtr < noiseInitBlocks) {
731         aec->noiseEstCtr++;
732         for (i = 0; i < PART_LEN1; i++) {
733             if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
734                 aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
735                     gInitNoise[1] * aec->dMinPow[i];
736             }
737             else {
738                 aec->dInitMinPow[i] = aec->dMinPow[i];
739             }
740         }
741         aec->noisePow = aec->dInitMinPow;
742     }
743     else {
744         aec->noisePow = aec->dMinPow;
745     }
746 
747     // Block wise delay estimation used for logging
748     if (aec->delay_logging_enabled) {
749       int delay_estimate = 0;
750       // Estimate the delay
751       delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator,
752                                                          abs_far_spectrum,
753                                                          abs_near_spectrum,
754                                                          PART_LEN1);
755       if (delay_estimate >= 0) {
756         // Update delay estimate buffer.
757         aec->delay_histogram[delay_estimate]++;
758       }
759     }
760 
761     // Update the xfBuf block position.
762     aec->xfBufBlockPos--;
763     if (aec->xfBufBlockPos == -1) {
764         aec->xfBufBlockPos = NR_PART - 1;
765     }
766 
767     // Buffer xf
768     memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf_ptr,
769            sizeof(float) * PART_LEN1);
770     memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, &xf_ptr[PART_LEN1],
771            sizeof(float) * PART_LEN1);
772 
773     memset(yf[0], 0, sizeof(float) * (PART_LEN1 * 2));
774 
775     // Filter far
776     WebRtcAec_FilterFar(aec, yf);
777 
778     // Inverse fft to obtain echo estimate and error.
779     fft[0] = yf[0][0];
780     fft[1] = yf[0][PART_LEN];
781     for (i = 1; i < PART_LEN; i++) {
782         fft[2 * i] = yf[0][i];
783         fft[2 * i + 1] = yf[1][i];
784     }
785     aec_rdft_inverse_128(fft);
786 
787     scale = 2.0f / PART_LEN2;
788     for (i = 0; i < PART_LEN; i++) {
789         y[i] = fft[PART_LEN + i] * scale; // fft scaling
790     }
791 
792     for (i = 0; i < PART_LEN; i++) {
793         e[i] = d[i] - y[i];
794     }
795 
796     // Error fft
797     memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
798     memset(fft, 0, sizeof(float) * PART_LEN);
799     memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
800     // TODO(bjornv): Change to use TimeToFrequency().
801     aec_rdft_forward_128(fft);
802 
803     ef[1][0] = 0;
804     ef[1][PART_LEN] = 0;
805     ef[0][0] = fft[0];
806     ef[0][PART_LEN] = fft[1];
807     for (i = 1; i < PART_LEN; i++) {
808         ef[0][i] = fft[2 * i];
809         ef[1][i] = fft[2 * i + 1];
810     }
811 
812     if (aec->metricsMode == 1) {
813       // Note that the first PART_LEN samples in fft (before transformation) are
814       // zero. Hence, the scaling by two in UpdateLevel() should not be
815       // performed. That scaling is taken care of in UpdateMetrics() instead.
816       UpdateLevel(&aec->linoutlevel, ef);
817     }
818 
819     // Scale error signal inversely with far power.
820     WebRtcAec_ScaleErrorSignal(aec, ef);
821     WebRtcAec_FilterAdaptation(aec, fft, ef);
822     NonLinearProcessing(aec, output, outputH);
823 
824     if (aec->metricsMode == 1) {
825         // Update power levels and echo metrics
826         UpdateLevel(&aec->farlevel, (float (*)[PART_LEN1]) xf_ptr);
827         UpdateLevel(&aec->nearlevel, df);
828         UpdateMetrics(aec);
829     }
830 
831     // Store the output block.
832     WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
833     // For H band
834     if (aec->sampFreq == 32000) {
835         WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN);
836     }
837 
838 #ifdef WEBRTC_AEC_DEBUG_DUMP
839     {
840         int16_t eInt16[PART_LEN];
841         for (i = 0; i < PART_LEN; i++) {
842             eInt16[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i],
843                 WEBRTC_SPL_WORD16_MIN);
844         }
845 
846         fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile);
847         fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile);
848     }
849 #endif
850 }
851 
NonLinearProcessing(aec_t * aec,short * output,short * outputH)852 static void NonLinearProcessing(aec_t *aec, short *output, short *outputH)
853 {
854     float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1];
855     complex_t comfortNoiseHband[PART_LEN1];
856     float fft[PART_LEN2];
857     float scale, dtmp;
858     float nlpGainHband;
859     int i, j, pos;
860 
861     // Coherence and non-linear filter
862     float cohde[PART_LEN1], cohxd[PART_LEN1];
863     float hNlDeAvg, hNlXdAvg;
864     float hNl[PART_LEN1];
865     float hNlPref[PREF_BAND_SIZE];
866     float hNlFb = 0, hNlFbLow = 0;
867     const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
868     const int prefBandSize = PREF_BAND_SIZE / aec->mult;
869     const int minPrefBand = 4 / aec->mult;
870 
871     // Near and error power sums
872     float sdSum = 0, seSum = 0;
873 
874     // Power estimate smoothing coefficients
875     const float gCoh[2][2] = {{0.9f, 0.1f}, {0.93f, 0.07f}};
876     const float *ptrGCoh = gCoh[aec->mult - 1];
877 
878     // Filter energy
879     float wfEnMax = 0, wfEn = 0;
880     const int delayEstInterval = 10 * aec->mult;
881 
882     float* xfw_ptr = NULL;
883 
884     aec->delayEstCtr++;
885     if (aec->delayEstCtr == delayEstInterval) {
886         aec->delayEstCtr = 0;
887     }
888 
889     // initialize comfort noise for H band
890     memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband));
891     nlpGainHband = (float)0.0;
892     dtmp = (float)0.0;
893 
894     // Measure energy in each filter partition to determine delay.
895     // TODO: Spread by computing one partition per block?
896     if (aec->delayEstCtr == 0) {
897         wfEnMax = 0;
898         aec->delayIdx = 0;
899         for (i = 0; i < NR_PART; i++) {
900             pos = i * PART_LEN1;
901             wfEn = 0;
902             for (j = 0; j < PART_LEN1; j++) {
903                 wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
904                     aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
905             }
906 
907             if (wfEn > wfEnMax) {
908                 wfEnMax = wfEn;
909                 aec->delayIdx = i;
910             }
911         }
912     }
913 
914     // We should always have at least one element stored in |far_buf|.
915     assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
916     // NLP
917     WebRtc_ReadBuffer(aec->far_buf_windowed, (void**) &xfw_ptr, &xfw[0][0], 1);
918 
919     // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
920     // |xfwBuf|.
921     // Buffer far.
922     memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
923 
924     // Use delayed far.
925     memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw));
926 
927     // Windowed near fft
928     for (i = 0; i < PART_LEN; i++) {
929         fft[i] = aec->dBuf[i] * sqrtHanning[i];
930         fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
931     }
932     aec_rdft_forward_128(fft);
933 
934     dfw[1][0] = 0;
935     dfw[1][PART_LEN] = 0;
936     dfw[0][0] = fft[0];
937     dfw[0][PART_LEN] = fft[1];
938     for (i = 1; i < PART_LEN; i++) {
939         dfw[0][i] = fft[2 * i];
940         dfw[1][i] = fft[2 * i + 1];
941     }
942 
943     // Windowed error fft
944     for (i = 0; i < PART_LEN; i++) {
945         fft[i] = aec->eBuf[i] * sqrtHanning[i];
946         fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
947     }
948     aec_rdft_forward_128(fft);
949     efw[1][0] = 0;
950     efw[1][PART_LEN] = 0;
951     efw[0][0] = fft[0];
952     efw[0][PART_LEN] = fft[1];
953     for (i = 1; i < PART_LEN; i++) {
954         efw[0][i] = fft[2 * i];
955         efw[1][i] = fft[2 * i + 1];
956     }
957 
958     // Smoothed PSD
959     for (i = 0; i < PART_LEN1; i++) {
960         aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] *
961             (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
962         aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] *
963             (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
964         // We threshold here to protect against the ill-effects of a zero farend.
965         // The threshold is not arbitrarily chosen, but balances protection and
966         // adverse interaction with the algorithm's tuning.
967         // TODO: investigate further why this is so sensitive.
968         aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] *
969             WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15);
970 
971         aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] *
972             (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
973         aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] *
974             (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
975 
976         aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] *
977             (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
978         aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] *
979             (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
980 
981         sdSum += aec->sd[i];
982         seSum += aec->se[i];
983     }
984 
985     // Divergent filter safeguard.
986     if (aec->divergeState == 0) {
987         if (seSum > sdSum) {
988             aec->divergeState = 1;
989         }
990     }
991     else {
992         if (seSum * 1.05f < sdSum) {
993             aec->divergeState = 0;
994         }
995     }
996 
997     if (aec->divergeState == 1) {
998         memcpy(efw, dfw, sizeof(efw));
999     }
1000 
1001     // Reset if error is significantly larger than nearend (13 dB).
1002     if (seSum > (19.95f * sdSum)) {
1003         memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
1004     }
1005 
1006     // Subband coherence
1007     for (i = 0; i < PART_LEN1; i++) {
1008         cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
1009             (aec->sd[i] * aec->se[i] + 1e-10f);
1010         cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
1011             (aec->sx[i] * aec->sd[i] + 1e-10f);
1012     }
1013 
1014     hNlXdAvg = 0;
1015     for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
1016         hNlXdAvg += cohxd[i];
1017     }
1018     hNlXdAvg /= prefBandSize;
1019     hNlXdAvg = 1 - hNlXdAvg;
1020 
1021     hNlDeAvg = 0;
1022     for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
1023         hNlDeAvg += cohde[i];
1024     }
1025     hNlDeAvg /= prefBandSize;
1026 
1027     if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) {
1028         aec->hNlXdAvgMin = hNlXdAvg;
1029     }
1030 
1031     if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) {
1032         aec->stNearState = 1;
1033     }
1034     else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
1035         aec->stNearState = 0;
1036     }
1037 
1038     if (aec->hNlXdAvgMin == 1) {
1039         aec->echoState = 0;
1040         aec->overDrive = aec->minOverDrive;
1041 
1042         if (aec->stNearState == 1) {
1043             memcpy(hNl, cohde, sizeof(hNl));
1044             hNlFb = hNlDeAvg;
1045             hNlFbLow = hNlDeAvg;
1046         }
1047         else {
1048             for (i = 0; i < PART_LEN1; i++) {
1049                 hNl[i] = 1 - cohxd[i];
1050             }
1051             hNlFb = hNlXdAvg;
1052             hNlFbLow = hNlXdAvg;
1053         }
1054     }
1055     else {
1056 
1057         if (aec->stNearState == 1) {
1058             aec->echoState = 0;
1059             memcpy(hNl, cohde, sizeof(hNl));
1060             hNlFb = hNlDeAvg;
1061             hNlFbLow = hNlDeAvg;
1062         }
1063         else {
1064             aec->echoState = 1;
1065             for (i = 0; i < PART_LEN1; i++) {
1066                 hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]);
1067             }
1068 
1069             // Select an order statistic from the preferred bands.
1070             // TODO: Using quicksort now, but a selection algorithm may be preferred.
1071             memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize);
1072             qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat);
1073             hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))];
1074             hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))];
1075         }
1076     }
1077 
1078     // Track the local filter minimum to determine suppression overdrive.
1079     if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) {
1080         aec->hNlFbLocalMin = hNlFbLow;
1081         aec->hNlFbMin = hNlFbLow;
1082         aec->hNlNewMin = 1;
1083         aec->hNlMinCtr = 0;
1084     }
1085     aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
1086     aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1);
1087 
1088     if (aec->hNlNewMin == 1) {
1089         aec->hNlMinCtr++;
1090     }
1091     if (aec->hNlMinCtr == 2) {
1092         aec->hNlNewMin = 0;
1093         aec->hNlMinCtr = 0;
1094         aec->overDrive = WEBRTC_SPL_MAX(aec->targetSupp /
1095             ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), aec->minOverDrive);
1096     }
1097 
1098     // Smooth the overdrive.
1099     if (aec->overDrive < aec->overDriveSm) {
1100       aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
1101     }
1102     else {
1103       aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
1104     }
1105 
1106     WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);
1107 
1108     // Add comfort noise.
1109     ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
1110 
1111     // TODO(bjornv): Investigate how to take the windowing below into account if
1112     // needed.
1113     if (aec->metricsMode == 1) {
1114       // Note that we have a scaling by two in the time domain |eBuf|.
1115       // In addition the time domain signal is windowed before transformation,
1116       // losing half the energy on the average. We take care of the first
1117       // scaling only in UpdateMetrics().
1118       UpdateLevel(&aec->nlpoutlevel, efw);
1119     }
1120     // Inverse error fft.
1121     fft[0] = efw[0][0];
1122     fft[1] = efw[0][PART_LEN];
1123     for (i = 1; i < PART_LEN; i++) {
1124         fft[2*i] = efw[0][i];
1125         // Sign change required by Ooura fft.
1126         fft[2*i + 1] = -efw[1][i];
1127     }
1128     aec_rdft_inverse_128(fft);
1129 
1130     // Overlap and add to obtain output.
1131     scale = 2.0f / PART_LEN2;
1132     for (i = 0; i < PART_LEN; i++) {
1133         fft[i] *= scale; // fft scaling
1134         fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i];
1135 
1136         // Saturation protection
1137         output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i],
1138             WEBRTC_SPL_WORD16_MIN);
1139 
1140         fft[PART_LEN + i] *= scale; // fft scaling
1141         aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i];
1142     }
1143 
1144     // For H band
1145     if (aec->sampFreq == 32000) {
1146 
1147         // H band gain
1148         // average nlp over low band: average over second half of freq spectrum
1149         // (4->8khz)
1150         GetHighbandGain(hNl, &nlpGainHband);
1151 
1152         // Inverse comfort_noise
1153         if (flagHbandCn == 1) {
1154             fft[0] = comfortNoiseHband[0][0];
1155             fft[1] = comfortNoiseHband[PART_LEN][0];
1156             for (i = 1; i < PART_LEN; i++) {
1157                 fft[2*i] = comfortNoiseHband[i][0];
1158                 fft[2*i + 1] = comfortNoiseHband[i][1];
1159             }
1160             aec_rdft_inverse_128(fft);
1161             scale = 2.0f / PART_LEN2;
1162         }
1163 
1164         // compute gain factor
1165         for (i = 0; i < PART_LEN; i++) {
1166             dtmp = (float)aec->dBufH[i];
1167             dtmp = (float)dtmp * nlpGainHband; // for variable gain
1168 
1169             // add some comfort noise where Hband is attenuated
1170             if (flagHbandCn == 1) {
1171                 fft[i] *= scale; // fft scaling
1172                 dtmp += cnScaleHband * fft[i];
1173             }
1174 
1175             // Saturation protection
1176             outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp,
1177                 WEBRTC_SPL_WORD16_MIN);
1178          }
1179     }
1180 
1181     // Copy the current block to the old position.
1182     memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
1183     memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);
1184 
1185     // Copy the current block to the old position for H band
1186     if (aec->sampFreq == 32000) {
1187         memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN);
1188     }
1189 
1190     memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) -
1191         sizeof(complex_t) * PART_LEN1);
1192 }
1193 
GetHighbandGain(const float * lambda,float * nlpGainHband)1194 static void GetHighbandGain(const float *lambda, float *nlpGainHband)
1195 {
1196     int i;
1197 
1198     nlpGainHband[0] = (float)0.0;
1199     for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
1200         nlpGainHband[0] += lambda[i];
1201     }
1202     nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
1203 }
1204 
ComfortNoise(aec_t * aec,float efw[2][PART_LEN1],complex_t * comfortNoiseHband,const float * noisePow,const float * lambda)1205 static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1],
1206     complex_t *comfortNoiseHband, const float *noisePow, const float *lambda)
1207 {
1208     int i, num;
1209     float rand[PART_LEN];
1210     float noise, noiseAvg, tmp, tmpAvg;
1211     WebRtc_Word16 randW16[PART_LEN];
1212     complex_t u[PART_LEN1];
1213 
1214     const float pi2 = 6.28318530717959f;
1215 
1216     // Generate a uniform random array on [0 1]
1217     WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
1218     for (i = 0; i < PART_LEN; i++) {
1219         rand[i] = ((float)randW16[i]) / 32768;
1220     }
1221 
1222     // Reject LF noise
1223     u[0][0] = 0;
1224     u[0][1] = 0;
1225     for (i = 1; i < PART_LEN1; i++) {
1226         tmp = pi2 * rand[i - 1];
1227 
1228         noise = sqrtf(noisePow[i]);
1229         u[i][0] = noise * (float)cos(tmp);
1230         u[i][1] = -noise * (float)sin(tmp);
1231     }
1232     u[PART_LEN][1] = 0;
1233 
1234     for (i = 0; i < PART_LEN1; i++) {
1235         // This is the proper weighting to match the background noise power
1236         tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
1237         //tmp = 1 - lambda[i];
1238         efw[0][i] += tmp * u[i][0];
1239         efw[1][i] += tmp * u[i][1];
1240     }
1241 
1242     // For H band comfort noise
1243     // TODO: don't compute noise and "tmp" twice. Use the previous results.
1244     noiseAvg = 0.0;
1245     tmpAvg = 0.0;
1246     num = 0;
1247     if (aec->sampFreq == 32000 && flagHbandCn == 1) {
1248 
1249         // average noise scale
1250         // average over second half of freq spectrum (i.e., 4->8khz)
1251         // TODO: we shouldn't need num. We know how many elements we're summing.
1252         for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
1253             num++;
1254             noiseAvg += sqrtf(noisePow[i]);
1255         }
1256         noiseAvg /= (float)num;
1257 
1258         // average nlp scale
1259         // average over second half of freq spectrum (i.e., 4->8khz)
1260         // TODO: we shouldn't need num. We know how many elements we're summing.
1261         num = 0;
1262         for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
1263             num++;
1264             tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
1265         }
1266         tmpAvg /= (float)num;
1267 
1268         // Use average noise for H band
1269         // TODO: we should probably have a new random vector here.
1270         // Reject LF noise
1271         u[0][0] = 0;
1272         u[0][1] = 0;
1273         for (i = 1; i < PART_LEN1; i++) {
1274             tmp = pi2 * rand[i - 1];
1275 
1276             // Use average noise for H band
1277             u[i][0] = noiseAvg * (float)cos(tmp);
1278             u[i][1] = -noiseAvg * (float)sin(tmp);
1279         }
1280         u[PART_LEN][1] = 0;
1281 
1282         for (i = 0; i < PART_LEN1; i++) {
1283             // Use average NLP weight for H band
1284             comfortNoiseHband[i][0] = tmpAvg * u[i][0];
1285             comfortNoiseHband[i][1] = tmpAvg * u[i][1];
1286         }
1287     }
1288 }
1289 
WebRtcAec_InitLevel(power_level_t * level)1290 static void WebRtcAec_InitLevel(power_level_t *level)
1291 {
1292     const float bigFloat = 1E17f;
1293 
1294     level->averagelevel = 0;
1295     level->framelevel = 0;
1296     level->minlevel = bigFloat;
1297     level->frsum = 0;
1298     level->sfrsum = 0;
1299     level->frcounter = 0;
1300     level->sfrcounter = 0;
1301 }
1302 
WebRtcAec_InitStats(stats_t * stats)1303 static void WebRtcAec_InitStats(stats_t *stats)
1304 {
1305     stats->instant = offsetLevel;
1306     stats->average = offsetLevel;
1307     stats->max = offsetLevel;
1308     stats->min = offsetLevel * (-1);
1309     stats->sum = 0;
1310     stats->hisum = 0;
1311     stats->himean = offsetLevel;
1312     stats->counter = 0;
1313     stats->hicounter = 0;
1314 }
1315 
UpdateLevel(power_level_t * level,float in[2][PART_LEN1])1316 static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]) {
1317   // Do the energy calculation in the frequency domain. The FFT is performed on
1318   // a segment of PART_LEN2 samples due to overlap, but we only want the energy
1319   // of half that data (the last PART_LEN samples). Parseval's relation states
1320   // that the energy is preserved according to
1321   //
1322   // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
1323   //                           = ENERGY,
1324   //
1325   // where N = PART_LEN2. Since we are only interested in calculating the energy
1326   // for the last PART_LEN samples we approximate by calculating ENERGY and
1327   // divide by 2,
1328   //
1329   // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
1330   //
1331   // Since we deal with real valued time domain signals we only store frequency
1332   // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
1333   // need to add the contribution from the missing part in
1334   // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
1335   // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
1336   // is the values in the for loop below, but multiplication by 2 and division
1337   // by 2 cancel.
1338 
1339   // TODO(bjornv): Investigate reusing energy calculations performed at other
1340   // places in the code.
1341   int k = 1;
1342   // Imaginary parts are zero at end points and left out of the calculation.
1343   float energy = (in[0][0] * in[0][0]) / 2;
1344   energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
1345 
1346   for (k = 1; k < PART_LEN; k++) {
1347     energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
1348   }
1349   energy /= PART_LEN2;
1350 
1351   level->sfrsum += energy;
1352   level->sfrcounter++;
1353 
1354   if (level->sfrcounter > subCountLen) {
1355     level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
1356     level->sfrsum = 0;
1357     level->sfrcounter = 0;
1358     if (level->framelevel > 0) {
1359       if (level->framelevel < level->minlevel) {
1360         level->minlevel = level->framelevel;  // New minimum.
1361       } else {
1362         level->minlevel *= (1 + 0.001f);  // Small increase.
1363       }
1364     }
1365     level->frcounter++;
1366     level->frsum += level->framelevel;
1367     if (level->frcounter > countLen) {
1368       level->averagelevel = level->frsum / countLen;
1369       level->frsum = 0;
1370       level->frcounter = 0;
1371     }
1372   }
1373 }
1374 
UpdateMetrics(aec_t * aec)1375 static void UpdateMetrics(aec_t *aec)
1376 {
1377     float dtmp, dtmp2;
1378 
1379     const float actThresholdNoisy = 8.0f;
1380     const float actThresholdClean = 40.0f;
1381     const float safety = 0.99995f;
1382     const float noisyPower = 300000.0f;
1383 
1384     float actThreshold;
1385     float echo, suppressedEcho;
1386 
1387     if (aec->echoState) {   // Check if echo is likely present
1388         aec->stateCounter++;
1389     }
1390 
1391     if (aec->farlevel.frcounter == 0) {
1392 
1393         if (aec->farlevel.minlevel < noisyPower) {
1394             actThreshold = actThresholdClean;
1395         }
1396         else {
1397             actThreshold = actThresholdNoisy;
1398         }
1399 
1400         if ((aec->stateCounter > (0.5f * countLen * subCountLen))
1401             && (aec->farlevel.sfrcounter == 0)
1402 
1403             // Estimate in active far-end segments only
1404             && (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel))
1405             ) {
1406 
1407             // Subtract noise power
1408             echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
1409 
1410             // ERL
1411             dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
1412                 aec->nearlevel.averagelevel + 1e-10f);
1413             dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
1414 
1415             aec->erl.instant = dtmp;
1416             if (dtmp > aec->erl.max) {
1417                 aec->erl.max = dtmp;
1418             }
1419 
1420             if (dtmp < aec->erl.min) {
1421                 aec->erl.min = dtmp;
1422             }
1423 
1424             aec->erl.counter++;
1425             aec->erl.sum += dtmp;
1426             aec->erl.average = aec->erl.sum / aec->erl.counter;
1427 
1428             // Upper mean
1429             if (dtmp > aec->erl.average) {
1430                 aec->erl.hicounter++;
1431                 aec->erl.hisum += dtmp;
1432                 aec->erl.himean = aec->erl.hisum / aec->erl.hicounter;
1433             }
1434 
1435             // A_NLP
1436             dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
1437                 (2 * aec->linoutlevel.averagelevel) + 1e-10f);
1438 
1439             // subtract noise power
1440             suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
1441                 safety * aec->linoutlevel.minlevel);
1442 
1443             dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
1444 
1445             aec->aNlp.instant = dtmp2;
1446             if (dtmp > aec->aNlp.max) {
1447                 aec->aNlp.max = dtmp;
1448             }
1449 
1450             if (dtmp < aec->aNlp.min) {
1451                 aec->aNlp.min = dtmp;
1452             }
1453 
1454             aec->aNlp.counter++;
1455             aec->aNlp.sum += dtmp;
1456             aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter;
1457 
1458             // Upper mean
1459             if (dtmp > aec->aNlp.average) {
1460                 aec->aNlp.hicounter++;
1461                 aec->aNlp.hisum += dtmp;
1462                 aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter;
1463             }
1464 
1465             // ERLE
1466 
1467             // subtract noise power
1468             suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel -
1469                 safety * aec->nlpoutlevel.minlevel);
1470 
1471             dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
1472                 (2 * aec->nlpoutlevel.averagelevel) + 1e-10f);
1473             dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
1474 
1475             dtmp = dtmp2;
1476             aec->erle.instant = dtmp;
1477             if (dtmp > aec->erle.max) {
1478                 aec->erle.max = dtmp;
1479             }
1480 
1481             if (dtmp < aec->erle.min) {
1482                 aec->erle.min = dtmp;
1483             }
1484 
1485             aec->erle.counter++;
1486             aec->erle.sum += dtmp;
1487             aec->erle.average = aec->erle.sum / aec->erle.counter;
1488 
1489             // Upper mean
1490             if (dtmp > aec->erle.average) {
1491                 aec->erle.hicounter++;
1492                 aec->erle.hisum += dtmp;
1493                 aec->erle.himean = aec->erle.hisum / aec->erle.hicounter;
1494             }
1495         }
1496 
1497         aec->stateCounter = 0;
1498     }
1499 }
1500 
TimeToFrequency(float time_data[PART_LEN2],float freq_data[2][PART_LEN1],int window)1501 static void TimeToFrequency(float time_data[PART_LEN2],
1502                             float freq_data[2][PART_LEN1],
1503                             int window) {
1504   int i = 0;
1505 
1506   // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
1507   if (window) {
1508     for (i = 0; i < PART_LEN; i++) {
1509       time_data[i] *= sqrtHanning[i];
1510       time_data[PART_LEN + i] *= sqrtHanning[PART_LEN - i];
1511     }
1512   }
1513 
1514   aec_rdft_forward_128(time_data);
1515   // Reorder.
1516   freq_data[1][0] = 0;
1517   freq_data[1][PART_LEN] = 0;
1518   freq_data[0][0] = time_data[0];
1519   freq_data[0][PART_LEN] = time_data[1];
1520   for (i = 1; i < PART_LEN; i++) {
1521     freq_data[0][i] = time_data[2 * i];
1522     freq_data[1][i] = time_data[2 * i + 1];
1523   }
1524 }
1525