• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* aec.cpp
2  *
3  * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
4  * All Rights Reserved.
5  *
6  * Acoustic Echo Cancellation NLMS-pw algorithm
7  *
8  * Version 0.3 filter created with www.dsptutor.freeuk.com
9  * Version 0.3.1 Allow change of stability parameter delta
10  * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
11  */
12 
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 
17 #ifdef HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20 
21 #include <math.h>
22 #include <string.h>
23 #include <stdint.h>
24 
25 #include <pulse/xmalloc.h>
26 
27 #include "adrian-aec.h"
28 
29 #ifndef DISABLE_ORC
30 #include "adrian-aec-orc-gen.h"
31 #endif
32 
33 #ifdef __SSE__
34 #include <xmmintrin.h>
35 #endif
36 
37 /* Vector Dot Product */
dotp(REAL a[],REAL b[])38 static REAL dotp(REAL a[], REAL b[])
39 {
40   REAL sum0 = 0.0f, sum1 = 0.0f;
41   int j;
42 
43   for (j = 0; j < NLMS_LEN; j += 2) {
44     // optimize: partial loop unrolling
45     sum0 += a[j] * b[j];
46     sum1 += a[j + 1] * b[j + 1];
47   }
48   return sum0 + sum1;
49 }
50 
dotp_sse(REAL a[],REAL b[])51 static REAL dotp_sse(REAL a[], REAL b[])
52 {
53 #ifdef __SSE__
54   /* This is taken from speex's inner product implementation */
55   int j;
56   REAL sum;
57   __m128 acc = _mm_setzero_ps();
58 
59   for (j=0;j<NLMS_LEN;j+=8)
60   {
61     acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
62     acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
63   }
64   acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
65   acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
66   _mm_store_ss(&sum, acc);
67 
68   return sum;
69 #else
70   return dotp(a, b);
71 #endif
72 }
73 
74 
AEC_init(int RATE,int have_vector)75 AEC* AEC_init(int RATE, int have_vector)
76 {
77   AEC *a = pa_xnew0(AEC, 1);
78   a->j = NLMS_EXT;
79   AEC_setambient(a, NoiseFloor);
80   a->dfast = a->dslow = M75dB_PCM;
81   a->xfast = a->xslow = M80dB_PCM;
82   a->gain = 1.0f;
83   a->Fx = IIR1_init(2000.0f/RATE);
84   a->Fe = IIR1_init(2000.0f/RATE);
85   a->cutoff = FIR_HP_300Hz_init();
86   a->acMic = IIR_HP_init();
87   a->acSpk = IIR_HP_init();
88 
89   a->aes_y2 = M0dB;
90 
91   a->fdwdisplay = -1;
92 
93   if (have_vector) {
94       /* Get a 16-byte aligned location */
95       a->w = (REAL *) (((uintptr_t) a->w_arr) - (((uintptr_t) a->w_arr) % 16) + 16);
96       a->dotp = dotp_sse;
97   } else {
98       /* We don't care about alignment, just use the array as-is */
99       a->w = a->w_arr;
100       a->dotp = dotp;
101   }
102 
103   return a;
104 }
105 
AEC_done(AEC * a)106 void AEC_done(AEC *a) {
107     pa_assert(a);
108 
109     pa_xfree(a->Fx);
110     pa_xfree(a->Fe);
111     pa_xfree(a->acMic);
112     pa_xfree(a->acSpk);
113     pa_xfree(a->cutoff);
114     pa_xfree(a);
115 }
116 
117 // Adrian soft decision DTD
118 // (Dual Average Near-End to Far-End signal Ratio DTD)
119 // This algorithm uses exponential smoothing with different
120 // ageing parameters to get fast and slow near-end and far-end
121 // signal averages. The ratio of NFRs term
122 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
123 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
124 // mapped to 1.0 with a limited linear function.
AEC_dtd(AEC * a,REAL d,REAL x)125 static float AEC_dtd(AEC *a, REAL d, REAL x)
126 {
127   float ratio, stepsize;
128 
129   // fast near-end and far-end average
130   a->dfast += ALPHAFAST * (fabsf(d) - a->dfast);
131   a->xfast += ALPHAFAST * (fabsf(x) - a->xfast);
132 
133   // slow near-end and far-end average
134   a->dslow += ALPHASLOW * (fabsf(d) - a->dslow);
135   a->xslow += ALPHASLOW * (fabsf(x) - a->xslow);
136 
137   if (a->xfast < M70dB_PCM) {
138     return 0.0f;   // no Spk signal
139   }
140 
141   if (a->dfast < M70dB_PCM) {
142     return 0.0f;   // no Mic signal
143   }
144 
145   // ratio of NFRs
146   ratio = (a->dfast * a->xslow) / (a->dslow * a->xfast);
147 
148   // Linear interpolation with clamping at the limits
149   if (ratio < STEPX1)
150     stepsize = STEPY1;
151   else if (ratio > STEPX2)
152     stepsize = STEPY2;
153   else
154     stepsize = STEPY1 + (STEPY2 - STEPY1) * (ratio - STEPX1) / (STEPX2 - STEPX1);
155 
156   return stepsize;
157 }
158 
159 
AEC_leaky(AEC * a)160 static void AEC_leaky(AEC *a)
161 // The xfast signal is used to charge the hangover timer to Thold.
162 // When hangover expires (no Spk signal for some time) the vector w
163 // is erased. This is my implementation of Leaky NLMS.
164 {
165   if (a->xfast >= M70dB_PCM) {
166     // vector w is valid for hangover Thold time
167     a->hangover = Thold;
168   } else {
169     if (a->hangover > 1) {
170       --(a->hangover);
171     } else if (1 == a->hangover) {
172       --(a->hangover);
173       // My Leaky NLMS is to erase vector w when hangover expires
174       memset(a->w_arr, 0, sizeof(a->w_arr));
175     }
176   }
177 }
178 
179 
180 #if 0
181 void AEC::openwdisplay() {
182   // open TCP connection to program wdisplay.tcl
183   fdwdisplay = socket_async("127.0.0.1", 50999);
184 };
185 #endif
186 
187 
AEC_nlms_pw(AEC * a,REAL d,REAL x_,float stepsize)188 static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
189 {
190   REAL e;
191   REAL ef;
192   a->x[a->j] = x_;
193   a->xf[a->j] = IIR1_highpass(a->Fx, x_);     // pre-whitening of x
194 
195   // calculate error value
196   // (mic signal - estimated mic signal from spk signal)
197   e = d;
198   if (a->hangover > 0) {
199     e -= a->dotp(a->w, a->x + a->j);
200   }
201   ef = IIR1_highpass(a->Fe, e);     // pre-whitening of e
202 
203   // optimize: iterative dotp(xf, xf)
204   a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]);
205 
206   if (stepsize > 0.0f) {
207     // calculate variable step size
208     REAL mikro_ef = stepsize * ef / a->dotp_xf_xf;
209 
210 #ifdef DISABLE_ORC
211     // update tap weights (filter learning)
212     int i;
213     for (i = 0; i < NLMS_LEN; i += 2) {
214       // optimize: partial loop unrolling
215       a->w[i] += mikro_ef * a->xf[i + a->j];
216       a->w[i + 1] += mikro_ef * a->xf[i + a->j + 1];
217     }
218 #else
219     update_tap_weights(a->w, &a->xf[a->j], mikro_ef, NLMS_LEN);
220 #endif
221   }
222 
223   if (--(a->j) < 0) {
224     // optimize: decrease number of memory copies
225     a->j = NLMS_EXT;
226     memmove(a->x + a->j + 1, a->x, (NLMS_LEN - 1) * sizeof(REAL));
227     memmove(a->xf + a->j + 1, a->xf, (NLMS_LEN - 1) * sizeof(REAL));
228   }
229 
230   // Saturation
231   if (e > MAXPCM) {
232     return MAXPCM;
233   } else if (e < -MAXPCM) {
234     return -MAXPCM;
235   } else {
236     return e;
237   }
238 }
239 
240 
AEC_doAEC(AEC * a,int d_,int x_)241 int AEC_doAEC(AEC *a, int d_, int x_)
242 {
243   REAL d = (REAL) d_;
244   REAL x = (REAL) x_;
245 
246   // Mic Highpass Filter - to remove DC
247   d = IIR_HP_highpass(a->acMic, d);
248 
249   // Mic Highpass Filter - cut-off below 300Hz
250   d = FIR_HP_300Hz_highpass(a->cutoff, d);
251 
252   // Amplify, for e.g. Soundcards with -6dB max. volume
253   d *= a->gain;
254 
255   // Spk Highpass Filter - to remove DC
256   x = IIR_HP_highpass(a->acSpk, x);
257 
258   // Double Talk Detector
259   a->stepsize = AEC_dtd(a, d, x);
260 
261   // Leaky (ageing of vector w)
262   AEC_leaky(a);
263 
264   // Acoustic Echo Cancellation
265   d = AEC_nlms_pw(a, d, x, a->stepsize);
266 
267 #if 0
268   if (fdwdisplay >= 0) {
269     if (++dumpcnt >= (WIDEB*RATE/10)) {
270       // wdisplay creates 10 dumps per seconds = large CPU load!
271       dumpcnt = 0;
272       write(fdwdisplay, ws, DUMP_LEN*sizeof(float));
273       // we don't check return value. This is not production quality!!!
274       memset(ws, 0, sizeof(ws));
275     } else {
276       int i;
277       for (i = 0; i < DUMP_LEN; i += 2) {
278         // optimize: partial loop unrolling
279         ws[i] += w[i];
280         ws[i + 1] += w[i + 1];
281       }
282     }
283   }
284 #endif
285 
286   return (int) d;
287 }
288