1 /* aec.cpp
2 *
3 * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
4 * All Rights Reserved.
5 *
6 * Acoustic Echo Cancellation NLMS-pw algorithm
7 *
8 * Version 0.3 filter created with www.dsptutor.freeuk.com
9 * Version 0.3.1 Allow change of stability parameter delta
10 * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
11 */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16
17 #ifdef HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20
21 #include <math.h>
22 #include <string.h>
23 #include <stdint.h>
24
25 #include <pulse/xmalloc.h>
26
27 #include "adrian-aec.h"
28
29 #ifndef DISABLE_ORC
30 #include "adrian-aec-orc-gen.h"
31 #endif
32
33 #ifdef __SSE__
34 #include <xmmintrin.h>
35 #endif
36
37 /* Vector Dot Product */
dotp(REAL a[],REAL b[])38 static REAL dotp(REAL a[], REAL b[])
39 {
40 REAL sum0 = 0.0f, sum1 = 0.0f;
41 int j;
42
43 for (j = 0; j < NLMS_LEN; j += 2) {
44 // optimize: partial loop unrolling
45 sum0 += a[j] * b[j];
46 sum1 += a[j + 1] * b[j + 1];
47 }
48 return sum0 + sum1;
49 }
50
dotp_sse(REAL a[],REAL b[])51 static REAL dotp_sse(REAL a[], REAL b[])
52 {
53 #ifdef __SSE__
54 /* This is taken from speex's inner product implementation */
55 int j;
56 REAL sum;
57 __m128 acc = _mm_setzero_ps();
58
59 for (j=0;j<NLMS_LEN;j+=8)
60 {
61 acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
62 acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
63 }
64 acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
65 acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
66 _mm_store_ss(&sum, acc);
67
68 return sum;
69 #else
70 return dotp(a, b);
71 #endif
72 }
73
74
AEC_init(int RATE,int have_vector)75 AEC* AEC_init(int RATE, int have_vector)
76 {
77 AEC *a = pa_xnew0(AEC, 1);
78 a->j = NLMS_EXT;
79 AEC_setambient(a, NoiseFloor);
80 a->dfast = a->dslow = M75dB_PCM;
81 a->xfast = a->xslow = M80dB_PCM;
82 a->gain = 1.0f;
83 a->Fx = IIR1_init(2000.0f/RATE);
84 a->Fe = IIR1_init(2000.0f/RATE);
85 a->cutoff = FIR_HP_300Hz_init();
86 a->acMic = IIR_HP_init();
87 a->acSpk = IIR_HP_init();
88
89 a->aes_y2 = M0dB;
90
91 a->fdwdisplay = -1;
92
93 if (have_vector) {
94 /* Get a 16-byte aligned location */
95 a->w = (REAL *) (((uintptr_t) a->w_arr) - (((uintptr_t) a->w_arr) % 16) + 16);
96 a->dotp = dotp_sse;
97 } else {
98 /* We don't care about alignment, just use the array as-is */
99 a->w = a->w_arr;
100 a->dotp = dotp;
101 }
102
103 return a;
104 }
105
AEC_done(AEC * a)106 void AEC_done(AEC *a) {
107 pa_assert(a);
108
109 pa_xfree(a->Fx);
110 pa_xfree(a->Fe);
111 pa_xfree(a->acMic);
112 pa_xfree(a->acSpk);
113 pa_xfree(a->cutoff);
114 pa_xfree(a);
115 }
116
117 // Adrian soft decision DTD
118 // (Dual Average Near-End to Far-End signal Ratio DTD)
119 // This algorithm uses exponential smoothing with different
120 // ageing parameters to get fast and slow near-end and far-end
121 // signal averages. The ratio of NFRs term
122 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
123 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
124 // mapped to 1.0 with a limited linear function.
AEC_dtd(AEC * a,REAL d,REAL x)125 static float AEC_dtd(AEC *a, REAL d, REAL x)
126 {
127 float ratio, stepsize;
128
129 // fast near-end and far-end average
130 a->dfast += ALPHAFAST * (fabsf(d) - a->dfast);
131 a->xfast += ALPHAFAST * (fabsf(x) - a->xfast);
132
133 // slow near-end and far-end average
134 a->dslow += ALPHASLOW * (fabsf(d) - a->dslow);
135 a->xslow += ALPHASLOW * (fabsf(x) - a->xslow);
136
137 if (a->xfast < M70dB_PCM) {
138 return 0.0f; // no Spk signal
139 }
140
141 if (a->dfast < M70dB_PCM) {
142 return 0.0f; // no Mic signal
143 }
144
145 // ratio of NFRs
146 ratio = (a->dfast * a->xslow) / (a->dslow * a->xfast);
147
148 // Linear interpolation with clamping at the limits
149 if (ratio < STEPX1)
150 stepsize = STEPY1;
151 else if (ratio > STEPX2)
152 stepsize = STEPY2;
153 else
154 stepsize = STEPY1 + (STEPY2 - STEPY1) * (ratio - STEPX1) / (STEPX2 - STEPX1);
155
156 return stepsize;
157 }
158
159
AEC_leaky(AEC * a)160 static void AEC_leaky(AEC *a)
161 // The xfast signal is used to charge the hangover timer to Thold.
162 // When hangover expires (no Spk signal for some time) the vector w
163 // is erased. This is my implementation of Leaky NLMS.
164 {
165 if (a->xfast >= M70dB_PCM) {
166 // vector w is valid for hangover Thold time
167 a->hangover = Thold;
168 } else {
169 if (a->hangover > 1) {
170 --(a->hangover);
171 } else if (1 == a->hangover) {
172 --(a->hangover);
173 // My Leaky NLMS is to erase vector w when hangover expires
174 memset(a->w_arr, 0, sizeof(a->w_arr));
175 }
176 }
177 }
178
179
180 #if 0
181 void AEC::openwdisplay() {
182 // open TCP connection to program wdisplay.tcl
183 fdwdisplay = socket_async("127.0.0.1", 50999);
184 };
185 #endif
186
187
AEC_nlms_pw(AEC * a,REAL d,REAL x_,float stepsize)188 static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
189 {
190 REAL e;
191 REAL ef;
192 a->x[a->j] = x_;
193 a->xf[a->j] = IIR1_highpass(a->Fx, x_); // pre-whitening of x
194
195 // calculate error value
196 // (mic signal - estimated mic signal from spk signal)
197 e = d;
198 if (a->hangover > 0) {
199 e -= a->dotp(a->w, a->x + a->j);
200 }
201 ef = IIR1_highpass(a->Fe, e); // pre-whitening of e
202
203 // optimize: iterative dotp(xf, xf)
204 a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]);
205
206 if (stepsize > 0.0f) {
207 // calculate variable step size
208 REAL mikro_ef = stepsize * ef / a->dotp_xf_xf;
209
210 #ifdef DISABLE_ORC
211 // update tap weights (filter learning)
212 int i;
213 for (i = 0; i < NLMS_LEN; i += 2) {
214 // optimize: partial loop unrolling
215 a->w[i] += mikro_ef * a->xf[i + a->j];
216 a->w[i + 1] += mikro_ef * a->xf[i + a->j + 1];
217 }
218 #else
219 update_tap_weights(a->w, &a->xf[a->j], mikro_ef, NLMS_LEN);
220 #endif
221 }
222
223 if (--(a->j) < 0) {
224 // optimize: decrease number of memory copies
225 a->j = NLMS_EXT;
226 memmove(a->x + a->j + 1, a->x, (NLMS_LEN - 1) * sizeof(REAL));
227 memmove(a->xf + a->j + 1, a->xf, (NLMS_LEN - 1) * sizeof(REAL));
228 }
229
230 // Saturation
231 if (e > MAXPCM) {
232 return MAXPCM;
233 } else if (e < -MAXPCM) {
234 return -MAXPCM;
235 } else {
236 return e;
237 }
238 }
239
240
AEC_doAEC(AEC * a,int d_,int x_)241 int AEC_doAEC(AEC *a, int d_, int x_)
242 {
243 REAL d = (REAL) d_;
244 REAL x = (REAL) x_;
245
246 // Mic Highpass Filter - to remove DC
247 d = IIR_HP_highpass(a->acMic, d);
248
249 // Mic Highpass Filter - cut-off below 300Hz
250 d = FIR_HP_300Hz_highpass(a->cutoff, d);
251
252 // Amplify, for e.g. Soundcards with -6dB max. volume
253 d *= a->gain;
254
255 // Spk Highpass Filter - to remove DC
256 x = IIR_HP_highpass(a->acSpk, x);
257
258 // Double Talk Detector
259 a->stepsize = AEC_dtd(a, d, x);
260
261 // Leaky (ageing of vector w)
262 AEC_leaky(a);
263
264 // Acoustic Echo Cancellation
265 d = AEC_nlms_pw(a, d, x, a->stepsize);
266
267 #if 0
268 if (fdwdisplay >= 0) {
269 if (++dumpcnt >= (WIDEB*RATE/10)) {
270 // wdisplay creates 10 dumps per seconds = large CPU load!
271 dumpcnt = 0;
272 write(fdwdisplay, ws, DUMP_LEN*sizeof(float));
273 // we don't check return value. This is not production quality!!!
274 memset(ws, 0, sizeof(ws));
275 } else {
276 int i;
277 for (i = 0; i < DUMP_LEN; i += 2) {
278 // optimize: partial loop unrolling
279 ws[i] += w[i];
280 ws[i + 1] += w[i + 1];
281 }
282 }
283 }
284 #endif
285
286 return (int) d;
287 }
288