1 /* Copyright (C) 2003-2008 Jean-Marc Valin
2
3 File: mdf.c
4 Echo canceller based on the MDF algorithm (see below)
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
8 met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 3. The name of the author may not be used to endorse or promote products
18 derived from this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 The echo canceller is based on the MDF algorithm described in:
35
36 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
38 February 1990.
39
40 We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41 double-talk is achieved using a variable learning rate as described in:
42
43 Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44 Cancellation With Double-Talk. IEEE Transactions on Audio,
45 Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
46 http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
47
48 There is no explicit double-talk detection, but a continuous variation
49 in the learning rate based on residual echo, double-talk and background
50 noise.
51
52 About the fixed-point version:
53 All the signals are represented with 16-bit words. The filter weights
54 are represented with 32-bit words, but only the top 16 bits are used
55 in most cases. The lower 16 bits are completely unreliable (due to the
56 fact that the update is done only on the top bits), but help in the
57 adaptation -- probably by removing a "threshold effect" due to
58 quantization (rounding going to zero) when the gradient is small.
59
60 Another kludge that seems to work good: when performing the weight
61 update, we only move half the way toward the "goal" this seems to
62 reduce the effect of quantization noise in the update phase. This
63 can be seen as applying a gradient descent on a "soft constraint"
64 instead of having a hard constraint.
65
66 */
67
68 #ifdef HAVE_CONFIG_H
69 #include "config.h"
70 #endif
71
72 #include "arch.h"
73 #include "speex/speex_echo.h"
74 #include "fftwrap.h"
75 #include "pseudofloat.h"
76 #include "math_approx.h"
77 #include "os_support.h"
78
79 #ifndef M_PI
80 #define M_PI 3.14159265358979323846
81 #endif
82
83 #ifdef FIXED_POINT
84 #define WEIGHT_SHIFT 11
85 #define NORMALIZE_SCALEDOWN 5
86 #define NORMALIZE_SCALEUP 3
87 #else
88 #define WEIGHT_SHIFT 0
89 #endif
90
91 #ifdef FIXED_POINT
92 #define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
93 #else
94 #define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
95 #endif
96
97 /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
98 and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
99 #define TWO_PATH
100
101 #ifdef FIXED_POINT
102 static const spx_float_t MIN_LEAK = {20972, -22};
103
104 /* Constants for the two-path filter */
105 static const spx_float_t VAR1_SMOOTH = {23593, -16};
106 static const spx_float_t VAR2_SMOOTH = {23675, -15};
107 static const spx_float_t VAR1_UPDATE = {16384, -15};
108 static const spx_float_t VAR2_UPDATE = {16384, -16};
109 static const spx_float_t VAR_BACKTRACK = {16384, -12};
110 #define TOP16(x) ((x)>>16)
111
112 #else
113
114 static const spx_float_t MIN_LEAK = .005f;
115
116 /* Constants for the two-path filter */
117 static const spx_float_t VAR1_SMOOTH = .36f;
118 static const spx_float_t VAR2_SMOOTH = .7225f;
119 static const spx_float_t VAR1_UPDATE = .5f;
120 static const spx_float_t VAR2_UPDATE = .25f;
121 static const spx_float_t VAR_BACKTRACK = 4.f;
122 #define TOP16(x) (x)
123 #endif
124
125
126 #define PLAYBACK_DELAY 2
127
128 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
129
130
131 /** Speex echo cancellation state. */
132 struct SpeexEchoState_ {
133 int frame_size; /**< Number of samples processed each time */
134 int window_size;
135 int M;
136 int cancel_count;
137 int adapted;
138 int saturated;
139 int screwed_up;
140 int C; /** Number of input channels (microphones) */
141 int K; /** Number of output channels (loudspeakers) */
142 spx_int32_t sampling_rate;
143 spx_word16_t spec_average;
144 spx_word16_t beta0;
145 spx_word16_t beta_max;
146 spx_word32_t sum_adapt;
147 spx_word16_t leak_estimate;
148
149 spx_word16_t *e; /* scratch */
150 spx_word16_t *x; /* Far-end input buffer (2N) */
151 spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */
152 spx_word16_t *input; /* scratch */
153 spx_word16_t *y; /* scratch */
154 spx_word16_t *last_y;
155 spx_word16_t *Y; /* scratch */
156 spx_word16_t *E;
157 spx_word32_t *PHI; /* scratch */
158 spx_word32_t *W; /* (Background) filter weights */
159 #ifdef TWO_PATH
160 spx_word16_t *foreground; /* Foreground filter weights */
161 spx_word32_t Davg1; /* 1st recursive average of the residual power difference */
162 spx_word32_t Davg2; /* 2nd recursive average of the residual power difference */
163 spx_float_t Dvar1; /* Estimated variance of 1st estimator */
164 spx_float_t Dvar2; /* Estimated variance of 2nd estimator */
165 #endif
166 spx_word32_t *power; /* Power of the far-end signal */
167 spx_float_t *power_1;/* Inverse power of far-end */
168 spx_word16_t *wtmp; /* scratch */
169 #ifdef FIXED_POINT
170 spx_word16_t *wtmp2; /* scratch */
171 #endif
172 spx_word32_t *Rf; /* scratch */
173 spx_word32_t *Yf; /* scratch */
174 spx_word32_t *Xf; /* scratch */
175 spx_word32_t *Eh;
176 spx_word32_t *Yh;
177 spx_float_t Pey;
178 spx_float_t Pyy;
179 spx_word16_t *window;
180 spx_word16_t *prop;
181 void *fft_table;
182 spx_word16_t *memX, *memD, *memE;
183 spx_word16_t preemph;
184 spx_word16_t notch_radius;
185 spx_mem_t *notch_mem;
186
187 /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
188 spx_int16_t *play_buf;
189 int play_buf_pos;
190 int play_buf_started;
191 };
192
filter_dc_notch16(const spx_int16_t * in,spx_word16_t radius,spx_word16_t * out,int len,spx_mem_t * mem,int stride)193 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
194 {
195 int i;
196 spx_word16_t den2;
197 #ifdef FIXED_POINT
198 den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
199 #else
200 den2 = radius*radius + .7*(1-radius)*(1-radius);
201 #endif
202 /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
203 for (i=0;i<len;i++)
204 {
205 spx_word16_t vin = in[i*stride];
206 spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
207 #ifdef FIXED_POINT
208 mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
209 #else
210 mem[0] = mem[1] + 2*(-vin + radius*vout);
211 #endif
212 mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
213 out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
214 }
215 }
216
217 /* This inner product is slightly different from the codec version because of fixed-point */
mdf_inner_prod(const spx_word16_t * x,const spx_word16_t * y,int len)218 static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
219 {
220 spx_word32_t sum=0;
221 len >>= 1;
222 while(len--)
223 {
224 spx_word32_t part=0;
225 part = MAC16_16(part,*x++,*y++);
226 part = MAC16_16(part,*x++,*y++);
227 /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
228 sum = ADD32(sum,SHR32(part,6));
229 }
230 return sum;
231 }
232
233 /** Compute power spectrum of a half-complex (packed) vector */
power_spectrum(const spx_word16_t * X,spx_word32_t * ps,int N)234 static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
235 {
236 int i, j;
237 ps[0]=MULT16_16(X[0],X[0]);
238 for (i=1,j=1;i<N-1;i+=2,j++)
239 {
240 ps[j] = MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
241 }
242 ps[j]=MULT16_16(X[i],X[i]);
243 }
244
245 /** Compute power spectrum of a half-complex (packed) vector and accumulate */
power_spectrum_accum(const spx_word16_t * X,spx_word32_t * ps,int N)246 static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
247 {
248 int i, j;
249 ps[0]+=MULT16_16(X[0],X[0]);
250 for (i=1,j=1;i<N-1;i+=2,j++)
251 {
252 ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
253 }
254 ps[j]+=MULT16_16(X[i],X[i]);
255 }
256
257 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
258 #ifdef FIXED_POINT
spectral_mul_accum(const spx_word16_t * X,const spx_word32_t * Y,spx_word16_t * acc,int N,int M)259 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
260 {
261 int i,j;
262 spx_word32_t tmp1=0,tmp2=0;
263 for (j=0;j<M;j++)
264 {
265 tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
266 }
267 acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
268 for (i=1;i<N-1;i+=2)
269 {
270 tmp1 = tmp2 = 0;
271 for (j=0;j<M;j++)
272 {
273 tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
274 tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
275 }
276 acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
277 acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
278 }
279 tmp1 = tmp2 = 0;
280 for (j=0;j<M;j++)
281 {
282 tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
283 }
284 acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
285 }
spectral_mul_accum16(const spx_word16_t * X,const spx_word16_t * Y,spx_word16_t * acc,int N,int M)286 static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
287 {
288 int i,j;
289 spx_word32_t tmp1=0,tmp2=0;
290 for (j=0;j<M;j++)
291 {
292 tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
293 }
294 acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
295 for (i=1;i<N-1;i+=2)
296 {
297 tmp1 = tmp2 = 0;
298 for (j=0;j<M;j++)
299 {
300 tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
301 tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
302 }
303 acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
304 acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
305 }
306 tmp1 = tmp2 = 0;
307 for (j=0;j<M;j++)
308 {
309 tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
310 }
311 acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
312 }
313
314 #else
spectral_mul_accum(const spx_word16_t * X,const spx_word32_t * Y,spx_word16_t * acc,int N,int M)315 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
316 {
317 int i,j;
318 for (i=0;i<N;i++)
319 acc[i] = 0;
320 for (j=0;j<M;j++)
321 {
322 acc[0] += X[0]*Y[0];
323 for (i=1;i<N-1;i+=2)
324 {
325 acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
326 acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
327 }
328 acc[i] += X[i]*Y[i];
329 X += N;
330 Y += N;
331 }
332 }
333 #define spectral_mul_accum16 spectral_mul_accum
334 #endif
335
336 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
weighted_spectral_mul_conj(const spx_float_t * w,const spx_float_t p,const spx_word16_t * X,const spx_word16_t * Y,spx_word32_t * prod,int N)337 static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
338 {
339 int i, j;
340 spx_float_t W;
341 W = FLOAT_AMULT(p, w[0]);
342 prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
343 for (i=1,j=1;i<N-1;i+=2,j++)
344 {
345 W = FLOAT_AMULT(p, w[j]);
346 prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
347 prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
348 }
349 W = FLOAT_AMULT(p, w[j]);
350 prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
351 }
352
mdf_adjust_prop(const spx_word32_t * W,int N,int M,int P,spx_word16_t * prop)353 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
354 {
355 int i, j, p;
356 spx_word16_t max_sum = 1;
357 spx_word32_t prop_sum = 1;
358 for (i=0;i<M;i++)
359 {
360 spx_word32_t tmp = 1;
361 for (p=0;p<P;p++)
362 for (j=0;j<N;j++)
363 tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
364 #ifdef FIXED_POINT
365 /* Just a security in case an overflow were to occur */
366 tmp = MIN32(ABS32(tmp), 536870912);
367 #endif
368 prop[i] = spx_sqrt(tmp);
369 if (prop[i] > max_sum)
370 max_sum = prop[i];
371 }
372 for (i=0;i<M;i++)
373 {
374 prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
375 prop_sum += EXTEND32(prop[i]);
376 }
377 for (i=0;i<M;i++)
378 {
379 prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
380 /*printf ("%f ", prop[i]);*/
381 }
382 /*printf ("\n");*/
383 }
384
385 #ifdef DUMP_ECHO_CANCEL_DATA
386 #include <stdio.h>
387 static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
388
dump_audio(const spx_int16_t * rec,const spx_int16_t * play,const spx_int16_t * out,int len)389 static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
390 {
391 if (!(rFile && pFile && oFile))
392 {
393 speex_fatal("Dump files not open");
394 }
395 fwrite(rec, sizeof(spx_int16_t), len, rFile);
396 fwrite(play, sizeof(spx_int16_t), len, pFile);
397 fwrite(out, sizeof(spx_int16_t), len, oFile);
398 }
399 #endif
400
401 /** Creates a new echo canceller state */
speex_echo_state_init(int frame_size,int filter_length)402 EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
403 {
404 return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
405 }
406
speex_echo_state_init_mc(int frame_size,int filter_length,int nb_mic,int nb_speakers)407 EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
408 {
409 int i,N,M, C, K;
410 SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
411
412 st->K = nb_speakers;
413 st->C = nb_mic;
414 C=st->C;
415 K=st->K;
416 #ifdef DUMP_ECHO_CANCEL_DATA
417 if (rFile || pFile || oFile)
418 speex_fatal("Opening dump files twice");
419 rFile = fopen("aec_rec.sw", "wb");
420 pFile = fopen("aec_play.sw", "wb");
421 oFile = fopen("aec_out.sw", "wb");
422 #endif
423
424 st->frame_size = frame_size;
425 st->window_size = 2*frame_size;
426 N = st->window_size;
427 M = st->M = (filter_length+st->frame_size-1)/frame_size;
428 st->cancel_count=0;
429 st->sum_adapt = 0;
430 st->saturated = 0;
431 st->screwed_up = 0;
432 /* This is the default sampling rate */
433 st->sampling_rate = 8000;
434 st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
435 #ifdef FIXED_POINT
436 st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
437 st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
438 #else
439 st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
440 st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
441 #endif
442 st->leak_estimate = 0;
443
444 st->fft_table = spx_fft_init(N);
445
446 st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
447 st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
448 st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
449 st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
450 st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
451 st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
452 st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
453 st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
454 st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
455 st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
456
457 st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
458 st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
459 st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
460 st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
461 #ifdef TWO_PATH
462 st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
463 #endif
464 st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
465 st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
466 st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
467 st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
468 st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
469 st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
470 #ifdef FIXED_POINT
471 st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
472 for (i=0;i<N>>1;i++)
473 {
474 st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
475 st->window[N-i-1] = st->window[i];
476 }
477 #else
478 for (i=0;i<N;i++)
479 st->window[i] = .5-.5*cos(2*M_PI*i/N);
480 #endif
481 for (i=0;i<=st->frame_size;i++)
482 st->power_1[i] = FLOAT_ONE;
483 for (i=0;i<N*M*K*C;i++)
484 st->W[i] = 0;
485 {
486 spx_word32_t sum = 0;
487 /* Ratio of ~10 between adaptation rate of first and last block */
488 spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
489 st->prop[0] = QCONST16(.7, 15);
490 sum = EXTEND32(st->prop[0]);
491 for (i=1;i<M;i++)
492 {
493 st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
494 sum = ADD32(sum, EXTEND32(st->prop[i]));
495 }
496 for (i=M-1;i>=0;i--)
497 {
498 st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
499 }
500 }
501
502 st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
503 st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
504 st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
505 st->preemph = QCONST16(.9,15);
506 if (st->sampling_rate<12000)
507 st->notch_radius = QCONST16(.9, 15);
508 else if (st->sampling_rate<24000)
509 st->notch_radius = QCONST16(.982, 15);
510 else
511 st->notch_radius = QCONST16(.992, 15);
512
513 st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
514 st->adapted = 0;
515 st->Pey = st->Pyy = FLOAT_ONE;
516
517 #ifdef TWO_PATH
518 st->Davg1 = st->Davg2 = 0;
519 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
520 #endif
521
522 st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
523 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
524 st->play_buf_started = 0;
525
526 return st;
527 }
528
529 /** Resets echo canceller state */
speex_echo_state_reset(SpeexEchoState * st)530 EXPORT void speex_echo_state_reset(SpeexEchoState *st)
531 {
532 int i, M, N, C, K;
533 st->cancel_count=0;
534 st->screwed_up = 0;
535 N = st->window_size;
536 M = st->M;
537 C=st->C;
538 K=st->K;
539 for (i=0;i<N*M;i++)
540 st->W[i] = 0;
541 #ifdef TWO_PATH
542 for (i=0;i<N*M;i++)
543 st->foreground[i] = 0;
544 #endif
545 for (i=0;i<N*(M+1);i++)
546 st->X[i] = 0;
547 for (i=0;i<=st->frame_size;i++)
548 {
549 st->power[i] = 0;
550 st->power_1[i] = FLOAT_ONE;
551 st->Eh[i] = 0;
552 st->Yh[i] = 0;
553 }
554 for (i=0;i<st->frame_size;i++)
555 {
556 st->last_y[i] = 0;
557 }
558 for (i=0;i<N*C;i++)
559 {
560 st->E[i] = 0;
561 }
562 for (i=0;i<N*K;i++)
563 {
564 st->x[i] = 0;
565 }
566 for (i=0;i<2*C;i++)
567 st->notch_mem[i] = 0;
568 for (i=0;i<C;i++)
569 st->memD[i]=st->memE[i]=0;
570 for (i=0;i<K;i++)
571 st->memX[i]=0;
572
573 st->saturated = 0;
574 st->adapted = 0;
575 st->sum_adapt = 0;
576 st->Pey = st->Pyy = FLOAT_ONE;
577 #ifdef TWO_PATH
578 st->Davg1 = st->Davg2 = 0;
579 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
580 #endif
581 for (i=0;i<3*st->frame_size;i++)
582 st->play_buf[i] = 0;
583 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
584 st->play_buf_started = 0;
585
586 }
587
588 /** Destroys an echo canceller state */
speex_echo_state_destroy(SpeexEchoState * st)589 EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
590 {
591 spx_fft_destroy(st->fft_table);
592
593 speex_free(st->e);
594 speex_free(st->x);
595 speex_free(st->input);
596 speex_free(st->y);
597 speex_free(st->last_y);
598 speex_free(st->Yf);
599 speex_free(st->Rf);
600 speex_free(st->Xf);
601 speex_free(st->Yh);
602 speex_free(st->Eh);
603
604 speex_free(st->X);
605 speex_free(st->Y);
606 speex_free(st->E);
607 speex_free(st->W);
608 #ifdef TWO_PATH
609 speex_free(st->foreground);
610 #endif
611 speex_free(st->PHI);
612 speex_free(st->power);
613 speex_free(st->power_1);
614 speex_free(st->window);
615 speex_free(st->prop);
616 speex_free(st->wtmp);
617 #ifdef FIXED_POINT
618 speex_free(st->wtmp2);
619 #endif
620 speex_free(st->memX);
621 speex_free(st->memD);
622 speex_free(st->memE);
623 speex_free(st->notch_mem);
624
625 speex_free(st->play_buf);
626 speex_free(st);
627
628 #ifdef DUMP_ECHO_CANCEL_DATA
629 fclose(rFile);
630 fclose(pFile);
631 fclose(oFile);
632 rFile = pFile = oFile = NULL;
633 #endif
634 }
635
speex_echo_capture(SpeexEchoState * st,const spx_int16_t * rec,spx_int16_t * out)636 EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
637 {
638 int i;
639 /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
640 st->play_buf_started = 1;
641 if (st->play_buf_pos>=st->frame_size)
642 {
643 speex_echo_cancellation(st, rec, st->play_buf, out);
644 st->play_buf_pos -= st->frame_size;
645 for (i=0;i<st->play_buf_pos;i++)
646 st->play_buf[i] = st->play_buf[i+st->frame_size];
647 } else {
648 speex_warning("No playback frame available (your application is buggy and/or got xruns)");
649 if (st->play_buf_pos!=0)
650 {
651 speex_warning("internal playback buffer corruption?");
652 st->play_buf_pos = 0;
653 }
654 for (i=0;i<st->frame_size;i++)
655 out[i] = rec[i];
656 }
657 }
658
speex_echo_playback(SpeexEchoState * st,const spx_int16_t * play)659 EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
660 {
661 /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
662 if (!st->play_buf_started)
663 {
664 speex_warning("discarded first playback frame");
665 return;
666 }
667 if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
668 {
669 int i;
670 for (i=0;i<st->frame_size;i++)
671 st->play_buf[st->play_buf_pos+i] = play[i];
672 st->play_buf_pos += st->frame_size;
673 if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
674 {
675 speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
676 for (i=0;i<st->frame_size;i++)
677 st->play_buf[st->play_buf_pos+i] = play[i];
678 st->play_buf_pos += st->frame_size;
679 }
680 } else {
681 speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
682 }
683 }
684
685 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
speex_echo_cancel(SpeexEchoState * st,const spx_int16_t * in,const spx_int16_t * far_end,spx_int16_t * out,spx_int32_t * Yout)686 EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
687 {
688 speex_echo_cancellation(st, in, far_end, out);
689 }
690
691 /** Performs echo cancellation on a frame */
speex_echo_cancellation(SpeexEchoState * st,const spx_int16_t * in,const spx_int16_t * far_end,spx_int16_t * out)692 EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
693 {
694 int i,j, chan, speak;
695 int N,M, C, K;
696 spx_word32_t Syy,See,Sxx,Sdd, Sff;
697 #ifdef TWO_PATH
698 spx_word32_t Dbf;
699 int update_foreground;
700 #endif
701 spx_word32_t Sey;
702 spx_word16_t ss, ss_1;
703 spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
704 spx_float_t alpha, alpha_1;
705 spx_word16_t RER;
706 spx_word32_t tmp32;
707
708 N = st->window_size;
709 M = st->M;
710 C = st->C;
711 K = st->K;
712
713 st->cancel_count++;
714 #ifdef FIXED_POINT
715 ss=DIV32_16(11469,M);
716 ss_1 = SUB16(32767,ss);
717 #else
718 ss=.35/M;
719 ss_1 = 1-ss;
720 #endif
721
722 for (chan = 0; chan < C; chan++)
723 {
724 /* Apply a notch filter to make sure DC doesn't end up causing problems */
725 filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
726 /* Copy input data to buffer and apply pre-emphasis */
727 /* Copy input data to buffer */
728 for (i=0;i<st->frame_size;i++)
729 {
730 spx_word32_t tmp32;
731 /* FIXME: This core has changed a bit, need to merge properly */
732 tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
733 #ifdef FIXED_POINT
734 if (tmp32 > 32767)
735 {
736 tmp32 = 32767;
737 if (st->saturated == 0)
738 st->saturated = 1;
739 }
740 if (tmp32 < -32767)
741 {
742 tmp32 = -32767;
743 if (st->saturated == 0)
744 st->saturated = 1;
745 }
746 #endif
747 st->memD[chan] = st->input[chan*st->frame_size+i];
748 st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
749 }
750 }
751
752 for (speak = 0; speak < K; speak++)
753 {
754 for (i=0;i<st->frame_size;i++)
755 {
756 spx_word32_t tmp32;
757 st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
758 tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
759 #ifdef FIXED_POINT
760 /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
761 if (tmp32 > 32767)
762 {
763 tmp32 = 32767;
764 st->saturated = M+1;
765 }
766 if (tmp32 < -32767)
767 {
768 tmp32 = -32767;
769 st->saturated = M+1;
770 }
771 #endif
772 st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
773 st->memX[speak] = far_end[i*K+speak];
774 }
775 }
776
777 for (speak = 0; speak < K; speak++)
778 {
779 /* Shift memory: this could be optimized eventually*/
780 for (j=M-1;j>=0;j--)
781 {
782 for (i=0;i<N;i++)
783 st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
784 }
785 /* Convert x (echo input) to frequency domain */
786 spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
787 }
788
789 Sxx = 0;
790 for (speak = 0; speak < K; speak++)
791 {
792 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
793 power_spectrum_accum(st->X+speak*N, st->Xf, N);
794 }
795
796 Sff = 0;
797 for (chan = 0; chan < C; chan++)
798 {
799 #ifdef TWO_PATH
800 /* Compute foreground filter */
801 spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
802 spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
803 for (i=0;i<st->frame_size;i++)
804 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
805 Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
806 #endif
807 }
808
809 /* Adjust proportional adaption rate */
810 /* FIXME: Adjust that for C, K*/
811 if (st->adapted)
812 mdf_adjust_prop (st->W, N, M, C*K, st->prop);
813 /* Compute weight gradient */
814 if (st->saturated == 0)
815 {
816 for (chan = 0; chan < C; chan++)
817 {
818 for (speak = 0; speak < K; speak++)
819 {
820 for (j=M-1;j>=0;j--)
821 {
822 weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
823 for (i=0;i<N;i++)
824 st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
825 }
826 }
827 }
828 } else {
829 st->saturated--;
830 }
831
832 /* FIXME: MC conversion required */
833 /* Update weight to prevent circular convolution (MDF / AUMDF) */
834 for (chan = 0; chan < C; chan++)
835 {
836 for (speak = 0; speak < K; speak++)
837 {
838 for (j=0;j<M;j++)
839 {
840 /* This is a variant of the Alternatively Updated MDF (AUMDF) */
841 /* Remove the "if" to make this an MDF filter */
842 if (j==0 || st->cancel_count%(M-1) == j-1)
843 {
844 #ifdef FIXED_POINT
845 for (i=0;i<N;i++)
846 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
847 spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
848 for (i=0;i<st->frame_size;i++)
849 {
850 st->wtmp[i]=0;
851 }
852 for (i=st->frame_size;i<N;i++)
853 {
854 st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
855 }
856 spx_fft(st->fft_table, st->wtmp, st->wtmp2);
857 /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
858 for (i=0;i<N;i++)
859 st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
860 #else
861 spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
862 for (i=st->frame_size;i<N;i++)
863 {
864 st->wtmp[i]=0;
865 }
866 spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
867 #endif
868 }
869 }
870 }
871 }
872
873 /* So we can use power_spectrum_accum */
874 for (i=0;i<=st->frame_size;i++)
875 st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
876
877 Dbf = 0;
878 See = 0;
879 #ifdef TWO_PATH
880 /* Difference in response, this is used to estimate the variance of our residual power estimate */
881 for (chan = 0; chan < C; chan++)
882 {
883 spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
884 spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
885 for (i=0;i<st->frame_size;i++)
886 st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
887 Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
888 for (i=0;i<st->frame_size;i++)
889 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
890 See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
891 }
892 #endif
893
894 #ifndef TWO_PATH
895 Sff = See;
896 #endif
897
898 #ifdef TWO_PATH
899 /* Logic for updating the foreground filter */
900
901 /* For two time windows, compute the mean of the energy difference, as well as the variance */
902 st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
903 st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
904 st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
905 st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
906
907 /* Equivalent float code:
908 st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
909 st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
910 st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
911 st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
912 */
913
914 update_foreground = 0;
915 /* Check if we have a statistically significant reduction in the residual echo */
916 /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
917 if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
918 update_foreground = 1;
919 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
920 update_foreground = 1;
921 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
922 update_foreground = 1;
923
924 /* Do we update? */
925 if (update_foreground)
926 {
927 st->Davg1 = st->Davg2 = 0;
928 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
929 /* Copy background filter to foreground filter */
930 for (i=0;i<N*M*C*K;i++)
931 st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
932 /* Apply a smooth transition so as to not introduce blocking artifacts */
933 for (chan = 0; chan < C; chan++)
934 for (i=0;i<st->frame_size;i++)
935 st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
936 } else {
937 int reset_background=0;
938 /* Otherwise, check if the background filter is significantly worse */
939 if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
940 reset_background = 1;
941 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
942 reset_background = 1;
943 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
944 reset_background = 1;
945 if (reset_background)
946 {
947 /* Copy foreground filter to background filter */
948 for (i=0;i<N*M*C*K;i++)
949 st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
950 /* We also need to copy the output so as to get correct adaptation */
951 for (chan = 0; chan < C; chan++)
952 {
953 for (i=0;i<st->frame_size;i++)
954 st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
955 for (i=0;i<st->frame_size;i++)
956 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
957 }
958 See = Sff;
959 st->Davg1 = st->Davg2 = 0;
960 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
961 }
962 }
963 #endif
964
965 Sey = Syy = Sdd = 0;
966 for (chan = 0; chan < C; chan++)
967 {
968 /* Compute error signal (for the output with de-emphasis) */
969 for (i=0;i<st->frame_size;i++)
970 {
971 spx_word32_t tmp_out;
972 #ifdef TWO_PATH
973 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
974 #else
975 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
976 #endif
977 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
978 /* This is an arbitrary test for saturation in the microphone signal */
979 if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
980 {
981 if (st->saturated == 0)
982 st->saturated = 1;
983 }
984 out[i*C+chan] = WORD2INT(tmp_out);
985 st->memE[chan] = tmp_out;
986 }
987
988 #ifdef DUMP_ECHO_CANCEL_DATA
989 dump_audio(in, far_end, out, st->frame_size);
990 #endif
991
992 /* Compute error signal (filter update version) */
993 for (i=0;i<st->frame_size;i++)
994 {
995 st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
996 st->e[chan*N+i] = 0;
997 }
998
999 /* Compute a bunch of correlations */
1000 /* FIXME: bad merge */
1001 Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
1002 Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
1003 Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
1004
1005 /* Convert error to frequency domain */
1006 spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
1007 for (i=0;i<st->frame_size;i++)
1008 st->y[i+chan*N] = 0;
1009 spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
1010
1011 /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
1012 power_spectrum_accum(st->E+chan*N, st->Rf, N);
1013 power_spectrum_accum(st->Y+chan*N, st->Yf, N);
1014
1015 }
1016
1017 /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
1018
1019 /* Do some sanity check */
1020 if (!(Syy>=0 && Sxx>=0 && See >= 0)
1021 #ifndef FIXED_POINT
1022 || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
1023 #endif
1024 )
1025 {
1026 /* Things have gone really bad */
1027 st->screwed_up += 50;
1028 for (i=0;i<st->frame_size*C;i++)
1029 out[i] = 0;
1030 } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
1031 {
1032 /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
1033 st->screwed_up++;
1034 } else {
1035 /* Everything's fine */
1036 st->screwed_up=0;
1037 }
1038 if (st->screwed_up>=50)
1039 {
1040 speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
1041 speex_echo_state_reset(st);
1042 return;
1043 }
1044
1045 /* Add a small noise floor to make sure not to have problems when dividing */
1046 See = MAX32(See, SHR32(MULT16_16(N, 100),6));
1047
1048 for (speak = 0; speak < K; speak++)
1049 {
1050 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
1051 power_spectrum_accum(st->X+speak*N, st->Xf, N);
1052 }
1053
1054
1055 /* Smooth far end energy estimate over time */
1056 for (j=0;j<=st->frame_size;j++)
1057 st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
1058
1059 /* Compute filtered spectra and (cross-)correlations */
1060 for (j=st->frame_size;j>=0;j--)
1061 {
1062 spx_float_t Eh, Yh;
1063 Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
1064 Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
1065 Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
1066 Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
1067 #ifdef FIXED_POINT
1068 st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
1069 st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
1070 #else
1071 st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
1072 st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
1073 #endif
1074 }
1075
1076 Pyy = FLOAT_SQRT(Pyy);
1077 Pey = FLOAT_DIVU(Pey,Pyy);
1078
1079 /* Compute correlation updatete rate */
1080 tmp32 = MULT16_32_Q15(st->beta0,Syy);
1081 if (tmp32 > MULT16_32_Q15(st->beta_max,See))
1082 tmp32 = MULT16_32_Q15(st->beta_max,See);
1083 alpha = FLOAT_DIV32(tmp32, See);
1084 alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1085 /* Update correlations (recursive average) */
1086 st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
1087 st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
1088 if (FLOAT_LT(st->Pyy, FLOAT_ONE))
1089 st->Pyy = FLOAT_ONE;
1090 /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1091 if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
1092 st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
1093 if (FLOAT_GT(st->Pey, st->Pyy))
1094 st->Pey = st->Pyy;
1095 /* leak_estimate is the linear regression result */
1096 st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
1097 /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1098 if (st->leak_estimate > 16383)
1099 st->leak_estimate = 32767;
1100 else
1101 st->leak_estimate = SHL16(st->leak_estimate,1);
1102 /*printf ("%f\n", st->leak_estimate);*/
1103
1104 /* Compute Residual to Error Ratio */
1105 #ifdef FIXED_POINT
1106 tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
1107 tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
1108 /* Check for y in e (lower bound on RER) */
1109 {
1110 spx_float_t bound = PSEUDOFLOAT(Sey);
1111 bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
1112 if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
1113 tmp32 = See;
1114 else if (tmp32 < FLOAT_EXTRACT32(bound))
1115 tmp32 = FLOAT_EXTRACT32(bound);
1116 }
1117 if (tmp32 > SHR32(See,1))
1118 tmp32 = SHR32(See,1);
1119 RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
1120 #else
1121 RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
1122 /* Check for y in e (lower bound on RER) */
1123 if (RER < Sey*Sey/(1+See*Syy))
1124 RER = Sey*Sey/(1+See*Syy);
1125 if (RER > .5)
1126 RER = .5;
1127 #endif
1128
1129 /* We consider that the filter has had minimal adaptation if the following is true*/
1130 if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
1131 {
1132 st->adapted = 1;
1133 }
1134
1135 if (st->adapted)
1136 {
1137 /* Normal learning rate calculation once we're past the minimal adaptation phase */
1138 for (i=0;i<=st->frame_size;i++)
1139 {
1140 spx_word32_t r, e;
1141 /* Compute frequency-domain adaptation mask */
1142 r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
1143 e = SHL32(st->Rf[i],3)+1;
1144 #ifdef FIXED_POINT
1145 if (r>SHR32(e,1))
1146 r = SHR32(e,1);
1147 #else
1148 if (r>.5*e)
1149 r = .5*e;
1150 #endif
1151 r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1152 /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1153 st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
1154 }
1155 } else {
1156 /* Temporary adaption rate if filter is not yet adapted enough */
1157 spx_word16_t adapt_rate=0;
1158
1159 if (Sxx > SHR32(MULT16_16(N, 1000),6))
1160 {
1161 tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1162 #ifdef FIXED_POINT
1163 if (tmp32 > SHR32(See,2))
1164 tmp32 = SHR32(See,2);
1165 #else
1166 if (tmp32 > .25*See)
1167 tmp32 = .25*See;
1168 #endif
1169 adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1170 }
1171 for (i=0;i<=st->frame_size;i++)
1172 st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
1173
1174
1175 /* How much have we adapted so far? */
1176 st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1177 }
1178
1179 /* FIXME: MC conversion required */
1180 for (i=0;i<st->frame_size;i++)
1181 st->last_y[i] = st->last_y[st->frame_size+i];
1182 if (st->adapted)
1183 {
1184 /* If the filter is adapted, take the filtered echo */
1185 for (i=0;i<st->frame_size;i++)
1186 st->last_y[st->frame_size+i] = in[i]-out[i];
1187 } else {
1188 /* If filter isn't adapted yet, all we can do is take the far end signal directly */
1189 /* moved earlier: for (i=0;i<N;i++)
1190 st->last_y[i] = st->x[i];*/
1191 }
1192
1193 }
1194
1195 /* Compute spectrum of estimated echo for use in an echo post-filter */
speex_echo_get_residual(SpeexEchoState * st,spx_word32_t * residual_echo,int len)1196 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
1197 {
1198 int i;
1199 spx_word16_t leak2;
1200 int N;
1201
1202 N = st->window_size;
1203
1204 /* Apply hanning window (should pre-compute it)*/
1205 for (i=0;i<N;i++)
1206 st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1207
1208 /* Compute power spectrum of the echo */
1209 spx_fft(st->fft_table, st->y, st->Y);
1210 power_spectrum(st->Y, residual_echo, N);
1211
1212 #ifdef FIXED_POINT
1213 if (st->leak_estimate > 16383)
1214 leak2 = 32767;
1215 else
1216 leak2 = SHL16(st->leak_estimate, 1);
1217 #else
1218 if (st->leak_estimate>.5)
1219 leak2 = 1;
1220 else
1221 leak2 = 2*st->leak_estimate;
1222 #endif
1223 /* Estimate residual echo */
1224 for (i=0;i<=st->frame_size;i++)
1225 residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1226
1227 }
1228
speex_echo_ctl(SpeexEchoState * st,int request,void * ptr)1229 EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1230 {
1231 switch(request)
1232 {
1233
1234 case SPEEX_ECHO_GET_FRAME_SIZE:
1235 (*(int*)ptr) = st->frame_size;
1236 break;
1237 case SPEEX_ECHO_SET_SAMPLING_RATE:
1238 st->sampling_rate = (*(int*)ptr);
1239 st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
1240 #ifdef FIXED_POINT
1241 st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
1242 st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
1243 #else
1244 st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
1245 st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
1246 #endif
1247 if (st->sampling_rate<12000)
1248 st->notch_radius = QCONST16(.9, 15);
1249 else if (st->sampling_rate<24000)
1250 st->notch_radius = QCONST16(.982, 15);
1251 else
1252 st->notch_radius = QCONST16(.992, 15);
1253 break;
1254 case SPEEX_ECHO_GET_SAMPLING_RATE:
1255 (*(int*)ptr) = st->sampling_rate;
1256 break;
1257 case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
1258 /*FIXME: Implement this for multiple channels */
1259 *((spx_int32_t *)ptr) = st->M * st->frame_size;
1260 break;
1261 case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
1262 {
1263 int M = st->M, N = st->window_size, n = st->frame_size, i, j;
1264 spx_int32_t *filt = (spx_int32_t *) ptr;
1265 for(j=0;j<M;j++)
1266 {
1267 /*FIXME: Implement this for multiple channels */
1268 #ifdef FIXED_POINT
1269 for (i=0;i<N;i++)
1270 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
1271 spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
1272 #else
1273 spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
1274 #endif
1275 for(i=0;i<n;i++)
1276 filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
1277 }
1278 }
1279 break;
1280 default:
1281 speex_warning_int("Unknown speex_echo_ctl request: ", request);
1282 return -1;
1283 }
1284 return 0;
1285 }
1286