1 /* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin)
2 Copyright (C) 2004-2006 Epic Games
3
4 File: preprocess.c
5 Preprocessor with denoising based on the algorithm by Ephraim and Malah
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. The name of the author may not be used to endorse or promote products
19 derived from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
25 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 POSSIBILITY OF SUCH DAMAGE.
32 */
33
34
35 /*
36 Recommended papers:
37
38 Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
39 short-time spectral amplitude estimator". IEEE Transactions on Acoustics,
40 Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984.
41
42 Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
43 log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and
44 Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985.
45
46 I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments".
47 Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001.
48
49 Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic
50 approach to combined acoustic echo cancellation and noise reduction". IEEE
51 Transactions on Speech and Audio Processing, 2002.
52
53 J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation
54 of simultaneous non-stationary sources". In Proceedings IEEE International
55 Conference on Acoustics, Speech, and Signal Processing, 2004.
56 */
57
58 #ifdef HAVE_CONFIG_H
59 #include "config.h"
60 #endif
61
62 #include <math.h>
63 #include "speex/speex_preprocess.h"
64 #include "speex/speex_echo.h"
65 #include "arch.h"
66 #include "fftwrap.h"
67 #include "filterbank.h"
68 #include "math_approx.h"
69 #include "os_support.h"
70
71 #define LOUDNESS_EXP 5.f
72 #define AMP_SCALE .001f
73 #define AMP_SCALE_1 1000.f
74
75 #define NB_BANDS 24
76
77 #define SPEECH_PROB_START_DEFAULT QCONST16(0.35f,15)
78 #define SPEECH_PROB_CONTINUE_DEFAULT QCONST16(0.20f,15)
79 #define NOISE_SUPPRESS_DEFAULT -15
80 #define ECHO_SUPPRESS_DEFAULT -40
81 #define ECHO_SUPPRESS_ACTIVE_DEFAULT -15
82
83 #ifndef NULL
84 #define NULL 0
85 #endif
86
87 #define SQR(x) ((x)*(x))
88 #define SQR16(x) (MULT16_16((x),(x)))
89 #define SQR16_Q15(x) (MULT16_16_Q15((x),(x)))
90
91 #ifdef FIXED_POINT
DIV32_16_Q8(spx_word32_t a,spx_word32_t b)92 static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b)
93 {
94 if (SHR32(a,7) >= b)
95 {
96 return 32767;
97 } else {
98 if (b>=QCONST32(1,23))
99 {
100 a = SHR32(a,8);
101 b = SHR32(b,8);
102 }
103 if (b>=QCONST32(1,19))
104 {
105 a = SHR32(a,4);
106 b = SHR32(b,4);
107 }
108 if (b>=QCONST32(1,15))
109 {
110 a = SHR32(a,4);
111 b = SHR32(b,4);
112 }
113 a = SHL32(a,8);
114 return PDIV32_16(a,b);
115 }
116
117 }
DIV32_16_Q15(spx_word32_t a,spx_word32_t b)118 static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b)
119 {
120 if (SHR32(a,15) >= b)
121 {
122 return 32767;
123 } else {
124 if (b>=QCONST32(1,23))
125 {
126 a = SHR32(a,8);
127 b = SHR32(b,8);
128 }
129 if (b>=QCONST32(1,19))
130 {
131 a = SHR32(a,4);
132 b = SHR32(b,4);
133 }
134 if (b>=QCONST32(1,15))
135 {
136 a = SHR32(a,4);
137 b = SHR32(b,4);
138 }
139 a = SHL32(a,15)-a;
140 return DIV32_16(a,b);
141 }
142 }
143 #define SNR_SCALING 256.f
144 #define SNR_SCALING_1 0.0039062f
145 #define SNR_SHIFT 8
146
147 #define FRAC_SCALING 32767.f
148 #define FRAC_SCALING_1 3.0518e-05
149 #define FRAC_SHIFT 1
150
151 #define EXPIN_SCALING 2048.f
152 #define EXPIN_SCALING_1 0.00048828f
153 #define EXPIN_SHIFT 11
154 #define EXPOUT_SCALING_1 1.5259e-05
155
156 #define NOISE_SHIFT 7
157
158 #else
159
160 #define DIV32_16_Q8(a,b) ((a)/(b))
161 #define DIV32_16_Q15(a,b) ((a)/(b))
162 #define SNR_SCALING 1.f
163 #define SNR_SCALING_1 1.f
164 #define SNR_SHIFT 0
165 #define FRAC_SCALING 1.f
166 #define FRAC_SCALING_1 1.f
167 #define FRAC_SHIFT 0
168 #define NOISE_SHIFT 0
169
170 #define EXPIN_SCALING 1.f
171 #define EXPIN_SCALING_1 1.f
172 #define EXPOUT_SCALING_1 1.f
173
174 #endif
175
176 /** Speex pre-processor state. */
177 struct SpeexPreprocessState_ {
178 /* Basic info */
179 int frame_size; /**< Number of samples processed each time */
180 int ps_size; /**< Number of points in the power spectrum */
181 int sampling_rate; /**< Sampling rate of the input/output */
182 int nbands;
183 FilterBank *bank;
184
185 /* Parameters */
186 int denoise_enabled;
187 int vad_enabled;
188 int dereverb_enabled;
189 spx_word16_t reverb_decay;
190 spx_word16_t reverb_level;
191 spx_word16_t speech_prob_start;
192 spx_word16_t speech_prob_continue;
193 int noise_suppress;
194 int echo_suppress;
195 int echo_suppress_active;
196 SpeexEchoState *echo_state;
197
198 spx_word16_t speech_prob; /**< Probability last frame was speech */
199
200 /* DSP-related arrays */
201 spx_word16_t *frame; /**< Processing frame (2*ps_size) */
202 spx_word16_t *ft; /**< Processing frame in freq domain (2*ps_size) */
203 spx_word32_t *ps; /**< Current power spectrum */
204 spx_word16_t *gain2; /**< Adjusted gains */
205 spx_word16_t *gain_floor; /**< Minimum gain allowed */
206 spx_word16_t *window; /**< Analysis/Synthesis window */
207 spx_word32_t *noise; /**< Noise estimate */
208 spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */
209 spx_word32_t *old_ps; /**< Power spectrum for last frame */
210 spx_word16_t *gain; /**< Ephraim Malah gain */
211 spx_word16_t *prior; /**< A-priori SNR */
212 spx_word16_t *post; /**< A-posteriori SNR */
213
214 spx_word32_t *S; /**< Smoothed power spectrum */
215 spx_word32_t *Smin; /**< See Cohen paper */
216 spx_word32_t *Stmp; /**< See Cohen paper */
217 int *update_prob; /**< Probability of speech presence for noise update */
218
219 spx_word16_t *zeta; /**< Smoothed a priori SNR */
220 spx_word32_t *echo_noise;
221 spx_word32_t *residual_echo;
222
223 /* Misc */
224 spx_word16_t *inbuf; /**< Input buffer (overlapped analysis) */
225 spx_word16_t *outbuf; /**< Output buffer (for overlap and add) */
226
227 /* AGC stuff, only for floating point for now */
228 #ifndef FIXED_POINT
229 int agc_enabled;
230 float agc_level;
231 float loudness_accum;
232 float *loudness_weight; /**< Perceptual loudness curve */
233 float loudness; /**< Loudness estimate */
234 float agc_gain; /**< Current AGC gain */
235 float max_gain; /**< Maximum gain allowed */
236 float max_increase_step; /**< Maximum increase in gain from one frame to another */
237 float max_decrease_step; /**< Maximum decrease in gain from one frame to another */
238 float prev_loudness; /**< Loudness of previous frame */
239 float init_max; /**< Current gain limit during initialisation */
240 #endif
241 int nb_adapt; /**< Number of frames used for adaptation so far */
242 int was_speech;
243 int min_count; /**< Number of frames processed so far */
244 void *fft_lookup; /**< Lookup table for the FFT */
245 #ifdef FIXED_POINT
246 int frame_shift;
247 #endif
248 };
249
250
conj_window(spx_word16_t * w,int len)251 static void conj_window(spx_word16_t *w, int len)
252 {
253 int i;
254 for (i=0;i<len;i++)
255 {
256 spx_word16_t tmp;
257 #ifdef FIXED_POINT
258 spx_word16_t x = DIV32_16(MULT16_16(32767,i),len);
259 #else
260 spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len);
261 #endif
262 int inv=0;
263 if (x<QCONST16(1.f,13))
264 {
265 } else if (x<QCONST16(2.f,13))
266 {
267 x=QCONST16(2.f,13)-x;
268 inv=1;
269 } else if (x<QCONST16(3.f,13))
270 {
271 x=x-QCONST16(2.f,13);
272 inv=1;
273 } else {
274 x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */
275 }
276 x = MULT16_16_Q14(QCONST16(1.271903f,14), x);
277 tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(SHL32(EXTEND32(x),2))));
278 if (inv)
279 tmp=SUB16(Q15_ONE,tmp);
280 w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15));
281 }
282 }
283
284
285 #ifdef FIXED_POINT
286 /* This function approximates the gain function
287 y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
288 which multiplied by xi/(1+xi) is the optimal gain
289 in the loudness domain ( sqrt[amplitude] )
290 Input in Q11 format, output in Q15
291 */
hypergeom_gain(spx_word32_t xx)292 static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
293 {
294 int ind;
295 spx_word16_t frac;
296 /* Q13 table */
297 static const spx_word16_t table[21] = {
298 6730, 8357, 9868, 11267, 12563, 13770, 14898,
299 15959, 16961, 17911, 18816, 19682, 20512, 21311,
300 22082, 22827, 23549, 24250, 24931, 25594, 26241};
301 ind = SHR32(xx,10);
302 if (ind<0)
303 return Q15_ONE;
304 if (ind>19)
305 return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT))));
306 frac = SHL32(xx-SHL32(ind,10),5);
307 return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7);
308 }
309
qcurve(spx_word16_t x)310 static inline spx_word16_t qcurve(spx_word16_t x)
311 {
312 x = MAX16(x, 1);
313 return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x))));
314 }
315
316 /* Compute the gain floor based on different floors for the background noise and residual echo */
compute_gain_floor(int noise_suppress,int effective_echo_suppress,spx_word32_t * noise,spx_word32_t * echo,spx_word16_t * gain_floor,int len)317 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
318 {
319 int i;
320
321 if (noise_suppress > effective_echo_suppress)
322 {
323 spx_word16_t noise_gain, gain_ratio;
324 noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1)));
325 gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1)));
326
327 /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
328 for (i=0;i<len;i++)
329 gain_floor[i] = MULT16_16_Q15(noise_gain,
330 spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]),
331 (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
332 } else {
333 spx_word16_t echo_gain, gain_ratio;
334 echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1)));
335 gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1)));
336
337 /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
338 for (i=0;i<len;i++)
339 gain_floor[i] = MULT16_16_Q15(echo_gain,
340 spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i],
341 (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
342 }
343 }
344
345 #else
346 /* This function approximates the gain function
347 y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
348 which multiplied by xi/(1+xi) is the optimal gain
349 in the loudness domain ( sqrt[amplitude] )
350 */
hypergeom_gain(spx_word32_t xx)351 static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
352 {
353 int ind;
354 float integer, frac;
355 float x;
356 static const float table[21] = {
357 0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f,
358 1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f,
359 2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f};
360 x = EXPIN_SCALING_1*xx;
361 integer = floor(2*x);
362 ind = (int)integer;
363 if (ind<0)
364 return FRAC_SCALING;
365 if (ind>19)
366 return FRAC_SCALING*(1+.1296/x);
367 frac = 2*x-integer;
368 return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f);
369 }
370
qcurve(spx_word16_t x)371 static inline spx_word16_t qcurve(spx_word16_t x)
372 {
373 return 1.f/(1.f+.15f/(SNR_SCALING_1*x));
374 }
375
compute_gain_floor(int noise_suppress,int effective_echo_suppress,spx_word32_t * noise,spx_word32_t * echo,spx_word16_t * gain_floor,int len)376 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
377 {
378 int i;
379 float echo_floor;
380 float noise_floor;
381
382 noise_floor = exp(.2302585f*noise_suppress);
383 echo_floor = exp(.2302585f*effective_echo_suppress);
384
385 /* Compute the gain floor based on different floors for the background noise and residual echo */
386 for (i=0;i<len;i++)
387 gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]);
388 }
389
390 #endif
speex_preprocess_state_init(int frame_size,int sampling_rate)391 EXPORT SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate)
392 {
393 int i;
394 int N, N3, N4, M;
395
396 SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState));
397 st->frame_size = frame_size;
398
399 /* Round ps_size down to the nearest power of two */
400 #if 0
401 i=1;
402 st->ps_size = st->frame_size;
403 while(1)
404 {
405 if (st->ps_size & ~i)
406 {
407 st->ps_size &= ~i;
408 i<<=1;
409 } else {
410 break;
411 }
412 }
413
414
415 if (st->ps_size < 3*st->frame_size/4)
416 st->ps_size = st->ps_size * 3 / 2;
417 #else
418 st->ps_size = st->frame_size;
419 #endif
420
421 N = st->ps_size;
422 N3 = 2*N - st->frame_size;
423 N4 = st->frame_size - N3;
424
425 st->sampling_rate = sampling_rate;
426 st->denoise_enabled = 1;
427 st->vad_enabled = 0;
428 st->dereverb_enabled = 0;
429 st->reverb_decay = 0;
430 st->reverb_level = 0;
431 st->noise_suppress = NOISE_SUPPRESS_DEFAULT;
432 st->echo_suppress = ECHO_SUPPRESS_DEFAULT;
433 st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT;
434
435 st->speech_prob_start = SPEECH_PROB_START_DEFAULT;
436 st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT;
437
438 st->echo_state = NULL;
439
440 st->nbands = NB_BANDS;
441 M = st->nbands;
442 st->bank = filterbank_new(M, sampling_rate, N, 1);
443
444 st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
445 st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
446 st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
447
448 st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
449 st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
450 st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
451 st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
452 st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
453 st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
454 st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
455 st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
456 st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
457 st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
458 st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
459 st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
460
461 st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
462 st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
463 st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
464 st->update_prob = (int*)speex_alloc(N*sizeof(int));
465
466 st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
467 st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
468
469 conj_window(st->window, 2*N3);
470 for (i=2*N3;i<2*st->ps_size;i++)
471 st->window[i]=Q15_ONE;
472
473 if (N4>0)
474 {
475 for (i=N3-1;i>=0;i--)
476 {
477 st->window[i+N3+N4]=st->window[i+N3];
478 st->window[i+N3]=1;
479 }
480 }
481 for (i=0;i<N+M;i++)
482 {
483 st->noise[i]=QCONST32(1.f,NOISE_SHIFT);
484 st->reverb_estimate[i]=0;
485 st->old_ps[i]=1;
486 st->gain[i]=Q15_ONE;
487 st->post[i]=SHL16(1, SNR_SHIFT);
488 st->prior[i]=SHL16(1, SNR_SHIFT);
489 }
490
491 for (i=0;i<N;i++)
492 st->update_prob[i] = 1;
493 for (i=0;i<N3;i++)
494 {
495 st->inbuf[i]=0;
496 st->outbuf[i]=0;
497 }
498 #ifndef FIXED_POINT
499 st->agc_enabled = 0;
500 st->agc_level = 8000;
501 st->loudness_weight = (float*)speex_alloc(N*sizeof(float));
502 for (i=0;i<N;i++)
503 {
504 float ff=((float)i)*.5*sampling_rate/((float)N);
505 /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/
506 st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f);
507 if (st->loudness_weight[i]<.01f)
508 st->loudness_weight[i]=.01f;
509 st->loudness_weight[i] *= st->loudness_weight[i];
510 }
511 /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/
512 st->loudness = 1e-15;
513 st->agc_gain = 1;
514 st->max_gain = 30;
515 st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate);
516 st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate);
517 st->prev_loudness = 1;
518 st->init_max = 1;
519 #endif
520 st->was_speech = 0;
521
522 st->fft_lookup = spx_fft_init(2*N);
523
524 st->nb_adapt=0;
525 st->min_count=0;
526 return st;
527 }
528
speex_preprocess_state_destroy(SpeexPreprocessState * st)529 EXPORT void speex_preprocess_state_destroy(SpeexPreprocessState *st)
530 {
531 speex_free(st->frame);
532 speex_free(st->ft);
533 speex_free(st->ps);
534 speex_free(st->gain2);
535 speex_free(st->gain_floor);
536 speex_free(st->window);
537 speex_free(st->noise);
538 speex_free(st->reverb_estimate);
539 speex_free(st->old_ps);
540 speex_free(st->gain);
541 speex_free(st->prior);
542 speex_free(st->post);
543 #ifndef FIXED_POINT
544 speex_free(st->loudness_weight);
545 #endif
546 speex_free(st->echo_noise);
547 speex_free(st->residual_echo);
548
549 speex_free(st->S);
550 speex_free(st->Smin);
551 speex_free(st->Stmp);
552 speex_free(st->update_prob);
553 speex_free(st->zeta);
554
555 speex_free(st->inbuf);
556 speex_free(st->outbuf);
557
558 spx_fft_destroy(st->fft_lookup);
559 filterbank_destroy(st->bank);
560 speex_free(st);
561 }
562
563 /* FIXME: The AGC doesn't work yet with fixed-point*/
564 #ifndef FIXED_POINT
speex_compute_agc(SpeexPreprocessState * st,spx_word16_t Pframe,spx_word16_t * ft)565 static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft)
566 {
567 int i;
568 int N = st->ps_size;
569 float target_gain;
570 float loudness=1.f;
571 float rate;
572
573 for (i=2;i<N;i++)
574 {
575 loudness += 2.f*N*st->ps[i]* st->loudness_weight[i];
576 }
577 loudness=sqrt(loudness);
578 /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) &&
579 loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/
580 if (Pframe>.3f)
581 {
582 /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/
583 rate = .03*Pframe*Pframe;
584 st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP);
585 st->loudness_accum = (1-rate)*st->loudness_accum + rate;
586 if (st->init_max < st->max_gain && st->nb_adapt > 20)
587 st->init_max *= 1.f + .1f*Pframe*Pframe;
588 }
589 /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/
590
591 target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP);
592
593 if ((Pframe>.5 && st->nb_adapt > 20) || target_gain < st->agc_gain)
594 {
595 if (target_gain > st->max_increase_step*st->agc_gain)
596 target_gain = st->max_increase_step*st->agc_gain;
597 if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness)
598 target_gain = st->max_decrease_step*st->agc_gain;
599 if (target_gain > st->max_gain)
600 target_gain = st->max_gain;
601 if (target_gain > st->init_max)
602 target_gain = st->init_max;
603
604 st->agc_gain = target_gain;
605 }
606 /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/
607
608 for (i=0;i<2*N;i++)
609 ft[i] *= st->agc_gain;
610 st->prev_loudness = loudness;
611 }
612 #endif
613
preprocess_analysis(SpeexPreprocessState * st,spx_int16_t * x)614 static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x)
615 {
616 int i;
617 int N = st->ps_size;
618 int N3 = 2*N - st->frame_size;
619 int N4 = st->frame_size - N3;
620 spx_word32_t *ps=st->ps;
621
622 /* 'Build' input frame */
623 for (i=0;i<N3;i++)
624 st->frame[i]=st->inbuf[i];
625 for (i=0;i<st->frame_size;i++)
626 st->frame[N3+i]=x[i];
627
628 /* Update inbuf */
629 for (i=0;i<N3;i++)
630 st->inbuf[i]=x[N4+i];
631
632 /* Windowing */
633 for (i=0;i<2*N;i++)
634 st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
635
636 #ifdef FIXED_POINT
637 {
638 spx_word16_t max_val=0;
639 for (i=0;i<2*N;i++)
640 max_val = MAX16(max_val, ABS16(st->frame[i]));
641 st->frame_shift = 14-spx_ilog2(EXTEND32(max_val));
642 for (i=0;i<2*N;i++)
643 st->frame[i] = SHL16(st->frame[i], st->frame_shift);
644 }
645 #endif
646
647 /* Perform FFT */
648 spx_fft(st->fft_lookup, st->frame, st->ft);
649
650 /* Power spectrum */
651 ps[0]=MULT16_16(st->ft[0],st->ft[0]);
652 for (i=1;i<N;i++)
653 ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]);
654 for (i=0;i<N;i++)
655 st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift);
656
657 filterbank_compute_bank32(st->bank, ps, ps+N);
658 }
659
update_noise_prob(SpeexPreprocessState * st)660 static void update_noise_prob(SpeexPreprocessState *st)
661 {
662 int i;
663 int min_range;
664 int N = st->ps_size;
665
666 for (i=1;i<N-1;i++)
667 st->S[i] = MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1])
668 + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]);
669 st->S[0] = MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]);
670 st->S[N-1] = MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]);
671
672 if (st->nb_adapt==1)
673 {
674 for (i=0;i<N;i++)
675 st->Smin[i] = st->Stmp[i] = 0;
676 }
677
678 if (st->nb_adapt < 100)
679 min_range = 15;
680 else if (st->nb_adapt < 1000)
681 min_range = 50;
682 else if (st->nb_adapt < 10000)
683 min_range = 150;
684 else
685 min_range = 300;
686 if (st->min_count > min_range)
687 {
688 st->min_count = 0;
689 for (i=0;i<N;i++)
690 {
691 st->Smin[i] = MIN32(st->Stmp[i], st->S[i]);
692 st->Stmp[i] = st->S[i];
693 }
694 } else {
695 for (i=0;i<N;i++)
696 {
697 st->Smin[i] = MIN32(st->Smin[i], st->S[i]);
698 st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]);
699 }
700 }
701 for (i=0;i<N;i++)
702 {
703 if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > st->Smin[i])
704 st->update_prob[i] = 1;
705 else
706 st->update_prob[i] = 0;
707 /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/
708 /*fprintf (stderr, "%f ", st->update_prob[i]);*/
709 }
710
711 }
712
713 #define NOISE_OVERCOMPENS 1.
714
715 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
716
speex_preprocess(SpeexPreprocessState * st,spx_int16_t * x,spx_int32_t * echo)717 EXPORT int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
718 {
719 return speex_preprocess_run(st, x);
720 }
721
speex_preprocess_run(SpeexPreprocessState * st,spx_int16_t * x)722 EXPORT int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x)
723 {
724 int i;
725 int M;
726 int N = st->ps_size;
727 int N3 = 2*N - st->frame_size;
728 int N4 = st->frame_size - N3;
729 spx_word32_t *ps=st->ps;
730 spx_word32_t Zframe;
731 spx_word16_t Pframe;
732 spx_word16_t beta, beta_1;
733 spx_word16_t effective_echo_suppress;
734
735 st->nb_adapt++;
736 if (st->nb_adapt>20000)
737 st->nb_adapt = 20000;
738 st->min_count++;
739
740 beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));
741 beta_1 = Q15_ONE-beta;
742 M = st->nbands;
743 /* Deal with residual echo if provided */
744 if (st->echo_state)
745 {
746 speex_echo_get_residual(st->echo_state, st->residual_echo, N);
747 #ifndef FIXED_POINT
748 /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */
749 if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f))
750 {
751 for (i=0;i<N;i++)
752 st->residual_echo[i] = 0;
753 }
754 #endif
755 for (i=0;i<N;i++)
756 st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]);
757 filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N);
758 } else {
759 for (i=0;i<N+M;i++)
760 st->echo_noise[i] = 0;
761 }
762 preprocess_analysis(st, x);
763
764 update_noise_prob(st);
765
766 /* Noise estimation always updated for the 10 first frames */
767 /*if (st->nb_adapt<10)
768 {
769 for (i=1;i<N-1;i++)
770 st->update_prob[i] = 0;
771 }
772 */
773
774 /* Update the noise estimate for the frequencies where it can be */
775 for (i=0;i<N;i++)
776 {
777 if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))
778 st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));
779 }
780 filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
781
782 /* Special case for first frame */
783 if (st->nb_adapt==1)
784 for (i=0;i<N+M;i++)
785 st->old_ps[i] = ps[i];
786
787 /* Compute a posteriori SNR */
788 for (i=0;i<N+M;i++)
789 {
790 spx_word16_t gamma;
791
792 /* Total noise estimate including residual echo and reverberation */
793 spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);
794
795 /* A posteriori SNR = ps/noise - 1*/
796 st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
797 st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));
798
799 /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */
800 gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));
801
802 /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */
803 st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));
804 st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));
805 }
806
807 /*print_vec(st->post, N+M, "");*/
808
809 /* Recursive average of the a priori SNR. A bit smoothed for the psd components */
810 st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
811 for (i=1;i<N-1;i++)
812 st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),
813 MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);
814 for (i=N-1;i<N+M;i++)
815 st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);
816
817 /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */
818 Zframe = 0;
819 for (i=N;i<N+M;i++)
820 Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));
821 Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));
822
823 effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));
824
825 compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
826
827 /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale)
828 Technically this is actually wrong because the EM gaim assumes a slightly different probability
829 distribution */
830 for (i=N;i<N+M;i++)
831 {
832 /* See EM and Cohen papers*/
833 spx_word32_t theta;
834 /* Gain from hypergeometric function */
835 spx_word32_t MM;
836 /* Weiner filter gain */
837 spx_word16_t prior_ratio;
838 /* a priority probability of speech presence based on Bark sub-band alone */
839 spx_word16_t P1;
840 /* Speech absence a priori probability (considering sub-band and frame) */
841 spx_word16_t q;
842 #ifdef FIXED_POINT
843 spx_word16_t tmp;
844 #endif
845
846 prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
847 theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
848
849 MM = hypergeom_gain(theta);
850 /* Gain with bound */
851 st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
852 /* Save old Bark power spectrum */
853 st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
854
855 P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));
856 q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
857 #ifdef FIXED_POINT
858 theta = MIN32(theta, EXTEND32(32767));
859 /*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
860 tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/
861 /*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));
862 st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
863 #else
864 st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));
865 #endif
866 }
867 /* Convert the EM gains and speech prob to linear frequency */
868 filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
869 filterbank_compute_psd16(st->bank,st->gain+N, st->gain);
870
871 /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */
872 if (1)
873 {
874 filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);
875
876 /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */
877 for (i=0;i<N;i++)
878 {
879 spx_word32_t MM;
880 spx_word32_t theta;
881 spx_word16_t prior_ratio;
882 spx_word16_t tmp;
883 spx_word16_t p;
884 spx_word16_t g;
885
886 /* Wiener filter gain */
887 prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
888 theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
889
890 /* Optimal estimator for loudness domain */
891 MM = hypergeom_gain(theta);
892 /* EM gain with bound */
893 g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
894 /* Interpolated speech probability of presence */
895 p = st->gain2[i];
896
897 /* Constrain the gain to be close to the Bark scale gain */
898 if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])
899 g = MULT16_16(3,st->gain[i]);
900 st->gain[i] = g;
901
902 /* Save old power spectrum */
903 st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
904
905 /* Apply gain floor */
906 if (st->gain[i] < st->gain_floor[i])
907 st->gain[i] = st->gain_floor[i];
908
909 /* Exponential decay model for reverberation (unused) */
910 /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
911
912 /* Take into account speech probability of presence (loudness domain MMSE estimator) */
913 /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */
914 tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
915 st->gain2[i]=SQR16_Q15(tmp);
916
917 /* Use this if you want a log-domain MMSE estimator instead */
918 /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
919 }
920 } else {
921 for (i=N;i<N+M;i++)
922 {
923 spx_word16_t tmp;
924 spx_word16_t p = st->gain2[i];
925 st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);
926 tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
927 st->gain2[i]=SQR16_Q15(tmp);
928 }
929 filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
930 }
931
932 /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */
933 if (!st->denoise_enabled)
934 {
935 for (i=0;i<N+M;i++)
936 st->gain2[i]=Q15_ONE;
937 }
938
939 /* Apply computed gain */
940 for (i=1;i<N;i++)
941 {
942 st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);
943 st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);
944 }
945 st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);
946 st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);
947
948 /*FIXME: This *will* not work for fixed-point */
949 #ifndef FIXED_POINT
950 if (st->agc_enabled)
951 speex_compute_agc(st, Pframe, st->ft);
952 #endif
953
954 /* Inverse FFT with 1/N scaling */
955 spx_ifft(st->fft_lookup, st->ft, st->frame);
956 /* Scale back to original (lower) amplitude */
957 for (i=0;i<2*N;i++)
958 st->frame[i] = PSHR16(st->frame[i], st->frame_shift);
959
960 /*FIXME: This *will* not work for fixed-point */
961 #ifndef FIXED_POINT
962 if (st->agc_enabled)
963 {
964 float max_sample=0;
965 for (i=0;i<2*N;i++)
966 if (fabs(st->frame[i])>max_sample)
967 max_sample = fabs(st->frame[i]);
968 if (max_sample>28000.f)
969 {
970 float damp = 28000.f/max_sample;
971 for (i=0;i<2*N;i++)
972 st->frame[i] *= damp;
973 }
974 }
975 #endif
976
977 /* Synthesis window (for WOLA) */
978 for (i=0;i<2*N;i++)
979 st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
980
981 /* Perform overlap and add */
982 for (i=0;i<N3;i++)
983 x[i] = st->outbuf[i] + st->frame[i];
984 for (i=0;i<N4;i++)
985 x[N3+i] = st->frame[N3+i];
986
987 /* Update outbuf */
988 for (i=0;i<N3;i++)
989 st->outbuf[i] = st->frame[st->frame_size+i];
990
991 /* FIXME: This VAD is a kludge */
992 st->speech_prob = Pframe;
993 if (st->vad_enabled)
994 {
995 if (st->speech_prob > st->speech_prob_start || (st->was_speech && st->speech_prob > st->speech_prob_continue))
996 {
997 st->was_speech=1;
998 return 1;
999 } else
1000 {
1001 st->was_speech=0;
1002 return 0;
1003 }
1004 } else {
1005 return 1;
1006 }
1007 }
1008
speex_preprocess_estimate_update(SpeexPreprocessState * st,spx_int16_t * x)1009 EXPORT void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x)
1010 {
1011 int i;
1012 int N = st->ps_size;
1013 int N3 = 2*N - st->frame_size;
1014 int M;
1015 spx_word32_t *ps=st->ps;
1016
1017 M = st->nbands;
1018 st->min_count++;
1019
1020 preprocess_analysis(st, x);
1021
1022 update_noise_prob(st);
1023
1024 for (i=1;i<N-1;i++)
1025 {
1026 if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))
1027 {
1028 st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));
1029 }
1030 }
1031
1032 for (i=0;i<N3;i++)
1033 st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);
1034
1035 /* Save old power spectrum */
1036 for (i=0;i<N+M;i++)
1037 st->old_ps[i] = ps[i];
1038
1039 for (i=0;i<N;i++)
1040 st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);
1041 }
1042
1043
speex_preprocess_ctl(SpeexPreprocessState * state,int request,void * ptr)1044 EXPORT int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
1045 {
1046 int i;
1047 SpeexPreprocessState *st;
1048 st=(SpeexPreprocessState*)state;
1049 switch(request)
1050 {
1051 case SPEEX_PREPROCESS_SET_DENOISE:
1052 st->denoise_enabled = (*(spx_int32_t*)ptr);
1053 break;
1054 case SPEEX_PREPROCESS_GET_DENOISE:
1055 (*(spx_int32_t*)ptr) = st->denoise_enabled;
1056 break;
1057 #ifndef FIXED_POINT
1058 case SPEEX_PREPROCESS_SET_AGC:
1059 st->agc_enabled = (*(spx_int32_t*)ptr);
1060 break;
1061 case SPEEX_PREPROCESS_GET_AGC:
1062 (*(spx_int32_t*)ptr) = st->agc_enabled;
1063 break;
1064 #ifndef DISABLE_FLOAT_API
1065 case SPEEX_PREPROCESS_SET_AGC_LEVEL:
1066 st->agc_level = (*(float*)ptr);
1067 if (st->agc_level<1)
1068 st->agc_level=1;
1069 if (st->agc_level>32768)
1070 st->agc_level=32768;
1071 break;
1072 case SPEEX_PREPROCESS_GET_AGC_LEVEL:
1073 (*(float*)ptr) = st->agc_level;
1074 break;
1075 #endif /* #ifndef DISABLE_FLOAT_API */
1076 case SPEEX_PREPROCESS_SET_AGC_INCREMENT:
1077 st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1078 break;
1079 case SPEEX_PREPROCESS_GET_AGC_INCREMENT:
1080 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size);
1081 break;
1082 case SPEEX_PREPROCESS_SET_AGC_DECREMENT:
1083 st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1084 break;
1085 case SPEEX_PREPROCESS_GET_AGC_DECREMENT:
1086 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size);
1087 break;
1088 case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN:
1089 st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr));
1090 break;
1091 case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN:
1092 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain));
1093 break;
1094 #endif
1095 case SPEEX_PREPROCESS_SET_VAD:
1096 speex_warning("The VAD has been replaced by a hack pending a complete rewrite");
1097 st->vad_enabled = (*(spx_int32_t*)ptr);
1098 break;
1099 case SPEEX_PREPROCESS_GET_VAD:
1100 (*(spx_int32_t*)ptr) = st->vad_enabled;
1101 break;
1102
1103 case SPEEX_PREPROCESS_SET_DEREVERB:
1104 st->dereverb_enabled = (*(spx_int32_t*)ptr);
1105 for (i=0;i<st->ps_size;i++)
1106 st->reverb_estimate[i]=0;
1107 break;
1108 case SPEEX_PREPROCESS_GET_DEREVERB:
1109 (*(spx_int32_t*)ptr) = st->dereverb_enabled;
1110 break;
1111
1112 case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
1113 /* FIXME: Re-enable when de-reverberation is actually enabled again */
1114 /*st->reverb_level = (*(float*)ptr);*/
1115 break;
1116 case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
1117 /* FIXME: Re-enable when de-reverberation is actually enabled again */
1118 /*(*(float*)ptr) = st->reverb_level;*/
1119 break;
1120
1121 case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
1122 /* FIXME: Re-enable when de-reverberation is actually enabled again */
1123 /*st->reverb_decay = (*(float*)ptr);*/
1124 break;
1125 case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
1126 /* FIXME: Re-enable when de-reverberation is actually enabled again */
1127 /*(*(float*)ptr) = st->reverb_decay;*/
1128 break;
1129
1130 case SPEEX_PREPROCESS_SET_PROB_START:
1131 *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1132 st->speech_prob_start = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1133 break;
1134 case SPEEX_PREPROCESS_GET_PROB_START:
1135 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);
1136 break;
1137
1138 case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
1139 *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1140 st->speech_prob_continue = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1141 break;
1142 case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
1143 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);
1144 break;
1145
1146 case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:
1147 st->noise_suppress = -ABS(*(spx_int32_t*)ptr);
1148 break;
1149 case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:
1150 (*(spx_int32_t*)ptr) = st->noise_suppress;
1151 break;
1152 case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:
1153 st->echo_suppress = -ABS(*(spx_int32_t*)ptr);
1154 break;
1155 case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:
1156 (*(spx_int32_t*)ptr) = st->echo_suppress;
1157 break;
1158 case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:
1159 st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);
1160 break;
1161 case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:
1162 (*(spx_int32_t*)ptr) = st->echo_suppress_active;
1163 break;
1164 case SPEEX_PREPROCESS_SET_ECHO_STATE:
1165 st->echo_state = (SpeexEchoState*)ptr;
1166 break;
1167 case SPEEX_PREPROCESS_GET_ECHO_STATE:
1168 (*(SpeexEchoState**)ptr) = (SpeexEchoState*)st->echo_state;
1169 break;
1170 #ifndef FIXED_POINT
1171 case SPEEX_PREPROCESS_GET_AGC_LOUDNESS:
1172 (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP);
1173 break;
1174 case SPEEX_PREPROCESS_GET_AGC_GAIN:
1175 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->agc_gain));
1176 break;
1177 #endif
1178 case SPEEX_PREPROCESS_GET_PSD_SIZE:
1179 case SPEEX_PREPROCESS_GET_NOISE_PSD_SIZE:
1180 (*(spx_int32_t*)ptr) = st->ps_size;
1181 break;
1182 case SPEEX_PREPROCESS_GET_PSD:
1183 for(i=0;i<st->ps_size;i++)
1184 ((spx_int32_t *)ptr)[i] = (spx_int32_t) st->ps[i];
1185 break;
1186 case SPEEX_PREPROCESS_GET_NOISE_PSD:
1187 for(i=0;i<st->ps_size;i++)
1188 ((spx_int32_t *)ptr)[i] = (spx_int32_t) PSHR32(st->noise[i], NOISE_SHIFT);
1189 break;
1190 case SPEEX_PREPROCESS_GET_PROB:
1191 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob, 100);
1192 break;
1193 #ifndef FIXED_POINT
1194 case SPEEX_PREPROCESS_SET_AGC_TARGET:
1195 st->agc_level = (*(spx_int32_t*)ptr);
1196 if (st->agc_level<1)
1197 st->agc_level=1;
1198 if (st->agc_level>32768)
1199 st->agc_level=32768;
1200 break;
1201 case SPEEX_PREPROCESS_GET_AGC_TARGET:
1202 (*(spx_int32_t*)ptr) = st->agc_level;
1203 break;
1204 #endif
1205 default:
1206 speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
1207 return -1;
1208 }
1209 return 0;
1210 }
1211
1212 #ifdef FIXED_DEBUG
1213 long long spx_mips=0;
1214 #endif
1215
1216