1 /* Copyright (C) 2002-2006 Jean-Marc Valin
2 File: nb_celp.c
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include <math.h>
37 #include "nb_celp.h"
38 #include "lpc.h"
39 #include "lsp.h"
40 #include "ltp.h"
41 #include "quant_lsp.h"
42 #include "cb_search.h"
43 #include "filters.h"
44 #include "stack_alloc.h"
45 #include "vq.h"
46 #include <speex/speex_bits.h>
47 #include "vbr.h"
48 #include "arch.h"
49 #include "math_approx.h"
50 #include "os_support.h"
51 #include <speex/speex_callbacks.h>
52
53 #ifdef VORBIS_PSYCHO
54 #include "vorbis_psy.h"
55 #endif
56
57 #ifndef M_PI
58 #define M_PI 3.14159265358979323846 /* pi */
59 #endif
60
61 #ifndef NULL
62 #define NULL 0
63 #endif
64
65 #define SUBMODE(x) st->submodes[st->submodeID]->x
66
67 /* Default size for the encoder and decoder stack (can be changed at compile time).
68 This does not apply when using variable-size arrays or alloca. */
69 #ifndef NB_ENC_STACK
70 #define NB_ENC_STACK (8000*sizeof(spx_sig_t))
71 #endif
72
73 #ifndef NB_DEC_STACK
74 #define NB_DEC_STACK (4000*sizeof(spx_sig_t))
75 #endif
76
77
78 #ifdef FIXED_POINT
79 const spx_word32_t ol_gain_table[32]={18900, 25150, 33468, 44536, 59265, 78865, 104946, 139653, 185838, 247297, 329081, 437913, 582736, 775454, 1031906, 1373169, 1827293, 2431601, 3235761, 4305867, 5729870, 7624808, 10146425, 13501971, 17967238, 23909222, 31816294, 42338330, 56340132, 74972501, 99766822, 132760927};
80 const spx_word16_t exc_gain_quant_scal3_bound[7]={1841, 3883, 6051, 8062, 10444, 13580, 18560};
81 const spx_word16_t exc_gain_quant_scal3[8]={1002, 2680, 5086, 7016, 9108, 11781, 15380, 21740};
82 const spx_word16_t exc_gain_quant_scal1_bound[1]={14385};
83 const spx_word16_t exc_gain_quant_scal1[2]={11546, 17224};
84
85 #define LSP_MARGIN 16
86 #define LSP_DELTA1 6553
87 #define LSP_DELTA2 1638
88
89 #else
90
91 const float exc_gain_quant_scal3_bound[7]={0.112338f, 0.236980f, 0.369316f, 0.492054f, 0.637471f, 0.828874f, 1.132784f};
92 const float exc_gain_quant_scal3[8]={0.061130f, 0.163546f, 0.310413f, 0.428220f, 0.555887f, 0.719055f, 0.938694f, 1.326874f};
93 const float exc_gain_quant_scal1_bound[1]={0.87798f};
94 const float exc_gain_quant_scal1[2]={0.70469f, 1.05127f};
95
96 #define LSP_MARGIN .002f
97 #define LSP_DELTA1 .2f
98 #define LSP_DELTA2 .05f
99
100 #endif
101
102 #ifdef VORBIS_PSYCHO
103 #define EXTRA_BUFFER 100
104 #else
105 #define EXTRA_BUFFER 0
106 #endif
107
108
109 #define sqr(x) ((x)*(x))
110
111 extern const spx_word16_t lag_window[];
112 extern const spx_word16_t lpc_window[];
113
nb_encoder_init(const SpeexMode * m)114 void *nb_encoder_init(const SpeexMode *m)
115 {
116 EncState *st;
117 const SpeexNBMode *mode;
118 int i;
119
120 mode=(const SpeexNBMode *)m->mode;
121 st = (EncState*)speex_alloc(sizeof(EncState));
122 if (!st)
123 return NULL;
124 #if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
125 st->stack = NULL;
126 #else
127 st->stack = (char*)speex_alloc_scratch(NB_ENC_STACK);
128 #endif
129
130 st->mode=m;
131
132 st->frameSize = mode->frameSize;
133 st->nbSubframes=mode->frameSize/mode->subframeSize;
134 st->subframeSize=mode->subframeSize;
135 st->windowSize = st->frameSize+st->subframeSize;
136 st->lpcSize = mode->lpcSize;
137 st->gamma1=mode->gamma1;
138 st->gamma2=mode->gamma2;
139 st->min_pitch=mode->pitchStart;
140 st->max_pitch=mode->pitchEnd;
141 st->lpc_floor = mode->lpc_floor;
142
143 st->submodes=mode->submodes;
144 st->submodeID=st->submodeSelect=mode->defaultSubmode;
145 st->bounded_pitch = 1;
146
147 st->encode_submode = 1;
148
149 #ifdef VORBIS_PSYCHO
150 st->psy = vorbis_psy_init(8000, 256);
151 st->curve = (float*)speex_alloc(128*sizeof(float));
152 st->old_curve = (float*)speex_alloc(128*sizeof(float));
153 st->psy_window = (float*)speex_alloc(256*sizeof(float));
154 #endif
155
156 st->cumul_gain = 1024;
157
158 /* Allocating input buffer */
159 st->winBuf = (spx_word16_t*)speex_alloc((st->windowSize-st->frameSize)*sizeof(spx_word16_t));
160 /* Allocating excitation buffer */
161 st->excBuf = (spx_word16_t*)speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t));
162 st->exc = st->excBuf + mode->pitchEnd + 2;
163 st->swBuf = (spx_word16_t*)speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t));
164 st->sw = st->swBuf + mode->pitchEnd + 2;
165
166 st->window= lpc_window;
167
168 /* Create the window for autocorrelation (lag-windowing) */
169 st->lagWindow = lag_window;
170
171 st->old_lsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
172 st->old_qlsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
173 st->first = 1;
174 for (i=0;i<st->lpcSize;i++)
175 st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1);
176
177 st->mem_sp = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
178 st->mem_sw = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
179 st->mem_sw_whole = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
180 st->mem_exc = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
181 st->mem_exc2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
182
183 st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t));
184 st->innov_rms_save = NULL;
185
186 st->pitch = (int*)speex_alloc((st->nbSubframes)*sizeof(int));
187
188 #ifndef DISABLE_VBR
189 st->vbr = (VBRState*)speex_alloc(sizeof(VBRState));
190 vbr_init(st->vbr);
191 st->vbr_quality = 8;
192 st->vbr_enabled = 0;
193 st->vbr_max = 0;
194 st->vad_enabled = 0;
195 st->dtx_enabled = 0;
196 st->dtx_count=0;
197 st->abr_enabled = 0;
198 st->abr_drift = 0;
199 st->abr_drift2 = 0;
200 #endif /* #ifndef DISABLE_VBR */
201
202 st->plc_tuning = 2;
203 st->complexity=2;
204 st->sampling_rate=8000;
205 st->isWideband = 0;
206 st->highpass_enabled = 1;
207
208 #ifdef ENABLE_VALGRIND
209 VALGRIND_MAKE_READABLE(st, NB_ENC_STACK);
210 #endif
211 return st;
212 }
213
nb_encoder_destroy(void * state)214 void nb_encoder_destroy(void *state)
215 {
216 EncState *st=(EncState *)state;
217 /* Free all allocated memory */
218 #if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA))
219 speex_free_scratch(st->stack);
220 #endif
221
222 speex_free (st->winBuf);
223 speex_free (st->excBuf);
224 speex_free (st->old_qlsp);
225 speex_free (st->swBuf);
226
227 speex_free (st->old_lsp);
228 speex_free (st->mem_sp);
229 speex_free (st->mem_sw);
230 speex_free (st->mem_sw_whole);
231 speex_free (st->mem_exc);
232 speex_free (st->mem_exc2);
233 speex_free (st->pi_gain);
234 speex_free (st->pitch);
235
236 #ifndef DISABLE_VBR
237 vbr_destroy(st->vbr);
238 speex_free (st->vbr);
239 #endif /* #ifndef DISABLE_VBR */
240
241 #ifdef VORBIS_PSYCHO
242 vorbis_psy_destroy(st->psy);
243 speex_free (st->curve);
244 speex_free (st->old_curve);
245 speex_free (st->psy_window);
246 #endif
247
248 /*Free state memory... should be last*/
249 speex_free(st);
250 }
251
nb_encode(void * state,void * vin,SpeexBits * bits)252 int nb_encode(void *state, void *vin, SpeexBits *bits)
253 {
254 EncState *st;
255 int i, sub, roots;
256 int ol_pitch;
257 spx_word16_t ol_pitch_coef;
258 spx_word32_t ol_gain;
259 VARDECL(spx_word16_t *ringing);
260 VARDECL(spx_word16_t *target);
261 VARDECL(spx_sig_t *innov);
262 VARDECL(spx_word32_t *exc32);
263 VARDECL(spx_mem_t *mem);
264 VARDECL(spx_coef_t *bw_lpc1);
265 VARDECL(spx_coef_t *bw_lpc2);
266 VARDECL(spx_coef_t *lpc);
267 VARDECL(spx_lsp_t *lsp);
268 VARDECL(spx_lsp_t *qlsp);
269 VARDECL(spx_lsp_t *interp_lsp);
270 VARDECL(spx_lsp_t *interp_qlsp);
271 VARDECL(spx_coef_t *interp_lpc);
272 VARDECL(spx_coef_t *interp_qlpc);
273 char *stack;
274 VARDECL(spx_word16_t *syn_resp);
275 VARDECL(spx_word16_t *real_exc);
276
277 spx_word32_t ener=0;
278 spx_word16_t fine_gain;
279 spx_word16_t *in = (spx_word16_t*)vin;
280
281 st=(EncState *)state;
282 stack=st->stack;
283
284 ALLOC(lpc, st->lpcSize, spx_coef_t);
285 ALLOC(bw_lpc1, st->lpcSize, spx_coef_t);
286 ALLOC(bw_lpc2, st->lpcSize, spx_coef_t);
287 ALLOC(lsp, st->lpcSize, spx_lsp_t);
288 ALLOC(qlsp, st->lpcSize, spx_lsp_t);
289 ALLOC(interp_lsp, st->lpcSize, spx_lsp_t);
290 ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t);
291 ALLOC(interp_lpc, st->lpcSize, spx_coef_t);
292 ALLOC(interp_qlpc, st->lpcSize, spx_coef_t);
293
294 /* Move signals 1 frame towards the past */
295 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, st->max_pitch+2);
296 SPEEX_MOVE(st->swBuf, st->swBuf+st->frameSize, st->max_pitch+2);
297
298 if (st->highpass_enabled)
299 highpass(in, in, st->frameSize, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_INPUT, st->mem_hp);
300
301 {
302 VARDECL(spx_word16_t *w_sig);
303 VARDECL(spx_word16_t *autocorr);
304 ALLOC(w_sig, st->windowSize, spx_word16_t);
305 ALLOC(autocorr, st->lpcSize+1, spx_word16_t);
306 /* Window for analysis */
307 for (i=0;i<st->windowSize-st->frameSize;i++)
308 w_sig[i] = EXTRACT16(SHR32(MULT16_16(st->winBuf[i],st->window[i]),SIG_SHIFT));
309 for (;i<st->windowSize;i++)
310 w_sig[i] = EXTRACT16(SHR32(MULT16_16(in[i-st->windowSize+st->frameSize],st->window[i]),SIG_SHIFT));
311 /* Compute auto-correlation */
312 _spx_autocorr(w_sig, autocorr, st->lpcSize+1, st->windowSize);
313 autocorr[0] = ADD16(autocorr[0],MULT16_16_Q15(autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */
314
315 /* Lag windowing: equivalent to filtering in the power-spectrum domain */
316 for (i=0;i<st->lpcSize+1;i++)
317 autocorr[i] = MULT16_16_Q14(autocorr[i],st->lagWindow[i]);
318
319 /* Levinson-Durbin */
320 _spx_lpc(lpc, autocorr, st->lpcSize);
321 /* LPC to LSPs (x-domain) transform */
322 roots=lpc_to_lsp (lpc, st->lpcSize, lsp, 10, LSP_DELTA1, stack);
323 /* Check if we found all the roots */
324 if (roots!=st->lpcSize)
325 {
326 /*If we can't find all LSP's, do some damage control and use previous filter*/
327 for (i=0;i<st->lpcSize;i++)
328 {
329 lsp[i]=st->old_lsp[i];
330 }
331 }
332 }
333
334
335
336
337 /* Whole frame analysis (open-loop estimation of pitch and excitation gain) */
338 {
339 int diff = st->windowSize-st->frameSize;
340 if (st->first)
341 for (i=0;i<st->lpcSize;i++)
342 interp_lsp[i] = lsp[i];
343 else
344 lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, st->nbSubframes, st->nbSubframes<<1);
345
346 lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN);
347
348 /* Compute interpolated LPCs (unquantized) for whole frame*/
349 lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack);
350
351
352 /*Open-loop pitch*/
353 if (!st->submodes[st->submodeID] || (st->complexity>2 && SUBMODE(have_subframe_gain)<3) || SUBMODE(forced_pitch_gain) || SUBMODE(lbr_pitch) != -1
354 #ifndef DISABLE_VBR
355 || st->vbr_enabled || st->vad_enabled
356 #endif
357 )
358 {
359 int nol_pitch[6];
360 spx_word16_t nol_pitch_coef[6];
361
362 bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize);
363 bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize);
364
365 SPEEX_COPY(st->sw, st->winBuf, diff);
366 SPEEX_COPY(st->sw+diff, in, st->frameSize-diff);
367 filter_mem16(st->sw, bw_lpc1, bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole, stack);
368
369 open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize,
370 nol_pitch, nol_pitch_coef, 6, stack);
371 ol_pitch=nol_pitch[0];
372 ol_pitch_coef = nol_pitch_coef[0];
373 /*Try to remove pitch multiples*/
374 for (i=1;i<6;i++)
375 {
376 #ifdef FIXED_POINT
377 if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],27853)) &&
378 #else
379 if ((nol_pitch_coef[i]>.85*nol_pitch_coef[0]) &&
380 #endif
381 (ABS(2*nol_pitch[i]-ol_pitch)<=2 || ABS(3*nol_pitch[i]-ol_pitch)<=3 ||
382 ABS(4*nol_pitch[i]-ol_pitch)<=4 || ABS(5*nol_pitch[i]-ol_pitch)<=5))
383 {
384 /*ol_pitch_coef=nol_pitch_coef[i];*/
385 ol_pitch = nol_pitch[i];
386 }
387 }
388 /*if (ol_pitch>50)
389 ol_pitch/=2;*/
390 /*ol_pitch_coef = sqrt(ol_pitch_coef);*/
391
392 } else {
393 ol_pitch=0;
394 ol_pitch_coef=0;
395 }
396
397 /*Compute "real" excitation*/
398 SPEEX_COPY(st->exc, st->winBuf, diff);
399 SPEEX_COPY(st->exc+diff, in, st->frameSize-diff);
400 fir_mem16(st->exc, interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc, stack);
401
402 /* Compute open-loop excitation gain */
403 {
404 spx_word16_t g = compute_rms16(st->exc, st->frameSize);
405 if (st->submodeID!=1 && ol_pitch>0)
406 ol_gain = MULT16_16(g, MULT16_16_Q14(QCONST16(1.1,14),
407 spx_sqrt(QCONST32(1.,28)-MULT16_32_Q15(QCONST16(.8,15),SHL32(MULT16_16(ol_pitch_coef,ol_pitch_coef),16)))));
408 else
409 ol_gain = SHL32(EXTEND32(g),SIG_SHIFT);
410 }
411 }
412
413 #ifdef VORBIS_PSYCHO
414 SPEEX_MOVE(st->psy_window, st->psy_window+st->frameSize, 256-st->frameSize);
415 SPEEX_COPY(&st->psy_window[256-st->frameSize], in, st->frameSize);
416 compute_curve(st->psy, st->psy_window, st->curve);
417 /*print_vec(st->curve, 128, "curve");*/
418 if (st->first)
419 SPEEX_COPY(st->old_curve, st->curve, 128);
420 #endif
421
422 /*VBR stuff*/
423 #ifndef DISABLE_VBR
424 if (st->vbr && (st->vbr_enabled||st->vad_enabled))
425 {
426 float lsp_dist=0;
427 for (i=0;i<st->lpcSize;i++)
428 lsp_dist += (st->old_lsp[i] - lsp[i])*(st->old_lsp[i] - lsp[i]);
429 lsp_dist /= LSP_SCALING*LSP_SCALING;
430
431 if (st->abr_enabled)
432 {
433 float qual_change=0;
434 if (st->abr_drift2 * st->abr_drift > 0)
435 {
436 /* Only adapt if long-term and short-term drift are the same sign */
437 qual_change = -.00001*st->abr_drift/(1+st->abr_count);
438 if (qual_change>.05)
439 qual_change=.05;
440 if (qual_change<-.05)
441 qual_change=-.05;
442 }
443 st->vbr_quality += qual_change;
444 if (st->vbr_quality>10)
445 st->vbr_quality=10;
446 if (st->vbr_quality<0)
447 st->vbr_quality=0;
448 }
449
450 st->relative_quality = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, GAIN_SCALING_1*ol_pitch_coef);
451 /*if (delta_qual<0)*/
452 /* delta_qual*=.1*(3+st->vbr_quality);*/
453 if (st->vbr_enabled)
454 {
455 spx_int32_t mode;
456 int choice=0;
457 float min_diff=100;
458 mode = 8;
459 while (mode)
460 {
461 int v1;
462 float thresh;
463 v1=(int)floor(st->vbr_quality);
464 if (v1==10)
465 thresh = vbr_nb_thresh[mode][v1];
466 else
467 thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1];
468 if (st->relative_quality > thresh &&
469 st->relative_quality-thresh<min_diff)
470 {
471 choice = mode;
472 min_diff = st->relative_quality-thresh;
473 }
474 mode--;
475 }
476 mode=choice;
477 if (mode==0)
478 {
479 if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
480 {
481 mode=1;
482 st->dtx_count=1;
483 } else {
484 mode=0;
485 st->dtx_count++;
486 }
487 } else {
488 st->dtx_count=0;
489 }
490
491 speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
492 if (st->vbr_max>0)
493 {
494 spx_int32_t rate;
495 speex_encoder_ctl(state, SPEEX_GET_BITRATE, &rate);
496 if (rate > st->vbr_max)
497 {
498 rate = st->vbr_max;
499 speex_encoder_ctl(state, SPEEX_SET_BITRATE, &rate);
500 }
501 }
502
503 if (st->abr_enabled)
504 {
505 spx_int32_t bitrate;
506 speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate);
507 st->abr_drift+=(bitrate-st->abr_enabled);
508 st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled);
509 st->abr_count += 1.0;
510 }
511
512 } else {
513 /*VAD only case*/
514 int mode;
515 if (st->relative_quality<2)
516 {
517 if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
518 {
519 st->dtx_count=1;
520 mode=1;
521 } else {
522 mode=0;
523 st->dtx_count++;
524 }
525 } else {
526 st->dtx_count = 0;
527 mode=st->submodeSelect;
528 }
529 /*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/
530 st->submodeID=mode;
531 }
532 } else {
533 st->relative_quality = -1;
534 }
535 #endif /* #ifndef DISABLE_VBR */
536
537 if (st->encode_submode)
538 {
539 /* First, transmit a zero for narrowband */
540 speex_bits_pack(bits, 0, 1);
541
542 /* Transmit the sub-mode we use for this frame */
543 speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS);
544
545 }
546
547 /* If null mode (no transmission), just set a couple things to zero*/
548 if (st->submodes[st->submodeID] == NULL)
549 {
550 for (i=0;i<st->frameSize;i++)
551 st->exc[i]=st->sw[i]=VERY_SMALL;
552
553 for (i=0;i<st->lpcSize;i++)
554 st->mem_sw[i]=0;
555 st->first=1;
556 st->bounded_pitch = 1;
557
558 SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize);
559
560 /* Clear memory (no need to really compute it) */
561 for (i=0;i<st->lpcSize;i++)
562 st->mem_sp[i] = 0;
563 return 0;
564
565 }
566
567 /* LSP Quantization */
568 if (st->first)
569 {
570 for (i=0;i<st->lpcSize;i++)
571 st->old_lsp[i] = lsp[i];
572 }
573
574
575 /*Quantize LSPs*/
576 #if 1 /*0 for unquantized*/
577 SUBMODE(lsp_quant)(lsp, qlsp, st->lpcSize, bits);
578 #else
579 for (i=0;i<st->lpcSize;i++)
580 qlsp[i]=lsp[i];
581 #endif
582
583 /*If we use low bit-rate pitch mode, transmit open-loop pitch*/
584 if (SUBMODE(lbr_pitch)!=-1)
585 {
586 speex_bits_pack(bits, ol_pitch-st->min_pitch, 7);
587 }
588
589 if (SUBMODE(forced_pitch_gain))
590 {
591 int quant;
592 /* This just damps the pitch a bit, because it tends to be too aggressive when forced */
593 ol_pitch_coef = MULT16_16_Q15(QCONST16(.9,15), ol_pitch_coef);
594 #ifdef FIXED_POINT
595 quant = PSHR16(MULT16_16_16(15, ol_pitch_coef),GAIN_SHIFT);
596 #else
597 quant = (int)floor(.5+15*ol_pitch_coef*GAIN_SCALING_1);
598 #endif
599 if (quant>15)
600 quant=15;
601 if (quant<0)
602 quant=0;
603 speex_bits_pack(bits, quant, 4);
604 ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT));
605 }
606
607
608 /*Quantize and transmit open-loop excitation gain*/
609 #ifdef FIXED_POINT
610 {
611 int qe = scal_quant32(ol_gain, ol_gain_table, 32);
612 /*ol_gain = exp(qe/3.5)*SIG_SCALING;*/
613 ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
614 speex_bits_pack(bits, qe, 5);
615 }
616 #else
617 {
618 int qe = (int)(floor(.5+3.5*log(ol_gain*1.0/SIG_SCALING)));
619 if (qe<0)
620 qe=0;
621 if (qe>31)
622 qe=31;
623 ol_gain = exp(qe/3.5)*SIG_SCALING;
624 speex_bits_pack(bits, qe, 5);
625 }
626 #endif
627
628
629
630 /* Special case for first frame */
631 if (st->first)
632 {
633 for (i=0;i<st->lpcSize;i++)
634 st->old_qlsp[i] = qlsp[i];
635 }
636
637 /* Target signal */
638 ALLOC(target, st->subframeSize, spx_word16_t);
639 ALLOC(innov, st->subframeSize, spx_sig_t);
640 ALLOC(exc32, st->subframeSize, spx_word32_t);
641 ALLOC(ringing, st->subframeSize, spx_word16_t);
642 ALLOC(syn_resp, st->subframeSize, spx_word16_t);
643 ALLOC(real_exc, st->subframeSize, spx_word16_t);
644 ALLOC(mem, st->lpcSize, spx_mem_t);
645
646 /* Loop on sub-frames */
647 for (sub=0;sub<st->nbSubframes;sub++)
648 {
649 int offset;
650 spx_word16_t *sw;
651 spx_word16_t *exc;
652 int pitch;
653 int response_bound = st->subframeSize;
654
655 /* Offset relative to start of frame */
656 offset = st->subframeSize*sub;
657 /* Excitation */
658 exc=st->exc+offset;
659 /* Weighted signal */
660 sw=st->sw+offset;
661
662 /* LSP interpolation (quantized and unquantized) */
663 lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes);
664 lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes);
665
666 /* Make sure the filters are stable */
667 lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN);
668 lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN);
669
670 /* Compute interpolated LPCs (quantized and unquantized) */
671 lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack);
672
673 lsp_to_lpc(interp_qlsp, interp_qlpc, st->lpcSize, stack);
674
675 /* Compute analysis filter gain at w=pi (for use in SB-CELP) */
676 {
677 spx_word32_t pi_g=LPC_SCALING;
678 for (i=0;i<st->lpcSize;i+=2)
679 {
680 /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/
681 pi_g = ADD32(pi_g, SUB32(EXTEND32(interp_qlpc[i+1]),EXTEND32(interp_qlpc[i])));
682 }
683 st->pi_gain[sub] = pi_g;
684 }
685
686 #ifdef VORBIS_PSYCHO
687 {
688 float curr_curve[128];
689 float fact = ((float)sub+1.0f)/st->nbSubframes;
690 for (i=0;i<128;i++)
691 curr_curve[i] = (1.0f-fact)*st->old_curve[i] + fact*st->curve[i];
692 curve_to_lpc(st->psy, curr_curve, bw_lpc1, bw_lpc2, 10);
693 }
694 #else
695 /* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */
696 bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize);
697 if (st->gamma2>=0)
698 bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize);
699 else
700 {
701 for (i=0;i<st->lpcSize;i++)
702 bw_lpc2[i]=0;
703 }
704 /*print_vec(st->bw_lpc1, 10, "bw_lpc");*/
705 #endif
706
707 /*FIXME: This will break if we change the window size */
708 speex_assert(st->windowSize-st->frameSize == st->subframeSize);
709 if (sub==0)
710 {
711 for (i=0;i<st->subframeSize;i++)
712 real_exc[i] = sw[i] = st->winBuf[i];
713 } else {
714 for (i=0;i<st->subframeSize;i++)
715 real_exc[i] = sw[i] = in[i+((sub-1)*st->subframeSize)];
716 }
717 fir_mem16(real_exc, interp_qlpc, real_exc, st->subframeSize, st->lpcSize, st->mem_exc2, stack);
718
719 if (st->complexity==0)
720 response_bound >>= 1;
721 compute_impulse_response(interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, response_bound, st->lpcSize, stack);
722 for (i=response_bound;i<st->subframeSize;i++)
723 syn_resp[i]=VERY_SMALL;
724
725 /* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */
726 for (i=0;i<st->lpcSize;i++)
727 mem[i]=SHL32(st->mem_sp[i],1);
728 for (i=0;i<st->subframeSize;i++)
729 ringing[i] = VERY_SMALL;
730 #ifdef SHORTCUTS2
731 iir_mem16(ringing, interp_qlpc, ringing, response_bound, st->lpcSize, mem, stack);
732 for (i=0;i<st->lpcSize;i++)
733 mem[i]=SHL32(st->mem_sw[i],1);
734 filter_mem16(ringing, st->bw_lpc1, st->bw_lpc2, ringing, response_bound, st->lpcSize, mem, stack);
735 SPEEX_MEMSET(&ringing[response_bound], 0, st->subframeSize-response_bound);
736 #else
737 iir_mem16(ringing, interp_qlpc, ringing, st->subframeSize, st->lpcSize, mem, stack);
738 for (i=0;i<st->lpcSize;i++)
739 mem[i]=SHL32(st->mem_sw[i],1);
740 filter_mem16(ringing, bw_lpc1, bw_lpc2, ringing, st->subframeSize, st->lpcSize, mem, stack);
741 #endif
742
743 /* Compute weighted signal */
744 for (i=0;i<st->lpcSize;i++)
745 mem[i]=st->mem_sw[i];
746 filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, mem, stack);
747
748 if (st->complexity==0)
749 for (i=0;i<st->lpcSize;i++)
750 st->mem_sw[i]=mem[i];
751
752 /* Compute target signal (saturation prevents overflows on clipped input speech) */
753 for (i=0;i<st->subframeSize;i++)
754 target[i]=EXTRACT16(SATURATE(SUB32(sw[i],PSHR32(ringing[i],1)),32767));
755
756 /* Reset excitation */
757 SPEEX_MEMSET(exc, 0, st->subframeSize);
758
759 /* If we have a long-term predictor (otherwise, something's wrong) */
760 speex_assert (SUBMODE(ltp_quant));
761 {
762 int pit_min, pit_max;
763 /* Long-term prediction */
764 if (SUBMODE(lbr_pitch) != -1)
765 {
766 /* Low bit-rate pitch handling */
767 int margin;
768 margin = SUBMODE(lbr_pitch);
769 if (margin)
770 {
771 if (ol_pitch < st->min_pitch+margin-1)
772 ol_pitch=st->min_pitch+margin-1;
773 if (ol_pitch > st->max_pitch-margin)
774 ol_pitch=st->max_pitch-margin;
775 pit_min = ol_pitch-margin+1;
776 pit_max = ol_pitch+margin;
777 } else {
778 pit_min=pit_max=ol_pitch;
779 }
780 } else {
781 pit_min = st->min_pitch;
782 pit_max = st->max_pitch;
783 }
784
785 /* Force pitch to use only the current frame if needed */
786 if (st->bounded_pitch && pit_max>offset)
787 pit_max=offset;
788
789 /* Perform pitch search */
790 pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2,
791 exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
792 st->lpcSize, st->subframeSize, bits, stack,
793 exc, syn_resp, st->complexity, 0, st->plc_tuning, &st->cumul_gain);
794
795 st->pitch[sub]=pitch;
796 }
797 /* Quantization of innovation */
798 SPEEX_MEMSET(innov, 0, st->subframeSize);
799
800 /* FIXME: Make sure this is save from overflows (so far so good) */
801 for (i=0;i<st->subframeSize;i++)
802 real_exc[i] = EXTRACT16(SUB32(EXTEND32(real_exc[i]), PSHR32(exc32[i],SIG_SHIFT-1)));
803
804 ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT);
805
806 /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */
807 #ifdef FIXED_POINT
808 {
809 spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT));
810 if (f<=32767)
811 fine_gain = f;
812 else
813 fine_gain = 32767;
814 }
815 #else
816 fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT));
817 #endif
818 /* Calculate gain correction for the sub-frame (if any) */
819 if (SUBMODE(have_subframe_gain))
820 {
821 int qe;
822 if (SUBMODE(have_subframe_gain)==3)
823 {
824 qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8);
825 speex_bits_pack(bits, qe, 3);
826 ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain);
827 } else {
828 qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2);
829 speex_bits_pack(bits, qe, 1);
830 ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain);
831 }
832 } else {
833 ener=ol_gain;
834 }
835
836 /*printf ("%f %f\n", ener, ol_gain);*/
837
838 /* Normalize innovation */
839 signal_div(target, target, ener, st->subframeSize);
840
841 /* Quantize innovation */
842 speex_assert (SUBMODE(innovation_quant));
843 {
844 /* Codebook search */
845 SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
846 SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
847 innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook));
848
849 /* De-normalize innovation and update excitation */
850 signal_mul(innov, innov, ener, st->subframeSize);
851
852 for (i=0;i<st->subframeSize;i++)
853 exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
854
855 /* In some (rare) modes, we do a second search (more bits) to reduce noise even more */
856 if (SUBMODE(double_codebook)) {
857 char *tmp_stack=stack;
858 VARDECL(spx_sig_t *innov2);
859 ALLOC(innov2, st->subframeSize, spx_sig_t);
860 SPEEX_MEMSET(innov2, 0, st->subframeSize);
861 for (i=0;i<st->subframeSize;i++)
862 target[i]=MULT16_16_P13(QCONST16(2.2f,13), target[i]);
863 SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
864 SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
865 innov2, syn_resp, bits, stack, st->complexity, 0);
866 signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize);
867 for (i=0;i<st->subframeSize;i++)
868 innov[i] = ADD32(innov[i],innov2[i]);
869 stack = tmp_stack;
870 }
871 for (i=0;i<st->subframeSize;i++)
872 exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
873 if (st->innov_rms_save)
874 {
875 st->innov_rms_save[sub] = compute_rms(innov, st->subframeSize);
876 }
877 }
878
879 /* Final signal synthesis from excitation */
880 iir_mem16(exc, interp_qlpc, sw, st->subframeSize, st->lpcSize, st->mem_sp, stack);
881
882 /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */
883 if (st->complexity!=0)
884 filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw, stack);
885
886 }
887
888 /* Store the LSPs for interpolation in the next frame */
889 if (st->submodeID>=1)
890 {
891 for (i=0;i<st->lpcSize;i++)
892 st->old_lsp[i] = lsp[i];
893 for (i=0;i<st->lpcSize;i++)
894 st->old_qlsp[i] = qlsp[i];
895 }
896
897 #ifdef VORBIS_PSYCHO
898 if (st->submodeID>=1)
899 SPEEX_COPY(st->old_curve, st->curve, 128);
900 #endif
901
902 if (st->submodeID==1)
903 {
904 #ifndef DISABLE_VBR
905 if (st->dtx_count)
906 speex_bits_pack(bits, 15, 4);
907 else
908 #endif
909 speex_bits_pack(bits, 0, 4);
910 }
911
912 /* The next frame will not be the first (Duh!) */
913 st->first = 0;
914 SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize);
915
916 if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0)
917 st->bounded_pitch = 1;
918 else
919 st->bounded_pitch = 0;
920
921 return 1;
922 }
923
nb_decoder_init(const SpeexMode * m)924 void *nb_decoder_init(const SpeexMode *m)
925 {
926 DecState *st;
927 const SpeexNBMode *mode;
928 int i;
929
930 mode=(const SpeexNBMode*)m->mode;
931 st = (DecState *)speex_alloc(sizeof(DecState));
932 if (!st)
933 return NULL;
934 #if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
935 st->stack = NULL;
936 #else
937 st->stack = (char*)speex_alloc_scratch(NB_DEC_STACK);
938 #endif
939
940 st->mode=m;
941
942
943 st->encode_submode = 1;
944
945 st->first=1;
946 /* Codec parameters, should eventually have several "modes"*/
947 st->frameSize = mode->frameSize;
948 st->nbSubframes=mode->frameSize/mode->subframeSize;
949 st->subframeSize=mode->subframeSize;
950 st->lpcSize = mode->lpcSize;
951 st->min_pitch=mode->pitchStart;
952 st->max_pitch=mode->pitchEnd;
953
954 st->submodes=mode->submodes;
955 st->submodeID=mode->defaultSubmode;
956
957 st->lpc_enh_enabled=1;
958
959 st->excBuf = (spx_word16_t*)speex_alloc((st->frameSize + 2*st->max_pitch + st->subframeSize + 12)*sizeof(spx_word16_t));
960 st->exc = st->excBuf + 2*st->max_pitch + st->subframeSize + 6;
961 SPEEX_MEMSET(st->excBuf, 0, st->frameSize + st->max_pitch);
962
963 st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t));
964 st->old_qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t));
965 st->mem_sp = (spx_mem_t*)speex_alloc(st->lpcSize*sizeof(spx_mem_t));
966 st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t));
967 st->last_pitch = 40;
968 st->count_lost=0;
969 st->pitch_gain_buf[0] = st->pitch_gain_buf[1] = st->pitch_gain_buf[2] = 0;
970 st->pitch_gain_buf_idx = 0;
971 st->seed = 1000;
972
973 st->sampling_rate=8000;
974 st->last_ol_gain = 0;
975
976 st->user_callback.func = &speex_default_user_handler;
977 st->user_callback.data = NULL;
978 for (i=0;i<16;i++)
979 st->speex_callbacks[i].func = NULL;
980
981 st->voc_m1=st->voc_m2=st->voc_mean=0;
982 st->voc_offset=0;
983 st->dtx_enabled=0;
984 st->isWideband = 0;
985 st->highpass_enabled = 1;
986
987 #ifdef ENABLE_VALGRIND
988 VALGRIND_MAKE_READABLE(st, NB_DEC_STACK);
989 #endif
990 return st;
991 }
992
nb_decoder_destroy(void * state)993 void nb_decoder_destroy(void *state)
994 {
995 DecState *st;
996 st=(DecState*)state;
997
998 #if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA))
999 speex_free_scratch(st->stack);
1000 #endif
1001
1002 speex_free (st->excBuf);
1003 speex_free (st->interp_qlpc);
1004 speex_free (st->old_qlsp);
1005 speex_free (st->mem_sp);
1006 speex_free (st->pi_gain);
1007
1008 speex_free(state);
1009 }
1010
1011 #define median3(a, b, c) ((a) < (b) ? ((b) < (c) ? (b) : ((a) < (c) ? (c) : (a))) : ((c) < (b) ? (b) : ((c) < (a) ? (c) : (a))))
1012
1013 #ifdef FIXED_POINT
1014 const spx_word16_t attenuation[10] = {32767, 31483, 27923, 22861, 17278, 12055, 7764, 4616, 2533, 1283};
1015 #else
1016 const spx_word16_t attenuation[10] = {1., 0.961, 0.852, 0.698, 0.527, 0.368, 0.237, 0.141, 0.077, 0.039};
1017
1018 #endif
1019
nb_decode_lost(DecState * st,spx_word16_t * out,char * stack)1020 static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack)
1021 {
1022 int i;
1023 int pitch_val;
1024 spx_word16_t pitch_gain;
1025 spx_word16_t fact;
1026 spx_word16_t gain_med;
1027 spx_word16_t innov_gain;
1028 spx_word16_t noise_gain;
1029
1030 if (st->count_lost<10)
1031 fact = attenuation[st->count_lost];
1032 else
1033 fact = 0;
1034
1035 gain_med = median3(st->pitch_gain_buf[0], st->pitch_gain_buf[1], st->pitch_gain_buf[2]);
1036 if (gain_med < st->last_pitch_gain)
1037 st->last_pitch_gain = gain_med;
1038
1039 #ifdef FIXED_POINT
1040 pitch_gain = st->last_pitch_gain;
1041 if (pitch_gain>54)
1042 pitch_gain = 54;
1043 pitch_gain = SHL16(pitch_gain, 9);
1044 #else
1045 pitch_gain = GAIN_SCALING_1*st->last_pitch_gain;
1046 if (pitch_gain>.85)
1047 pitch_gain=.85;
1048 #endif
1049 pitch_gain = MULT16_16_Q15(fact,pitch_gain) + VERY_SMALL;
1050 /* FIXME: This was rms of innovation (not exc) */
1051 innov_gain = compute_rms16(st->exc, st->frameSize);
1052 noise_gain = MULT16_16_Q15(innov_gain, MULT16_16_Q15(fact, SUB16(Q15ONE,MULT16_16_Q15(pitch_gain,pitch_gain))));
1053 /* Shift all buffers by one frame */
1054 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, 2*st->max_pitch + st->subframeSize + 12);
1055
1056
1057 pitch_val = st->last_pitch + SHR32((spx_int32_t)speex_rand(1+st->count_lost, &st->seed),SIG_SHIFT);
1058 if (pitch_val > st->max_pitch)
1059 pitch_val = st->max_pitch;
1060 if (pitch_val < st->min_pitch)
1061 pitch_val = st->min_pitch;
1062 for (i=0;i<st->frameSize;i++)
1063 {
1064 st->exc[i]= MULT16_16_Q15(pitch_gain, (st->exc[i-pitch_val]+VERY_SMALL)) +
1065 speex_rand(noise_gain, &st->seed);
1066 }
1067
1068 bw_lpc(QCONST16(.98,15), st->interp_qlpc, st->interp_qlpc, st->lpcSize);
1069 iir_mem16(&st->exc[-st->subframeSize], st->interp_qlpc, out, st->frameSize,
1070 st->lpcSize, st->mem_sp, stack);
1071 highpass(out, out, st->frameSize, HIGHPASS_NARROWBAND|HIGHPASS_OUTPUT, st->mem_hp);
1072
1073 st->first = 0;
1074 st->count_lost++;
1075 st->pitch_gain_buf[st->pitch_gain_buf_idx++] = PSHR16(pitch_gain,9);
1076 if (st->pitch_gain_buf_idx > 2) /* rollover */
1077 st->pitch_gain_buf_idx = 0;
1078 }
1079
1080 /* Just so we don't need to carry the complete wideband mode information */
1081 static const int wb_skip_table[8] = {0, 36, 112, 192, 352, 0, 0, 0};
1082
nb_decode(void * state,SpeexBits * bits,void * vout)1083 int nb_decode(void *state, SpeexBits *bits, void *vout)
1084 {
1085 DecState *st;
1086 int i, sub;
1087 int pitch;
1088 spx_word16_t pitch_gain[3];
1089 spx_word32_t ol_gain=0;
1090 int ol_pitch=0;
1091 spx_word16_t ol_pitch_coef=0;
1092 int best_pitch=40;
1093 spx_word16_t best_pitch_gain=0;
1094 int wideband;
1095 int m;
1096 char *stack;
1097 VARDECL(spx_sig_t *innov);
1098 VARDECL(spx_word32_t *exc32);
1099 VARDECL(spx_coef_t *ak);
1100 VARDECL(spx_lsp_t *qlsp);
1101 spx_word16_t pitch_average=0;
1102
1103 spx_word16_t *out = (spx_word16_t*)vout;
1104 VARDECL(spx_lsp_t *interp_qlsp);
1105
1106 st=(DecState*)state;
1107 stack=st->stack;
1108
1109 /* Check if we're in DTX mode*/
1110 if (!bits && st->dtx_enabled)
1111 {
1112 st->submodeID=0;
1113 } else
1114 {
1115 /* If bits is NULL, consider the packet to be lost (what could we do anyway) */
1116 if (!bits)
1117 {
1118 nb_decode_lost(st, out, stack);
1119 return 0;
1120 }
1121
1122 if (st->encode_submode)
1123 {
1124
1125 /* Search for next narrowband block (handle requests, skip wideband blocks) */
1126 do {
1127 if (speex_bits_remaining(bits)<5)
1128 return -1;
1129 wideband = speex_bits_unpack_unsigned(bits, 1);
1130 if (wideband) /* Skip wideband block (for compatibility) */
1131 {
1132 int submode;
1133 int advance;
1134 advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
1135 /*speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);*/
1136 advance = wb_skip_table[submode];
1137 if (advance < 0)
1138 {
1139 speex_notify("Invalid mode encountered. The stream is corrupted.");
1140 return -2;
1141 }
1142 advance -= (SB_SUBMODE_BITS+1);
1143 speex_bits_advance(bits, advance);
1144
1145 if (speex_bits_remaining(bits)<5)
1146 return -1;
1147 wideband = speex_bits_unpack_unsigned(bits, 1);
1148 if (wideband)
1149 {
1150 advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
1151 /*speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);*/
1152 advance = wb_skip_table[submode];
1153 if (advance < 0)
1154 {
1155 speex_notify("Invalid mode encountered. The stream is corrupted.");
1156 return -2;
1157 }
1158 advance -= (SB_SUBMODE_BITS+1);
1159 speex_bits_advance(bits, advance);
1160 wideband = speex_bits_unpack_unsigned(bits, 1);
1161 if (wideband)
1162 {
1163 speex_notify("More than two wideband layers found. The stream is corrupted.");
1164 return -2;
1165 }
1166
1167 }
1168 }
1169 if (speex_bits_remaining(bits)<4)
1170 return -1;
1171 /* FIXME: Check for overflow */
1172 m = speex_bits_unpack_unsigned(bits, 4);
1173 if (m==15) /* We found a terminator */
1174 {
1175 return -1;
1176 } else if (m==14) /* Speex in-band request */
1177 {
1178 int ret = speex_inband_handler(bits, st->speex_callbacks, state);
1179 if (ret)
1180 return ret;
1181 } else if (m==13) /* User in-band request */
1182 {
1183 int ret = st->user_callback.func(bits, state, st->user_callback.data);
1184 if (ret)
1185 return ret;
1186 } else if (m>8) /* Invalid mode */
1187 {
1188 speex_notify("Invalid mode encountered. The stream is corrupted.");
1189 return -2;
1190 }
1191
1192 } while (m>8);
1193
1194 /* Get the sub-mode that was used */
1195 st->submodeID = m;
1196 }
1197
1198 }
1199
1200 /* Shift all buffers by one frame */
1201 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, 2*st->max_pitch + st->subframeSize + 12);
1202
1203 /* If null mode (no transmission), just set a couple things to zero*/
1204 if (st->submodes[st->submodeID] == NULL)
1205 {
1206 VARDECL(spx_coef_t *lpc);
1207 ALLOC(lpc, st->lpcSize, spx_coef_t);
1208 bw_lpc(QCONST16(0.93f,15), st->interp_qlpc, lpc, st->lpcSize);
1209 {
1210 spx_word16_t innov_gain=0;
1211 /* FIXME: This was innov, not exc */
1212 innov_gain = compute_rms16(st->exc, st->frameSize);
1213 for (i=0;i<st->frameSize;i++)
1214 st->exc[i]=speex_rand(innov_gain, &st->seed);
1215 }
1216
1217
1218 st->first=1;
1219
1220 /* Final signal synthesis from excitation */
1221 iir_mem16(st->exc, lpc, out, st->frameSize, st->lpcSize, st->mem_sp, stack);
1222
1223 st->count_lost=0;
1224 return 0;
1225 }
1226
1227 ALLOC(qlsp, st->lpcSize, spx_lsp_t);
1228
1229 /* Unquantize LSPs */
1230 SUBMODE(lsp_unquant)(qlsp, st->lpcSize, bits);
1231
1232 /*Damp memory if a frame was lost and the LSP changed too much*/
1233 if (st->count_lost)
1234 {
1235 spx_word16_t fact;
1236 spx_word32_t lsp_dist=0;
1237 for (i=0;i<st->lpcSize;i++)
1238 lsp_dist = ADD32(lsp_dist, EXTEND32(ABS(st->old_qlsp[i] - qlsp[i])));
1239 #ifdef FIXED_POINT
1240 fact = SHR16(19661,SHR32(lsp_dist,LSP_SHIFT+2));
1241 #else
1242 fact = .6*exp(-.2*lsp_dist);
1243 #endif
1244 for (i=0;i<st->lpcSize;i++)
1245 st->mem_sp[i] = MULT16_32_Q15(fact,st->mem_sp[i]);
1246 }
1247
1248
1249 /* Handle first frame and lost-packet case */
1250 if (st->first || st->count_lost)
1251 {
1252 for (i=0;i<st->lpcSize;i++)
1253 st->old_qlsp[i] = qlsp[i];
1254 }
1255
1256 /* Get open-loop pitch estimation for low bit-rate pitch coding */
1257 if (SUBMODE(lbr_pitch)!=-1)
1258 {
1259 ol_pitch = st->min_pitch+speex_bits_unpack_unsigned(bits, 7);
1260 }
1261
1262 if (SUBMODE(forced_pitch_gain))
1263 {
1264 int quant;
1265 quant = speex_bits_unpack_unsigned(bits, 4);
1266 ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT));
1267 }
1268
1269 /* Get global excitation gain */
1270 {
1271 int qe;
1272 qe = speex_bits_unpack_unsigned(bits, 5);
1273 #ifdef FIXED_POINT
1274 /* FIXME: Perhaps we could slightly lower the gain here when the output is going to saturate? */
1275 ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
1276 #else
1277 ol_gain = SIG_SCALING*exp(qe/3.5);
1278 #endif
1279 }
1280
1281 ALLOC(ak, st->lpcSize, spx_coef_t);
1282 ALLOC(innov, st->subframeSize, spx_sig_t);
1283 ALLOC(exc32, st->subframeSize, spx_word32_t);
1284
1285 if (st->submodeID==1)
1286 {
1287 int extra;
1288 extra = speex_bits_unpack_unsigned(bits, 4);
1289
1290 if (extra==15)
1291 st->dtx_enabled=1;
1292 else
1293 st->dtx_enabled=0;
1294 }
1295 if (st->submodeID>1)
1296 st->dtx_enabled=0;
1297
1298 /*Loop on subframes */
1299 for (sub=0;sub<st->nbSubframes;sub++)
1300 {
1301 int offset;
1302 spx_word16_t *exc;
1303 spx_word16_t *sp;
1304 spx_word16_t *innov_save = NULL;
1305 spx_word16_t tmp;
1306
1307 /* Offset relative to start of frame */
1308 offset = st->subframeSize*sub;
1309 /* Excitation */
1310 exc=st->exc+offset;
1311 /* Original signal */
1312 sp=out+offset;
1313 if (st->innov_save)
1314 innov_save = st->innov_save+offset;
1315
1316
1317 /* Reset excitation */
1318 SPEEX_MEMSET(exc, 0, st->subframeSize);
1319
1320 /*Adaptive codebook contribution*/
1321 speex_assert (SUBMODE(ltp_unquant));
1322 {
1323 int pit_min, pit_max;
1324 /* Handle pitch constraints if any */
1325 if (SUBMODE(lbr_pitch) != -1)
1326 {
1327 int margin;
1328 margin = SUBMODE(lbr_pitch);
1329 if (margin)
1330 {
1331 /* GT - need optimization?
1332 if (ol_pitch < st->min_pitch+margin-1)
1333 ol_pitch=st->min_pitch+margin-1;
1334 if (ol_pitch > st->max_pitch-margin)
1335 ol_pitch=st->max_pitch-margin;
1336 pit_min = ol_pitch-margin+1;
1337 pit_max = ol_pitch+margin;
1338 */
1339 pit_min = ol_pitch-margin+1;
1340 if (pit_min < st->min_pitch)
1341 pit_min = st->min_pitch;
1342 pit_max = ol_pitch+margin;
1343 if (pit_max > st->max_pitch)
1344 pit_max = st->max_pitch;
1345 } else {
1346 pit_min = pit_max = ol_pitch;
1347 }
1348 } else {
1349 pit_min = st->min_pitch;
1350 pit_max = st->max_pitch;
1351 }
1352
1353
1354
1355 SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
1356 st->subframeSize, &pitch, &pitch_gain[0], bits, stack,
1357 st->count_lost, offset, st->last_pitch_gain, 0);
1358
1359 /* Ensuring that things aren't blowing up as would happen if e.g. an encoder is
1360 crafting packets to make us produce NaNs and slow down the decoder (vague DoS threat).
1361 We can probably be even more aggressive and limit to 15000 or so. */
1362 sanitize_values32(exc32, NEG32(QCONST32(32000,SIG_SHIFT-1)), QCONST32(32000,SIG_SHIFT-1), st->subframeSize);
1363
1364 tmp = gain_3tap_to_1tap(pitch_gain);
1365
1366 pitch_average += tmp;
1367 if ((tmp>best_pitch_gain&&ABS(2*best_pitch-pitch)>=3&&ABS(3*best_pitch-pitch)>=4&&ABS(4*best_pitch-pitch)>=5)
1368 || (tmp>MULT16_16_Q15(QCONST16(.6,15),best_pitch_gain)&&(ABS(best_pitch-2*pitch)<3||ABS(best_pitch-3*pitch)<4||ABS(best_pitch-4*pitch)<5))
1369 || (MULT16_16_Q15(QCONST16(.67,15),tmp)>best_pitch_gain&&(ABS(2*best_pitch-pitch)<3||ABS(3*best_pitch-pitch)<4||ABS(4*best_pitch-pitch)<5)) )
1370 {
1371 best_pitch = pitch;
1372 if (tmp > best_pitch_gain)
1373 best_pitch_gain = tmp;
1374 }
1375 }
1376
1377 /* Unquantize the innovation */
1378 {
1379 int q_energy;
1380 spx_word32_t ener;
1381
1382 SPEEX_MEMSET(innov, 0, st->subframeSize);
1383
1384 /* Decode sub-frame gain correction */
1385 if (SUBMODE(have_subframe_gain)==3)
1386 {
1387 q_energy = speex_bits_unpack_unsigned(bits, 3);
1388 ener = MULT16_32_Q14(exc_gain_quant_scal3[q_energy],ol_gain);
1389 } else if (SUBMODE(have_subframe_gain)==1)
1390 {
1391 q_energy = speex_bits_unpack_unsigned(bits, 1);
1392 ener = MULT16_32_Q14(exc_gain_quant_scal1[q_energy],ol_gain);
1393 } else {
1394 ener = ol_gain;
1395 }
1396
1397 speex_assert (SUBMODE(innovation_unquant));
1398 {
1399 /*Fixed codebook contribution*/
1400 SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed);
1401 /* De-normalize innovation and update excitation */
1402
1403 signal_mul(innov, innov, ener, st->subframeSize);
1404
1405 /* Decode second codebook (only for some modes) */
1406 if (SUBMODE(double_codebook))
1407 {
1408 char *tmp_stack=stack;
1409 VARDECL(spx_sig_t *innov2);
1410 ALLOC(innov2, st->subframeSize, spx_sig_t);
1411 SPEEX_MEMSET(innov2, 0, st->subframeSize);
1412 SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed);
1413 signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize);
1414 for (i=0;i<st->subframeSize;i++)
1415 innov[i] = ADD32(innov[i], innov2[i]);
1416 stack = tmp_stack;
1417 }
1418 for (i=0;i<st->subframeSize;i++)
1419 exc[i]=EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
1420 /*print_vec(exc, 40, "innov");*/
1421 if (innov_save)
1422 {
1423 for (i=0;i<st->subframeSize;i++)
1424 innov_save[i] = EXTRACT16(PSHR32(innov[i], SIG_SHIFT));
1425 }
1426 }
1427
1428 /*Vocoder mode*/
1429 if (st->submodeID==1)
1430 {
1431 spx_word16_t g=ol_pitch_coef;
1432 g=MULT16_16_P14(QCONST16(1.5f,14),(g-QCONST16(.2f,6)));
1433 if (g<0)
1434 g=0;
1435 if (g>GAIN_SCALING)
1436 g=GAIN_SCALING;
1437
1438 SPEEX_MEMSET(exc, 0, st->subframeSize);
1439 while (st->voc_offset<st->subframeSize)
1440 {
1441 /* exc[st->voc_offset]= g*sqrt(2*ol_pitch)*ol_gain;
1442 Not quite sure why we need the factor of two in the sqrt */
1443 if (st->voc_offset>=0)
1444 exc[st->voc_offset]=MULT16_16(spx_sqrt(MULT16_16_16(2,ol_pitch)),EXTRACT16(PSHR32(MULT16_16(g,PSHR32(ol_gain,SIG_SHIFT)),6)));
1445 st->voc_offset+=ol_pitch;
1446 }
1447 st->voc_offset -= st->subframeSize;
1448
1449 for (i=0;i<st->subframeSize;i++)
1450 {
1451 spx_word16_t exci=exc[i];
1452 exc[i]= ADD16(ADD16(MULT16_16_Q15(QCONST16(.7f,15),exc[i]) , MULT16_16_Q15(QCONST16(.3f,15),st->voc_m1)),
1453 SUB16(MULT16_16_Q15(Q15_ONE-MULT16_16_16(QCONST16(.85f,9),g),EXTRACT16(PSHR32(innov[i],SIG_SHIFT))),
1454 MULT16_16_Q15(MULT16_16_16(QCONST16(.15f,9),g),EXTRACT16(PSHR32(st->voc_m2,SIG_SHIFT)))
1455 ));
1456 st->voc_m1 = exci;
1457 st->voc_m2=innov[i];
1458 st->voc_mean = EXTRACT16(PSHR32(ADD32(MULT16_16(QCONST16(.8f,15),st->voc_mean), MULT16_16(QCONST16(.2f,15),exc[i])), 15));
1459 exc[i]-=st->voc_mean;
1460 }
1461 }
1462
1463 }
1464 }
1465
1466 ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t);
1467
1468 if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0 && !st->count_lost)
1469 {
1470 multicomb(st->exc-st->subframeSize, out, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack);
1471 multicomb(st->exc+st->subframeSize, out+2*st->subframeSize, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack);
1472 } else {
1473 SPEEX_COPY(out, &st->exc[-st->subframeSize], st->frameSize);
1474 }
1475
1476 /* If the last packet was lost, re-scale the excitation to obtain the same energy as encoded in ol_gain */
1477 if (st->count_lost)
1478 {
1479 spx_word16_t exc_ener;
1480 spx_word32_t gain32;
1481 spx_word16_t gain;
1482 exc_ener = compute_rms16 (st->exc, st->frameSize);
1483 gain32 = PDIV32(ol_gain, ADD16(exc_ener,1));
1484 #ifdef FIXED_POINT
1485 if (gain32 > 32767)
1486 gain32 = 32767;
1487 gain = EXTRACT16(gain32);
1488 #else
1489 if (gain32 > 2)
1490 gain32=2;
1491 gain = gain32;
1492 #endif
1493 for (i=0;i<st->frameSize;i++)
1494 {
1495 st->exc[i] = MULT16_16_Q14(gain, st->exc[i]);
1496 out[i]=st->exc[i-st->subframeSize];
1497 }
1498 }
1499
1500 /*Loop on subframes */
1501 for (sub=0;sub<st->nbSubframes;sub++)
1502 {
1503 int offset;
1504 spx_word16_t *sp;
1505 spx_word16_t *exc;
1506 /* Offset relative to start of frame */
1507 offset = st->subframeSize*sub;
1508 /* Original signal */
1509 sp=out+offset;
1510 /* Excitation */
1511 exc=st->exc+offset;
1512
1513 /* LSP interpolation (quantized and unquantized) */
1514 lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes);
1515
1516 /* Make sure the LSP's are stable */
1517 lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN);
1518
1519 /* Compute interpolated LPCs (unquantized) */
1520 lsp_to_lpc(interp_qlsp, ak, st->lpcSize, stack);
1521
1522 /* Compute analysis filter at w=pi */
1523 {
1524 spx_word32_t pi_g=LPC_SCALING;
1525 for (i=0;i<st->lpcSize;i+=2)
1526 {
1527 /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/
1528 pi_g = ADD32(pi_g, SUB32(EXTEND32(ak[i+1]),EXTEND32(ak[i])));
1529 }
1530 st->pi_gain[sub] = pi_g;
1531 }
1532
1533 iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
1534 st->mem_sp, stack);
1535
1536 for (i=0;i<st->lpcSize;i++)
1537 st->interp_qlpc[i] = ak[i];
1538
1539 }
1540
1541 if (st->highpass_enabled)
1542 highpass(out, out, st->frameSize, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_OUTPUT, st->mem_hp);
1543 /*for (i=0;i<st->frameSize;i++)
1544 printf ("%d\n", (int)st->frame[i]);*/
1545
1546 /* Tracking output level */
1547 st->level = 1+PSHR32(ol_gain,SIG_SHIFT);
1548 st->max_level = MAX16(MULT16_16_Q15(QCONST16(.99f,15), st->max_level), st->level);
1549 st->min_level = MIN16(ADD16(1,MULT16_16_Q14(QCONST16(1.01f,14), st->min_level)), st->level);
1550 if (st->max_level < st->min_level+1)
1551 st->max_level = st->min_level+1;
1552 /*printf ("%f %f %f %d\n", og, st->min_level, st->max_level, update);*/
1553
1554 /* Store the LSPs for interpolation in the next frame */
1555 for (i=0;i<st->lpcSize;i++)
1556 st->old_qlsp[i] = qlsp[i];
1557
1558 /* The next frame will not be the first (Duh!) */
1559 st->first = 0;
1560 st->count_lost=0;
1561 st->last_pitch = best_pitch;
1562 #ifdef FIXED_POINT
1563 st->last_pitch_gain = PSHR16(pitch_average,2);
1564 #else
1565 st->last_pitch_gain = .25*pitch_average;
1566 #endif
1567 st->pitch_gain_buf[st->pitch_gain_buf_idx++] = st->last_pitch_gain;
1568 if (st->pitch_gain_buf_idx > 2) /* rollover */
1569 st->pitch_gain_buf_idx = 0;
1570
1571 st->last_ol_gain = ol_gain;
1572
1573 return 0;
1574 }
1575
nb_encoder_ctl(void * state,int request,void * ptr)1576 int nb_encoder_ctl(void *state, int request, void *ptr)
1577 {
1578 EncState *st;
1579 st=(EncState*)state;
1580 switch(request)
1581 {
1582 case SPEEX_GET_FRAME_SIZE:
1583 (*(spx_int32_t*)ptr) = st->frameSize;
1584 break;
1585 case SPEEX_SET_LOW_MODE:
1586 case SPEEX_SET_MODE:
1587 st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr);
1588 break;
1589 case SPEEX_GET_LOW_MODE:
1590 case SPEEX_GET_MODE:
1591 (*(spx_int32_t*)ptr) = st->submodeID;
1592 break;
1593 #ifndef DISABLE_VBR
1594 case SPEEX_SET_VBR:
1595 st->vbr_enabled = (*(spx_int32_t*)ptr);
1596 break;
1597 case SPEEX_GET_VBR:
1598 (*(spx_int32_t*)ptr) = st->vbr_enabled;
1599 break;
1600 case SPEEX_SET_VAD:
1601 st->vad_enabled = (*(spx_int32_t*)ptr);
1602 break;
1603 case SPEEX_GET_VAD:
1604 (*(spx_int32_t*)ptr) = st->vad_enabled;
1605 break;
1606 case SPEEX_SET_DTX:
1607 st->dtx_enabled = (*(spx_int32_t*)ptr);
1608 break;
1609 case SPEEX_GET_DTX:
1610 (*(spx_int32_t*)ptr) = st->dtx_enabled;
1611 break;
1612 case SPEEX_SET_ABR:
1613 st->abr_enabled = (*(spx_int32_t*)ptr);
1614 st->vbr_enabled = st->abr_enabled!=0;
1615 if (st->vbr_enabled)
1616 {
1617 spx_int32_t i=10;
1618 spx_int32_t rate, target;
1619 float vbr_qual;
1620 target = (*(spx_int32_t*)ptr);
1621 while (i>=0)
1622 {
1623 speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
1624 speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
1625 if (rate <= target)
1626 break;
1627 i--;
1628 }
1629 vbr_qual=i;
1630 if (vbr_qual<0)
1631 vbr_qual=0;
1632 speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_qual);
1633 st->abr_count=0;
1634 st->abr_drift=0;
1635 st->abr_drift2=0;
1636 }
1637
1638 break;
1639 case SPEEX_GET_ABR:
1640 (*(spx_int32_t*)ptr) = st->abr_enabled;
1641 break;
1642 #endif /* #ifndef DISABLE_VBR */
1643 #if !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API)
1644 case SPEEX_SET_VBR_QUALITY:
1645 st->vbr_quality = (*(float*)ptr);
1646 break;
1647 case SPEEX_GET_VBR_QUALITY:
1648 (*(float*)ptr) = st->vbr_quality;
1649 break;
1650 #endif /* !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API) */
1651 case SPEEX_SET_QUALITY:
1652 {
1653 int quality = (*(spx_int32_t*)ptr);
1654 if (quality < 0)
1655 quality = 0;
1656 if (quality > 10)
1657 quality = 10;
1658 st->submodeSelect = st->submodeID = ((const SpeexNBMode*)(st->mode->mode))->quality_map[quality];
1659 }
1660 break;
1661 case SPEEX_SET_COMPLEXITY:
1662 st->complexity = (*(spx_int32_t*)ptr);
1663 if (st->complexity<0)
1664 st->complexity=0;
1665 break;
1666 case SPEEX_GET_COMPLEXITY:
1667 (*(spx_int32_t*)ptr) = st->complexity;
1668 break;
1669 case SPEEX_SET_BITRATE:
1670 {
1671 spx_int32_t i=10;
1672 spx_int32_t rate, target;
1673 target = (*(spx_int32_t*)ptr);
1674 while (i>=0)
1675 {
1676 speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
1677 speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
1678 if (rate <= target)
1679 break;
1680 i--;
1681 }
1682 }
1683 break;
1684 case SPEEX_GET_BITRATE:
1685 if (st->submodes[st->submodeID])
1686 (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
1687 else
1688 (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
1689 break;
1690 case SPEEX_SET_SAMPLING_RATE:
1691 st->sampling_rate = (*(spx_int32_t*)ptr);
1692 break;
1693 case SPEEX_GET_SAMPLING_RATE:
1694 (*(spx_int32_t*)ptr)=st->sampling_rate;
1695 break;
1696 case SPEEX_RESET_STATE:
1697 {
1698 int i;
1699 st->bounded_pitch = 1;
1700 st->first = 1;
1701 for (i=0;i<st->lpcSize;i++)
1702 st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1);
1703 for (i=0;i<st->lpcSize;i++)
1704 st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0;
1705 for (i=0;i<st->frameSize+st->max_pitch+1;i++)
1706 st->excBuf[i]=st->swBuf[i]=0;
1707 for (i=0;i<st->windowSize-st->frameSize;i++)
1708 st->winBuf[i]=0;
1709 }
1710 break;
1711 case SPEEX_SET_SUBMODE_ENCODING:
1712 st->encode_submode = (*(spx_int32_t*)ptr);
1713 break;
1714 case SPEEX_GET_SUBMODE_ENCODING:
1715 (*(spx_int32_t*)ptr) = st->encode_submode;
1716 break;
1717 case SPEEX_GET_LOOKAHEAD:
1718 (*(spx_int32_t*)ptr)=(st->windowSize-st->frameSize);
1719 break;
1720 case SPEEX_SET_PLC_TUNING:
1721 st->plc_tuning = (*(spx_int32_t*)ptr);
1722 if (st->plc_tuning>100)
1723 st->plc_tuning=100;
1724 break;
1725 case SPEEX_GET_PLC_TUNING:
1726 (*(spx_int32_t*)ptr)=(st->plc_tuning);
1727 break;
1728 #ifndef DISABLE_VBR
1729 case SPEEX_SET_VBR_MAX_BITRATE:
1730 st->vbr_max = (*(spx_int32_t*)ptr);
1731 break;
1732 case SPEEX_GET_VBR_MAX_BITRATE:
1733 (*(spx_int32_t*)ptr) = st->vbr_max;
1734 break;
1735 #endif /* #ifndef DISABLE_VBR */
1736 case SPEEX_SET_HIGHPASS:
1737 st->highpass_enabled = (*(spx_int32_t*)ptr);
1738 break;
1739 case SPEEX_GET_HIGHPASS:
1740 (*(spx_int32_t*)ptr) = st->highpass_enabled;
1741 break;
1742
1743 /* This is all internal stuff past this point */
1744 case SPEEX_GET_PI_GAIN:
1745 {
1746 int i;
1747 spx_word32_t *g = (spx_word32_t*)ptr;
1748 for (i=0;i<st->nbSubframes;i++)
1749 g[i]=st->pi_gain[i];
1750 }
1751 break;
1752 case SPEEX_GET_EXC:
1753 {
1754 int i;
1755 for (i=0;i<st->nbSubframes;i++)
1756 ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize);
1757 }
1758 break;
1759 #ifndef DISABLE_VBR
1760 case SPEEX_GET_RELATIVE_QUALITY:
1761 (*(float*)ptr)=st->relative_quality;
1762 break;
1763 #endif /* #ifndef DISABLE_VBR */
1764 case SPEEX_SET_INNOVATION_SAVE:
1765 st->innov_rms_save = (spx_word16_t*)ptr;
1766 break;
1767 case SPEEX_SET_WIDEBAND:
1768 st->isWideband = *((spx_int32_t*)ptr);
1769 break;
1770 case SPEEX_GET_STACK:
1771 *((char**)ptr) = st->stack;
1772 break;
1773 default:
1774 speex_warning_int("Unknown nb_ctl request: ", request);
1775 return -1;
1776 }
1777 return 0;
1778 }
1779
nb_decoder_ctl(void * state,int request,void * ptr)1780 int nb_decoder_ctl(void *state, int request, void *ptr)
1781 {
1782 DecState *st;
1783 st=(DecState*)state;
1784 switch(request)
1785 {
1786 case SPEEX_SET_LOW_MODE:
1787 case SPEEX_SET_MODE:
1788 st->submodeID = (*(spx_int32_t*)ptr);
1789 break;
1790 case SPEEX_GET_LOW_MODE:
1791 case SPEEX_GET_MODE:
1792 (*(spx_int32_t*)ptr) = st->submodeID;
1793 break;
1794 case SPEEX_SET_ENH:
1795 st->lpc_enh_enabled = *((spx_int32_t*)ptr);
1796 break;
1797 case SPEEX_GET_ENH:
1798 *((spx_int32_t*)ptr) = st->lpc_enh_enabled;
1799 break;
1800 case SPEEX_GET_FRAME_SIZE:
1801 (*(spx_int32_t*)ptr) = st->frameSize;
1802 break;
1803 case SPEEX_GET_BITRATE:
1804 if (st->submodes[st->submodeID])
1805 (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
1806 else
1807 (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
1808 break;
1809 case SPEEX_SET_SAMPLING_RATE:
1810 st->sampling_rate = (*(spx_int32_t*)ptr);
1811 break;
1812 case SPEEX_GET_SAMPLING_RATE:
1813 (*(spx_int32_t*)ptr)=st->sampling_rate;
1814 break;
1815 case SPEEX_SET_HANDLER:
1816 {
1817 SpeexCallback *c = (SpeexCallback*)ptr;
1818 st->speex_callbacks[c->callback_id].func=c->func;
1819 st->speex_callbacks[c->callback_id].data=c->data;
1820 st->speex_callbacks[c->callback_id].callback_id=c->callback_id;
1821 }
1822 break;
1823 case SPEEX_SET_USER_HANDLER:
1824 {
1825 SpeexCallback *c = (SpeexCallback*)ptr;
1826 st->user_callback.func=c->func;
1827 st->user_callback.data=c->data;
1828 st->user_callback.callback_id=c->callback_id;
1829 }
1830 break;
1831 case SPEEX_RESET_STATE:
1832 {
1833 int i;
1834 for (i=0;i<st->lpcSize;i++)
1835 st->mem_sp[i]=0;
1836 for (i=0;i<st->frameSize + st->max_pitch + 1;i++)
1837 st->excBuf[i]=0;
1838 }
1839 break;
1840 case SPEEX_SET_SUBMODE_ENCODING:
1841 st->encode_submode = (*(spx_int32_t*)ptr);
1842 break;
1843 case SPEEX_GET_SUBMODE_ENCODING:
1844 (*(spx_int32_t*)ptr) = st->encode_submode;
1845 break;
1846 case SPEEX_GET_LOOKAHEAD:
1847 (*(spx_int32_t*)ptr)=st->subframeSize;
1848 break;
1849 case SPEEX_SET_HIGHPASS:
1850 st->highpass_enabled = (*(spx_int32_t*)ptr);
1851 break;
1852 case SPEEX_GET_HIGHPASS:
1853 (*(spx_int32_t*)ptr) = st->highpass_enabled;
1854 break;
1855 /* FIXME: Convert to fixed-point and re-enable even when float API is disabled */
1856 #ifndef DISABLE_FLOAT_API
1857 case SPEEX_GET_ACTIVITY:
1858 {
1859 float ret;
1860 ret = log(st->level/st->min_level)/log(st->max_level/st->min_level);
1861 if (ret>1)
1862 ret = 1;
1863 /* Done in a strange way to catch NaNs as well */
1864 if (!(ret > 0))
1865 ret = 0;
1866 /*printf ("%f %f %f %f\n", st->level, st->min_level, st->max_level, ret);*/
1867 (*(spx_int32_t*)ptr) = (int)(100*ret);
1868 }
1869 break;
1870 #endif
1871 case SPEEX_GET_PI_GAIN:
1872 {
1873 int i;
1874 spx_word32_t *g = (spx_word32_t*)ptr;
1875 for (i=0;i<st->nbSubframes;i++)
1876 g[i]=st->pi_gain[i];
1877 }
1878 break;
1879 case SPEEX_GET_EXC:
1880 {
1881 int i;
1882 for (i=0;i<st->nbSubframes;i++)
1883 ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize);
1884 }
1885 break;
1886 case SPEEX_GET_DTX_STATUS:
1887 *((spx_int32_t*)ptr) = st->dtx_enabled;
1888 break;
1889 case SPEEX_SET_INNOVATION_SAVE:
1890 st->innov_save = (spx_word16_t*)ptr;
1891 break;
1892 case SPEEX_SET_WIDEBAND:
1893 st->isWideband = *((spx_int32_t*)ptr);
1894 break;
1895 case SPEEX_GET_STACK:
1896 *((char**)ptr) = st->stack;
1897 break;
1898 default:
1899 speex_warning_int("Unknown nb_ctl request: ", request);
1900 return -1;
1901 }
1902 return 0;
1903 }
1904