1 /* Copyright (C) 2002-2006 Jean-Marc Valin
2 File: filters.c
3 Various analysis/synthesis filters
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 - Neither the name of the Xiph.org Foundation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
24 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifdef HAVE_CONFIG_H
34 #include "config.h"
35 #endif
36
37 #include "filters.h"
38 #include "stack_alloc.h"
39 #include "arch.h"
40 #include "math_approx.h"
41 #include "ltp.h"
42 #include <math.h>
43
44 #ifdef _USE_SSE
45 #include "filters_sse.h"
46 #elif defined (ARM4_ASM) || defined(ARM5E_ASM)
47 #include "filters_arm4.h"
48 #elif defined (BFIN_ASM)
49 #include "filters_bfin.h"
50 #endif
51
52
53
bw_lpc(spx_word16_t gamma,const spx_coef_t * lpc_in,spx_coef_t * lpc_out,int order)54 void bw_lpc(spx_word16_t gamma, const spx_coef_t *lpc_in, spx_coef_t *lpc_out, int order)
55 {
56 int i;
57 spx_word16_t tmp=gamma;
58 for (i=0;i<order;i++)
59 {
60 lpc_out[i] = MULT16_16_P15(tmp,lpc_in[i]);
61 tmp = MULT16_16_P15(tmp, gamma);
62 }
63 }
64
sanitize_values32(spx_word32_t * vec,spx_word32_t min_val,spx_word32_t max_val,int len)65 void sanitize_values32(spx_word32_t *vec, spx_word32_t min_val, spx_word32_t max_val, int len)
66 {
67 int i;
68 for (i=0;i<len;i++)
69 {
70 /* It's important we do the test that way so we can catch NaNs, which are neither greater nor smaller */
71 if (!(vec[i]>=min_val && vec[i] <= max_val))
72 {
73 if (vec[i] < min_val)
74 vec[i] = min_val;
75 else if (vec[i] > max_val)
76 vec[i] = max_val;
77 else /* Has to be NaN */
78 vec[i] = 0;
79 }
80 }
81 }
82
highpass(const spx_word16_t * x,spx_word16_t * y,int len,int filtID,spx_mem_t * mem)83 void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_mem_t *mem)
84 {
85 int i;
86 #ifdef FIXED_POINT
87 const spx_word16_t Pcoef[5][3] = {{16384, -31313, 14991}, {16384, -31569, 15249}, {16384, -31677, 15328}, {16384, -32313, 15947}, {16384, -22446, 6537}};
88 const spx_word16_t Zcoef[5][3] = {{15672, -31344, 15672}, {15802, -31601, 15802}, {15847, -31694, 15847}, {16162, -32322, 16162}, {14418, -28836, 14418}};
89 #else
90 const spx_word16_t Pcoef[5][3] = {{1.00000f, -1.91120f, 0.91498f}, {1.00000f, -1.92683f, 0.93071f}, {1.00000f, -1.93338f, 0.93553f}, {1.00000f, -1.97226f, 0.97332f}, {1.00000f, -1.37000f, 0.39900f}};
91 const spx_word16_t Zcoef[5][3] = {{0.95654f, -1.91309f, 0.95654f}, {0.96446f, -1.92879f, 0.96446f}, {0.96723f, -1.93445f, 0.96723f}, {0.98645f, -1.97277f, 0.98645f}, {0.88000f, -1.76000f, 0.88000f}};
92 #endif
93 const spx_word16_t *den, *num;
94 if (filtID>4)
95 filtID=4;
96
97 den = Pcoef[filtID]; num = Zcoef[filtID];
98 /*return;*/
99 for (i=0;i<len;i++)
100 {
101 spx_word16_t yi;
102 spx_word32_t vout = ADD32(MULT16_16(num[0], x[i]),mem[0]);
103 yi = EXTRACT16(SATURATE(PSHR32(vout,14),32767));
104 mem[0] = ADD32(MAC16_16(mem[1], num[1],x[i]), SHL32(MULT16_32_Q15(-den[1],vout),1));
105 mem[1] = ADD32(MULT16_16(num[2],x[i]), SHL32(MULT16_32_Q15(-den[2],vout),1));
106 y[i] = yi;
107 }
108 }
109
110 #ifdef FIXED_POINT
111
112 /* FIXME: These functions are ugly and probably introduce too much error */
signal_mul(const spx_sig_t * x,spx_sig_t * y,spx_word32_t scale,int len)113 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
114 {
115 int i;
116 for (i=0;i<len;i++)
117 {
118 y[i] = SHL32(MULT16_32_Q14(EXTRACT16(SHR32(x[i],7)),scale),7);
119 }
120 }
121
signal_div(const spx_word16_t * x,spx_word16_t * y,spx_word32_t scale,int len)122 void signal_div(const spx_word16_t *x, spx_word16_t *y, spx_word32_t scale, int len)
123 {
124 int i;
125 if (scale > SHL32(EXTEND32(SIG_SCALING), 8))
126 {
127 spx_word16_t scale_1;
128 scale = PSHR32(scale, SIG_SHIFT);
129 scale_1 = EXTRACT16(PDIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale));
130 for (i=0;i<len;i++)
131 {
132 y[i] = MULT16_16_P15(scale_1, x[i]);
133 }
134 } else if (scale > SHR32(EXTEND32(SIG_SCALING), 2)) {
135 spx_word16_t scale_1;
136 scale = PSHR32(scale, SIG_SHIFT-5);
137 scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
138 for (i=0;i<len;i++)
139 {
140 y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),8);
141 }
142 } else {
143 spx_word16_t scale_1;
144 scale = PSHR32(scale, SIG_SHIFT-7);
145 if (scale < 5)
146 scale = 5;
147 scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
148 for (i=0;i<len;i++)
149 {
150 y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),6);
151 }
152 }
153 }
154
155 #else
156
signal_mul(const spx_sig_t * x,spx_sig_t * y,spx_word32_t scale,int len)157 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
158 {
159 int i;
160 for (i=0;i<len;i++)
161 y[i] = scale*x[i];
162 }
163
signal_div(const spx_sig_t * x,spx_sig_t * y,spx_word32_t scale,int len)164 void signal_div(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
165 {
166 int i;
167 float scale_1 = 1/scale;
168 for (i=0;i<len;i++)
169 y[i] = scale_1*x[i];
170 }
171 #endif
172
173
174
175 #ifdef FIXED_POINT
176
177
178
compute_rms(const spx_sig_t * x,int len)179 spx_word16_t compute_rms(const spx_sig_t *x, int len)
180 {
181 int i;
182 spx_word32_t sum=0;
183 spx_sig_t max_val=1;
184 int sig_shift;
185
186 for (i=0;i<len;i++)
187 {
188 spx_sig_t tmp = x[i];
189 if (tmp<0)
190 tmp = -tmp;
191 if (tmp > max_val)
192 max_val = tmp;
193 }
194
195 sig_shift=0;
196 while (max_val>16383)
197 {
198 sig_shift++;
199 max_val >>= 1;
200 }
201
202 for (i=0;i<len;i+=4)
203 {
204 spx_word32_t sum2=0;
205 spx_word16_t tmp;
206 tmp = EXTRACT16(SHR32(x[i],sig_shift));
207 sum2 = MAC16_16(sum2,tmp,tmp);
208 tmp = EXTRACT16(SHR32(x[i+1],sig_shift));
209 sum2 = MAC16_16(sum2,tmp,tmp);
210 tmp = EXTRACT16(SHR32(x[i+2],sig_shift));
211 sum2 = MAC16_16(sum2,tmp,tmp);
212 tmp = EXTRACT16(SHR32(x[i+3],sig_shift));
213 sum2 = MAC16_16(sum2,tmp,tmp);
214 sum = ADD32(sum,SHR32(sum2,6));
215 }
216
217 return EXTRACT16(PSHR32(SHL32(EXTEND32(spx_sqrt(DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
218 }
219
compute_rms16(const spx_word16_t * x,int len)220 spx_word16_t compute_rms16(const spx_word16_t *x, int len)
221 {
222 int i;
223 spx_word16_t max_val=10;
224
225 for (i=0;i<len;i++)
226 {
227 spx_sig_t tmp = x[i];
228 if (tmp<0)
229 tmp = -tmp;
230 if (tmp > max_val)
231 max_val = tmp;
232 }
233 if (max_val>16383)
234 {
235 spx_word32_t sum=0;
236 for (i=0;i<len;i+=4)
237 {
238 spx_word32_t sum2=0;
239 sum2 = MAC16_16(sum2,SHR16(x[i],1),SHR16(x[i],1));
240 sum2 = MAC16_16(sum2,SHR16(x[i+1],1),SHR16(x[i+1],1));
241 sum2 = MAC16_16(sum2,SHR16(x[i+2],1),SHR16(x[i+2],1));
242 sum2 = MAC16_16(sum2,SHR16(x[i+3],1),SHR16(x[i+3],1));
243 sum = ADD32(sum,SHR32(sum2,6));
244 }
245 return SHL16(spx_sqrt(DIV32(sum,len)),4);
246 } else {
247 spx_word32_t sum=0;
248 int sig_shift=0;
249 if (max_val < 8192)
250 sig_shift=1;
251 if (max_val < 4096)
252 sig_shift=2;
253 if (max_val < 2048)
254 sig_shift=3;
255 for (i=0;i<len;i+=4)
256 {
257 spx_word32_t sum2=0;
258 sum2 = MAC16_16(sum2,SHL16(x[i],sig_shift),SHL16(x[i],sig_shift));
259 sum2 = MAC16_16(sum2,SHL16(x[i+1],sig_shift),SHL16(x[i+1],sig_shift));
260 sum2 = MAC16_16(sum2,SHL16(x[i+2],sig_shift),SHL16(x[i+2],sig_shift));
261 sum2 = MAC16_16(sum2,SHL16(x[i+3],sig_shift),SHL16(x[i+3],sig_shift));
262 sum = ADD32(sum,SHR32(sum2,6));
263 }
264 return SHL16(spx_sqrt(DIV32(sum,len)),3-sig_shift);
265 }
266 }
267
268 #ifndef OVERRIDE_NORMALIZE16
normalize16(const spx_sig_t * x,spx_word16_t * y,spx_sig_t max_scale,int len)269 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
270 {
271 int i;
272 spx_sig_t max_val=1;
273 int sig_shift;
274
275 for (i=0;i<len;i++)
276 {
277 spx_sig_t tmp = x[i];
278 if (tmp<0)
279 tmp = NEG32(tmp);
280 if (tmp >= max_val)
281 max_val = tmp;
282 }
283
284 sig_shift=0;
285 while (max_val>max_scale)
286 {
287 sig_shift++;
288 max_val >>= 1;
289 }
290
291 for (i=0;i<len;i++)
292 y[i] = EXTRACT16(SHR32(x[i], sig_shift));
293
294 return sig_shift;
295 }
296 #endif
297
298 #else
299
compute_rms(const spx_sig_t * x,int len)300 spx_word16_t compute_rms(const spx_sig_t *x, int len)
301 {
302 int i;
303 float sum=0;
304 for (i=0;i<len;i++)
305 {
306 sum += x[i]*x[i];
307 }
308 return sqrt(.1+sum/len);
309 }
compute_rms16(const spx_word16_t * x,int len)310 spx_word16_t compute_rms16(const spx_word16_t *x, int len)
311 {
312 return compute_rms(x, len);
313 }
314 #endif
315
316
317
318 #ifndef OVERRIDE_FILTER_MEM16
filter_mem16(const spx_word16_t * x,const spx_coef_t * num,const spx_coef_t * den,spx_word16_t * y,int N,int ord,spx_mem_t * mem,char * stack)319 void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
320 {
321 int i,j;
322 spx_word16_t xi,yi,nyi;
323 for (i=0;i<N;i++)
324 {
325 xi= x[i];
326 yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
327 nyi = NEG16(yi);
328 for (j=0;j<ord-1;j++)
329 {
330 mem[j] = MAC16_16(MAC16_16(mem[j+1], num[j],xi), den[j],nyi);
331 }
332 mem[ord-1] = ADD32(MULT16_16(num[ord-1],xi), MULT16_16(den[ord-1],nyi));
333 y[i] = yi;
334 }
335 }
336 #endif
337
338 #ifndef OVERRIDE_IIR_MEM16
iir_mem16(const spx_word16_t * x,const spx_coef_t * den,spx_word16_t * y,int N,int ord,spx_mem_t * mem,char * stack)339 void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
340 {
341 int i,j;
342 spx_word16_t yi,nyi;
343
344 for (i=0;i<N;i++)
345 {
346 yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
347 nyi = NEG16(yi);
348 for (j=0;j<ord-1;j++)
349 {
350 mem[j] = MAC16_16(mem[j+1],den[j],nyi);
351 }
352 mem[ord-1] = MULT16_16(den[ord-1],nyi);
353 y[i] = yi;
354 }
355 }
356 #endif
357
358 #ifndef OVERRIDE_FIR_MEM16
fir_mem16(const spx_word16_t * x,const spx_coef_t * num,spx_word16_t * y,int N,int ord,spx_mem_t * mem,char * stack)359 void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
360 {
361 int i,j;
362 spx_word16_t xi,yi;
363
364 for (i=0;i<N;i++)
365 {
366 xi=x[i];
367 yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
368 for (j=0;j<ord-1;j++)
369 {
370 mem[j] = MAC16_16(mem[j+1], num[j],xi);
371 }
372 mem[ord-1] = MULT16_16(num[ord-1],xi);
373 y[i] = yi;
374 }
375 }
376 #endif
377
378
syn_percep_zero16(const spx_word16_t * xx,const spx_coef_t * ak,const spx_coef_t * awk1,const spx_coef_t * awk2,spx_word16_t * y,int N,int ord,char * stack)379 void syn_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
380 {
381 int i;
382 VARDECL(spx_mem_t *mem);
383 ALLOC(mem, ord, spx_mem_t);
384 for (i=0;i<ord;i++)
385 mem[i]=0;
386 iir_mem16(xx, ak, y, N, ord, mem, stack);
387 for (i=0;i<ord;i++)
388 mem[i]=0;
389 filter_mem16(y, awk1, awk2, y, N, ord, mem, stack);
390 }
residue_percep_zero16(const spx_word16_t * xx,const spx_coef_t * ak,const spx_coef_t * awk1,const spx_coef_t * awk2,spx_word16_t * y,int N,int ord,char * stack)391 void residue_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
392 {
393 int i;
394 VARDECL(spx_mem_t *mem);
395 ALLOC(mem, ord, spx_mem_t);
396 for (i=0;i<ord;i++)
397 mem[i]=0;
398 filter_mem16(xx, ak, awk1, y, N, ord, mem, stack);
399 for (i=0;i<ord;i++)
400 mem[i]=0;
401 fir_mem16(y, awk2, y, N, ord, mem, stack);
402 }
403
404
405 #ifndef OVERRIDE_COMPUTE_IMPULSE_RESPONSE
compute_impulse_response(const spx_coef_t * ak,const spx_coef_t * awk1,const spx_coef_t * awk2,spx_word16_t * y,int N,int ord,char * stack)406 void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
407 {
408 int i,j;
409 spx_word16_t y1, ny1i, ny2i;
410 VARDECL(spx_mem_t *mem1);
411 VARDECL(spx_mem_t *mem2);
412 ALLOC(mem1, ord, spx_mem_t);
413 ALLOC(mem2, ord, spx_mem_t);
414
415 y[0] = LPC_SCALING;
416 for (i=0;i<ord;i++)
417 y[i+1] = awk1[i];
418 i++;
419 for (;i<N;i++)
420 y[i] = VERY_SMALL;
421 for (i=0;i<ord;i++)
422 mem1[i] = mem2[i] = 0;
423 for (i=0;i<N;i++)
424 {
425 y1 = ADD16(y[i], EXTRACT16(PSHR32(mem1[0],LPC_SHIFT)));
426 ny1i = NEG16(y1);
427 y[i] = PSHR32(ADD32(SHL32(EXTEND32(y1),LPC_SHIFT+1),mem2[0]),LPC_SHIFT);
428 ny2i = NEG16(y[i]);
429 for (j=0;j<ord-1;j++)
430 {
431 mem1[j] = MAC16_16(mem1[j+1], awk2[j],ny1i);
432 mem2[j] = MAC16_16(mem2[j+1], ak[j],ny2i);
433 }
434 mem1[ord-1] = MULT16_16(awk2[ord-1],ny1i);
435 mem2[ord-1] = MULT16_16(ak[ord-1],ny2i);
436 }
437 }
438 #endif
439
440 /* Decomposes a signal into low-band and high-band using a QMF */
qmf_decomp(const spx_word16_t * xx,const spx_word16_t * aa,spx_word16_t * y1,spx_word16_t * y2,int N,int M,spx_word16_t * mem,char * stack)441 void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_word16_t *y1, spx_word16_t *y2, int N, int M, spx_word16_t *mem, char *stack)
442 {
443 int i,j,k,M2;
444 VARDECL(spx_word16_t *a);
445 VARDECL(spx_word16_t *x);
446 spx_word16_t *x2;
447
448 ALLOC(a, M, spx_word16_t);
449 ALLOC(x, N+M-1, spx_word16_t);
450 x2=x+M-1;
451 M2=M>>1;
452 for (i=0;i<M;i++)
453 a[M-i-1]= aa[i];
454 for (i=0;i<M-1;i++)
455 x[i]=mem[M-i-2];
456 for (i=0;i<N;i++)
457 x[i+M-1]=SHR16(xx[i],1);
458 for (i=0;i<M-1;i++)
459 mem[i]=SHR16(xx[N-i-1],1);
460 for (i=0,k=0;i<N;i+=2,k++)
461 {
462 spx_word32_t y1k=0, y2k=0;
463 for (j=0;j<M2;j++)
464 {
465 y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
466 y2k=SUB32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
467 j++;
468 y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
469 y2k=ADD32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
470 }
471 y1[k] = EXTRACT16(SATURATE(PSHR32(y1k,15),32767));
472 y2[k] = EXTRACT16(SATURATE(PSHR32(y2k,15),32767));
473 }
474 }
475
476 /* Re-synthesised a signal from the QMF low-band and high-band signals */
qmf_synth(const spx_word16_t * x1,const spx_word16_t * x2,const spx_word16_t * a,spx_word16_t * y,int N,int M,spx_word16_t * mem1,spx_word16_t * mem2,char * stack)477 void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word16_t *mem1, spx_word16_t *mem2, char *stack)
478 /* assumptions:
479 all odd x[i] are zero -- well, actually they are left out of the array now
480 N and M are multiples of 4 */
481 {
482 int i, j;
483 int M2, N2;
484 VARDECL(spx_word16_t *xx1);
485 VARDECL(spx_word16_t *xx2);
486
487 M2 = M>>1;
488 N2 = N>>1;
489 ALLOC(xx1, M2+N2, spx_word16_t);
490 ALLOC(xx2, M2+N2, spx_word16_t);
491
492 for (i = 0; i < N2; i++)
493 xx1[i] = x1[N2-1-i];
494 for (i = 0; i < M2; i++)
495 xx1[N2+i] = mem1[2*i+1];
496 for (i = 0; i < N2; i++)
497 xx2[i] = x2[N2-1-i];
498 for (i = 0; i < M2; i++)
499 xx2[N2+i] = mem2[2*i+1];
500
501 for (i = 0; i < N2; i += 2) {
502 spx_sig_t y0, y1, y2, y3;
503 spx_word16_t x10, x20;
504
505 y0 = y1 = y2 = y3 = 0;
506 x10 = xx1[N2-2-i];
507 x20 = xx2[N2-2-i];
508
509 for (j = 0; j < M2; j += 2) {
510 spx_word16_t x11, x21;
511 spx_word16_t a0, a1;
512
513 a0 = a[2*j];
514 a1 = a[2*j+1];
515 x11 = xx1[N2-1+j-i];
516 x21 = xx2[N2-1+j-i];
517
518 #ifdef FIXED_POINT
519 /* We multiply twice by the same coef to avoid overflows */
520 y0 = MAC16_16(MAC16_16(y0, a0, x11), NEG16(a0), x21);
521 y1 = MAC16_16(MAC16_16(y1, a1, x11), a1, x21);
522 y2 = MAC16_16(MAC16_16(y2, a0, x10), NEG16(a0), x20);
523 y3 = MAC16_16(MAC16_16(y3, a1, x10), a1, x20);
524 #else
525 y0 = ADD32(y0,MULT16_16(a0, x11-x21));
526 y1 = ADD32(y1,MULT16_16(a1, x11+x21));
527 y2 = ADD32(y2,MULT16_16(a0, x10-x20));
528 y3 = ADD32(y3,MULT16_16(a1, x10+x20));
529 #endif
530 a0 = a[2*j+2];
531 a1 = a[2*j+3];
532 x10 = xx1[N2+j-i];
533 x20 = xx2[N2+j-i];
534
535 #ifdef FIXED_POINT
536 /* We multiply twice by the same coef to avoid overflows */
537 y0 = MAC16_16(MAC16_16(y0, a0, x10), NEG16(a0), x20);
538 y1 = MAC16_16(MAC16_16(y1, a1, x10), a1, x20);
539 y2 = MAC16_16(MAC16_16(y2, a0, x11), NEG16(a0), x21);
540 y3 = MAC16_16(MAC16_16(y3, a1, x11), a1, x21);
541 #else
542 y0 = ADD32(y0,MULT16_16(a0, x10-x20));
543 y1 = ADD32(y1,MULT16_16(a1, x10+x20));
544 y2 = ADD32(y2,MULT16_16(a0, x11-x21));
545 y3 = ADD32(y3,MULT16_16(a1, x11+x21));
546 #endif
547 }
548 #ifdef FIXED_POINT
549 y[2*i] = EXTRACT16(SATURATE32(PSHR32(y0,15),32767));
550 y[2*i+1] = EXTRACT16(SATURATE32(PSHR32(y1,15),32767));
551 y[2*i+2] = EXTRACT16(SATURATE32(PSHR32(y2,15),32767));
552 y[2*i+3] = EXTRACT16(SATURATE32(PSHR32(y3,15),32767));
553 #else
554 /* Normalize up explicitly if we're in float */
555 y[2*i] = 2.f*y0;
556 y[2*i+1] = 2.f*y1;
557 y[2*i+2] = 2.f*y2;
558 y[2*i+3] = 2.f*y3;
559 #endif
560 }
561
562 for (i = 0; i < M2; i++)
563 mem1[2*i+1] = xx1[i];
564 for (i = 0; i < M2; i++)
565 mem2[2*i+1] = xx2[i];
566 }
567
568 #ifdef FIXED_POINT
569 #if 0
570 const spx_word16_t shift_filt[3][7] = {{-33, 1043, -4551, 19959, 19959, -4551, 1043},
571 {-98, 1133, -4425, 29179, 8895, -2328, 444},
572 {444, -2328, 8895, 29179, -4425, 1133, -98}};
573 #else
574 const spx_word16_t shift_filt[3][7] = {{-390, 1540, -4993, 20123, 20123, -4993, 1540},
575 {-1064, 2817, -6694, 31589, 6837, -990, -209},
576 {-209, -990, 6837, 31589, -6694, 2817, -1064}};
577 #endif
578 #else
579 #if 0
580 const float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02},
581 {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403},
582 {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613, -0.0029937}};
583 #else
584 const float shift_filt[3][7] = {{-0.011915f, 0.046995f, -0.152373f, 0.614108f, 0.614108f, -0.152373f, 0.046995f},
585 {-0.0324855f, 0.0859768f, -0.2042986f, 0.9640297f, 0.2086420f, -0.0302054f, -0.0063646f},
586 {-0.0063646f, -0.0302054f, 0.2086420f, 0.9640297f, -0.2042986f, 0.0859768f, -0.0324855f}};
587 #endif
588 #endif
589
interp_pitch(spx_word16_t * exc,spx_word16_t * interp,int pitch,int len)590 int interp_pitch(
591 spx_word16_t *exc, /*decoded excitation*/
592 spx_word16_t *interp, /*decoded excitation*/
593 int pitch, /*pitch period*/
594 int len
595 )
596 {
597 int i,j,k;
598 spx_word32_t corr[4][7];
599 spx_word32_t maxcorr;
600 int maxi, maxj;
601 for (i=0;i<7;i++)
602 {
603 corr[0][i] = inner_prod(exc, exc-pitch-3+i, len);
604 }
605 for (i=0;i<3;i++)
606 {
607 for (j=0;j<7;j++)
608 {
609 int i1, i2;
610 spx_word32_t tmp=0;
611 i1 = 3-j;
612 if (i1<0)
613 i1 = 0;
614 i2 = 10-j;
615 if (i2>7)
616 i2 = 7;
617 for (k=i1;k<i2;k++)
618 tmp += MULT16_32_Q15(shift_filt[i][k],corr[0][j+k-3]);
619 corr[i+1][j] = tmp;
620 }
621 }
622 maxi=maxj=0;
623 maxcorr = corr[0][0];
624 for (i=0;i<4;i++)
625 {
626 for (j=0;j<7;j++)
627 {
628 if (corr[i][j] > maxcorr)
629 {
630 maxcorr = corr[i][j];
631 maxi=i;
632 maxj=j;
633 }
634 }
635 }
636 for (i=0;i<len;i++)
637 {
638 spx_word32_t tmp = 0;
639 if (maxi>0)
640 {
641 for (k=0;k<7;k++)
642 {
643 tmp += MULT16_16(exc[i-(pitch-maxj+3)+k-3],shift_filt[maxi-1][k]);
644 }
645 } else {
646 tmp = SHL32(exc[i-(pitch-maxj+3)],15);
647 }
648 interp[i] = PSHR32(tmp,15);
649 }
650 return pitch-maxj+3;
651 }
652
multicomb(spx_word16_t * exc,spx_word16_t * new_exc,spx_coef_t * ak,int p,int nsf,int pitch,int max_pitch,spx_word16_t comb_gain,char * stack)653 void multicomb(
654 spx_word16_t *exc, /*decoded excitation*/
655 spx_word16_t *new_exc, /*enhanced excitation*/
656 spx_coef_t *ak, /*LPC filter coefs*/
657 int p, /*LPC order*/
658 int nsf, /*sub-frame size*/
659 int pitch, /*pitch period*/
660 int max_pitch,
661 spx_word16_t comb_gain, /*gain of comb filter*/
662 char *stack
663 )
664 {
665 int i;
666 VARDECL(spx_word16_t *iexc);
667 spx_word16_t old_ener, new_ener;
668 int corr_pitch;
669
670 spx_word16_t iexc0_mag, iexc1_mag, exc_mag;
671 spx_word32_t corr0, corr1;
672 spx_word16_t gain0, gain1;
673 spx_word16_t pgain1, pgain2;
674 spx_word16_t c1, c2;
675 spx_word16_t g1, g2;
676 spx_word16_t ngain;
677 spx_word16_t gg1, gg2;
678 #ifdef FIXED_POINT
679 int scaledown=0;
680 #endif
681 #if 0 /* Set to 1 to enable full pitch search */
682 int nol_pitch[6];
683 spx_word16_t nol_pitch_coef[6];
684 spx_word16_t ol_pitch_coef;
685 open_loop_nbest_pitch(exc, 20, 120, nsf,
686 nol_pitch, nol_pitch_coef, 6, stack);
687 corr_pitch=nol_pitch[0];
688 ol_pitch_coef = nol_pitch_coef[0];
689 /*Try to remove pitch multiples*/
690 for (i=1;i<6;i++)
691 {
692 #ifdef FIXED_POINT
693 if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],19661)) &&
694 #else
695 if ((nol_pitch_coef[i]>.6*nol_pitch_coef[0]) &&
696 #endif
697 (ABS(2*nol_pitch[i]-corr_pitch)<=2 || ABS(3*nol_pitch[i]-corr_pitch)<=3 ||
698 ABS(4*nol_pitch[i]-corr_pitch)<=4 || ABS(5*nol_pitch[i]-corr_pitch)<=5))
699 {
700 corr_pitch = nol_pitch[i];
701 }
702 }
703 #else
704 corr_pitch = pitch;
705 #endif
706
707 ALLOC(iexc, 2*nsf, spx_word16_t);
708
709 interp_pitch(exc, iexc, corr_pitch, 80);
710 if (corr_pitch>max_pitch)
711 interp_pitch(exc, iexc+nsf, 2*corr_pitch, 80);
712 else
713 interp_pitch(exc, iexc+nsf, -corr_pitch, 80);
714
715 #ifdef FIXED_POINT
716 for (i=0;i<nsf;i++)
717 {
718 if (ABS16(exc[i])>16383)
719 {
720 scaledown = 1;
721 break;
722 }
723 }
724 if (scaledown)
725 {
726 for (i=0;i<nsf;i++)
727 exc[i] = SHR16(exc[i],1);
728 for (i=0;i<2*nsf;i++)
729 iexc[i] = SHR16(iexc[i],1);
730 }
731 #endif
732 /*interp_pitch(exc, iexc+2*nsf, 2*corr_pitch, 80);*/
733
734 /*printf ("%d %d %f\n", pitch, corr_pitch, max_corr*ener_1);*/
735 iexc0_mag = spx_sqrt(1000+inner_prod(iexc,iexc,nsf));
736 iexc1_mag = spx_sqrt(1000+inner_prod(iexc+nsf,iexc+nsf,nsf));
737 exc_mag = spx_sqrt(1+inner_prod(exc,exc,nsf));
738 corr0 = inner_prod(iexc,exc,nsf);
739 if (corr0<0)
740 corr0=0;
741 corr1 = inner_prod(iexc+nsf,exc,nsf);
742 if (corr1<0)
743 corr1=0;
744 #ifdef FIXED_POINT
745 /* Doesn't cost much to limit the ratio and it makes the rest easier */
746 if (SHL32(EXTEND32(iexc0_mag),6) < EXTEND32(exc_mag))
747 iexc0_mag = ADD16(1,PSHR16(exc_mag,6));
748 if (SHL32(EXTEND32(iexc1_mag),6) < EXTEND32(exc_mag))
749 iexc1_mag = ADD16(1,PSHR16(exc_mag,6));
750 #endif
751 if (corr0 > MULT16_16(iexc0_mag,exc_mag))
752 pgain1 = QCONST16(1., 14);
753 else
754 pgain1 = PDIV32_16(SHL32(PDIV32(corr0, exc_mag),14),iexc0_mag);
755 if (corr1 > MULT16_16(iexc1_mag,exc_mag))
756 pgain2 = QCONST16(1., 14);
757 else
758 pgain2 = PDIV32_16(SHL32(PDIV32(corr1, exc_mag),14),iexc1_mag);
759 gg1 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc0_mag);
760 gg2 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc1_mag);
761 if (comb_gain>0)
762 {
763 #ifdef FIXED_POINT
764 c1 = (MULT16_16_Q15(QCONST16(.4,15),comb_gain)+QCONST16(.07,15));
765 c2 = QCONST16(.5,15)+MULT16_16_Q14(QCONST16(1.72,14),(c1-QCONST16(.07,15)));
766 #else
767 c1 = .4*comb_gain+.07;
768 c2 = .5+1.72*(c1-.07);
769 #endif
770 } else
771 {
772 c1=c2=0;
773 }
774 #ifdef FIXED_POINT
775 g1 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain1),pgain1);
776 g2 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain2),pgain2);
777 #else
778 g1 = 1-c2*pgain1*pgain1;
779 g2 = 1-c2*pgain2*pgain2;
780 #endif
781 if (g1<c1)
782 g1 = c1;
783 if (g2<c1)
784 g2 = c1;
785 g1 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g1);
786 g2 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g2);
787 if (corr_pitch>max_pitch)
788 {
789 gain0 = MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q14(g1,gg1));
790 gain1 = MULT16_16_Q15(QCONST16(.3,15),MULT16_16_Q14(g2,gg2));
791 } else {
792 gain0 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g1,gg1));
793 gain1 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g2,gg2));
794 }
795 for (i=0;i<nsf;i++)
796 new_exc[i] = ADD16(exc[i], EXTRACT16(PSHR32(ADD32(MULT16_16(gain0,iexc[i]), MULT16_16(gain1,iexc[i+nsf])),8)));
797 /* FIXME: compute_rms16 is currently not quite accurate enough (but close) */
798 new_ener = compute_rms16(new_exc, nsf);
799 old_ener = compute_rms16(exc, nsf);
800
801 if (old_ener < 1)
802 old_ener = 1;
803 if (new_ener < 1)
804 new_ener = 1;
805 if (old_ener > new_ener)
806 old_ener = new_ener;
807 ngain = PDIV32_16(SHL32(EXTEND32(old_ener),14),new_ener);
808
809 for (i=0;i<nsf;i++)
810 new_exc[i] = MULT16_16_Q14(ngain, new_exc[i]);
811 #ifdef FIXED_POINT
812 if (scaledown)
813 {
814 for (i=0;i<nsf;i++)
815 exc[i] = SHL16(exc[i],1);
816 for (i=0;i<nsf;i++)
817 new_exc[i] = SHL16(SATURATE16(new_exc[i],16383),1);
818 }
819 #endif
820 }
821
822