1 /* Copyright (C) 2002-2006 Jean-Marc Valin
2 File: cb_search.c
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "cb_search.h"
37 #include "filters.h"
38 #include "stack_alloc.h"
39 #include "vq.h"
40 #include "arch.h"
41 #include "math_approx.h"
42 #include "os_support.h"
43
44 #ifdef _USE_SSE
45 #include "cb_search_sse.h"
46 #elif defined(ARM4_ASM) || defined(ARM5E_ASM)
47 #include "cb_search_arm4.h"
48 #elif defined(BFIN_ASM)
49 #include "cb_search_bfin.h"
50 #endif
51
52 #ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
compute_weighted_codebook(const signed char * shape_cb,const spx_word16_t * r,spx_word16_t * resp,spx_word16_t * resp2,spx_word32_t * E,int shape_cb_size,int subvect_size,char * stack)53 static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
54 {
55 int i, j, k;
56 VARDECL(spx_word16_t *shape);
57 ALLOC(shape, subvect_size, spx_word16_t);
58 for (i=0;i<shape_cb_size;i++)
59 {
60 spx_word16_t *res;
61
62 res = resp+i*subvect_size;
63 for (k=0;k<subvect_size;k++)
64 shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k];
65 E[i]=0;
66
67 /* Compute codeword response using convolution with impulse response */
68 for(j=0;j<subvect_size;j++)
69 {
70 spx_word32_t resj=0;
71 spx_word16_t res16;
72 for (k=0;k<=j;k++)
73 resj = MAC16_16(resj,shape[k],r[j-k]);
74 #ifdef FIXED_POINT
75 res16 = EXTRACT16(SHR32(resj, 13));
76 #else
77 res16 = 0.03125f*resj;
78 #endif
79 /* Compute codeword energy */
80 E[i]=MAC16_16(E[i],res16,res16);
81 res[j] = res16;
82 /*printf ("%d\n", (int)res[j]);*/
83 }
84 }
85
86 }
87 #endif
88
89 #ifndef OVERRIDE_TARGET_UPDATE
target_update(spx_word16_t * t,spx_word16_t g,spx_word16_t * r,int len)90 static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len)
91 {
92 int n;
93 for (n=0;n<len;n++)
94 t[n] = SUB16(t[n],PSHR32(MULT16_16(g,r[n]),13));
95 }
96 #endif
97
98
99
split_cb_search_shape_sign_N1(spx_word16_t target[],spx_coef_t ak[],spx_coef_t awk1[],spx_coef_t awk2[],const void * par,int p,int nsf,spx_sig_t * exc,spx_word16_t * r,SpeexBits * bits,char * stack,int update_target)100 static void split_cb_search_shape_sign_N1(
101 spx_word16_t target[], /* target vector */
102 spx_coef_t ak[], /* LPCs for this subframe */
103 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
104 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
105 const void *par, /* Codebook/search parameters*/
106 int p, /* number of LPC coeffs */
107 int nsf, /* number of samples in subframe */
108 spx_sig_t *exc,
109 spx_word16_t *r,
110 SpeexBits *bits,
111 char *stack,
112 int update_target
113 )
114 {
115 int i,j,m,q;
116 VARDECL(spx_word16_t *resp);
117 #ifdef _USE_SSE
118 VARDECL(__m128 *resp2);
119 VARDECL(__m128 *E);
120 #else
121 spx_word16_t *resp2;
122 VARDECL(spx_word32_t *E);
123 #endif
124 VARDECL(spx_word16_t *t);
125 VARDECL(spx_sig_t *e);
126 const signed char *shape_cb;
127 int shape_cb_size, subvect_size, nb_subvect;
128 const split_cb_params *params;
129 int best_index;
130 spx_word32_t best_dist;
131 int have_sign;
132
133 params = (const split_cb_params *) par;
134 subvect_size = params->subvect_size;
135 nb_subvect = params->nb_subvect;
136 shape_cb_size = 1<<params->shape_bits;
137 shape_cb = params->shape_cb;
138 have_sign = params->have_sign;
139 ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
140 #ifdef _USE_SSE
141 ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
142 ALLOC(E, shape_cb_size>>2, __m128);
143 #else
144 resp2 = resp;
145 ALLOC(E, shape_cb_size, spx_word32_t);
146 #endif
147 ALLOC(t, nsf, spx_word16_t);
148 ALLOC(e, nsf, spx_sig_t);
149
150 /* FIXME: Do we still need to copy the target? */
151 SPEEX_COPY(t, target, nsf);
152
153 compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
154
155 for (i=0;i<nb_subvect;i++)
156 {
157 spx_word16_t *x=t+subvect_size*i;
158 /*Find new n-best based on previous n-best j*/
159 if (have_sign)
160 vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
161 else
162 vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
163
164 speex_bits_pack(bits,best_index,params->shape_bits+have_sign);
165
166 {
167 int rind;
168 spx_word16_t *res;
169 spx_word16_t sign=1;
170 rind = best_index;
171 if (rind>=shape_cb_size)
172 {
173 sign=-1;
174 rind-=shape_cb_size;
175 }
176 res = resp+rind*subvect_size;
177 if (sign>0)
178 for (m=0;m<subvect_size;m++)
179 t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
180 else
181 for (m=0;m<subvect_size;m++)
182 t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
183
184 #ifdef FIXED_POINT
185 if (sign==1)
186 {
187 for (j=0;j<subvect_size;j++)
188 e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
189 } else {
190 for (j=0;j<subvect_size;j++)
191 e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
192 }
193 #else
194 for (j=0;j<subvect_size;j++)
195 e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
196 #endif
197
198 }
199
200 for (m=0;m<subvect_size;m++)
201 {
202 spx_word16_t g;
203 int rind;
204 spx_word16_t sign=1;
205 rind = best_index;
206 if (rind>=shape_cb_size)
207 {
208 sign=-1;
209 rind-=shape_cb_size;
210 }
211
212 q=subvect_size-m;
213 #ifdef FIXED_POINT
214 g=sign*shape_cb[rind*subvect_size+m];
215 #else
216 g=sign*0.03125*shape_cb[rind*subvect_size+m];
217 #endif
218 target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
219 }
220 }
221
222 /* Update excitation */
223 /* FIXME: We could update the excitation directly above */
224 for (j=0;j<nsf;j++)
225 exc[j]=ADD32(exc[j],e[j]);
226
227 /* Update target: only update target if necessary */
228 if (update_target)
229 {
230 VARDECL(spx_word16_t *r2);
231 ALLOC(r2, nsf, spx_word16_t);
232 for (j=0;j<nsf;j++)
233 r2[j] = EXTRACT16(PSHR32(e[j] ,6));
234 syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
235 for (j=0;j<nsf;j++)
236 target[j]=SUB16(target[j],PSHR16(r2[j],2));
237 }
238 }
239
240
241
split_cb_search_shape_sign(spx_word16_t target[],spx_coef_t ak[],spx_coef_t awk1[],spx_coef_t awk2[],const void * par,int p,int nsf,spx_sig_t * exc,spx_word16_t * r,SpeexBits * bits,char * stack,int complexity,int update_target)242 void split_cb_search_shape_sign(
243 spx_word16_t target[], /* target vector */
244 spx_coef_t ak[], /* LPCs for this subframe */
245 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
246 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
247 const void *par, /* Codebook/search parameters*/
248 int p, /* number of LPC coeffs */
249 int nsf, /* number of samples in subframe */
250 spx_sig_t *exc,
251 spx_word16_t *r,
252 SpeexBits *bits,
253 char *stack,
254 int complexity,
255 int update_target
256 )
257 {
258 int i,j,k,m,n,q;
259 VARDECL(spx_word16_t *resp);
260 #ifdef _USE_SSE
261 VARDECL(__m128 *resp2);
262 VARDECL(__m128 *E);
263 #else
264 spx_word16_t *resp2;
265 VARDECL(spx_word32_t *E);
266 #endif
267 VARDECL(spx_word16_t *t);
268 VARDECL(spx_sig_t *e);
269 VARDECL(spx_word16_t *tmp);
270 VARDECL(spx_word32_t *ndist);
271 VARDECL(spx_word32_t *odist);
272 VARDECL(int *itmp);
273 VARDECL(spx_word16_t **ot2);
274 VARDECL(spx_word16_t **nt2);
275 spx_word16_t **ot, **nt;
276 VARDECL(int **nind);
277 VARDECL(int **oind);
278 VARDECL(int *ind);
279 const signed char *shape_cb;
280 int shape_cb_size, subvect_size, nb_subvect;
281 const split_cb_params *params;
282 int N=2;
283 VARDECL(int *best_index);
284 VARDECL(spx_word32_t *best_dist);
285 VARDECL(int *best_nind);
286 VARDECL(int *best_ntarget);
287 int have_sign;
288 N=complexity;
289 if (N>10)
290 N=10;
291 /* Complexity isn't as important for the codebooks as it is for the pitch */
292 N=(2*N)/3;
293 if (N<1)
294 N=1;
295 if (N==1)
296 {
297 split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target);
298 return;
299 }
300 ALLOC(ot2, N, spx_word16_t*);
301 ALLOC(nt2, N, spx_word16_t*);
302 ALLOC(oind, N, int*);
303 ALLOC(nind, N, int*);
304
305 params = (const split_cb_params *) par;
306 subvect_size = params->subvect_size;
307 nb_subvect = params->nb_subvect;
308 shape_cb_size = 1<<params->shape_bits;
309 shape_cb = params->shape_cb;
310 have_sign = params->have_sign;
311 ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
312 #ifdef _USE_SSE
313 ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
314 ALLOC(E, shape_cb_size>>2, __m128);
315 #else
316 resp2 = resp;
317 ALLOC(E, shape_cb_size, spx_word32_t);
318 #endif
319 ALLOC(t, nsf, spx_word16_t);
320 ALLOC(e, nsf, spx_sig_t);
321 ALLOC(ind, nb_subvect, int);
322
323 ALLOC(tmp, 2*N*nsf, spx_word16_t);
324 for (i=0;i<N;i++)
325 {
326 ot2[i]=tmp+2*i*nsf;
327 nt2[i]=tmp+(2*i+1)*nsf;
328 }
329 ot=ot2;
330 nt=nt2;
331 ALLOC(best_index, N, int);
332 ALLOC(best_dist, N, spx_word32_t);
333 ALLOC(best_nind, N, int);
334 ALLOC(best_ntarget, N, int);
335 ALLOC(ndist, N, spx_word32_t);
336 ALLOC(odist, N, spx_word32_t);
337
338 ALLOC(itmp, 2*N*nb_subvect, int);
339 for (i=0;i<N;i++)
340 {
341 nind[i]=itmp+2*i*nb_subvect;
342 oind[i]=itmp+(2*i+1)*nb_subvect;
343 }
344
345 SPEEX_COPY(t, target, nsf);
346
347 for (j=0;j<N;j++)
348 SPEEX_COPY(&ot[j][0], t, nsf);
349
350 /* Pre-compute codewords response and energy */
351 compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
352
353 for (j=0;j<N;j++)
354 odist[j]=0;
355
356 /*For all subvectors*/
357 for (i=0;i<nb_subvect;i++)
358 {
359 /*"erase" nbest list*/
360 for (j=0;j<N;j++)
361 ndist[j]=VERY_LARGE32;
362 /* This is not strictly necessary, but it provides an additonal safety
363 to prevent crashes in case something goes wrong in the previous
364 steps (e.g. NaNs) */
365 for (j=0;j<N;j++)
366 best_nind[j] = best_ntarget[j] = 0;
367 /*For all n-bests of previous subvector*/
368 for (j=0;j<N;j++)
369 {
370 spx_word16_t *x=ot[j]+subvect_size*i;
371 spx_word32_t tener = 0;
372 for (m=0;m<subvect_size;m++)
373 tener = MAC16_16(tener, x[m],x[m]);
374 #ifdef FIXED_POINT
375 tener = SHR32(tener,1);
376 #else
377 tener *= .5;
378 #endif
379 /*Find new n-best based on previous n-best j*/
380 if (have_sign)
381 vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
382 else
383 vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
384
385 /*For all new n-bests*/
386 for (k=0;k<N;k++)
387 {
388 /* Compute total distance (including previous sub-vectors */
389 spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener);
390
391 /*update n-best list*/
392 if (err<ndist[N-1])
393 {
394 for (m=0;m<N;m++)
395 {
396 if (err < ndist[m])
397 {
398 for (n=N-1;n>m;n--)
399 {
400 ndist[n] = ndist[n-1];
401 best_nind[n] = best_nind[n-1];
402 best_ntarget[n] = best_ntarget[n-1];
403 }
404 /* n is equal to m here, so they're interchangeable */
405 ndist[m] = err;
406 best_nind[n] = best_index[k];
407 best_ntarget[n] = j;
408 break;
409 }
410 }
411 }
412 }
413 if (i==0)
414 break;
415 }
416 for (j=0;j<N;j++)
417 {
418 /*previous target (we don't care what happened before*/
419 for (m=(i+1)*subvect_size;m<nsf;m++)
420 nt[j][m]=ot[best_ntarget[j]][m];
421
422 /* New code: update the rest of the target only if it's worth it */
423 for (m=0;m<subvect_size;m++)
424 {
425 spx_word16_t g;
426 int rind;
427 spx_word16_t sign=1;
428 rind = best_nind[j];
429 if (rind>=shape_cb_size)
430 {
431 sign=-1;
432 rind-=shape_cb_size;
433 }
434
435 q=subvect_size-m;
436 #ifdef FIXED_POINT
437 g=sign*shape_cb[rind*subvect_size+m];
438 #else
439 g=sign*0.03125*shape_cb[rind*subvect_size+m];
440 #endif
441 target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
442 }
443
444 for (q=0;q<nb_subvect;q++)
445 nind[j][q]=oind[best_ntarget[j]][q];
446 nind[j][i]=best_nind[j];
447 }
448
449 /*update old-new data*/
450 /* just swap pointers instead of a long copy */
451 {
452 spx_word16_t **tmp2;
453 tmp2=ot;
454 ot=nt;
455 nt=tmp2;
456 }
457 for (j=0;j<N;j++)
458 for (m=0;m<nb_subvect;m++)
459 oind[j][m]=nind[j][m];
460 for (j=0;j<N;j++)
461 odist[j]=ndist[j];
462 }
463
464 /*save indices*/
465 for (i=0;i<nb_subvect;i++)
466 {
467 ind[i]=nind[0][i];
468 speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
469 }
470
471 /* Put everything back together */
472 for (i=0;i<nb_subvect;i++)
473 {
474 int rind;
475 spx_word16_t sign=1;
476 rind = ind[i];
477 if (rind>=shape_cb_size)
478 {
479 sign=-1;
480 rind-=shape_cb_size;
481 }
482 #ifdef FIXED_POINT
483 if (sign==1)
484 {
485 for (j=0;j<subvect_size;j++)
486 e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
487 } else {
488 for (j=0;j<subvect_size;j++)
489 e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
490 }
491 #else
492 for (j=0;j<subvect_size;j++)
493 e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
494 #endif
495 }
496 /* Update excitation */
497 for (j=0;j<nsf;j++)
498 exc[j]=ADD32(exc[j],e[j]);
499
500 /* Update target: only update target if necessary */
501 if (update_target)
502 {
503 VARDECL(spx_word16_t *r2);
504 ALLOC(r2, nsf, spx_word16_t);
505 for (j=0;j<nsf;j++)
506 r2[j] = EXTRACT16(PSHR32(e[j] ,6));
507 syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
508 for (j=0;j<nsf;j++)
509 target[j]=SUB16(target[j],PSHR16(r2[j],2));
510 }
511 }
512
513
split_cb_shape_sign_unquant(spx_sig_t * exc,const void * par,int nsf,SpeexBits * bits,char * stack,spx_int32_t * seed)514 void split_cb_shape_sign_unquant(
515 spx_sig_t *exc,
516 const void *par, /* non-overlapping codebook */
517 int nsf, /* number of samples in subframe */
518 SpeexBits *bits,
519 char *stack,
520 spx_int32_t *seed
521 )
522 {
523 int i,j;
524 VARDECL(int *ind);
525 VARDECL(int *signs);
526 const signed char *shape_cb;
527 int shape_cb_size, subvect_size, nb_subvect;
528 const split_cb_params *params;
529 int have_sign;
530
531 params = (const split_cb_params *) par;
532 subvect_size = params->subvect_size;
533 nb_subvect = params->nb_subvect;
534 shape_cb_size = 1<<params->shape_bits;
535 shape_cb = params->shape_cb;
536 have_sign = params->have_sign;
537
538 ALLOC(ind, nb_subvect, int);
539 ALLOC(signs, nb_subvect, int);
540
541 /* Decode codewords and gains */
542 for (i=0;i<nb_subvect;i++)
543 {
544 if (have_sign)
545 signs[i] = speex_bits_unpack_unsigned(bits, 1);
546 else
547 signs[i] = 0;
548 ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits);
549 }
550 /* Compute decoded excitation */
551 for (i=0;i<nb_subvect;i++)
552 {
553 spx_word16_t s=1;
554 if (signs[i])
555 s=-1;
556 #ifdef FIXED_POINT
557 if (s==1)
558 {
559 for (j=0;j<subvect_size;j++)
560 exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5);
561 } else {
562 for (j=0;j<subvect_size;j++)
563 exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5));
564 }
565 #else
566 for (j=0;j<subvect_size;j++)
567 exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j];
568 #endif
569 }
570 }
571
noise_codebook_quant(spx_word16_t target[],spx_coef_t ak[],spx_coef_t awk1[],spx_coef_t awk2[],const void * par,int p,int nsf,spx_sig_t * exc,spx_word16_t * r,SpeexBits * bits,char * stack,int complexity,int update_target)572 void noise_codebook_quant(
573 spx_word16_t target[], /* target vector */
574 spx_coef_t ak[], /* LPCs for this subframe */
575 spx_coef_t awk1[], /* Weighted LPCs for this subframe */
576 spx_coef_t awk2[], /* Weighted LPCs for this subframe */
577 const void *par, /* Codebook/search parameters*/
578 int p, /* number of LPC coeffs */
579 int nsf, /* number of samples in subframe */
580 spx_sig_t *exc,
581 spx_word16_t *r,
582 SpeexBits *bits,
583 char *stack,
584 int complexity,
585 int update_target
586 )
587 {
588 int i;
589 VARDECL(spx_word16_t *tmp);
590 ALLOC(tmp, nsf, spx_word16_t);
591 residue_percep_zero16(target, ak, awk1, awk2, tmp, nsf, p, stack);
592
593 for (i=0;i<nsf;i++)
594 exc[i]+=SHL32(EXTEND32(tmp[i]),8);
595 SPEEX_MEMSET(target, 0, nsf);
596 }
597
598
noise_codebook_unquant(spx_sig_t * exc,const void * par,int nsf,SpeexBits * bits,char * stack,spx_int32_t * seed)599 void noise_codebook_unquant(
600 spx_sig_t *exc,
601 const void *par, /* non-overlapping codebook */
602 int nsf, /* number of samples in subframe */
603 SpeexBits *bits,
604 char *stack,
605 spx_int32_t *seed
606 )
607 {
608 int i;
609 /* FIXME: This is bad, but I don't think the function ever gets called anyway */
610 for (i=0;i<nsf;i++)
611 exc[i]=SHL32(EXTEND32(speex_rand(1, seed)),SIG_SHIFT);
612 }
613