1 /*
2 ** Copyright 2003-2010, VisualOn, Inc.
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 ** http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16
17 /***********************************************************************
18 * File: c4t64fx.c *
19 * *
20 * Description:Performs algebraic codebook search for higher modes *
21 * *
22 ************************************************************************/
23
24 /************************************************************************
25 * Function: ACELP_4t64_fx() *
26 * *
27 * 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook. *
28 * 4 tracks x 16 positions per track = 64 samples. *
29 * *
30 * 20 bits --> 4 pulses in a frame of 64 samples. *
31 * 36 bits --> 8 pulses in a frame of 64 samples. *
32 * 44 bits --> 10 pulses in a frame of 64 samples. *
33 * 52 bits --> 12 pulses in a frame of 64 samples. *
34 * 64 bits --> 16 pulses in a frame of 64 samples. *
35 * 72 bits --> 18 pulses in a frame of 64 samples. *
36 * 88 bits --> 24 pulses in a frame of 64 samples. *
37 * *
38 * All pulses can have two (2) possible amplitudes: +1 or -1. *
39 * Each pulse can have sixteen (16) possible positions. *
40 *************************************************************************/
41
42 #include "typedef.h"
43 #include "basic_op.h"
44 #include "math_op.h"
45 #include "acelp.h"
46 #include "cnst.h"
47
48 #include "q_pulse.h"
49
50 #undef LOG_TAG
51 #define LOG_TAG "amrwbenc"
52 #include "log/log.h"
53
54 static Word16 tipos[36] = {
55 0, 1, 2, 3, /* starting point &ipos[0], 1st iter */
56 1, 2, 3, 0, /* starting point &ipos[4], 2nd iter */
57 2, 3, 0, 1, /* starting point &ipos[8], 3rd iter */
58 3, 0, 1, 2, /* starting point &ipos[12], 4th iter */
59 0, 1, 2, 3,
60 1, 2, 3, 0,
61 2, 3, 0, 1,
62 3, 0, 1, 2,
63 0, 1, 2, 3}; /* end point for 24 pulses &ipos[35], 4th iter */
64
65 #define NB_PULSE_MAX 24
66
67 #define L_SUBFR 64
68 #define NB_TRACK 4
69 #define STEP 4
70 #define NB_POS 16
71 #define MSIZE 256
72 #define NB_MAX 8
73 #define NPMAXPT ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK)
74
75 /* Private functions */
76 void cor_h_vec_012(
77 Word16 h[], /* (i) scaled impulse response */
78 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
79 Word16 track, /* (i) track to use */
80 Word16 sign[], /* (i) sign vector */
81 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
82 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
83 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
84 );
85
86 void cor_h_vec_012_asm(
87 Word16 h[], /* (i) scaled impulse response */
88 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
89 Word16 track, /* (i) track to use */
90 Word16 sign[], /* (i) sign vector */
91 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
92 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
93 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
94 );
95
96 void cor_h_vec_30(
97 Word16 h[], /* (i) scaled impulse response */
98 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
99 Word16 track, /* (i) track to use */
100 Word16 sign[], /* (i) sign vector */
101 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
102 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
103 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
104 );
105
106 void search_ixiy(
107 Word16 nb_pos_ix, /* (i) nb of pos for pulse 1 (1..8) */
108 Word16 track_x, /* (i) track of pulse 1 */
109 Word16 track_y, /* (i) track of pulse 2 */
110 Word16 * ps, /* (i/o) correlation of all fixed pulses */
111 Word16 * alp, /* (i/o) energy of all fixed pulses */
112 Word16 * ix, /* (o) position of pulse 1 */
113 Word16 * iy, /* (o) position of pulse 2 */
114 Word16 dn[], /* (i) corr. between target and h[] */
115 Word16 dn2[], /* (i) vector of selected positions */
116 Word16 cor_x[], /* (i) corr. of pulse 1 with fixed pulses */
117 Word16 cor_y[], /* (i) corr. of pulse 2 with fixed pulses */
118 Word16 rrixiy[][MSIZE] /* (i) corr. of pulse 1 with pulse 2 */
119 );
120
121
ACELP_4t64_fx(Word16 dn[],Word16 cn[],Word16 H[],Word16 code[],Word16 y[],Word16 nbbits,Word16 ser_size,Word16 _index[])122 void ACELP_4t64_fx(
123 Word16 dn[], /* (i) <12b : correlation between target x[] and H[] */
124 Word16 cn[], /* (i) <12b : residual after long term prediction */
125 Word16 H[], /* (i) Q12: impulse response of weighted synthesis filter */
126 Word16 code[], /* (o) Q9 : algebraic (fixed) codebook excitation */
127 Word16 y[], /* (o) Q9 : filtered fixed codebook excitation */
128 Word16 nbbits, /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits */
129 Word16 ser_size, /* (i) : bit rate */
130 Word16 _index[] /* (o) : index (20): 5+5+5+5 = 20 bits. */
131 /* (o) : index (36): 9+9+9+9 = 36 bits. */
132 /* (o) : index (44): 13+9+13+9 = 44 bits. */
133 /* (o) : index (52): 13+13+13+13 = 52 bits. */
134 /* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits. */
135 /* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits. */
136 /* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits. */
137 )
138 {
139 Word32 i, j, k;
140 Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp;
141 Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp;
142 Word16 *p0, *p1, *p2, *p3, *psign;
143 Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift;
144 Word32 s, cor, L_tmp, L_index;
145 Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR];
146 Word16 ind[NPMAXPT * NB_TRACK];
147 Word16 codvec[NB_PULSE_MAX], nbpos[10];
148 Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK];
149 Word16 h_buf[4 * L_SUBFR];
150 Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE];
151 Word16 ipos[NB_PULSE_MAX];
152
153 switch (nbbits)
154 {
155 case 20: /* 20 bits, 4 pulses, 4 tracks */
156 nbiter = 4; /* 4x16x16=1024 loop */
157 alp = 8192; /* alp = 2.0 (Q12) */
158 nb_pulse = 4;
159 nbpos[0] = 4;
160 nbpos[1] = 8;
161 break;
162 case 36: /* 36 bits, 8 pulses, 4 tracks */
163 nbiter = 4; /* 4x20x16=1280 loop */
164 alp = 4096; /* alp = 1.0 (Q12) */
165 nb_pulse = 8;
166 nbpos[0] = 4;
167 nbpos[1] = 8;
168 nbpos[2] = 8;
169 break;
170 case 44: /* 44 bits, 10 pulses, 4 tracks */
171 nbiter = 4; /* 4x26x16=1664 loop */
172 alp = 4096; /* alp = 1.0 (Q12) */
173 nb_pulse = 10;
174 nbpos[0] = 4;
175 nbpos[1] = 6;
176 nbpos[2] = 8;
177 nbpos[3] = 8;
178 break;
179 case 52: /* 52 bits, 12 pulses, 4 tracks */
180 nbiter = 4; /* 4x26x16=1664 loop */
181 alp = 4096; /* alp = 1.0 (Q12) */
182 nb_pulse = 12;
183 nbpos[0] = 4;
184 nbpos[1] = 6;
185 nbpos[2] = 8;
186 nbpos[3] = 8;
187 break;
188 case 64: /* 64 bits, 16 pulses, 4 tracks */
189 nbiter = 3; /* 3x36x16=1728 loop */
190 alp = 3277; /* alp = 0.8 (Q12) */
191 nb_pulse = 16;
192 nbpos[0] = 4;
193 nbpos[1] = 4;
194 nbpos[2] = 6;
195 nbpos[3] = 6;
196 nbpos[4] = 8;
197 nbpos[5] = 8;
198 break;
199 case 72: /* 72 bits, 18 pulses, 4 tracks */
200 nbiter = 3; /* 3x35x16=1680 loop */
201 alp = 3072; /* alp = 0.75 (Q12) */
202 nb_pulse = 18;
203 nbpos[0] = 2;
204 nbpos[1] = 3;
205 nbpos[2] = 4;
206 nbpos[3] = 5;
207 nbpos[4] = 6;
208 nbpos[5] = 7;
209 nbpos[6] = 8;
210 break;
211 case 88: /* 88 bits, 24 pulses, 4 tracks */
212 if(ser_size > 462)
213 nbiter = 1;
214 else
215 nbiter = 2; /* 2x53x16=1696 loop */
216
217 alp = 2048; /* alp = 0.5 (Q12) */
218 nb_pulse = 24;
219 nbpos[0] = 2;
220 nbpos[1] = 2;
221 nbpos[2] = 3;
222 nbpos[3] = 4;
223 nbpos[4] = 5;
224 nbpos[5] = 6;
225 nbpos[6] = 7;
226 nbpos[7] = 8;
227 nbpos[8] = 8;
228 nbpos[9] = 8;
229 break;
230 default:
231 nbiter = 0;
232 alp = 0;
233 nb_pulse = 0;
234 }
235
236 for (i = 0; i < nb_pulse; i++)
237 {
238 codvec[i] = i;
239 }
240
241 /*----------------------------------------------------------------*
242 * Find sign for each pulse position. *
243 *----------------------------------------------------------------*/
244 /* calculate energy for normalization of cn[] and dn[] */
245 /* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */
246 #ifdef ASM_OPT /* asm optimization branch */
247 s = Dot_product12_asm(cn, cn, L_SUBFR, &exp);
248 #else
249 s = Dot_product12(cn, cn, L_SUBFR, &exp);
250 #endif
251
252 Isqrt_n(&s, &exp);
253 s = L_shl(s, (exp + 5));
254 k_cn = extract_h(L_add(s, 0x8000));
255
256 /* set k_dn = 32..512 (ener_dn = 2^30..2^22) */
257 #ifdef ASM_OPT /* asm optimization branch */
258 s = Dot_product12_asm(dn, dn, L_SUBFR, &exp);
259 #else
260 s = Dot_product12(dn, dn, L_SUBFR, &exp);
261 #endif
262
263 Isqrt_n(&s, &exp);
264 k_dn = voround(L_shl(s, (exp + 5 + 3))); /* k_dn = 256..4096 */
265 k_dn = vo_mult_r(alp, k_dn); /* alp in Q12 */
266
267 /* mix normalized cn[] and dn[] */
268 p0 = cn;
269 p1 = dn;
270 p2 = dn2;
271
272 for (i = 0; i < L_SUBFR/4; i++)
273 {
274 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
275 *p2++ = s >> 7;
276 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
277 *p2++ = s >> 7;
278 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
279 *p2++ = s >> 7;
280 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
281 *p2++ = s >> 7;
282 }
283
284 /* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[] */
285 for(i = 0; i < L_SUBFR; i++)
286 {
287 val = dn[i];
288 ps = dn2[i];
289 if (ps >= 0)
290 {
291 sign[i] = 32767; /* sign = +1 (Q12) */
292 vec[i] = -32768;
293 } else
294 {
295 sign[i] = -32768; /* sign = -1 (Q12) */
296 vec[i] = 32767;
297 dn[i] = -val;
298 dn2[i] = -ps;
299 }
300 }
301 /*----------------------------------------------------------------*
302 * Select NB_MAX position per track according to max of dn2[]. *
303 *----------------------------------------------------------------*/
304 pos = 0;
305 for (i = 0; i < NB_TRACK; i++)
306 {
307 for (k = 0; k < NB_MAX; k++)
308 {
309 ps = -1;
310 for (j = i; j < L_SUBFR; j += STEP)
311 {
312 if(dn2[j] > ps)
313 {
314 ps = dn2[j];
315 pos = j;
316 }
317 }
318 dn2[pos] = (k - NB_MAX); /* dn2 < 0 when position is selected */
319 if (k == 0)
320 {
321 pos_max[i] = pos;
322 }
323 }
324 }
325
326 /*--------------------------------------------------------------*
327 * Scale h[] to avoid overflow and to get maximum of precision *
328 * on correlation. *
329 * *
330 * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16). *
331 * ==> This allow addition of 16 pulses without saturation. *
332 * *
333 * Energy worst case (on resonant impulse response), *
334 * - energy of h[] is approximately MAX/16. *
335 * - During search, the energy is divided by 8 to avoid *
336 * overflow on "alp". (energy of h[] = MAX/128). *
337 * ==> "alp" worst case detected is 22854 on sinusoidal wave. *
338 *--------------------------------------------------------------*/
339
340 /* impulse response buffer for fast computation */
341
342 h = h_buf;
343 h_inv = h_buf + (2 * L_SUBFR);
344 L_tmp = 0;
345 for (i = 0; i < L_SUBFR; i++)
346 {
347 *h++ = 0;
348 *h_inv++ = 0;
349 L_tmp = L_add(L_tmp, (H[i] * H[i]) << 1);
350 }
351 /* scale h[] down (/2) when energy of h[] is high with many pulses used */
352 val = extract_h(L_tmp);
353 h_shift = 0;
354
355 if ((nb_pulse >= 12) && (val > 1024))
356 {
357 h_shift = 1;
358 }
359 p0 = H;
360 p1 = h;
361 p2 = h_inv;
362
363 for (i = 0; i < L_SUBFR/4; i++)
364 {
365 *p1 = *p0++ >> h_shift;
366 *p2++ = -(*p1++);
367 *p1 = *p0++ >> h_shift;
368 *p2++ = -(*p1++);
369 *p1 = *p0++ >> h_shift;
370 *p2++ = -(*p1++);
371 *p1 = *p0++ >> h_shift;
372 *p2++ = -(*p1++);
373 }
374
375 /*------------------------------------------------------------*
376 * Compute rrixix[][] needed for the codebook search. *
377 * This algorithm compute impulse response energy of all *
378 * positions (16) in each track (4). Total = 4x16 = 64. *
379 *------------------------------------------------------------*/
380
381 /* storage order --> i3i3, i2i2, i1i1, i0i0 */
382
383 /* Init pointers to last position of rrixix[] */
384 p0 = &rrixix[0][NB_POS - 1];
385 p1 = &rrixix[1][NB_POS - 1];
386 p2 = &rrixix[2][NB_POS - 1];
387 p3 = &rrixix[3][NB_POS - 1];
388
389 ptr_h1 = h;
390 cor = 0x00008000L; /* for rounding */
391 for (i = 0; i < NB_POS; i++)
392 {
393 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
394 ptr_h1++;
395 *p3-- = extract_h(cor);
396 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
397 ptr_h1++;
398 *p2-- = extract_h(cor);
399 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
400 ptr_h1++;
401 *p1-- = extract_h(cor);
402 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
403 ptr_h1++;
404 *p0-- = extract_h(cor);
405 }
406
407 /*------------------------------------------------------------*
408 * Compute rrixiy[][] needed for the codebook search. *
409 * This algorithm compute correlation between 2 pulses *
410 * (2 impulses responses) in 4 possible adjacents tracks. *
411 * (track 0-1, 1-2, 2-3 and 3-0). Total = 4x16x16 = 1024. *
412 *------------------------------------------------------------*/
413
414 /* storage order --> i2i3, i1i2, i0i1, i3i0 */
415
416 pos = MSIZE - 1;
417 ptr_hf = h + 1;
418
419 for (k = 0; k < NB_POS; k++)
420 {
421 p3 = &rrixiy[2][pos];
422 p2 = &rrixiy[1][pos];
423 p1 = &rrixiy[0][pos];
424 p0 = &rrixiy[3][pos - NB_POS];
425
426 cor = 0x00008000L; /* for rounding */
427 ptr_h1 = h;
428 ptr_h2 = ptr_hf;
429
430 for (i = k + 1; i < NB_POS; i++)
431 {
432 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
433 ptr_h1++;
434 ptr_h2++;
435 *p3 = extract_h(cor);
436 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
437 ptr_h1++;
438 ptr_h2++;
439 *p2 = extract_h(cor);
440 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
441 ptr_h1++;
442 ptr_h2++;
443 *p1 = extract_h(cor);
444 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
445 ptr_h1++;
446 ptr_h2++;
447 *p0 = extract_h(cor);
448
449 p3 -= (NB_POS + 1);
450 p2 -= (NB_POS + 1);
451 p1 -= (NB_POS + 1);
452 p0 -= (NB_POS + 1);
453 }
454 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
455 ptr_h1++;
456 ptr_h2++;
457 *p3 = extract_h(cor);
458 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
459 ptr_h1++;
460 ptr_h2++;
461 *p2 = extract_h(cor);
462 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
463 ptr_h1++;
464 ptr_h2++;
465 *p1 = extract_h(cor);
466
467 pos -= NB_POS;
468 ptr_hf += STEP;
469 }
470
471 /* storage order --> i3i0, i2i3, i1i2, i0i1 */
472
473 pos = MSIZE - 1;
474 ptr_hf = h + 3;
475
476 for (k = 0; k < NB_POS; k++)
477 {
478 p3 = &rrixiy[3][pos];
479 p2 = &rrixiy[2][pos - 1];
480 p1 = &rrixiy[1][pos - 1];
481 p0 = &rrixiy[0][pos - 1];
482
483 cor = 0x00008000L; /* for rounding */
484 ptr_h1 = h;
485 ptr_h2 = ptr_hf;
486
487 for (i = k + 1; i < NB_POS; i++)
488 {
489 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
490 ptr_h1++;
491 ptr_h2++;
492 *p3 = extract_h(cor);
493 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
494 ptr_h1++;
495 ptr_h2++;
496 *p2 = extract_h(cor);
497 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
498 ptr_h1++;
499 ptr_h2++;
500 *p1 = extract_h(cor);
501 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
502 ptr_h1++;
503 ptr_h2++;
504 *p0 = extract_h(cor);
505
506 p3 -= (NB_POS + 1);
507 p2 -= (NB_POS + 1);
508 p1 -= (NB_POS + 1);
509 p0 -= (NB_POS + 1);
510 }
511 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
512 ptr_h1++;
513 ptr_h2++;
514 *p3 = extract_h(cor);
515
516 pos--;
517 ptr_hf += STEP;
518 }
519
520 /*------------------------------------------------------------*
521 * Modification of rrixiy[][] to take signs into account. *
522 *------------------------------------------------------------*/
523
524 p0 = &rrixiy[0][0];
525
526 for (k = 0; k < NB_TRACK; k++)
527 {
528 j_temp = (k + 1)&0x03;
529 for (i = k; i < L_SUBFR; i += STEP)
530 {
531 psign = sign;
532 if (psign[i] < 0)
533 {
534 psign = vec;
535 }
536 j = j_temp;
537 for (; j < L_SUBFR; j += STEP)
538 {
539 *p0 = vo_mult(*p0, psign[j]);
540 p0++;
541 }
542 }
543 }
544
545 /*-------------------------------------------------------------------*
546 * Deep first search *
547 *-------------------------------------------------------------------*/
548
549 psk = -1;
550 alpk = 1;
551
552 for (k = 0; k < nbiter; k++)
553 {
554 j_temp = k<<2;
555 for (i = 0; i < nb_pulse; i++)
556 ipos[i] = tipos[j_temp + i];
557
558 if(nbbits == 20)
559 {
560 pos = 0;
561 ps = 0;
562 alp = 0;
563 for (i = 0; i < L_SUBFR; i++)
564 {
565 vec[i] = 0;
566 }
567 } else if ((nbbits == 36) || (nbbits == 44))
568 {
569 /* first stage: fix 2 pulses */
570 pos = 2;
571
572 ix = ind[0] = pos_max[ipos[0]];
573 iy = ind[1] = pos_max[ipos[1]];
574 ps = dn[ix] + dn[iy];
575 i = ix >> 2; /* ix / STEP */
576 j = iy >> 2; /* iy / STEP */
577 s = rrixix[ipos[0]][i] << 13;
578 s += rrixix[ipos[1]][j] << 13;
579 i = (i << 4) + j; /* (ix/STEP)*NB_POS + (iy/STEP) */
580 s += rrixiy[ipos[0]][i] << 14;
581 alp = (s + 0x8000) >> 16;
582 if (sign[ix] < 0)
583 p0 = h_inv - ix;
584 else
585 p0 = h - ix;
586 if (sign[iy] < 0)
587 p1 = h_inv - iy;
588 else
589 p1 = h - iy;
590
591 for (i = 0; i < L_SUBFR; i++)
592 {
593 vec[i] = (*p0++) + (*p1++);
594 }
595
596 if(nbbits == 44)
597 {
598 ipos[8] = 0;
599 ipos[9] = 1;
600 }
601 } else
602 {
603 /* first stage: fix 4 pulses */
604 pos = 4;
605
606 ix = ind[0] = pos_max[ipos[0]];
607 iy = ind[1] = pos_max[ipos[1]];
608 i = ind[2] = pos_max[ipos[2]];
609 j = ind[3] = pos_max[ipos[3]];
610 ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]);
611
612 if (sign[ix] < 0)
613 p0 = h_inv - ix;
614 else
615 p0 = h - ix;
616
617 if (sign[iy] < 0)
618 p1 = h_inv - iy;
619 else
620 p1 = h - iy;
621
622 if (sign[i] < 0)
623 p2 = h_inv - i;
624 else
625 p2 = h - i;
626
627 if (sign[j] < 0)
628 p3 = h_inv - j;
629 else
630 p3 = h - j;
631
632 L_tmp = 0L;
633 for(i = 0; i < L_SUBFR; i++)
634 {
635 Word32 vecSq2;
636 vec[i] = add1(add1(add1(*p0++, *p1++), *p2++), *p3++);
637 vecSq2 = (vec[i] * vec[i]) << 1;
638 if (vecSq2 > 0 && L_tmp > INT_MAX - vecSq2) {
639 L_tmp = INT_MAX;
640 } else if (vecSq2 < 0 && L_tmp < INT_MIN - vecSq2) {
641 L_tmp = INT_MIN;
642 } else {
643 L_tmp += vecSq2;
644 }
645 }
646
647 alp = ((L_tmp >> 3) + 0x8000) >> 16;
648
649 if(nbbits == 72)
650 {
651 ipos[16] = 0;
652 ipos[17] = 1;
653 }
654 }
655
656 /* other stages of 2 pulses */
657
658 for (j = pos, st = 0; j < nb_pulse; j += 2, st++)
659 {
660 /*--------------------------------------------------*
661 * Calculate correlation of all possible positions *
662 * of the next 2 pulses with previous fixed pulses. *
663 * Each pulse can have 16 possible positions. *
664 *--------------------------------------------------*/
665 if(ipos[j] == 3)
666 {
667 cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
668 }
669 else
670 {
671 #ifdef ASM_OPT /* asm optimization branch */
672 cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
673 #else
674 cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
675 #endif
676 }
677 /*--------------------------------------------------*
678 * Find best positions of 2 pulses. *
679 *--------------------------------------------------*/
680 search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp,
681 &ix, &iy, dn, dn2, cor_x, cor_y, rrixiy);
682
683 ind[j] = ix;
684 ind[j + 1] = iy;
685
686 if (sign[ix] < 0)
687 p0 = h_inv - ix;
688 else
689 p0 = h - ix;
690 if (sign[iy] < 0)
691 p1 = h_inv - iy;
692 else
693 p1 = h - iy;
694
695 for (i = 0; i < L_SUBFR; i+=4)
696 {
697 vec[i] += add1((*p0++), (*p1++));
698 vec[i+1] += add1((*p0++), (*p1++));
699 vec[i+2] += add1((*p0++), (*p1++));
700 vec[i+3] += add1((*p0++), (*p1++));
701 }
702 }
703 /* memorise the best codevector */
704 ps = vo_mult(ps, ps);
705 s = L_sub(vo_L_mult(alpk, ps), vo_L_mult(psk, alp));
706 if (s > 0)
707 {
708 psk = ps;
709 alpk = alp;
710 for (i = 0; i < nb_pulse; i++)
711 {
712 codvec[i] = ind[i];
713 }
714 for (i = 0; i < L_SUBFR; i++)
715 {
716 y[i] = vec[i];
717 }
718 }
719 }
720 /*-------------------------------------------------------------------*
721 * Build the codeword, the filtered codeword and index of codevector.*
722 *-------------------------------------------------------------------*/
723 for (i = 0; i < NPMAXPT * NB_TRACK; i++)
724 {
725 ind[i] = -1;
726 }
727 for (i = 0; i < L_SUBFR; i++)
728 {
729 code[i] = 0;
730 y[i] = vo_shr_r(y[i], 3); /* Q12 to Q9 */
731 }
732 val = (512 >> h_shift); /* codeword in Q9 format */
733 for (k = 0; k < nb_pulse; k++)
734 {
735 i = codvec[k]; /* read pulse position */
736 j = sign[i]; /* read sign */
737 index = i >> 2; /* index = pos of pulse (0..15) */
738 track = (Word16) (i & 0x03); /* track = i % NB_TRACK (0..3) */
739
740 if (j > 0)
741 {
742 code[i] += val;
743 codvec[k] += 128;
744 } else
745 {
746 code[i] -= val;
747 index += NB_POS;
748 }
749
750 i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1));
751
752 while (i < NPMAXPT * NB_TRACK && ind[i] >= 0)
753 {
754 i += 1;
755 }
756 if (i < NPMAXPT * NB_TRACK) {
757 ind[i] = index;
758 } else {
759 ALOGE("b/132647222, OOB access in ind array track=%d i=%d", track, i);
760 android_errorWriteLog(0x534e4554, "132647222");
761 }
762 }
763
764 k = 0;
765 /* Build index of codevector */
766 if(nbbits == 20)
767 {
768 for (track = 0; track < NB_TRACK; track++)
769 {
770 _index[track] = (Word16)(quant_1p_N1(ind[k], 4));
771 k += NPMAXPT;
772 }
773 } else if(nbbits == 36)
774 {
775 for (track = 0; track < NB_TRACK; track++)
776 {
777 _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
778 k += NPMAXPT;
779 }
780 } else if(nbbits == 44)
781 {
782 for (track = 0; track < NB_TRACK - 2; track++)
783 {
784 _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
785 k += NPMAXPT;
786 }
787 for (track = 2; track < NB_TRACK; track++)
788 {
789 _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
790 k += NPMAXPT;
791 }
792 } else if(nbbits == 52)
793 {
794 for (track = 0; track < NB_TRACK; track++)
795 {
796 _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
797 k += NPMAXPT;
798 }
799 } else if(nbbits == 64)
800 {
801 for (track = 0; track < NB_TRACK; track++)
802 {
803 L_index = quant_4p_4N(&ind[k], 4);
804 _index[track] = (Word16)((L_index >> 14) & 3);
805 _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
806 k += NPMAXPT;
807 }
808 } else if(nbbits == 72)
809 {
810 for (track = 0; track < NB_TRACK - 2; track++)
811 {
812 L_index = quant_5p_5N(&ind[k], 4);
813 _index[track] = (Word16)((L_index >> 10) & 0x03FF);
814 _index[track + NB_TRACK] = (Word16)(L_index & 0x03FF);
815 k += NPMAXPT;
816 }
817 for (track = 2; track < NB_TRACK; track++)
818 {
819 L_index = quant_4p_4N(&ind[k], 4);
820 _index[track] = (Word16)((L_index >> 14) & 3);
821 _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
822 k += NPMAXPT;
823 }
824 } else if(nbbits == 88)
825 {
826 for (track = 0; track < NB_TRACK; track++)
827 {
828 L_index = quant_6p_6N_2(&ind[k], 4);
829 _index[track] = (Word16)((L_index >> 11) & 0x07FF);
830 _index[track + NB_TRACK] = (Word16)(L_index & 0x07FF);
831 k += NPMAXPT;
832 }
833 }
834 return;
835 }
836
837
838 /*-------------------------------------------------------------------*
839 * Function cor_h_vec() *
840 * ~~~~~~~~~~~~~~~~~~~~~ *
841 * Compute correlations of h[] with vec[] for the specified track. *
842 *-------------------------------------------------------------------*/
cor_h_vec_30(Word16 h[],Word16 vec[],Word16 track,Word16 sign[],Word16 rrixix[][NB_POS],Word16 cor_1[],Word16 cor_2[])843 void cor_h_vec_30(
844 Word16 h[], /* (i) scaled impulse response */
845 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
846 Word16 track, /* (i) track to use */
847 Word16 sign[], /* (i) sign vector */
848 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
849 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
850 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
851 )
852 {
853 Word32 i, j, pos, corr;
854 Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
855 Word32 L_sum1,L_sum2;
856 cor_x = cor_1;
857 cor_y = cor_2;
858 p0 = rrixix[track];
859 p3 = rrixix[0];
860 pos = track;
861
862 for (i = 0; i < NB_POS; i+=2)
863 {
864 L_sum1 = L_sum2 = 0L;
865 p1 = h;
866 p2 = &vec[pos];
867 for (j=pos;j < L_SUBFR; j++)
868 {
869 L_sum1 = L_add(L_sum1, *p1 * *p2);
870 p2-=3;
871 L_sum2 = L_add(L_sum2, *p1++ * *p2);
872 p2+=4;
873 }
874 p2-=3;
875 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
876 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
877 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
878
879 L_sum1 = L_shl(L_sum1, 2);
880 L_sum2 = L_shl(L_sum2, 2);
881
882 corr = voround(L_sum1);
883 *cor_x++ = mult(corr, sign[pos]) + (*p0++);
884 corr = voround(L_sum2);
885 *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
886 pos += STEP;
887
888 L_sum1 = L_sum2 = 0L;
889 p1 = h;
890 p2 = &vec[pos];
891 for (j=pos;j < L_SUBFR; j++)
892 {
893 L_sum1 = L_add(L_sum1, *p1 * *p2);
894 p2-=3;
895 L_sum2 = L_add(L_sum2, *p1++ * *p2);
896 p2+=4;
897 }
898 p2-=3;
899 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
900 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
901 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
902
903 L_sum1 = L_shl(L_sum1, 2);
904 L_sum2 = L_shl(L_sum2, 2);
905
906 corr = voround(L_sum1);
907 *cor_x++ = mult(corr, sign[pos]) + (*p0++);
908 corr = voround(L_sum2);
909 *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
910 pos += STEP;
911 }
912 return;
913 }
914
cor_h_vec_012(Word16 h[],Word16 vec[],Word16 track,Word16 sign[],Word16 rrixix[][NB_POS],Word16 cor_1[],Word16 cor_2[])915 void cor_h_vec_012(
916 Word16 h[], /* (i) scaled impulse response */
917 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
918 Word16 track, /* (i) track to use */
919 Word16 sign[], /* (i) sign vector */
920 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
921 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
922 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
923 )
924 {
925 Word32 i, j, pos, corr;
926 Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
927 Word32 L_sum1,L_sum2;
928 cor_x = cor_1;
929 cor_y = cor_2;
930 p0 = rrixix[track];
931 p3 = rrixix[track+1];
932 pos = track;
933
934 for (i = 0; i < NB_POS; i+=2)
935 {
936 L_sum1 = L_sum2 = 0L;
937 p1 = h;
938 p2 = &vec[pos];
939 for (j=62-pos ;j >= 0; j--)
940 {
941 L_sum1 = L_add(L_sum1, *p1 * *p2++);
942 L_sum2 = L_add(L_sum2, *p1++ * *p2);
943 }
944 L_sum1 = L_add(L_sum1, *p1 * *p2);
945 L_sum1 = L_shl(L_sum1, 2);
946 L_sum2 = L_shl(L_sum2, 2);
947
948 corr = voround(L_sum1);
949 cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++);
950 corr = voround(L_sum2);
951 cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++);
952 pos += STEP;
953
954 L_sum1 = L_sum2 = 0L;
955 p1 = h;
956 p2 = &vec[pos];
957 for (j= 62-pos;j >= 0; j--)
958 {
959 L_sum1 = L_add(L_sum1, *p1 * *p2++);
960 L_sum2 = L_add(L_sum2, *p1++ * *p2);
961 }
962 L_sum1 = L_add(L_sum1, *p1 * *p2);
963 L_sum1 = L_shl(L_sum1, 2);
964 L_sum2 = L_shl(L_sum2, 2);
965
966 corr = voround(L_sum1);
967 cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++);
968 corr = voround(L_sum2);
969 cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++);
970 pos += STEP;
971 }
972 return;
973 }
974
975 /*-------------------------------------------------------------------*
976 * Function search_ixiy() *
977 * ~~~~~~~~~~~~~~~~~~~~~~~ *
978 * Find the best positions of 2 pulses in a subframe. *
979 *-------------------------------------------------------------------*/
980
search_ixiy(Word16 nb_pos_ix,Word16 track_x,Word16 track_y,Word16 * ps,Word16 * alp,Word16 * ix,Word16 * iy,Word16 dn[],Word16 dn2[],Word16 cor_x[],Word16 cor_y[],Word16 rrixiy[][MSIZE])981 void search_ixiy(
982 Word16 nb_pos_ix, /* (i) nb of pos for pulse 1 (1..8) */
983 Word16 track_x, /* (i) track of pulse 1 */
984 Word16 track_y, /* (i) track of pulse 2 */
985 Word16 * ps, /* (i/o) correlation of all fixed pulses */
986 Word16 * alp, /* (i/o) energy of all fixed pulses */
987 Word16 * ix, /* (o) position of pulse 1 */
988 Word16 * iy, /* (o) position of pulse 2 */
989 Word16 dn[], /* (i) corr. between target and h[] */
990 Word16 dn2[], /* (i) vector of selected positions */
991 Word16 cor_x[], /* (i) corr. of pulse 1 with fixed pulses */
992 Word16 cor_y[], /* (i) corr. of pulse 2 with fixed pulses */
993 Word16 rrixiy[][MSIZE] /* (i) corr. of pulse 1 with pulse 2 */
994 )
995 {
996 Word32 x, y, pos, thres_ix;
997 Word16 ps1, ps2, sq, sqk;
998 Word16 alp_16, alpk;
999 Word16 *p0, *p1, *p2;
1000 Word32 s, alp0, alp1, alp2;
1001
1002 p0 = cor_x;
1003 p1 = cor_y;
1004 p2 = rrixiy[track_x];
1005
1006 thres_ix = nb_pos_ix - NB_MAX;
1007
1008 alp0 = L_deposit_h(*alp);
1009 alp0 = (alp0 + 0x00008000L); /* for rounding */
1010
1011 sqk = -1;
1012 alpk = 1;
1013
1014 for (x = track_x; x < L_SUBFR; x += STEP)
1015 {
1016 ps1 = *ps + dn[x];
1017 alp1 = L_add(alp0, ((*p0++)<<13));
1018
1019 if (dn2[x] < thres_ix)
1020 {
1021 pos = -1;
1022 for (y = track_y; y < L_SUBFR; y += STEP)
1023 {
1024 ps2 = add1(ps1, dn[y]);
1025
1026 alp2 = L_add(alp1, ((*p1++)<<13));
1027 alp2 = L_add(alp2, ((*p2++)<<14));
1028 alp_16 = extract_h(alp2);
1029 sq = vo_mult(ps2, ps2);
1030 s = L_sub(vo_L_mult(alpk, sq), L_mult(sqk, alp_16));
1031
1032 if (s > 0)
1033 {
1034 sqk = sq;
1035 alpk = alp_16;
1036 pos = y;
1037 }
1038 }
1039 p1 -= NB_POS;
1040
1041 if (pos >= 0)
1042 {
1043 *ix = x;
1044 *iy = pos;
1045 }
1046 } else
1047 {
1048 p2 += NB_POS;
1049 }
1050 }
1051
1052 *ps = add1(*ps, add1(dn[*ix], dn[*iy]));
1053 *alp = alpk;
1054
1055 return;
1056 }
1057
1058
1059
1060
1061