• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 #include <string.h>
21 
22 #include "ixheaacd_sbr_common.h"
23 #include <ixheaacd_type_def.h>
24 
25 #include "ixheaacd_constants.h"
26 #include "ixheaacd_basic_ops32.h"
27 #include "ixheaacd_basic_ops16.h"
28 #include "ixheaacd_basic_ops40.h"
29 #include "ixheaacd_basic_ops.h"
30 
31 #include "ixheaacd_intrinsics.h"
32 #include "ixheaacd_common_rom.h"
33 #include "ixheaacd_bitbuffer.h"
34 #include "ixheaacd_sbrdecsettings.h"
35 #include "ixheaacd_sbr_scale.h"
36 #include "ixheaacd_lpp_tran.h"
37 #include "ixheaacd_env_extr_part.h"
38 #include "ixheaacd_sbr_rom.h"
39 #include "ixheaacd_hybrid.h"
40 #include "ixheaacd_ps_dec.h"
41 #include "ixheaacd_env_extr.h"
42 #include "ixheaacd_qmf_dec.h"
43 
44 #include <ixheaacd_basic_op.h>
45 #include "ixheaacd_env_calc.h"
46 
47 #include "ixheaacd_interface.h"
48 
49 #include "ixheaacd_function_selector.h"
50 #include "ixheaacd_audioobjtypes.h"
51 #if !__ARM_NEON__
52 
53 #define DCT3_LEN (32)
54 #define DCT2_LEN (64)
55 
56 #define LP_SHIFT_VAL 7
57 #define HQ_SHIFT_64 4
58 #define RADIXSHIFT 1
59 #define HQ_SHIFT_VAL 4
60 
ixheaacd_dct3_32(WORD32 * input,WORD32 * output,const WORD16 * main_twidle_fwd,const WORD16 * post_tbl,const WORD16 * w_16,const WORD32 * p_table)61 VOID ixheaacd_dct3_32(WORD32 *input, WORD32 *output,
62                       const WORD16 *main_twidle_fwd, const WORD16 *post_tbl,
63                       const WORD16 *w_16, const WORD32 *p_table) {
64   WORD32 n, k;
65 
66   WORD32 temp1[6];
67   WORD32 temp2[4];
68   WORD16 twid_re, twid_im;
69   WORD32 *ptr_reverse, *ptr_forward, *p_out, *ptr_out1;
70   const WORD16 *twidle_fwd, *twidle_rev;
71 
72   ptr_forward = &input[49];
73   ptr_reverse = &input[47];
74 
75   p_out = output;
76   twidle_fwd = main_twidle_fwd;
77   twidle_fwd += 4;
78 
79   *p_out++ = input[48] >> LP_SHIFT_VAL;
80   *p_out++ = 0;
81 
82   for (n = 1; n < DCT3_LEN / 2; n++) {
83     temp1[0] = *ptr_forward++;
84     temp1[1] = *ptr_reverse--;
85     temp1[0] = ixheaacd_add32_sat(ixheaacd_shr32(temp1[0], LP_SHIFT_VAL),
86                               ixheaacd_shr32(temp1[1], LP_SHIFT_VAL));
87 
88     temp1[2] = *(ptr_forward - 33);
89     temp1[3] = *(ptr_reverse - 31);
90     temp1[1] = ixheaacd_sub32_sat(ixheaacd_shr32(temp1[2], LP_SHIFT_VAL),
91                               ixheaacd_shr32(temp1[3], LP_SHIFT_VAL));
92     twid_re = *twidle_fwd++;
93 
94     twid_im = *twidle_fwd;
95     twidle_fwd += 3;
96 
97     *p_out++ = ixheaacd_mult32x16in32(temp1[0], twid_re) +
98                ixheaacd_mult32x16in32(temp1[1], twid_im);
99     *p_out++ = -ixheaacd_mult32x16in32(temp1[1], twid_re) +
100                ixheaacd_mult32x16in32(temp1[0], twid_im);
101   }
102 
103   twid_re = *twidle_fwd++;
104 
105   twid_im = *twidle_fwd;
106   twidle_fwd += 3;
107 
108   temp1[1] = *ptr_reverse--;
109   temp1[0] = *(ptr_reverse - 31);
110   temp1[1] = ixheaacd_sub32_sat(ixheaacd_shr32(temp1[1], LP_SHIFT_VAL),
111                             ixheaacd_shr32(temp1[0], LP_SHIFT_VAL));
112 
113   temp1[0] = temp1[1];
114 
115   temp2[2] = ixheaacd_mult32x16in32(temp1[0], twid_re) +
116              ixheaacd_mult32x16in32(temp1[1], twid_im);
117   temp2[3] = -ixheaacd_mult32x16in32(temp1[1], twid_re) +
118              ixheaacd_mult32x16in32(temp1[0], twid_im);
119 
120   ptr_forward = output;
121   ptr_reverse = &output[DCT3_LEN - 1];
122 
123   temp2[0] = *ptr_forward++;
124   temp2[1] = *ptr_forward--;
125 
126   temp1[0] = -temp2[1] - temp2[3];
127   temp1[1] = temp2[0] - temp2[2];
128   temp2[0] = (temp2[0] + temp2[2] + temp1[0]);
129   temp2[1] = (temp2[1] - temp2[3] + temp1[1]);
130 
131   temp2[0] >>= 1;
132   temp2[1] >>= 1;
133 
134   *ptr_forward++ = temp2[0];
135   *ptr_forward++ = temp2[1];
136 
137   twidle_fwd = post_tbl + 2;
138   twidle_rev = post_tbl + 14;
139 
140   for (n = 1; n < DCT3_LEN / 4; n++) {
141     temp2[0] = *ptr_forward++;
142     temp2[1] = *ptr_forward--;
143     temp2[3] = *ptr_reverse--;
144     temp2[2] = *ptr_reverse++;
145 
146     twid_re = *twidle_rev;
147     twidle_rev -= 2;
148     twid_im = *twidle_fwd;
149     twidle_fwd += 2;
150 
151     temp1[0] = temp2[0] - temp2[2];
152     temp1[1] = (temp2[0] + temp2[2]);
153 
154     temp1[2] = temp2[1] + temp2[3];
155     temp1[3] = (temp2[1] - temp2[3]);
156 
157     temp1[4] = ixheaacd_mult32x16in32(temp1[0], twid_re) +
158                ixheaacd_mult32x16in32(temp1[2], twid_im);
159     temp1[5] = -ixheaacd_mult32x16in32(temp1[2], twid_re) +
160                ixheaacd_mult32x16in32(temp1[0], twid_im);
161 
162     temp1[1] >>= 1;
163     temp1[3] >>= 1;
164 
165     *ptr_forward++ = temp1[1] - temp1[4];
166     *ptr_forward++ = temp1[3] + temp1[5];
167 
168     *ptr_reverse-- = -temp1[3] + temp1[5];
169     *ptr_reverse-- = temp1[1] + temp1[4];
170   }
171   temp2[0] = *ptr_forward++;
172   temp2[1] = *ptr_forward--;
173   temp2[3] = *ptr_reverse--;
174   temp2[2] = *ptr_reverse++;
175 
176   twid_re = -*twidle_rev;
177   twidle_rev -= 2;
178   twid_im = *twidle_fwd;
179   twidle_fwd += 2;
180 
181   temp1[0] = temp2[0] - temp2[2];
182   temp1[1] = (temp2[0] + temp2[2]);
183 
184   temp1[2] = temp2[1] + temp2[3];
185   temp1[3] = (temp2[1] - temp2[3]);
186 
187   temp1[4] = ixheaacd_mult32x16in32(temp1[0], twid_re) -
188              ixheaacd_mult32x16in32(temp1[2], twid_im);
189   temp1[5] = ixheaacd_mult32x16in32(temp1[2], twid_re) +
190              ixheaacd_mult32x16in32(temp1[0], twid_im);
191 
192   temp1[1] >>= 1;
193   temp1[3] >>= 1;
194   *ptr_forward++ = temp1[1] + temp1[4];
195   *ptr_forward++ = temp1[3] + temp1[5];
196 
197   ixheaacd_radix4bfly(w_16, output, 1, 4);
198   ixheaacd_postradixcompute4(input, output, p_table, 16);
199 
200   output[0] = input[0];
201   output[2] = input[1];
202 
203   p_out = input + 2;
204   ptr_forward = output + 1;
205   ptr_reverse = output + 30;
206   ptr_out1 = input + 18;
207 
208   for (k = (DCT3_LEN / 4) - 1; k != 0; k--) {
209     WORD32 tempre, tempim;
210 
211     tempre = *p_out++;
212     tempim = *p_out++;
213     *ptr_forward = (tempim);
214     ptr_forward += 2;
215     *ptr_forward = (tempre);
216     ptr_forward += 2;
217 
218     tempre = *ptr_out1++;
219     tempim = *ptr_out1++;
220     *ptr_reverse = (tempim);
221     ptr_reverse -= 2;
222     *ptr_reverse = (tempre);
223     ptr_reverse -= 2;
224   }
225 
226   {
227     WORD32 tempre, tempim;
228     tempre = *p_out++;
229     tempim = *p_out++;
230     *ptr_forward = (tempim);
231     ptr_forward += 2;
232     *ptr_forward = (tempre);
233     ptr_forward += 2;
234   }
235 
236   return;
237 }
238 
ixheaacd_dct2_64(WORD32 * x,WORD32 * X,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr,WORD16 * filter_states)239 VOID ixheaacd_dct2_64(WORD32 *x, WORD32 *X,
240                       ia_qmf_dec_tables_struct *qmf_dec_tables_ptr,
241                       WORD16 *filter_states) {
242   ixheaacd_pretwdct2(x, X);
243 
244   ixheaacd_radix4bfly(qmf_dec_tables_ptr->w_32, X, 1, 8);
245 
246   ixheaacd_radix4bfly(qmf_dec_tables_ptr->w_32 + 48, X, 4, 2);
247 
248   ixheaacd_postradixcompute2(x, X, qmf_dec_tables_ptr->dig_rev_table2_32, 32);
249 
250   ixheaacd_fftposttw(x, qmf_dec_tables_ptr);
251 
252   ixheaacd_posttwdct2(x, filter_states, qmf_dec_tables_ptr);
253 
254   return;
255 }
256 
ixheaacd_cos_sin_mod(WORD32 * subband,ia_sbr_qmf_filter_bank_struct * qmf_bank,WORD16 * p_twiddle,WORD32 * p_dig_rev_tbl)257 VOID ixheaacd_cos_sin_mod(WORD32 *subband,
258                           ia_sbr_qmf_filter_bank_struct *qmf_bank,
259                           WORD16 *p_twiddle, WORD32 *p_dig_rev_tbl) {
260   WORD32 re2, re3;
261   WORD16 wim, wre;
262 
263   WORD32 i, M_2;
264   WORD32 M = ixheaacd_shr32(qmf_bank->no_channels, 1);
265 
266   const WORD16 *p_sin;
267   const WORD16 *p_sin_cos = &qmf_bank->cos_twiddle[0];
268   WORD32 subband_tmp[128];
269   WORD32 re;
270   WORD32 im;
271   WORD32 *psubband, *psubband1;
272   WORD32 *psubband_t, *psubband1_t;
273   WORD32 *psubband2, *psubband12;
274   WORD32 *psubband_t2, *psubband1_t2;
275 
276   M_2 = ixheaacd_shr32(M, 1);
277 
278   psubband = &subband[0];
279   psubband1 = &subband[2 * M - 1];
280   psubband_t = subband_tmp;
281   psubband1_t = &subband_tmp[2 * M - 1];
282 
283   psubband2 = &subband[64];
284   psubband12 = &subband[2 * M - 1 + 64];
285   psubband_t2 = &subband_tmp[64];
286   psubband1_t2 = &subband_tmp[2 * M - 1 + 64];
287 
288   for (i = (M_2 >> 1) - 1; i >= 0; i--) {
289     re = *psubband++;
290     im = *psubband1--;
291 
292     wim = *p_sin_cos++;
293     wre = *p_sin_cos++;
294 
295     *psubband_t++ = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re, wre),
296                                    ixheaacd_mult32x16in32(im, wim));
297     *psubband_t++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wre),
298                                        ixheaacd_mult32x16in32(re, wim));
299 
300     re = *psubband2++;
301     im = *psubband12--;
302 
303     *psubband_t2++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wim),
304                                         ixheaacd_mult32x16in32(re, wre));
305     *psubband_t2++ = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re, wim),
306                                     ixheaacd_mult32x16in32(im, wre));
307 
308     re = *psubband1--;
309     im = *psubband++;
310 
311     wim = *p_sin_cos++;
312     wre = *p_sin_cos++;
313 
314     *psubband1_t-- = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wre),
315                                         ixheaacd_mult32x16in32(re, wim));
316     *psubband1_t-- = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re, wre),
317                                     ixheaacd_mult32x16in32(im, wim));
318 
319     re = *psubband12--;
320     im = *psubband2++;
321 
322     *psubband1_t2-- = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re, wim),
323                                      ixheaacd_mult32x16in32(im, wre));
324     *psubband1_t2-- = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wim),
325                                          ixheaacd_mult32x16in32(re, wre));
326 
327     re = *psubband++;
328     im = *psubband1--;
329 
330     wim = *p_sin_cos++;
331     wre = *p_sin_cos++;
332 
333     *psubband_t++ = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re, wre),
334                                    ixheaacd_mult32x16in32(im, wim));
335     *psubband_t++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wre),
336                                        ixheaacd_mult32x16in32(re, wim));
337 
338     re = *psubband2++;
339     im = *psubband12--;
340 
341     *psubband_t2++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wim),
342                                         ixheaacd_mult32x16in32(re, wre));
343     *psubband_t2++ = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re, wim),
344                                     ixheaacd_mult32x16in32(im, wre));
345 
346     re = *psubband1--;
347     im = *psubband++;
348 
349     wim = *p_sin_cos++;
350     wre = *p_sin_cos++;
351 
352     *psubband1_t-- = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wre),
353                                         ixheaacd_mult32x16in32(re, wim));
354     *psubband1_t-- = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re, wre),
355                                     ixheaacd_mult32x16in32(im, wim));
356 
357     re = *psubband12--;
358     im = *psubband2++;
359     ;
360 
361     *psubband1_t2-- = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re, wim),
362                                      ixheaacd_mult32x16in32(im, wre));
363     *psubband1_t2-- = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wim),
364                                          ixheaacd_mult32x16in32(re, wre));
365   }
366 
367   if (M == 32) {
368     ixheaacd_radix4bfly(p_twiddle, subband_tmp, 1, 8);
369     ixheaacd_radix4bfly(p_twiddle + 48, subband_tmp, 4, 2);
370     ixheaacd_postradixcompute2(subband, subband_tmp, p_dig_rev_tbl, 32);
371 
372     ixheaacd_radix4bfly(p_twiddle, &subband_tmp[64], 1, 8);
373     ixheaacd_radix4bfly(p_twiddle + 48, &subband_tmp[64], 4, 2);
374     ixheaacd_postradixcompute2(&subband[64], &subband_tmp[64], p_dig_rev_tbl,
375                                32);
376 
377   } else {
378     ixheaacd_radix4bfly(p_twiddle, subband_tmp, 1, 4);
379     ixheaacd_postradixcompute4(subband, subband_tmp, p_dig_rev_tbl, 16);
380 
381     ixheaacd_radix4bfly(p_twiddle, &subband_tmp[64], 1, 4);
382     ixheaacd_postradixcompute4(&subband[64], &subband_tmp[64], p_dig_rev_tbl,
383                                16);
384   }
385 
386   psubband = &subband[0];
387   psubband1 = &subband[2 * M - 1];
388 
389   re = *psubband1;
390 
391   *psubband = *psubband >> 1;
392   psubband++;
393   *psubband1 = ixheaacd_negate32(*psubband >> 1);
394   psubband1--;
395 
396   p_sin = &qmf_bank->alt_sin_twiddle[0];
397   wim = *p_sin++;
398   wre = *p_sin++;
399 
400   im = *psubband1;
401 
402   *psubband1-- = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re, wre),
403                                 ixheaacd_mult32x16in32(im, wim));
404   *psubband++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wre),
405                                    ixheaacd_mult32x16in32(re, wim));
406 
407   psubband2 = &subband[64];
408   psubband12 = &subband[2 * M - 1 + 64];
409 
410   re = *psubband12;
411 
412   *psubband12-- = ixheaacd_negate32_sat(*psubband2 >> 1);
413 
414   *psubband2 = psubband2[1] >> 1;
415 
416   psubband2++;
417 
418   im = *psubband12;
419 
420   *psubband2++ = ixheaacd_negate32_sat(ixheaacd_add32_sat(
421       ixheaacd_mult32x16in32(re, wre), ixheaacd_mult32x16in32(im, wim)));
422   *psubband12-- = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(re, wim),
423                                      ixheaacd_mult32x16in32(im, wre));
424 
425   for (i = (M_2 - 2); i >= 0; i--) {
426     im = psubband[0];
427 
428     re = psubband[1];
429 
430     re2 = *psubband1;
431 
432     *psubband++ = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re, wim),
433                                  ixheaacd_mult32x16in32(im, wre));
434     *psubband1-- = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wim),
435                                       ixheaacd_mult32x16in32(re, wre));
436 
437     im = psubband2[0];
438 
439     re = psubband2[1];
440 
441     re3 = *psubband12;
442 
443     *psubband12-- = ixheaacd_negate32_sat(ixheaacd_add32_sat(
444         ixheaacd_mult32x16in32(re, wim), ixheaacd_mult32x16in32(im, wre)));
445     *psubband2++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(re, wre),
446                                       ixheaacd_mult32x16in32(im, wim));
447 
448     wim = *p_sin++;
449     wre = *p_sin++;
450     im = psubband1[0];
451 
452     *psubband1-- = ixheaacd_add32_sat(ixheaacd_mult32x16in32(re2, wre),
453                                   ixheaacd_mult32x16in32(im, wim));
454     *psubband++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(im, wre),
455                                      ixheaacd_mult32x16in32(re2, wim));
456 
457     im = psubband12[0];
458 
459     *psubband2++ = ixheaacd_negate32_sat(ixheaacd_add32_sat(
460         ixheaacd_mult32x16in32(re3, wre), ixheaacd_mult32x16in32(im, wim)));
461     *psubband12-- = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(re3, wim),
462                                        ixheaacd_mult32x16in32(im, wre));
463   }
464 }
465 
ixheaacd_fwd_modulation(const WORD32 * p_time_in1,WORD32 * real_subband,WORD32 * imag_subband,ia_sbr_qmf_filter_bank_struct * qmf_bank,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr)466 VOID ixheaacd_fwd_modulation(const WORD32 *p_time_in1, WORD32 *real_subband,
467                              WORD32 *imag_subband,
468                              ia_sbr_qmf_filter_bank_struct *qmf_bank,
469                              ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
470   WORD32 i;
471   const WORD32 *p_time_in2 = &p_time_in1[2 * NO_ANALYSIS_CHANNELS - 1];
472   WORD32 temp1, temp2;
473   WORD32 *t_real_subband = real_subband;
474   WORD32 *t_imag_subband = imag_subband;
475   const WORD16 *tcos;
476 
477   for (i = NO_ANALYSIS_CHANNELS - 1; i >= 0; i--) {
478     temp1 = ixheaacd_shr32(*p_time_in1++, HQ_SHIFT_VAL);
479     temp2 = ixheaacd_shr32(*p_time_in2--, HQ_SHIFT_VAL);
480 
481     *t_real_subband++ = ixheaacd_sub32_sat(temp1, temp2);
482 
483     *t_imag_subband++ = ixheaacd_add32_sat(temp1, temp2);
484   }
485 
486   ixheaacd_cos_sin_mod(real_subband, qmf_bank, qmf_dec_tables_ptr->w_16,
487                        qmf_dec_tables_ptr->dig_rev_table4_16);
488 
489   tcos = qmf_bank->t_cos;
490 
491   for (i = (qmf_bank->usb - qmf_bank->lsb - 1); i >= 0; i--) {
492     WORD16 cosh, sinh;
493     WORD32 re, im;
494 
495     re = *real_subband;
496     im = *imag_subband;
497     cosh = *tcos++;
498     sinh = *tcos++;
499     *real_subband++ = ixheaacd_add32_sat(ixheaacd_mult32x16in32_shl(re, cosh),
500                                      ixheaacd_mult32x16in32_shl(im, sinh));
501     *imag_subband++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32_shl(im, cosh),
502                                          ixheaacd_mult32x16in32_shl(re, sinh));
503   }
504 }
505 
ixheaacd_sbr_qmfanal32_winadd(WORD16 * inp1,WORD16 * inp2,WORD16 * p_qmf1,WORD16 * p_qmf2,WORD32 * p_out)506 VOID ixheaacd_sbr_qmfanal32_winadd(WORD16 *inp1, WORD16 *inp2, WORD16 *p_qmf1,
507                                    WORD16 *p_qmf2, WORD32 *p_out) {
508   WORD32 n;
509 
510   for (n = 0; n < 32; n += 2) {
511     WORD32 accu;
512 
513     accu = ixheaacd_mult16x16in32(inp1[n + 0], p_qmf1[2 * (n + 0)]);
514     accu = ixheaacd_add32_sat(
515         accu, ixheaacd_mult16x16in32(inp1[n + 64], p_qmf1[2 * (n + 64)]));
516     accu = ixheaacd_add32_sat(
517         accu, ixheaacd_mult16x16in32(inp1[n + 128], p_qmf1[2 * (n + 128)]));
518     accu = ixheaacd_add32_sat(
519         accu, ixheaacd_mult16x16in32(inp1[n + 192], p_qmf1[2 * (n + 192)]));
520     accu = ixheaacd_add32_sat(
521         accu, ixheaacd_mult16x16in32(inp1[n + 256], p_qmf1[2 * (n + 256)]));
522     p_out[n] = accu;
523 
524     accu = ixheaacd_mult16x16in32(inp1[n + 1 + 0], p_qmf1[2 * (n + 1 + 0)]);
525     accu = ixheaacd_add32_sat(
526         accu,
527         ixheaacd_mult16x16in32(inp1[n + 1 + 64], p_qmf1[2 * (n + 1 + 64)]));
528     accu = ixheaacd_add32_sat(
529         accu,
530         ixheaacd_mult16x16in32(inp1[n + 1 + 128], p_qmf1[2 * (n + 1 + 128)]));
531     accu = ixheaacd_add32_sat(
532         accu,
533         ixheaacd_mult16x16in32(inp1[n + 1 + 192], p_qmf1[2 * (n + 1 + 192)]));
534     accu = ixheaacd_add32_sat(
535         accu,
536         ixheaacd_mult16x16in32(inp1[n + 1 + 256], p_qmf1[2 * (n + 1 + 256)]));
537     p_out[n + 1] = accu;
538 
539     accu = ixheaacd_mult16x16in32(inp2[n + 0], p_qmf2[2 * (n + 0)]);
540     accu = ixheaacd_add32_sat(
541         accu, ixheaacd_mult16x16in32(inp2[n + 64], p_qmf2[2 * (n + 64)]));
542     accu = ixheaacd_add32_sat(
543         accu, ixheaacd_mult16x16in32(inp2[n + 128], p_qmf2[2 * (n + 128)]));
544     accu = ixheaacd_add32_sat(
545         accu, ixheaacd_mult16x16in32(inp2[n + 192], p_qmf2[2 * (n + 192)]));
546     accu = ixheaacd_add32_sat(
547         accu, ixheaacd_mult16x16in32(inp2[n + 256], p_qmf2[2 * (n + 256)]));
548     p_out[n + 32] = accu;
549 
550     accu = ixheaacd_mult16x16in32(inp2[n + 1 + 0], p_qmf2[2 * (n + 1 + 0)]);
551     accu = ixheaacd_add32_sat(
552         accu,
553         ixheaacd_mult16x16in32(inp2[n + 1 + 64], p_qmf2[2 * (n + 1 + 64)]));
554     accu = ixheaacd_add32_sat(
555         accu,
556         ixheaacd_mult16x16in32(inp2[n + 1 + 128], p_qmf2[2 * (n + 1 + 128)]));
557     accu = ixheaacd_add32_sat(
558         accu,
559         ixheaacd_mult16x16in32(inp2[n + 1 + 192], p_qmf2[2 * (n + 1 + 192)]));
560     accu = ixheaacd_add32_sat(
561         accu,
562         ixheaacd_mult16x16in32(inp2[n + 1 + 256], p_qmf2[2 * (n + 1 + 256)]));
563     p_out[n + 1 + 32] = accu;
564   }
565 }
566 
ixheaacd_cplx_anal_qmffilt(const WORD16 * time_sample_buf,ia_sbr_scale_fact_struct * sbr_scale_factor,WORD32 ** qmf_real,WORD32 ** qmf_imag,ia_sbr_qmf_filter_bank_struct * qmf_bank,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr,WORD32 ch_fac,WORD32 low_pow_flag,WORD audio_object_type)567 VOID ixheaacd_cplx_anal_qmffilt(const WORD16 *time_sample_buf,
568                                 ia_sbr_scale_fact_struct *sbr_scale_factor,
569                                 WORD32 **qmf_real, WORD32 **qmf_imag,
570                                 ia_sbr_qmf_filter_bank_struct *qmf_bank,
571                                 ia_qmf_dec_tables_struct *qmf_dec_tables_ptr,
572                                 WORD32 ch_fac, WORD32 low_pow_flag,
573                                 WORD audio_object_type) {
574   WORD32 i, k;
575   WORD32 num_time_slots = qmf_bank->num_time_slots;
576 
577   WORD32 analysis_buffer[4 * NO_ANALYSIS_CHANNELS];
578   WORD16 *filter_states = qmf_bank->core_samples_buffer;
579 
580   WORD16 *fp1, *fp2, *tmp;
581 
582   WORD16 *filter_1;
583   WORD16 *filter_2;
584   WORD16 *filt_ptr;
585   if (audio_object_type != AOT_ER_AAC_ELD &&
586       audio_object_type != AOT_ER_AAC_LD) {
587     qmf_bank->filter_pos +=
588         (qmf_dec_tables_ptr->qmf_c - qmf_bank->analy_win_coeff);
589     qmf_bank->analy_win_coeff = qmf_dec_tables_ptr->qmf_c;
590   } else {
591     qmf_bank->filter_pos +=
592         (qmf_dec_tables_ptr->qmf_c_eld3 - qmf_bank->analy_win_coeff);
593     qmf_bank->analy_win_coeff = qmf_dec_tables_ptr->qmf_c_eld3;
594   }
595 
596   filter_1 = qmf_bank->filter_pos;
597 
598   if (audio_object_type != AOT_ER_AAC_ELD &&
599       audio_object_type != AOT_ER_AAC_LD) {
600     filter_2 = filter_1 + 64;
601   } else {
602     filter_2 = filter_1 + 32;
603   }
604 
605   sbr_scale_factor->st_lb_scale = 0;
606   sbr_scale_factor->lb_scale = -10;
607   if (!low_pow_flag) {
608     if (audio_object_type != AOT_ER_AAC_ELD &&
609         audio_object_type != AOT_ER_AAC_LD) {
610       sbr_scale_factor->lb_scale = -8;
611     } else {
612       sbr_scale_factor->lb_scale = -9;
613     }
614     qmf_bank->cos_twiddle =
615         (WORD16 *)qmf_dec_tables_ptr->sbr_sin_cos_twiddle_l32;
616     qmf_bank->alt_sin_twiddle =
617         (WORD16 *)qmf_dec_tables_ptr->sbr_alt_sin_twiddle_l32;
618     if (audio_object_type != AOT_ER_AAC_ELD &&
619         audio_object_type != AOT_ER_AAC_LD) {
620       qmf_bank->t_cos = (WORD16 *)qmf_dec_tables_ptr->sbr_t_cos_sin_l32;
621     } else {
622       qmf_bank->t_cos =
623           (WORD16 *)qmf_dec_tables_ptr->ixheaacd_sbr_t_cos_sin_l32_eld;
624     }
625   }
626 
627   fp1 = qmf_bank->anal_filter_states;
628   fp2 = qmf_bank->anal_filter_states + NO_ANALYSIS_CHANNELS;
629 
630   if (audio_object_type == AOT_ER_AAC_ELD ||
631       audio_object_type == AOT_ER_AAC_LD) {
632     filter_2 = qmf_bank->filter_2;
633     fp1 = qmf_bank->fp1_anal;
634     fp2 = qmf_bank->fp2_anal;
635   }
636 
637   for (i = 0; i < num_time_slots; i++) {
638     for (k = 0; k < NO_ANALYSIS_CHANNELS; k++)
639       filter_states[NO_ANALYSIS_CHANNELS - 1 - k] = time_sample_buf[ch_fac * k];
640 
641     if (audio_object_type != AOT_ER_AAC_ELD &&
642         audio_object_type != AOT_ER_AAC_LD) {
643       ixheaacd_sbr_qmfanal32_winadd(fp1, fp2, filter_1, filter_2,
644                                     analysis_buffer);
645     } else {
646       ixheaacd_sbr_qmfanal32_winadd_eld(fp1, fp2, filter_1, filter_2,
647                                         analysis_buffer);
648     }
649 
650     time_sample_buf += NO_ANALYSIS_CHANNELS * ch_fac;
651 
652     filter_states -= NO_ANALYSIS_CHANNELS;
653     if (filter_states < qmf_bank->anal_filter_states) {
654       filter_states = qmf_bank->anal_filter_states + 288;
655     }
656 
657     tmp = fp1;
658     fp1 = fp2;
659     fp2 = tmp;
660     if (audio_object_type != AOT_ER_AAC_ELD &&
661         audio_object_type != AOT_ER_AAC_LD) {
662       filter_1 += 64;
663       filter_2 += 64;
664     } else {
665       filter_1 += 32;
666       filter_2 += 32;
667     }
668 
669     filt_ptr = filter_1;
670     filter_1 = filter_2;
671     filter_2 = filt_ptr;
672     if (audio_object_type != AOT_ER_AAC_ELD &&
673         audio_object_type != AOT_ER_AAC_LD) {
674       if (filter_2 > (qmf_bank->analy_win_coeff + 640)) {
675         filter_1 = (WORD16 *)qmf_bank->analy_win_coeff;
676         filter_2 = (WORD16 *)qmf_bank->analy_win_coeff + 64;
677       }
678     } else {
679       if (filter_2 > (qmf_bank->analy_win_coeff + 320)) {
680         filter_1 = (WORD16 *)qmf_bank->analy_win_coeff;
681         filter_2 = (WORD16 *)qmf_bank->analy_win_coeff + 32;
682       }
683     }
684 
685     if (!low_pow_flag) {
686       ixheaacd_fwd_modulation(analysis_buffer, qmf_real[i], qmf_imag[i],
687                               qmf_bank, qmf_dec_tables_ptr);
688     } else {
689       ixheaacd_dct3_32(
690           (WORD32 *)analysis_buffer, qmf_real[i], qmf_dec_tables_ptr->dct23_tw,
691           qmf_dec_tables_ptr->post_fft_tbl, qmf_dec_tables_ptr->w_16,
692           qmf_dec_tables_ptr->dig_rev_table4_16);
693     }
694   }
695 
696   qmf_bank->filter_pos = filter_1;
697   qmf_bank->core_samples_buffer = filter_states;
698 
699   if (audio_object_type == AOT_ER_AAC_ELD || audio_object_type == AOT_ER_AAC_LD)
700 
701   {
702     qmf_bank->fp1_anal = fp1;
703     qmf_bank->fp2_anal = fp2;
704     qmf_bank->filter_2 = filter_2;
705   }
706 }
707 
ixheaacd_inv_modulation_lp(WORD32 * qmf_real,WORD16 * filter_states,ia_sbr_qmf_filter_bank_struct * syn_qmf,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr)708 VOID ixheaacd_inv_modulation_lp(WORD32 *qmf_real, WORD16 *filter_states,
709                                 ia_sbr_qmf_filter_bank_struct *syn_qmf,
710                                 ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
711   WORD32 L = syn_qmf->no_channels;
712   const WORD32 M = (L >> 1);
713   WORD32 *dct_in = qmf_real;
714   WORD32 time_out[2 * NO_SYNTHESIS_CHANNELS];
715 
716   WORD32 *ptime_out = &time_out[0];
717 
718   if (L == 64)
719     ixheaacd_dct2_64(dct_in, ptime_out, qmf_dec_tables_ptr, filter_states + M);
720   else
721     ixheaacd_dct2_32(dct_in, time_out, qmf_dec_tables_ptr, filter_states);
722 
723   filter_states[3 * M] = 0;
724 }
725 
ixheaacd_inv_emodulation(WORD32 * qmf_real,ia_sbr_qmf_filter_bank_struct * syn_qmf,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr)726 VOID ixheaacd_inv_emodulation(WORD32 *qmf_real,
727                               ia_sbr_qmf_filter_bank_struct *syn_qmf,
728                               ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
729   if (syn_qmf->no_channels == 64)
730     ixheaacd_cos_sin_mod(qmf_real, syn_qmf, qmf_dec_tables_ptr->w_32,
731                          qmf_dec_tables_ptr->dig_rev_table2_32);
732   else
733     ixheaacd_cos_sin_mod(qmf_real, syn_qmf, qmf_dec_tables_ptr->w_16,
734                          qmf_dec_tables_ptr->dig_rev_table4_16);
735 }
736 
ixheaacd_esbr_radix4bfly(const WORD32 * w,WORD32 * x,WORD32 index1,WORD32 index)737 VOID ixheaacd_esbr_radix4bfly(const WORD32 *w, WORD32 *x, WORD32 index1,
738                               WORD32 index) {
739   int i;
740   WORD32 l1, l2, h2, fft_jmp;
741   WORD32 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
742   WORD32 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
743   WORD32 x_0, x_1, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
744   WORD32 x_h2_0, x_h2_1;
745   WORD32 si10, si20, si30, co10, co20, co30;
746 
747   WORD64 mul_1, mul_2, mul_3, mul_4, mul_5, mul_6;
748   WORD64 mul_7, mul_8, mul_9, mul_10, mul_11, mul_12;
749   WORD32 *x_l1;
750   WORD32 *x_l2;
751   WORD32 *x_h2;
752   const WORD32 *w_ptr = w;
753   WORD32 i1;
754 
755   h2 = index << 1;
756   l1 = index << 2;
757   l2 = (index << 2) + (index << 1);
758 
759   x_l1 = &(x[l1]);
760   x_l2 = &(x[l2]);
761   x_h2 = &(x[h2]);
762 
763   fft_jmp = 6 * (index);
764 
765   for (i1 = 0; i1 < index1; i1++) {
766     for (i = 0; i < index; i++) {
767       si10 = (*w_ptr++);
768       co10 = (*w_ptr++);
769       si20 = (*w_ptr++);
770       co20 = (*w_ptr++);
771       si30 = (*w_ptr++);
772       co30 = (*w_ptr++);
773 
774       x_0 = x[0];
775       x_h2_0 = x[h2];
776       x_l1_0 = x[l1];
777       x_l2_0 = x[l2];
778 
779       xh0_0 = ixheaacd_add32_sat(x_0, x_l1_0);
780       xl0_0 = ixheaacd_sub32_sat(x_0, x_l1_0);
781 
782       xh20_0 = ixheaacd_add32_sat(x_h2_0, x_l2_0);
783       xl20_0 = ixheaacd_sub32_sat(x_h2_0, x_l2_0);
784 
785       x[0] = ixheaacd_add32_sat(xh0_0, xh20_0);
786       xt0_0 = ixheaacd_sub32_sat(xh0_0, xh20_0);
787 
788       x_1 = x[1];
789       x_h2_1 = x[h2 + 1];
790       x_l1_1 = x[l1 + 1];
791       x_l2_1 = x[l2 + 1];
792 
793       xh1_0 = ixheaacd_add32_sat(x_1, x_l1_1);
794       xl1_0 = ixheaacd_sub32_sat(x_1, x_l1_1);
795 
796       xh21_0 = ixheaacd_add32_sat(x_h2_1, x_l2_1);
797       xl21_0 = ixheaacd_sub32_sat(x_h2_1, x_l2_1);
798 
799       x[1] = ixheaacd_add32_sat(xh1_0, xh21_0);
800       yt0_0 = ixheaacd_sub32_sat(xh1_0, xh21_0);
801 
802       xt1_0 = ixheaacd_add32_sat(xl0_0, xl21_0);
803       xt2_0 = ixheaacd_sub32_sat(xl0_0, xl21_0);
804 
805       yt2_0 = ixheaacd_add32_sat(xl1_0, xl20_0);
806       yt1_0 = ixheaacd_sub32_sat(xl1_0, xl20_0);
807 
808       mul_11 = ixheaacd_mult64(xt2_0, co30);
809       mul_3 = ixheaacd_mult64(yt2_0, si30);
810       x[l2] = (WORD32)((mul_3 + mul_11) >> 32) << RADIXSHIFT;
811 
812       mul_5 = ixheaacd_mult64(xt2_0, si30);
813       mul_9 = ixheaacd_mult64(yt2_0, co30);
814       x[l2 + 1] = (WORD32)((mul_9 - mul_5) >> 32) << RADIXSHIFT;
815 
816       mul_12 = ixheaacd_mult64(xt0_0, co20);
817       mul_2 = ixheaacd_mult64(yt0_0, si20);
818       x[l1] = (WORD32)((mul_2 + mul_12) >> 32) << RADIXSHIFT;
819 
820       mul_6 = ixheaacd_mult64(xt0_0, si20);
821       mul_8 = ixheaacd_mult64(yt0_0, co20);
822       x[l1 + 1] = (WORD32)((mul_8 - mul_6) >> 32) << RADIXSHIFT;
823 
824       mul_4 = ixheaacd_mult64(xt1_0, co10);
825       mul_1 = ixheaacd_mult64(yt1_0, si10);
826       x[h2] = (WORD32)((mul_1 + mul_4) >> 32) << RADIXSHIFT;
827 
828       mul_10 = ixheaacd_mult64(xt1_0, si10);
829       mul_7 = ixheaacd_mult64(yt1_0, co10);
830       x[h2 + 1] = (WORD32)((mul_7 - mul_10) >> 32) << RADIXSHIFT;
831 
832       x += 2;
833     }
834     x += fft_jmp;
835     w_ptr = w_ptr - fft_jmp;
836   }
837 }
838 
ixheaacd_esbr_postradixcompute2(WORD32 * ptr_y,WORD32 * ptr_x,const WORD32 * pdig_rev_tbl,WORD32 npoints)839 VOID ixheaacd_esbr_postradixcompute2(WORD32 *ptr_y, WORD32 *ptr_x,
840                                      const WORD32 *pdig_rev_tbl,
841                                      WORD32 npoints) {
842   WORD32 i, k;
843   WORD32 h2;
844   WORD32 x_0, x_1, x_2, x_3;
845   WORD32 x_4, x_5, x_6, x_7;
846   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
847   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
848   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
849   WORD32 n0, j0;
850   WORD32 *x2, *x0;
851   WORD32 *y0, *y1, *y2, *y3;
852 
853   y0 = ptr_y;
854   y2 = ptr_y + (WORD32)npoints;
855   x0 = ptr_x;
856   x2 = ptr_x + (WORD32)(npoints >> 1);
857 
858   y1 = y0 + (WORD32)(npoints >> 2);
859   y3 = y2 + (WORD32)(npoints >> 2);
860   j0 = 8;
861   n0 = npoints >> 1;
862 
863   for (k = 0; k < 2; k++) {
864     for (i = 0; i<npoints>> 1; i += 8) {
865       h2 = *pdig_rev_tbl++ >> 2;
866 
867       x_0 = *x0++;
868       x_1 = *x0++;
869       x_2 = *x0++;
870       x_3 = *x0++;
871       x_4 = *x0++;
872       x_5 = *x0++;
873       x_6 = *x0++;
874       x_7 = *x0++;
875 
876       n00 = ixheaacd_add32_sat(x_0, x_2);
877       n01 = ixheaacd_add32_sat(x_1, x_3);
878       n20 = ixheaacd_sub32_sat(x_0, x_2);
879       n21 = ixheaacd_sub32_sat(x_1, x_3);
880       n10 = ixheaacd_add32_sat(x_4, x_6);
881       n11 = ixheaacd_add32_sat(x_5, x_7);
882       n30 = ixheaacd_sub32_sat(x_4, x_6);
883       n31 = ixheaacd_sub32_sat(x_5, x_7);
884 
885       y0[h2] = n00;
886       y0[h2 + 1] = n01;
887       y1[h2] = n10;
888       y1[h2 + 1] = n11;
889       y2[h2] = n20;
890       y2[h2 + 1] = n21;
891       y3[h2] = n30;
892       y3[h2 + 1] = n31;
893 
894       x_8 = *x2++;
895       x_9 = *x2++;
896       x_a = *x2++;
897       x_b = *x2++;
898       x_c = *x2++;
899       x_d = *x2++;
900       x_e = *x2++;
901       x_f = *x2++;
902 
903       n02 = ixheaacd_add32_sat(x_8, x_a);
904       n03 = ixheaacd_add32_sat(x_9, x_b);
905       n22 = ixheaacd_sub32_sat(x_8, x_a);
906       n23 = ixheaacd_sub32_sat(x_9, x_b);
907       n12 = ixheaacd_add32_sat(x_c, x_e);
908       n13 = ixheaacd_add32_sat(x_d, x_f);
909       n32 = ixheaacd_sub32_sat(x_c, x_e);
910       n33 = ixheaacd_sub32_sat(x_d, x_f);
911 
912       y0[h2 + 2] = n02;
913       y0[h2 + 3] = n03;
914       y1[h2 + 2] = n12;
915       y1[h2 + 3] = n13;
916       y2[h2 + 2] = n22;
917       y2[h2 + 3] = n23;
918       y3[h2 + 2] = n32;
919       y3[h2 + 3] = n33;
920     }
921     x0 += (WORD32)npoints >> 1;
922     x2 += (WORD32)npoints >> 1;
923   }
924 }
925 
ixheaacd_esbr_postradixcompute4(WORD32 * ptr_y,WORD32 * ptr_x,const WORD32 * p_dig_rev_tbl,WORD32 npoints)926 VOID ixheaacd_esbr_postradixcompute4(WORD32 *ptr_y, WORD32 *ptr_x,
927                                      const WORD32 *p_dig_rev_tbl,
928                                      WORD32 npoints) {
929   WORD32 i, k;
930   WORD32 h2;
931   WORD32 xh0_0, xh1_0, xl0_0, xl1_0;
932   WORD32 xh0_1, xh1_1, xl0_1, xl1_1;
933   WORD32 x_0, x_1, x_2, x_3;
934   WORD32 xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;
935   WORD32 x_4, x_5, x_6, x_7;
936   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
937   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
938   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
939   WORD32 n0, j0;
940   WORD32 *x2, *x0;
941   WORD32 *y0, *y1, *y2, *y3;
942 
943   y0 = ptr_y;
944   y2 = ptr_y + (WORD32)npoints;
945   x0 = ptr_x;
946   x2 = ptr_x + (WORD32)(npoints >> 1);
947 
948   y1 = y0 + (WORD32)(npoints >> 1);
949   y3 = y2 + (WORD32)(npoints >> 1);
950 
951   j0 = 4;
952   n0 = npoints >> 2;
953 
954   for (k = 0; k < 2; k++) {
955     for (i = 0; i<npoints>> 1; i += 8) {
956       h2 = *p_dig_rev_tbl++ >> 2;
957       x_0 = *x0++;
958       x_1 = *x0++;
959       x_2 = *x0++;
960       x_3 = *x0++;
961       x_4 = *x0++;
962       x_5 = *x0++;
963       x_6 = *x0++;
964       x_7 = *x0++;
965 
966       xh0_0 = ixheaacd_add32_sat(x_0, x_4);
967       xh1_0 = ixheaacd_add32_sat(x_1, x_5);
968       xl0_0 = ixheaacd_sub32_sat(x_0, x_4);
969       xl1_0 = ixheaacd_sub32_sat(x_1, x_5);
970       xh0_1 = ixheaacd_add32_sat(x_2, x_6);
971       xh1_1 = ixheaacd_add32_sat(x_3, x_7);
972       xl0_1 = ixheaacd_sub32_sat(x_2, x_6);
973       xl1_1 = ixheaacd_sub32_sat(x_3, x_7);
974 
975       n00 = ixheaacd_add32_sat(xh0_0, xh0_1);
976       n01 = ixheaacd_add32_sat(xh1_0, xh1_1);
977       n10 = ixheaacd_add32_sat(xl0_0, xl1_1);
978       n11 = ixheaacd_sub32_sat(xl1_0, xl0_1);
979       n20 = ixheaacd_sub32_sat(xh0_0, xh0_1);
980       n21 = ixheaacd_sub32_sat(xh1_0, xh1_1);
981       n30 = ixheaacd_sub32_sat(xl0_0, xl1_1);
982       n31 = ixheaacd_add32_sat(xl1_0, xl0_1);
983 
984       y0[h2] = n00;
985       y0[h2 + 1] = n01;
986       y1[h2] = n10;
987       y1[h2 + 1] = n11;
988       y2[h2] = n20;
989       y2[h2 + 1] = n21;
990       y3[h2] = n30;
991       y3[h2 + 1] = n31;
992 
993       x_8 = *x2++;
994       x_9 = *x2++;
995       x_a = *x2++;
996       x_b = *x2++;
997       x_c = *x2++;
998       x_d = *x2++;
999       x_e = *x2++;
1000       x_f = *x2++;
1001 
1002       xh0_2 = ixheaacd_add32_sat(x_8, x_c);
1003       xh1_2 = ixheaacd_add32_sat(x_9, x_d);
1004       xl0_2 = ixheaacd_sub32_sat(x_8, x_c);
1005       xl1_2 = ixheaacd_sub32_sat(x_9, x_d);
1006       xh0_3 = ixheaacd_add32_sat(x_a, x_e);
1007       xh1_3 = ixheaacd_add32_sat(x_b, x_f);
1008       xl0_3 = ixheaacd_sub32_sat(x_a, x_e);
1009       xl1_3 = ixheaacd_sub32_sat(x_b, x_f);
1010 
1011       n02 = ixheaacd_add32_sat(xh0_2, xh0_3);
1012       n03 = ixheaacd_add32_sat(xh1_2, xh1_3);
1013       n12 = ixheaacd_add32_sat(xl0_2, xl1_3);
1014       n13 = ixheaacd_sub32_sat(xl1_2, xl0_3);
1015       n22 = ixheaacd_sub32_sat(xh0_2, xh0_3);
1016       n23 = ixheaacd_sub32_sat(xh1_2, xh1_3);
1017       n32 = ixheaacd_sub32_sat(xl0_2, xl1_3);
1018       n33 = ixheaacd_add32_sat(xl1_2, xl0_3);
1019 
1020       y0[h2 + 2] = n02;
1021       y0[h2 + 3] = n03;
1022       y1[h2 + 2] = n12;
1023       y1[h2 + 3] = n13;
1024       y2[h2 + 2] = n22;
1025       y2[h2 + 3] = n23;
1026       y3[h2 + 2] = n32;
1027       y3[h2 + 3] = n33;
1028     }
1029     x0 += (WORD32)npoints >> 1;
1030     x2 += (WORD32)npoints >> 1;
1031   }
1032 }
1033 
ixheaacd_esbr_cos_sin_mod(WORD32 * subband,ia_sbr_qmf_filter_bank_struct * qmf_bank,WORD32 * p_twiddle,WORD32 * p_dig_rev_tbl)1034 VOID ixheaacd_esbr_cos_sin_mod(WORD32 *subband,
1035                                ia_sbr_qmf_filter_bank_struct *qmf_bank,
1036                                WORD32 *p_twiddle, WORD32 *p_dig_rev_tbl) {
1037   WORD32 z;
1038   WORD32 temp[128];
1039   WORD32 scaleshift = 0;
1040 
1041   WORD32 re2, re3;
1042   WORD32 wim, wre;
1043 
1044   WORD32 i, M_2;
1045   WORD32 M = ixheaacd_shr32(qmf_bank->no_channels, 1);
1046 
1047   const WORD32 *p_sin;
1048   const WORD32 *p_sin_cos;
1049 
1050   WORD32 subband_tmp[128];
1051   WORD32 re;
1052   WORD32 im;
1053   WORD32 *psubband, *psubband1;
1054   WORD32 *psubband_t, *psubband1_t;
1055   WORD32 *psubband2, *psubband12;
1056   WORD32 *psubband_t2, *psubband1_t2;
1057 
1058   M_2 = ixheaacd_shr32(M, 1);
1059 
1060   p_sin_cos = qmf_bank->esbr_cos_twiddle;
1061 
1062   psubband = &subband[0];
1063   psubband1 = &subband[2 * M - 1];
1064   psubband_t = subband_tmp;
1065   psubband1_t = &subband_tmp[2 * M - 1];
1066 
1067   psubband2 = &subband[64];
1068   psubband12 = &subband[2 * M - 1 + 64];
1069   psubband_t2 = &subband_tmp[64];
1070   psubband1_t2 = &subband_tmp[2 * M - 1 + 64];
1071 
1072   for (i = (M_2 >> 1) - 1; i >= 0; i--) {
1073     re = *psubband++;
1074     im = *psubband1--;
1075 
1076     wim = *p_sin_cos++;
1077     wre = *p_sin_cos++;
1078 
1079     *psubband_t++ = (WORD32)(
1080         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
1081         32);
1082     *psubband_t++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
1083                                                  ixheaacd_mult64(re, wim))) >>
1084                              32);
1085 
1086     re = *psubband2++;
1087     im = *psubband12--;
1088 
1089     *psubband_t2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
1090                                                   ixheaacd_mult64(re, wre))) >>
1091                               32);
1092     *psubband_t2++ = (WORD32)(
1093         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
1094         32);
1095 
1096     re = *psubband1--;
1097     im = *psubband++;
1098 
1099     wim = *p_sin_cos++;
1100     wre = *p_sin_cos++;
1101 
1102     *psubband1_t-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
1103                                                   ixheaacd_mult64(re, wim))) >>
1104                               32);
1105     *psubband1_t-- = (WORD32)(
1106         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
1107         32);
1108 
1109     re = *psubband12--;
1110     im = *psubband2++;
1111 
1112     *psubband1_t2-- = (WORD32)(
1113         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
1114         32);
1115     *psubband1_t2-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
1116                                                    ixheaacd_mult64(re, wre))) >>
1117                                32);
1118 
1119     re = *psubband++;
1120     im = *psubband1--;
1121 
1122     wim = *p_sin_cos++;
1123     wre = *p_sin_cos++;
1124 
1125     *psubband_t++ = (WORD32)(
1126         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
1127         32);
1128     *psubband_t++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
1129                                                  ixheaacd_mult64(re, wim))) >>
1130                              32);
1131 
1132     re = *psubband2++;
1133     im = *psubband12--;
1134 
1135     *psubband_t2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
1136                                                   ixheaacd_mult64(re, wre))) >>
1137                               32);
1138     *psubband_t2++ = (WORD32)(
1139         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
1140         32);
1141 
1142     re = *psubband1--;
1143     im = *psubband++;
1144 
1145     wim = *p_sin_cos++;
1146     wre = *p_sin_cos++;
1147 
1148     *psubband1_t-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
1149                                                   ixheaacd_mult64(re, wim))) >>
1150                               32);
1151     *psubband1_t-- = (WORD32)(
1152         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
1153         32);
1154 
1155     re = *psubband12--;
1156     im = *psubband2++;
1157 
1158     *psubband1_t2-- = (WORD32)(
1159         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
1160         32);
1161     *psubband1_t2-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
1162                                                    ixheaacd_mult64(re, wre))) >>
1163                                32);
1164   }
1165 
1166   if (M == 32) {
1167     ixheaacd_esbr_radix4bfly(p_twiddle, subband_tmp, 1, 8);
1168     ixheaacd_esbr_radix4bfly(p_twiddle + 48, subband_tmp, 4, 2);
1169     ixheaacd_esbr_postradixcompute2(subband, subband_tmp, p_dig_rev_tbl, 32);
1170 
1171     ixheaacd_esbr_radix4bfly(p_twiddle, &subband_tmp[64], 1, 8);
1172     ixheaacd_esbr_radix4bfly(p_twiddle + 48, &subband_tmp[64], 4, 2);
1173     ixheaacd_esbr_postradixcompute2(&subband[64], &subband_tmp[64],
1174                                     p_dig_rev_tbl, 32);
1175 
1176   }
1177 
1178   else if (M == 16) {
1179     ixheaacd_esbr_radix4bfly(p_twiddle, subband_tmp, 1, 4);
1180     ixheaacd_esbr_postradixcompute4(subband, subband_tmp, p_dig_rev_tbl, 16);
1181 
1182     ixheaacd_esbr_radix4bfly(p_twiddle, &subband_tmp[64], 1, 4);
1183     ixheaacd_esbr_postradixcompute4(&subband[64], &subband_tmp[64],
1184                                     p_dig_rev_tbl, 16);
1185 
1186   }
1187 
1188   else if (M == 12) {
1189     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
1190       temp[z] = subband_tmp[2 * z];
1191       temp[12 + z] = subband_tmp[2 * z + 1];
1192     }
1193 
1194     ixheaacd_complex_fft_p3(temp, &temp[12], 12, -1, &scaleshift);
1195 
1196     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
1197       subband[2 * z] = temp[z];
1198       subband[2 * z + 1] = temp[z + 12];
1199     }
1200     scaleshift = 0;
1201     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
1202       temp[z] = subband_tmp[64 + 2 * z];
1203       temp[12 + z] = subband_tmp[64 + 2 * z + 1];
1204     }
1205 
1206     ixheaacd_complex_fft_p3(temp, &temp[12], 12, -1, &scaleshift);
1207 
1208     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
1209       subband[64 + 2 * z] = temp[z];
1210       subband[64 + 2 * z + 1] = temp[z + 12];
1211     }
1212 
1213   }
1214 
1215   else {
1216     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
1217       temp[z] = subband_tmp[2 * z];
1218       temp[8 + z] = subband_tmp[2 * z + 1];
1219     }
1220 
1221     (*ixheaacd_complex_fft_p2)(temp, &temp[8], 8, -1, &scaleshift);
1222 
1223     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
1224       subband[2 * z] = temp[z] << scaleshift;
1225       subband[2 * z + 1] = temp[z + 8] << scaleshift;
1226     }
1227     scaleshift = 0;
1228     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
1229       temp[z] = subband_tmp[64 + 2 * z];
1230       temp[8 + z] = subband_tmp[64 + 2 * z + 1];
1231     }
1232 
1233     (*ixheaacd_complex_fft_p2)(temp, &temp[8], 8, -1, &scaleshift);
1234 
1235     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
1236       subband[64 + 2 * z] = temp[z] << scaleshift;
1237       subband[64 + 2 * z + 1] = temp[8 + z] << scaleshift;
1238     }
1239   }
1240 
1241   psubband = &subband[0];
1242   psubband1 = &subband[2 * M - 1];
1243 
1244   re = *psubband1;
1245 
1246   *psubband = *psubband >> 1;
1247   psubband++;
1248   *psubband1 = ixheaacd_negate32(*psubband >> 1);
1249   psubband1--;
1250 
1251   p_sin = qmf_bank->esbr_alt_sin_twiddle;
1252 
1253   wim = *p_sin++;
1254   wre = *p_sin++;
1255 
1256   im = *psubband1;
1257 
1258   *psubband1-- = (WORD32)(
1259       (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
1260       32);
1261   *psubband++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
1262                                              ixheaacd_mult64(re, wim))) >>
1263                          32);
1264 
1265   psubband2 = &subband[64];
1266   psubband12 = &subband[2 * M - 1 + 64];
1267 
1268   re = *psubband12;
1269 
1270   *psubband12-- = ixheaacd_negate32_sat(*psubband2 >> 1);
1271 
1272   *psubband2 = psubband2[1] >> 1;
1273 
1274   psubband2++;
1275 
1276   im = *psubband12;
1277 
1278   *psubband2++ = ixheaacd_negate32_sat((WORD32)(
1279       (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
1280       32));
1281   *psubband12-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re, wim),
1282                                                ixheaacd_mult64(im, wre))) >>
1283                            32);
1284 
1285   for (i = (M_2 - 2); i >= 0; i--) {
1286     im = psubband[0];
1287 
1288     re = psubband[1];
1289 
1290     re2 = *psubband1;
1291 
1292     *psubband++ = (WORD32)(
1293         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
1294         32);
1295     *psubband1-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
1296                                                 ixheaacd_mult64(re, wre))) >>
1297                             32);
1298 
1299     im = psubband2[0];
1300 
1301     re = psubband2[1];
1302 
1303     re3 = *psubband12;
1304 
1305     *psubband12-- = ixheaacd_negate32_sat((WORD32)(
1306         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
1307         32));
1308     *psubband2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re, wre),
1309                                                 ixheaacd_mult64(im, wim))) >>
1310                             32);
1311 
1312     wim = *p_sin++;
1313     wre = *p_sin++;
1314     im = psubband1[0];
1315 
1316     *psubband1-- = (WORD32)(
1317         (ixheaacd_add64(ixheaacd_mult64(re2, wre), ixheaacd_mult64(im, wim))) >>
1318         32);
1319     *psubband++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
1320                                                ixheaacd_mult64(re2, wim))) >>
1321                            32);
1322 
1323     im = psubband12[0];
1324 
1325     *psubband2++ = ixheaacd_negate32_sat((WORD32)(
1326         (ixheaacd_add64(ixheaacd_mult64(re3, wre), ixheaacd_mult64(im, wim))) >>
1327         32));
1328     *psubband12-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re3, wim),
1329                                                  ixheaacd_mult64(im, wre))) >>
1330                              32);
1331   }
1332 }
1333 
ixheaacd_esbr_fwd_modulation(const WORD32 * time_sample_buf,WORD32 * real_subband,WORD32 * imag_subband,ia_sbr_qmf_filter_bank_struct * qmf_bank,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr)1334 VOID ixheaacd_esbr_fwd_modulation(
1335     const WORD32 *time_sample_buf, WORD32 *real_subband, WORD32 *imag_subband,
1336     ia_sbr_qmf_filter_bank_struct *qmf_bank,
1337     ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
1338   WORD32 i;
1339   const WORD32 *time_sample_buf1 =
1340       &time_sample_buf[2 * qmf_bank->no_channels - 1];
1341   WORD32 temp1, temp2;
1342   WORD32 *t_real_subband = real_subband;
1343   WORD32 *t_imag_subband = imag_subband;
1344   const WORD32 *tcos;
1345 
1346   for (i = qmf_bank->no_channels - 1; i >= 0; i--) {
1347     temp1 = ixheaacd_shr32(*time_sample_buf++, HQ_SHIFT_64);
1348     temp2 = ixheaacd_shr32(*time_sample_buf1--, HQ_SHIFT_64);
1349 
1350     *t_real_subband++ = ixheaacd_sub32_sat(temp1, temp2);
1351 
1352     *t_imag_subband++ = ixheaacd_add32_sat(temp1, temp2);
1353   }
1354 
1355   ixheaacd_esbr_cos_sin_mod(real_subband, qmf_bank,
1356                             qmf_dec_tables_ptr->esbr_w_16,
1357                             qmf_dec_tables_ptr->dig_rev_table4_16);
1358 
1359   tcos = qmf_bank->esbr_t_cos;
1360 
1361   for (i = (qmf_bank->usb - qmf_bank->lsb - 1); i >= 0; i--) {
1362     WORD32 cosh, sinh;
1363     WORD32 re, im;
1364 
1365     re = *real_subband;
1366     im = *imag_subband;
1367     cosh = *tcos++;
1368     sinh = *tcos++;
1369     *real_subband++ = (WORD32)((ixheaacd_add64(ixheaacd_mult64(re, cosh),
1370                                                ixheaacd_mult64(im, sinh))) >>
1371                                31);
1372     *imag_subband++ =
1373         (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, cosh),
1374                                      ixheaacd_mult64(re, sinh))) >>
1375                  31);
1376   }
1377 }
1378 
ixheaacd_sbr_qmfsyn64_winadd(WORD16 * tmp1,WORD16 * tmp2,WORD16 * inp1,WORD16 * sample_buffer,FLAG shift,WORD32 ch_fac)1379 VOID ixheaacd_sbr_qmfsyn64_winadd(WORD16 *tmp1, WORD16 *tmp2, WORD16 *inp1,
1380                                   WORD16 *sample_buffer, FLAG shift,
1381                                   WORD32 ch_fac) {
1382   WORD32 k;
1383   WORD32 rounding_fac = 0x8000;
1384   rounding_fac = rounding_fac >> shift;
1385 
1386   for (k = 0; k < 64; k++) {
1387     WORD32 syn_out = rounding_fac;
1388 
1389     syn_out = ixheaacd_add32_sat(syn_out,
1390                              ixheaacd_mult16x16in32(tmp1[0 + k], inp1[k + 0]));
1391     syn_out = ixheaacd_add32_sat(
1392         syn_out, ixheaacd_mult16x16in32(tmp1[256 + k], inp1[k + 128]));
1393     syn_out = ixheaacd_add32_sat(
1394         syn_out, ixheaacd_mult16x16in32(tmp1[512 + k], inp1[k + 256]));
1395     syn_out = ixheaacd_add32_sat(
1396         syn_out, ixheaacd_mult16x16in32(tmp1[768 + k], inp1[k + 384]));
1397     syn_out = ixheaacd_add32_sat(
1398         syn_out, ixheaacd_mult16x16in32(tmp1[1024 + k], inp1[k + 512]));
1399 
1400     syn_out = ixheaacd_add32_sat(
1401         syn_out, ixheaacd_mult16x16in32(tmp2[128 + k], inp1[k + 64]));
1402     syn_out = ixheaacd_add32_sat(
1403         syn_out, ixheaacd_mult16x16in32(tmp2[384 + k], inp1[k + 192]));
1404     syn_out = ixheaacd_add32_sat(
1405         syn_out, ixheaacd_mult16x16in32(tmp2[640 + k], inp1[k + 320]));
1406     syn_out = ixheaacd_add32_sat(
1407         syn_out, ixheaacd_mult16x16in32(tmp2[896 + k], inp1[k + 448]));
1408     syn_out = ixheaacd_add32_sat(
1409         syn_out, ixheaacd_mult16x16in32(tmp2[1152 + k], inp1[k + 576]));
1410 
1411     sample_buffer[ch_fac * k] = (ixheaacd_shl32_sat(syn_out, shift) >> 16);
1412   }
1413 }
1414 
ixheaacd_esbr_qmfsyn64_winadd(WORD32 * tmp1,WORD32 * tmp2,WORD32 * inp1,WORD32 * sample_buffer,WORD32 ch_fac)1415 VOID ixheaacd_esbr_qmfsyn64_winadd(WORD32 *tmp1, WORD32 *tmp2, WORD32 *inp1,
1416                                    WORD32 *sample_buffer, WORD32 ch_fac) {
1417   WORD32 k;
1418 
1419   for (k = 0; k < 64; k++) {
1420     WORD64 syn_out = 0;
1421 
1422     syn_out =
1423         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[0 + k], inp1[k + 0]));
1424     syn_out =
1425         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[256 + k], inp1[k + 128]));
1426     syn_out =
1427         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[512 + k], inp1[k + 256]));
1428     syn_out =
1429         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[768 + k], inp1[k + 384]));
1430     syn_out =
1431         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[1024 + k], inp1[k + 512]));
1432 
1433     syn_out =
1434         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[128 + k], inp1[k + 64]));
1435     syn_out =
1436         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[384 + k], inp1[k + 192]));
1437     syn_out =
1438         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[640 + k], inp1[k + 320]));
1439     syn_out =
1440         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[896 + k], inp1[k + 448]));
1441     syn_out =
1442         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[1152 + k], inp1[k + 576]));
1443 
1444     sample_buffer[ch_fac * k] = (WORD32)(syn_out >> 31);
1445   }
1446 }
1447 
ixheaacd_shiftrountine(WORD32 * qmf_real,WORD32 * qmf_imag,WORD32 len,WORD32 common_shift)1448 VOID ixheaacd_shiftrountine(WORD32 *qmf_real, WORD32 *qmf_imag, WORD32 len,
1449                             WORD32 common_shift) {
1450   WORD32 treal, timag;
1451   WORD32 j;
1452 
1453   if (common_shift < 0) {
1454     WORD32 cshift = -common_shift;
1455     cshift = ixheaacd_min32(cshift, 31);
1456     for (j = len - 1; j >= 0; j--) {
1457       treal = *qmf_real;
1458       timag = *qmf_imag;
1459 
1460       treal = (ixheaacd_shr32(treal, cshift));
1461       timag = (ixheaacd_shr32(timag, cshift));
1462 
1463       *qmf_real++ = treal;
1464       *qmf_imag++ = timag;
1465     }
1466   } else {
1467     for (j = len - 1; j >= 0; j--) {
1468       treal = (ixheaacd_shl32_sat(*qmf_real, common_shift));
1469       timag = (ixheaacd_shl32_sat(*qmf_imag, common_shift));
1470       *qmf_real++ = treal;
1471       *qmf_imag++ = timag;
1472     }
1473   }
1474 }
1475 
ixheaacd_shiftrountine_with_rnd(WORD32 * qmf_real,WORD32 * qmf_imag,WORD16 * filter_states,WORD32 len,WORD32 shift)1476 VOID ixheaacd_shiftrountine_with_rnd(WORD32 *qmf_real, WORD32 *qmf_imag,
1477                                      WORD16 *filter_states, WORD32 len,
1478                                      WORD32 shift) {
1479   WORD16 *filter_states_rev = filter_states + len;
1480   WORD32 treal, timag;
1481   WORD32 j;
1482 
1483   for (j = (len - 1); j >= 0; j -= 2) {
1484     WORD32 r1, r2, i1, i2;
1485     i2 = qmf_imag[j];
1486     r2 = qmf_real[j];
1487 
1488     r1 = *qmf_real++;
1489     i1 = *qmf_imag++;
1490 
1491     timag = ixheaacd_add32_sat(i1, r1);
1492     timag = (ixheaacd_shl32_sat(timag, shift));
1493     filter_states_rev[j] = ixheaacd_round16(timag);
1494 
1495     treal = ixheaacd_sub32_sat(i2, r2);
1496     treal = (ixheaacd_shl32_sat(treal, shift));
1497     filter_states[j] = ixheaacd_round16(treal);
1498 
1499     treal = ixheaacd_sub32_sat(i1, r1);
1500     treal = (ixheaacd_shl32_sat(treal, shift));
1501     *filter_states++ = ixheaacd_round16(treal);
1502 
1503     timag = ixheaacd_add32_sat(i2, r2);
1504     ;
1505     timag = (ixheaacd_shl32_sat(timag, shift));
1506     *filter_states_rev++ = ixheaacd_round16(timag);
1507   }
1508 }
1509 
ixheaacd_shiftrountine_with_rnd_eld(WORD32 * qmf_real,WORD32 * qmf_imag,WORD16 * filter_states,WORD32 len,WORD32 shift)1510 VOID ixheaacd_shiftrountine_with_rnd_eld(WORD32 *qmf_real, WORD32 *qmf_imag,
1511                                          WORD16 *filter_states, WORD32 len,
1512                                          WORD32 shift) {
1513   WORD16 *filter_states_rev = filter_states + len;
1514   WORD32 treal, timag;
1515   WORD32 j;
1516 
1517   for (j = (len - 1); j >= 0; j -= 2) {
1518     WORD32 r1, r2, i1, i2;
1519     i2 = qmf_imag[j];
1520     r2 = qmf_real[j];
1521     r1 = *qmf_real++;
1522     i1 = *qmf_imag++;
1523 
1524     timag = ixheaacd_negate32(ixheaacd_add32_sat(i1, r1));
1525     timag = (ixheaacd_shl32_sat(timag, shift));
1526     filter_states_rev[j] = ixheaacd_round16(timag);
1527 
1528     treal = ixheaacd_sub32_sat(r2, i2);
1529     treal = (ixheaacd_shl32_sat(treal, shift));
1530     filter_states[j] = ixheaacd_round16(treal);
1531 
1532     treal = ixheaacd_sub32_sat(r1, i1);
1533     treal = (ixheaacd_shl32_sat(treal, shift));
1534     *filter_states++ = ixheaacd_round16(treal);
1535 
1536     timag = ixheaacd_negate32(ixheaacd_add32_sat(i2, r2));
1537     timag = (ixheaacd_shl32_sat(timag, shift));
1538     *filter_states_rev++ = ixheaacd_round16(timag);
1539   }
1540 }
1541 
ixheaacd_shiftrountine_with_rnd_hq(WORD32 * qmf_real,WORD32 * qmf_imag,WORD32 * filter_states,WORD32 len,WORD32 shift)1542 VOID ixheaacd_shiftrountine_with_rnd_hq(WORD32 *qmf_real, WORD32 *qmf_imag,
1543                                         WORD32 *filter_states, WORD32 len,
1544                                         WORD32 shift) {
1545   WORD32 *filter_states_rev = filter_states + len;
1546   WORD32 treal, timag;
1547   WORD32 j;
1548 
1549   for (j = (len - 1); j >= 0; j -= 2) {
1550     WORD32 r1, r2, i1, i2;
1551     i2 = qmf_imag[j];
1552     r2 = qmf_real[j];
1553     r1 = *qmf_real++;
1554     i1 = *qmf_imag++;
1555 
1556     timag = ixheaacd_add32_sat(i1, r1);
1557     timag = (ixheaacd_shl32_sat(timag, shift));
1558     filter_states_rev[j] = timag;
1559 
1560     treal = ixheaacd_sub32_sat(i2, r2);
1561     treal = (ixheaacd_shl32_sat(treal, shift));
1562     filter_states[j] = treal;
1563 
1564     treal = ixheaacd_sub32_sat(i1, r1);
1565     treal = (ixheaacd_shl32_sat(treal, shift));
1566     *filter_states++ = treal;
1567 
1568     timag = ixheaacd_add32_sat(i2, r2);
1569     timag = (ixheaacd_shl32_sat(timag, shift));
1570     *filter_states_rev++ = timag;
1571   }
1572 }
1573 
ixheaacd_radix4bfly(const WORD16 * w,WORD32 * x,WORD32 index1,WORD32 index)1574 VOID ixheaacd_radix4bfly(const WORD16 *w, WORD32 *x, WORD32 index1,
1575                          WORD32 index) {
1576   int i;
1577   WORD32 l1, l2, h2, fft_jmp;
1578   WORD32 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
1579   WORD32 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
1580   WORD32 x_0, x_1, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
1581   WORD32 x_h2_0, x_h2_1;
1582   WORD16 si10, si20, si30, co10, co20, co30;
1583 
1584   WORD32 mul_1, mul_2, mul_3, mul_4, mul_5, mul_6;
1585   WORD32 mul_7, mul_8, mul_9, mul_10, mul_11, mul_12;
1586   WORD32 *x_l1;
1587   WORD32 *x_l2;
1588   WORD32 *x_h2;
1589   const WORD16 *w_ptr = w;
1590   WORD32 i1;
1591 
1592   h2 = index << 1;
1593   l1 = index << 2;
1594   l2 = (index << 2) + (index << 1);
1595 
1596   x_l1 = &(x[l1]);
1597   x_l2 = &(x[l2]);
1598   x_h2 = &(x[h2]);
1599 
1600   fft_jmp = 6 * (index);
1601 
1602   for (i1 = 0; i1 < index1; i1++) {
1603     for (i = 0; i < index; i++) {
1604       si10 = (*w_ptr++);
1605       co10 = (*w_ptr++);
1606       si20 = (*w_ptr++);
1607       co20 = (*w_ptr++);
1608       si30 = (*w_ptr++);
1609       co30 = (*w_ptr++);
1610 
1611       x_0 = x[0];
1612       x_h2_0 = x[h2];
1613       x_l1_0 = x[l1];
1614       x_l2_0 = x[l2];
1615 
1616       xh0_0 = ixheaacd_add32_sat(x_0, x_l1_0);
1617       xl0_0 = ixheaacd_sub32_sat(x_0, x_l1_0);
1618 
1619       xh20_0 = ixheaacd_add32_sat(x_h2_0, x_l2_0);
1620       xl20_0 = ixheaacd_sub32_sat(x_h2_0, x_l2_0);
1621 
1622       x[0] = ixheaacd_add32_sat(xh0_0, xh20_0);
1623       xt0_0 = ixheaacd_sub32_sat(xh0_0, xh20_0);
1624 
1625       x_1 = x[1];
1626       x_h2_1 = x[h2 + 1];
1627       x_l1_1 = x[l1 + 1];
1628       x_l2_1 = x[l2 + 1];
1629 
1630       xh1_0 = ixheaacd_add32_sat(x_1, x_l1_1);
1631       xl1_0 = ixheaacd_sub32_sat(x_1, x_l1_1);
1632 
1633       xh21_0 = ixheaacd_add32_sat(x_h2_1, x_l2_1);
1634       xl21_0 = ixheaacd_sub32_sat(x_h2_1, x_l2_1);
1635 
1636       x[1] = ixheaacd_add32_sat(xh1_0, xh21_0);
1637       yt0_0 = ixheaacd_sub32_sat(xh1_0, xh21_0);
1638 
1639       xt1_0 = ixheaacd_add32_sat(xl0_0, xl21_0);
1640       xt2_0 = ixheaacd_sub32_sat(xl0_0, xl21_0);
1641 
1642       yt2_0 = ixheaacd_add32_sat(xl1_0, xl20_0);
1643       yt1_0 = ixheaacd_sub32_sat(xl1_0, xl20_0);
1644 
1645       mul_11 = ixheaacd_mult32x16in32(xt2_0, co30);
1646       mul_3 = ixheaacd_mult32x16in32(yt2_0, si30);
1647       x[l2] = (mul_3 + mul_11) << RADIXSHIFT;
1648 
1649       mul_5 = ixheaacd_mult32x16in32(xt2_0, si30);
1650       mul_9 = ixheaacd_mult32x16in32(yt2_0, co30);
1651       x[l2 + 1] = (mul_9 - mul_5) << RADIXSHIFT;
1652 
1653       mul_12 = ixheaacd_mult32x16in32(xt0_0, co20);
1654       mul_2 = ixheaacd_mult32x16in32(yt0_0, si20);
1655       x[l1] = (mul_2 + mul_12) << RADIXSHIFT;
1656 
1657       mul_6 = ixheaacd_mult32x16in32(xt0_0, si20);
1658       mul_8 = ixheaacd_mult32x16in32(yt0_0, co20);
1659       x[l1 + 1] = (mul_8 - mul_6) << RADIXSHIFT;
1660 
1661       mul_4 = ixheaacd_mult32x16in32(xt1_0, co10);
1662       mul_1 = ixheaacd_mult32x16in32(yt1_0, si10);
1663       x[h2] = (mul_1 + mul_4) << RADIXSHIFT;
1664 
1665       mul_10 = ixheaacd_mult32x16in32(xt1_0, si10);
1666       mul_7 = ixheaacd_mult32x16in32(yt1_0, co10);
1667       x[h2 + 1] = (mul_7 - mul_10) << RADIXSHIFT;
1668 
1669       x += 2;
1670     }
1671     x += fft_jmp;
1672     w_ptr = w_ptr - fft_jmp;
1673   }
1674 }
1675 
ixheaacd_postradixcompute4(WORD32 * ptr_y,WORD32 * ptr_x,const WORD32 * p_dig_rev_tbl,WORD32 npoints)1676 VOID ixheaacd_postradixcompute4(WORD32 *ptr_y, WORD32 *ptr_x,
1677                                 const WORD32 *p_dig_rev_tbl, WORD32 npoints) {
1678   WORD32 i, k;
1679   WORD32 h2;
1680   WORD32 xh0_0, xh1_0, xl0_0, xl1_0;
1681   WORD32 xh0_1, xh1_1, xl0_1, xl1_1;
1682   WORD32 x_0, x_1, x_2, x_3;
1683   WORD32 xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;
1684   WORD32 x_4, x_5, x_6, x_7;
1685   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
1686   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
1687   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
1688   WORD32 n0, j0;
1689   WORD32 *x2, *x0;
1690   WORD32 *y0, *y1, *y2, *y3;
1691 
1692   y0 = ptr_y;
1693   y2 = ptr_y + (WORD32)npoints;
1694   x0 = ptr_x;
1695   x2 = ptr_x + (WORD32)(npoints >> 1);
1696 
1697   y1 = y0 + (WORD32)(npoints >> 1);
1698   y3 = y2 + (WORD32)(npoints >> 1);
1699 
1700   j0 = 4;
1701   n0 = npoints >> 2;
1702 
1703   for (k = 0; k < 2; k++) {
1704     for (i = 0; i<npoints>> 1; i += 8) {
1705       h2 = *p_dig_rev_tbl++ >> 2;
1706       x_0 = *x0++;
1707       x_1 = *x0++;
1708       x_2 = *x0++;
1709       x_3 = *x0++;
1710       x_4 = *x0++;
1711       x_5 = *x0++;
1712       x_6 = *x0++;
1713       x_7 = *x0++;
1714 
1715       xh0_0 = ixheaacd_add32_sat(x_0, x_4);
1716       xh1_0 = ixheaacd_add32_sat(x_1, x_5);
1717       xl0_0 = ixheaacd_sub32_sat(x_0, x_4);
1718       xl1_0 = ixheaacd_sub32_sat(x_1, x_5);
1719       xh0_1 = ixheaacd_add32_sat(x_2, x_6);
1720       xh1_1 = ixheaacd_add32_sat(x_3, x_7);
1721       xl0_1 = ixheaacd_sub32_sat(x_2, x_6);
1722       xl1_1 = ixheaacd_sub32_sat(x_3, x_7);
1723 
1724       n00 = ixheaacd_add32_sat(xh0_0, xh0_1);
1725       n01 = ixheaacd_add32_sat(xh1_0, xh1_1);
1726       n10 = ixheaacd_add32_sat(xl0_0, xl1_1);
1727       n11 = ixheaacd_sub32_sat(xl1_0, xl0_1);
1728       n20 = ixheaacd_sub32_sat(xh0_0, xh0_1);
1729       n21 = ixheaacd_sub32_sat(xh1_0, xh1_1);
1730       n30 = ixheaacd_sub32_sat(xl0_0, xl1_1);
1731       n31 = ixheaacd_add32_sat(xl1_0, xl0_1);
1732 
1733       y0[h2] = n00;
1734       y0[h2 + 1] = n01;
1735       y1[h2] = n10;
1736       y1[h2 + 1] = n11;
1737       y2[h2] = n20;
1738       y2[h2 + 1] = n21;
1739       y3[h2] = n30;
1740       y3[h2 + 1] = n31;
1741 
1742       x_8 = *x2++;
1743       x_9 = *x2++;
1744       x_a = *x2++;
1745       x_b = *x2++;
1746       x_c = *x2++;
1747       x_d = *x2++;
1748       x_e = *x2++;
1749       x_f = *x2++;
1750 
1751       xh0_2 = ixheaacd_add32_sat(x_8, x_c);
1752       xh1_2 = ixheaacd_add32_sat(x_9, x_d);
1753       xl0_2 = ixheaacd_sub32_sat(x_8, x_c);
1754       xl1_2 = ixheaacd_sub32_sat(x_9, x_d);
1755       xh0_3 = ixheaacd_add32_sat(x_a, x_e);
1756       xh1_3 = ixheaacd_add32_sat(x_b, x_f);
1757       xl0_3 = ixheaacd_sub32_sat(x_a, x_e);
1758       xl1_3 = ixheaacd_sub32_sat(x_b, x_f);
1759 
1760       n02 = ixheaacd_add32_sat(xh0_2, xh0_3);
1761       n03 = ixheaacd_add32_sat(xh1_2, xh1_3);
1762       n12 = ixheaacd_add32_sat(xl0_2, xl1_3);
1763       n13 = ixheaacd_sub32_sat(xl1_2, xl0_3);
1764       n22 = ixheaacd_sub32_sat(xh0_2, xh0_3);
1765       n23 = ixheaacd_sub32_sat(xh1_2, xh1_3);
1766       n32 = ixheaacd_sub32_sat(xl0_2, xl1_3);
1767       n33 = ixheaacd_add32_sat(xl1_2, xl0_3);
1768 
1769       y0[h2 + 2] = n02;
1770       y0[h2 + 3] = n03;
1771       y1[h2 + 2] = n12;
1772       y1[h2 + 3] = n13;
1773       y2[h2 + 2] = n22;
1774       y2[h2 + 3] = n23;
1775       y3[h2 + 2] = n32;
1776       y3[h2 + 3] = n33;
1777     }
1778     x0 += (WORD32)npoints >> 1;
1779     x2 += (WORD32)npoints >> 1;
1780   }
1781 }
1782 
ixheaacd_postradixcompute2(WORD32 * ptr_y,WORD32 * ptr_x,const WORD32 * pdig_rev_tbl,WORD32 npoints)1783 VOID ixheaacd_postradixcompute2(WORD32 *ptr_y, WORD32 *ptr_x,
1784                                 const WORD32 *pdig_rev_tbl, WORD32 npoints) {
1785   WORD32 i, k;
1786   WORD32 h2;
1787   WORD32 x_0, x_1, x_2, x_3;
1788   WORD32 x_4, x_5, x_6, x_7;
1789   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
1790   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
1791   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
1792   WORD32 n0, j0;
1793   WORD32 *x2, *x0;
1794   WORD32 *y0, *y1, *y2, *y3;
1795 
1796   y0 = ptr_y;
1797   y2 = ptr_y + (WORD32)npoints;
1798   x0 = ptr_x;
1799   x2 = ptr_x + (WORD32)(npoints >> 1);
1800 
1801   y1 = y0 + (WORD32)(npoints >> 2);
1802   y3 = y2 + (WORD32)(npoints >> 2);
1803   j0 = 8;
1804   n0 = npoints >> 1;
1805 
1806   for (k = 0; k < 2; k++) {
1807     for (i = 0; i<npoints>> 1; i += 8) {
1808       h2 = *pdig_rev_tbl++ >> 2;
1809 
1810       x_0 = *x0++;
1811       x_1 = *x0++;
1812       x_2 = *x0++;
1813       x_3 = *x0++;
1814       x_4 = *x0++;
1815       x_5 = *x0++;
1816       x_6 = *x0++;
1817       x_7 = *x0++;
1818 
1819       n00 = ixheaacd_add32_sat(x_0, x_2);
1820       n01 = ixheaacd_add32_sat(x_1, x_3);
1821       n20 = ixheaacd_sub32_sat(x_0, x_2);
1822       n21 = ixheaacd_sub32_sat(x_1, x_3);
1823       n10 = ixheaacd_add32_sat(x_4, x_6);
1824       n11 = ixheaacd_add32_sat(x_5, x_7);
1825       n30 = ixheaacd_sub32_sat(x_4, x_6);
1826       n31 = ixheaacd_sub32_sat(x_5, x_7);
1827 
1828       y0[h2] = n00;
1829       y0[h2 + 1] = n01;
1830       y1[h2] = n10;
1831       y1[h2 + 1] = n11;
1832       y2[h2] = n20;
1833       y2[h2 + 1] = n21;
1834       y3[h2] = n30;
1835       y3[h2 + 1] = n31;
1836 
1837       x_8 = *x2++;
1838       x_9 = *x2++;
1839       x_a = *x2++;
1840       x_b = *x2++;
1841       x_c = *x2++;
1842       x_d = *x2++;
1843       x_e = *x2++;
1844       x_f = *x2++;
1845 
1846       n02 = ixheaacd_add32_sat(x_8, x_a);
1847       n03 = ixheaacd_add32_sat(x_9, x_b);
1848       n22 = ixheaacd_sub32_sat(x_8, x_a);
1849       n23 = ixheaacd_sub32_sat(x_9, x_b);
1850       n12 = ixheaacd_add32_sat(x_c, x_e);
1851       n13 = ixheaacd_add32_sat(x_d, x_f);
1852       n32 = ixheaacd_sub32_sat(x_c, x_e);
1853       n33 = ixheaacd_sub32_sat(x_d, x_f);
1854 
1855       y0[h2 + 2] = n02;
1856       y0[h2 + 3] = n03;
1857       y1[h2 + 2] = n12;
1858       y1[h2 + 3] = n13;
1859       y2[h2 + 2] = n22;
1860       y2[h2 + 3] = n23;
1861       y3[h2 + 2] = n32;
1862       y3[h2 + 3] = n33;
1863     }
1864     x0 += (WORD32)npoints >> 1;
1865     x2 += (WORD32)npoints >> 1;
1866   }
1867 }
1868 #endif
1869