• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 #include <string.h>
21 #include "ixheaacd_sbr_common.h"
22 #include <ixheaacd_type_def.h>
23 
24 #include "ixheaacd_constants.h"
25 #include "ixheaacd_basic_ops32.h"
26 #include "ixheaacd_basic_ops16.h"
27 #include "ixheaacd_basic_ops40.h"
28 #include "ixheaacd_basic_ops.h"
29 
30 #include "ixheaacd_intrinsics.h"
31 #include "ixheaacd_common_rom.h"
32 #include "ixheaacd_bitbuffer.h"
33 #include "ixheaacd_sbrdecsettings.h"
34 #include "ixheaacd_sbr_scale.h"
35 #include "ixheaacd_lpp_tran.h"
36 #include "ixheaacd_env_extr_part.h"
37 #include "ixheaacd_sbr_rom.h"
38 #include "ixheaacd_hybrid.h"
39 #include "ixheaacd_ps_dec.h"
40 #include "ixheaacd_env_extr.h"
41 #include "ixheaacd_qmf_dec.h"
42 
43 #include <ixheaacd_basic_op.h>
44 #include "ixheaacd_env_calc.h"
45 
46 #include "ixheaacd_interface.h"
47 #include "ixheaacd_function_selector.h"
48 #include "ixheaacd_audioobjtypes.h"
49 
50 #define mult16x16_16(a, b) ixheaacd_mult16((a), (b))
51 #define mac16x16(a, b, c) ixheaacd_mac16x16in32((a), (b), (c))
52 #define mpy_32x16(a, b) fixmuldiv2_32x16b((a), (b))
53 #define mpy_16x16(a, b) ixheaacd_mult16x16in32((a), (b))
54 #define mpy_32x32(a, b) ixheaacd_mult32((a), (b))
55 #define mpy_32x16H_n(a, b) ixheaacd_mult32x16hin32((a), (b))
56 #define msu16x16(a, b, c) msu16x16in32((a), (b), (c))
57 
58 #define DCT3_LEN (32)
59 #define DCT2_LEN (64)
60 
61 #define LP_SHIFT_VAL 7
62 #define HQ_SHIFT_64 4
63 #define RADIXSHIFT 1
64 #define ROUNDING_SPECTRA 1
65 #define HQ_SHIFT_VAL 4
66 
ixheaacd_dct3_32(WORD32 * input,WORD32 * output,const WORD16 * main_twidle_fwd,const WORD16 * post_tbl,const WORD16 * w_16,const WORD32 * p_table)67 VOID ixheaacd_dct3_32(WORD32 *input, WORD32 *output,
68                       const WORD16 *main_twidle_fwd, const WORD16 *post_tbl,
69                       const WORD16 *w_16, const WORD32 *p_table) {
70   WORD32 n, k;
71 
72   WORD32 temp1[6];
73   WORD32 temp2[4];
74   WORD16 twid_re, twid_im;
75   WORD32 *ptr_reverse, *ptr_forward, *p_out, *ptr_out1;
76   const WORD16 *twidle_fwd, *twidle_rev;
77 
78   ptr_forward = &input[49];
79   ptr_reverse = &input[47];
80 
81   p_out = output;
82   twidle_fwd = main_twidle_fwd;
83   twidle_fwd += 4;
84 
85   *p_out++ = input[48] >> LP_SHIFT_VAL;
86   *p_out++ = 0;
87 
88   for (n = 1; n < DCT3_LEN / 2; n++) {
89     temp1[0] = *ptr_forward++;
90     temp1[1] = *ptr_reverse--;
91     temp1[0] = ixheaacd_add32(ixheaacd_shr32(temp1[0], LP_SHIFT_VAL),
92                               ixheaacd_shr32(temp1[1], LP_SHIFT_VAL));
93 
94     temp1[2] = *(ptr_forward - 33);
95     temp1[3] = *(ptr_reverse - 31);
96     temp1[1] = ixheaacd_sub32(ixheaacd_shr32(temp1[2], LP_SHIFT_VAL),
97                               ixheaacd_shr32(temp1[3], LP_SHIFT_VAL));
98     twid_re = *twidle_fwd++;
99 
100     twid_im = *twidle_fwd;
101     twidle_fwd += 3;
102     *p_out++ = mac32x16in32_dual(temp1[0], twid_re, temp1[1], twid_im);
103     *p_out++ = msu32x16in32_dual(temp1[0], twid_im, temp1[1], twid_re);
104   }
105   twid_re = *twidle_fwd++;
106 
107   twid_im = *twidle_fwd;
108   twidle_fwd += 3;
109 
110   temp1[1] = *ptr_reverse--;
111   temp1[0] = *(ptr_reverse - 31);
112   temp1[1] = ixheaacd_sub32(ixheaacd_shr32(temp1[1], LP_SHIFT_VAL),
113                             ixheaacd_shr32(temp1[0], LP_SHIFT_VAL));
114 
115   temp1[0] = temp1[1];
116 
117   temp2[2] = mac32x16in32_dual(temp1[0], twid_re, temp1[1], twid_im);
118   temp2[3] = msu32x16in32_dual(temp1[0], twid_im, temp1[1], twid_re);
119 
120   ptr_forward = output;
121   ptr_reverse = &output[DCT3_LEN - 1];
122   temp2[0] = *ptr_forward++;
123   temp2[1] = *ptr_forward--;
124 
125   temp1[0] = -temp2[1] - temp2[3];
126   temp1[1] = temp2[0] - temp2[2];
127   temp2[0] = (temp2[0] + temp2[2] + temp1[0]);
128   temp2[1] = (temp2[1] - temp2[3] + temp1[1]);
129 
130   temp2[0] >>= 1;
131   temp2[1] >>= 1;
132 
133   *ptr_forward++ = temp2[0];
134   *ptr_forward++ = temp2[1];
135 
136   twidle_fwd = post_tbl + 2;
137   twidle_rev = post_tbl + 14;
138 
139   for (n = 1; n < DCT3_LEN / 4; n++) {
140     temp2[0] = *ptr_forward++;
141     temp2[1] = *ptr_forward--;
142     temp2[3] = *ptr_reverse--;
143     temp2[2] = *ptr_reverse++;
144 
145     twid_re = *twidle_rev;
146     twidle_rev -= 2;
147     twid_im = *twidle_fwd;
148     twidle_fwd += 2;
149 
150     temp1[0] = temp2[0] - temp2[2];
151     temp1[1] = (temp2[0] + temp2[2]);
152 
153     temp1[2] = temp2[1] + temp2[3];
154     temp1[3] = (temp2[1] - temp2[3]);
155     temp1[4] = mac32x16in32_dual(temp1[0], twid_re, temp1[2], twid_im);
156     temp1[5] = msu32x16in32_dual(temp1[0], twid_im, temp1[2], twid_re);
157 
158     temp1[1] >>= 1;
159     temp1[3] >>= 1;
160 
161     *ptr_forward++ = temp1[1] - temp1[4];
162     *ptr_forward++ = temp1[3] + temp1[5];
163 
164     *ptr_reverse-- = -temp1[3] + temp1[5];
165     *ptr_reverse-- = temp1[1] + temp1[4];
166   }
167   temp2[0] = *ptr_forward++;
168   temp2[1] = *ptr_forward--;
169   temp2[3] = *ptr_reverse--;
170   temp2[2] = *ptr_reverse++;
171 
172   twid_re = *twidle_rev;
173   twidle_rev -= 2;
174   twid_im = *twidle_fwd;
175   twidle_fwd += 2;
176 
177   temp1[0] = temp2[0] - temp2[2];
178   temp1[1] = (temp2[0] + temp2[2]);
179 
180   temp1[2] = temp2[1] + temp2[3];
181   temp1[3] = (temp2[1] - temp2[3]);
182 
183   temp1[4] = -mac32x16in32_dual(temp1[0], twid_re, temp1[2], twid_im);
184   temp1[5] = msu32x16in32_dual(temp1[0], twid_im, temp1[2], twid_re);
185 
186   temp1[1] >>= 1;
187   temp1[3] >>= 1;
188   *ptr_forward++ = temp1[1] + temp1[4];
189   *ptr_forward++ = temp1[3] + temp1[5];
190 
191   ixheaacd_radix4bfly(w_16, output, 1, 4);
192   ixheaacd_postradixcompute4(input, output, p_table, 16);
193 
194   output[0] = input[0];
195   output[2] = input[1];
196 
197   p_out = input + 2;
198   ptr_forward = output + 1;
199   ptr_reverse = output + 30;
200   ptr_out1 = input + 18;
201 
202   for (k = (DCT3_LEN / 4) - 1; k != 0; k--) {
203     WORD32 tempre, tempim;
204 
205     tempre = *p_out++;
206     tempim = *p_out++;
207     *ptr_forward = (tempim);
208     ptr_forward += 2;
209     *ptr_forward = (tempre);
210     ptr_forward += 2;
211 
212     tempre = *ptr_out1++;
213     tempim = *ptr_out1++;
214     *ptr_reverse = (tempim);
215     ptr_reverse -= 2;
216     *ptr_reverse = (tempre);
217     ptr_reverse -= 2;
218   }
219 
220   {
221     WORD32 tempre, tempim;
222     tempre = *p_out++;
223     tempim = *p_out++;
224     *ptr_forward = (tempim);
225     ptr_forward += 2;
226     *ptr_forward = (tempre);
227     ptr_forward += 2;
228   }
229 
230   return;
231 }
ixheaacd_dct2_64(WORD32 * x,WORD32 * X,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr,WORD16 * filter_states)232 VOID ixheaacd_dct2_64(WORD32 *x, WORD32 *X,
233                       ia_qmf_dec_tables_struct *qmf_dec_tables_ptr,
234                       WORD16 *filter_states) {
235   ixheaacd_pretwdct2(x, X);
236 
237   ixheaacd_sbr_imdct_using_fft(qmf_dec_tables_ptr->w1024, 32, X, x,
238                                qmf_dec_tables_ptr->dig_rev_table2_128,
239                                qmf_dec_tables_ptr->dig_rev_table2_128,
240                                qmf_dec_tables_ptr->dig_rev_table2_128,
241                                qmf_dec_tables_ptr->dig_rev_table2_128);
242 
243   ixheaacd_fftposttw(x, qmf_dec_tables_ptr);
244 
245   ixheaacd_posttwdct2(x, filter_states, qmf_dec_tables_ptr);
246 
247   return;
248 }
249 
ixheaacd_cos_sin_mod(WORD32 * subband,ia_sbr_qmf_filter_bank_struct * qmf_bank,WORD16 * p_twiddle,WORD32 * p_dig_rev_tbl)250 VOID ixheaacd_cos_sin_mod(WORD32 *subband,
251                           ia_sbr_qmf_filter_bank_struct *qmf_bank,
252                           WORD16 *p_twiddle, WORD32 *p_dig_rev_tbl) {
253   WORD32 M = ixheaacd_shr32(qmf_bank->no_channels, 1);
254 
255   const WORD16 *p_sin;
256   const WORD16 *p_sin_cos = &qmf_bank->cos_twiddle[0];
257   WORD32 subband_tmp[128];
258 
259   ixheaacd_cos_sin_mod_loop1(subband, M, p_sin_cos, subband_tmp);
260 
261   if (M == 32) {
262     ixheaacd_sbr_imdct_using_fft(
263         (const WORD32 *)p_twiddle, 32, subband_tmp, subband,
264         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
265         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
266 
267     ixheaacd_sbr_imdct_using_fft(
268         (const WORD32 *)p_twiddle, 32, &subband_tmp[64], &subband[64],
269         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
270         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
271 
272   } else {
273     ixheaacd_sbr_imdct_using_fft(
274         (const WORD32 *)p_twiddle, 16, subband_tmp, subband,
275         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
276         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
277 
278     ixheaacd_sbr_imdct_using_fft(
279         (const WORD32 *)p_twiddle, 16, &subband_tmp[64], &subband[64],
280         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
281         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
282   }
283 
284   p_sin = &qmf_bank->alt_sin_twiddle[0];
285   ixheaacd_cos_sin_mod_loop2(subband, p_sin, M);
286 }
287 
ixheaacd_fwd_modulation(const WORD32 * p_time_in1,WORD32 * real_subband,WORD32 * imag_subband,ia_sbr_qmf_filter_bank_struct * qmf_bank,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr)288 VOID ixheaacd_fwd_modulation(const WORD32 *p_time_in1, WORD32 *real_subband,
289                              WORD32 *imag_subband,
290                              ia_sbr_qmf_filter_bank_struct *qmf_bank,
291                              ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
292   WORD32 i;
293   const WORD32 *p_time_in2 = &p_time_in1[2 * NO_ANALYSIS_CHANNELS - 1];
294   WORD32 temp1, temp2;
295   WORD32 *t_real_subband = real_subband;
296   WORD32 *t_imag_subband = imag_subband;
297   const WORD16 *tcos;
298 
299   for (i = NO_ANALYSIS_CHANNELS - 1; i >= 0; i--) {
300     temp1 = ixheaacd_shr32(*p_time_in1++, HQ_SHIFT_VAL);
301     temp2 = ixheaacd_shr32(*p_time_in2--, HQ_SHIFT_VAL);
302 
303     *t_real_subband++ = ixheaacd_sub32_sat(temp1, temp2);
304     ;
305     *t_imag_subband++ = ixheaacd_add32(temp1, temp2);
306     ;
307   }
308 
309   ixheaacd_cos_sin_mod(real_subband, qmf_bank,
310                        (WORD16 *)qmf_dec_tables_ptr->w1024,
311                        (WORD32 *)qmf_dec_tables_ptr->dig_rev_table2_128);
312 
313   tcos = qmf_bank->t_cos;
314 
315   for (i = (qmf_bank->usb - qmf_bank->lsb - 1); i >= 0; i--) {
316     WORD16 cosh, sinh;
317     WORD32 re, im;
318 
319     re = *real_subband;
320     im = *imag_subband;
321     cosh = *tcos++;
322     sinh = *tcos++;
323     *real_subband++ = ixheaacd_add32(ixheaacd_mult32x16in32_shl(re, cosh),
324                                      ixheaacd_mult32x16in32_shl(im, sinh));
325     *imag_subband++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32_shl(im, cosh),
326                                          ixheaacd_mult32x16in32_shl(re, sinh));
327   }
328 }
329 
ixheaacd_cplx_anal_qmffilt(const WORD16 * time_sample_buf,ia_sbr_scale_fact_struct * sbr_scale_factor,WORD32 ** qmf_real,WORD32 ** qmf_imag,ia_sbr_qmf_filter_bank_struct * qmf_bank,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr,WORD32 ch_fac,WORD32 low_pow_flag,WORD audio_object_type)330 VOID ixheaacd_cplx_anal_qmffilt(const WORD16 *time_sample_buf,
331                                 ia_sbr_scale_fact_struct *sbr_scale_factor,
332                                 WORD32 **qmf_real, WORD32 **qmf_imag,
333                                 ia_sbr_qmf_filter_bank_struct *qmf_bank,
334                                 ia_qmf_dec_tables_struct *qmf_dec_tables_ptr,
335                                 WORD32 ch_fac, WORD32 low_pow_flag,
336                                 WORD audio_object_type) {
337   WORD32 i, k;
338   WORD32 num_time_slots = qmf_bank->num_time_slots;
339 
340   WORD32 analysis_buffer[4 * NO_ANALYSIS_CHANNELS];
341   WORD16 *filter_states = qmf_bank->core_samples_buffer;
342 
343   WORD16 *fp1, *fp2, *tmp;
344 
345   WORD16 *filter_1;
346   WORD16 *filter_2;
347   WORD16 *filt_ptr;
348   if (audio_object_type != AOT_ER_AAC_ELD &&
349       audio_object_type != AOT_ER_AAC_LD) {
350     qmf_bank->filter_pos +=
351         (qmf_dec_tables_ptr->qmf_c - qmf_bank->analy_win_coeff);
352     qmf_bank->analy_win_coeff = qmf_dec_tables_ptr->qmf_c;
353   } else {
354     qmf_bank->filter_pos +=
355         (qmf_dec_tables_ptr->qmf_c_eld3 - qmf_bank->analy_win_coeff);
356     qmf_bank->analy_win_coeff = qmf_dec_tables_ptr->qmf_c_eld3;
357   }
358 
359   filter_1 = qmf_bank->filter_pos;
360 
361   if (audio_object_type != AOT_ER_AAC_ELD &&
362       audio_object_type != AOT_ER_AAC_LD) {
363     filter_2 = filter_1 + 64;
364   } else {
365     filter_2 = filter_1 + 32;
366   }
367 
368   sbr_scale_factor->st_lb_scale = 0;
369   sbr_scale_factor->lb_scale = -10;
370   if (!low_pow_flag) {
371     if (audio_object_type != AOT_ER_AAC_ELD &&
372         audio_object_type != AOT_ER_AAC_LD) {
373       sbr_scale_factor->lb_scale = -8;
374     } else {
375       sbr_scale_factor->lb_scale = -9;
376     }
377     qmf_bank->cos_twiddle =
378         (WORD16 *)qmf_dec_tables_ptr->sbr_sin_cos_twiddle_l32;
379     qmf_bank->alt_sin_twiddle =
380         (WORD16 *)qmf_dec_tables_ptr->sbr_alt_sin_twiddle_l32;
381     if (audio_object_type != AOT_ER_AAC_ELD &&
382         audio_object_type != AOT_ER_AAC_LD) {
383       qmf_bank->t_cos = (WORD16 *)qmf_dec_tables_ptr->sbr_t_cos_sin_l32;
384     } else {
385       qmf_bank->t_cos =
386           (WORD16 *)qmf_dec_tables_ptr->ixheaacd_sbr_t_cos_sin_l32_eld;
387     }
388   }
389 
390   fp1 = qmf_bank->anal_filter_states;
391   fp2 = qmf_bank->anal_filter_states + NO_ANALYSIS_CHANNELS;
392 
393   if (audio_object_type == AOT_ER_AAC_ELD ||
394       audio_object_type == AOT_ER_AAC_LD) {
395     filter_2 = qmf_bank->filter_2;
396     fp1 = qmf_bank->fp1_anal;
397     fp2 = qmf_bank->fp2_anal;
398   }
399 
400   for (i = 0; i < num_time_slots; i++) {
401     for (k = 0; k < NO_ANALYSIS_CHANNELS; k++)
402       filter_states[NO_ANALYSIS_CHANNELS - 1 - k] = time_sample_buf[ch_fac * k];
403 
404     if (audio_object_type != AOT_ER_AAC_ELD &&
405         audio_object_type != AOT_ER_AAC_LD) {
406       ixheaacd_sbr_qmfanal32_winadds(fp1, fp2, filter_1, filter_2,
407                                      analysis_buffer, filter_states,
408                                      time_sample_buf, ch_fac);
409     }
410 
411     else {
412       ixheaacd_sbr_qmfanal32_winadd_eld(fp1, fp2, filter_1, filter_2,
413                                         analysis_buffer);
414     }
415 
416     time_sample_buf += NO_ANALYSIS_CHANNELS * ch_fac;
417 
418     filter_states -= NO_ANALYSIS_CHANNELS;
419     if (filter_states < qmf_bank->anal_filter_states) {
420       filter_states = qmf_bank->anal_filter_states + 288;
421     }
422 
423     tmp = fp1;
424     fp1 = fp2;
425     fp2 = tmp;
426     if (audio_object_type != AOT_ER_AAC_ELD &&
427         audio_object_type != AOT_ER_AAC_LD) {
428       filter_1 += 64;
429       filter_2 += 64;
430     } else {
431       filter_1 += 32;
432       filter_2 += 32;
433     }
434 
435     filt_ptr = filter_1;
436     filter_1 = filter_2;
437     filter_2 = filt_ptr;
438     if (audio_object_type != AOT_ER_AAC_ELD &&
439         audio_object_type != AOT_ER_AAC_LD) {
440       if (filter_2 > (qmf_bank->analy_win_coeff + 640)) {
441         filter_1 = (WORD16 *)qmf_bank->analy_win_coeff;
442         filter_2 = (WORD16 *)qmf_bank->analy_win_coeff + 64;
443       }
444     } else {
445       if (filter_2 > (qmf_bank->analy_win_coeff + 320)) {
446         filter_1 = (WORD16 *)qmf_bank->analy_win_coeff;
447         filter_2 = (WORD16 *)qmf_bank->analy_win_coeff + 32;
448       }
449     }
450 
451     if (!low_pow_flag) {
452       ixheaacd_fwd_modulation(analysis_buffer, qmf_real[i], qmf_imag[i],
453                               qmf_bank, qmf_dec_tables_ptr);
454     } else {
455       ixheaacd_dct3_32(
456           (WORD32 *)analysis_buffer, qmf_real[i], qmf_dec_tables_ptr->dct23_tw,
457           qmf_dec_tables_ptr->post_fft_tbl, qmf_dec_tables_ptr->w_16,
458           qmf_dec_tables_ptr->dig_rev_table4_16);
459     }
460   }
461 
462   qmf_bank->filter_pos = filter_1;
463   qmf_bank->core_samples_buffer = filter_states;
464 
465   if (audio_object_type == AOT_ER_AAC_ELD || audio_object_type == AOT_ER_AAC_LD)
466 
467   {
468     qmf_bank->fp1_anal = fp1;
469     qmf_bank->fp2_anal = fp2;
470     qmf_bank->filter_2 = filter_2;
471   }
472 }
473 
ixheaacd_inv_modulation_lp(WORD32 * qmf_real,WORD16 * filter_states,ia_sbr_qmf_filter_bank_struct * syn_qmf,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr)474 VOID ixheaacd_inv_modulation_lp(WORD32 *qmf_real, WORD16 *filter_states,
475                                 ia_sbr_qmf_filter_bank_struct *syn_qmf,
476                                 ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
477   WORD32 L = syn_qmf->no_channels;
478   const WORD32 M = (L >> 1);
479   WORD32 *dct_in = qmf_real;
480   WORD32 time_out[2 * NO_SYNTHESIS_CHANNELS];
481 
482   WORD32 ui_rem = ((WORD64)(&time_out[0]) % 8);
483   WORD32 *ptime_out = (pVOID)((WORD8 *)&time_out[0] + 8 - ui_rem);
484 
485   if (L == 64)
486     ixheaacd_dct2_64(dct_in, ptime_out, qmf_dec_tables_ptr, filter_states + M);
487   else
488     ixheaacd_dct2_32(dct_in, time_out, qmf_dec_tables_ptr, filter_states);
489 
490   filter_states[3 * M] = 0;
491 }
492 
ixheaacd_inv_emodulation(WORD32 * qmf_real,ia_sbr_qmf_filter_bank_struct * syn_qmf,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr)493 VOID ixheaacd_inv_emodulation(WORD32 *qmf_real,
494                               ia_sbr_qmf_filter_bank_struct *syn_qmf,
495                               ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
496   ixheaacd_cos_sin_mod(qmf_real, syn_qmf, (WORD16 *)qmf_dec_tables_ptr->w1024,
497                        (WORD32 *)qmf_dec_tables_ptr->dig_rev_table2_128);
498 }
499 
ixheaacd_esbr_radix4bfly(const WORD32 * w,WORD32 * x,WORD32 index1,WORD32 index)500 VOID ixheaacd_esbr_radix4bfly(const WORD32 *w, WORD32 *x, WORD32 index1,
501                               WORD32 index) {
502   int i;
503   WORD32 l1, l2, h2, fft_jmp;
504   WORD64 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
505   WORD64 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
506   WORD32 x_0, x_1, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
507   WORD32 x_h2_0, x_h2_1;
508   WORD32 si10, si20, si30, co10, co20, co30;
509 
510   WORD64 mul_1, mul_2, mul_3, mul_4, mul_5, mul_6;
511   WORD64 mul_7, mul_8, mul_9, mul_10, mul_11, mul_12;
512   WORD32 *x_l1;
513   WORD32 *x_l2;
514   WORD32 *x_h2;
515   const WORD32 *w_ptr = w;
516   WORD32 i1;
517 
518   h2 = index << 1;
519   l1 = index << 2;
520   l2 = (index << 2) + (index << 1);
521 
522   x_l1 = &(x[l1]);
523   x_l2 = &(x[l2]);
524   x_h2 = &(x[h2]);
525 
526   fft_jmp = 6 * (index);
527 
528   for (i1 = 0; i1 < index1; i1++) {
529     for (i = 0; i < index; i++) {
530       si10 = (*w_ptr++);
531       co10 = (*w_ptr++);
532       si20 = (*w_ptr++);
533       co20 = (*w_ptr++);
534       si30 = (*w_ptr++);
535       co30 = (*w_ptr++);
536 
537       x_0 = x[0];
538       x_h2_0 = x[h2];
539       x_l1_0 = x[l1];
540       x_l2_0 = x[l2];
541 
542       xh0_0 = (WORD64)x_0 + (WORD64)x_l1_0;
543       xl0_0 = (WORD64)x_0 - (WORD64)x_l1_0;
544 
545       xh20_0 = (WORD64)x_h2_0 + (WORD64)x_l2_0;
546       xl20_0 = (WORD64)x_h2_0 - (WORD64)x_l2_0;
547 
548       x[0] = (WORD32)ixheaacd_add64_sat(xh0_0, xh20_0);
549       xt0_0 = (WORD64)xh0_0 - (WORD64)xh20_0;
550 
551       x_1 = x[1];
552       x_h2_1 = x[h2 + 1];
553       x_l1_1 = x[l1 + 1];
554       x_l2_1 = x[l2 + 1];
555 
556       xh1_0 = (WORD64)x_1 + (WORD64)x_l1_1;
557       xl1_0 = (WORD64)x_1 - (WORD64)x_l1_1;
558 
559       xh21_0 = (WORD64)x_h2_1 + (WORD64)x_l2_1;
560       xl21_0 = (WORD64)x_h2_1 - (WORD64)x_l2_1;
561 
562       x[1] = (WORD32)ixheaacd_add64_sat(xh1_0, xh21_0);
563       yt0_0 = (WORD64)xh1_0 - (WORD64)xh21_0;
564 
565       xt1_0 = (WORD64)xl0_0 + (WORD64)xl21_0;
566       xt2_0 = (WORD64)xl0_0 - (WORD64)xl21_0;
567 
568       yt2_0 = (WORD64)xl1_0 + (WORD64)xl20_0;
569       yt1_0 = (WORD64)xl1_0 - (WORD64)xl20_0;
570 
571       mul_11 = ixheaacd_mult64(xt2_0, co30);
572       mul_3 = ixheaacd_mult64(yt2_0, si30);
573       x[l2] = (WORD32)((mul_3 + mul_11) >> 32) << RADIXSHIFT;
574 
575       mul_5 = ixheaacd_mult64(xt2_0, si30);
576       mul_9 = ixheaacd_mult64(yt2_0, co30);
577       x[l2 + 1] = (WORD32)((mul_9 - mul_5) >> 32) << RADIXSHIFT;
578 
579       mul_12 = ixheaacd_mult64(xt0_0, co20);
580       mul_2 = ixheaacd_mult64(yt0_0, si20);
581       x[l1] = (WORD32)((mul_2 + mul_12) >> 32) << RADIXSHIFT;
582 
583       mul_6 = ixheaacd_mult64(xt0_0, si20);
584       mul_8 = ixheaacd_mult64(yt0_0, co20);
585       x[l1 + 1] = (WORD32)((mul_8 - mul_6) >> 32) << RADIXSHIFT;
586 
587       mul_4 = ixheaacd_mult64(xt1_0, co10);
588       mul_1 = ixheaacd_mult64(yt1_0, si10);
589       x[h2] = (WORD32)((mul_1 + mul_4) >> 32) << RADIXSHIFT;
590 
591       mul_10 = ixheaacd_mult64(xt1_0, si10);
592       mul_7 = ixheaacd_mult64(yt1_0, co10);
593       x[h2 + 1] = (WORD32)((mul_7 - mul_10) >> 32) << RADIXSHIFT;
594 
595       x += 2;
596     }
597     x += fft_jmp;
598     w_ptr = w_ptr - fft_jmp;
599   }
600 }
601 
ixheaacd_esbr_postradixcompute2(WORD32 * ptr_y,WORD32 * ptr_x,const WORD32 * pdig_rev_tbl,WORD32 npoints)602 VOID ixheaacd_esbr_postradixcompute2(WORD32 *ptr_y, WORD32 *ptr_x,
603                                      const WORD32 *pdig_rev_tbl,
604                                      WORD32 npoints) {
605   WORD32 i, k;
606   WORD32 h2;
607   WORD32 x_0, x_1, x_2, x_3;
608   WORD32 x_4, x_5, x_6, x_7;
609   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
610   WORD32 n0, j0;
611   WORD32 *x2, *x0;
612   WORD32 *y0, *y1, *y2, *y3;
613 
614   y0 = ptr_y;
615   y2 = ptr_y + (WORD32)npoints;
616   x0 = ptr_x;
617   x2 = ptr_x + (WORD32)(npoints >> 1);
618 
619   y1 = y0 + (WORD32)(npoints >> 2);
620   y3 = y2 + (WORD32)(npoints >> 2);
621   j0 = 8;
622   n0 = npoints >> 1;
623 
624   for (k = 0; k < 2; k++) {
625     for (i = 0; i<npoints>> 1; i += 8) {
626       h2 = *pdig_rev_tbl++ >> 2;
627 
628       x_0 = *x0++;
629       x_1 = *x0++;
630       x_2 = *x0++;
631       x_3 = *x0++;
632       x_4 = *x0++;
633       x_5 = *x0++;
634       x_6 = *x0++;
635       x_7 = *x0++;
636 
637       y0[h2] = ixheaacd_add32_sat(x_0, x_2);
638       y0[h2 + 1] = ixheaacd_add32_sat(x_1, x_3);
639       y1[h2] = ixheaacd_add32_sat(x_4, x_6);
640       y1[h2 + 1] = ixheaacd_add32_sat(x_5, x_7);
641       y2[h2] = ixheaacd_sub32_sat(x_0, x_2);
642       y2[h2 + 1] = ixheaacd_sub32_sat(x_1, x_3);
643       y3[h2] = ixheaacd_sub32_sat(x_4, x_6);
644       y3[h2 + 1] = ixheaacd_sub32_sat(x_5, x_7);
645 
646       x_8 = *x2++;
647       x_9 = *x2++;
648       x_a = *x2++;
649       x_b = *x2++;
650       x_c = *x2++;
651       x_d = *x2++;
652       x_e = *x2++;
653       x_f = *x2++;
654 
655       y0[h2 + 2] = ixheaacd_add32_sat(x_8, x_a);
656       y0[h2 + 3] = ixheaacd_add32_sat(x_9, x_b);
657       y1[h2 + 2] = ixheaacd_add32_sat(x_c, x_e);
658       y1[h2 + 3] = ixheaacd_add32_sat(x_d, x_f);
659       y2[h2 + 2] = ixheaacd_sub32_sat(x_8, x_a);
660       y2[h2 + 3] = ixheaacd_sub32_sat(x_9, x_b);
661       y3[h2 + 2] = ixheaacd_sub32_sat(x_c, x_e);
662       y3[h2 + 3] = ixheaacd_sub32_sat(x_d, x_f);
663     }
664     x0 += (WORD32)npoints >> 1;
665     x2 += (WORD32)npoints >> 1;
666   }
667 }
668 
ixheaacd_esbr_postradixcompute4(WORD32 * ptr_y,WORD32 * ptr_x,const WORD32 * p_dig_rev_tbl,WORD32 npoints)669 VOID ixheaacd_esbr_postradixcompute4(WORD32 *ptr_y, WORD32 *ptr_x,
670                                      const WORD32 *p_dig_rev_tbl,
671                                      WORD32 npoints) {
672   WORD32 i, k;
673   WORD32 h2;
674   WORD32 xh0_0, xh1_0, xl0_0, xl1_0;
675   WORD32 xh0_1, xh1_1, xl0_1, xl1_1;
676   WORD32 x_0, x_1, x_2, x_3;
677   WORD32 xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;
678   WORD32 x_4, x_5, x_6, x_7;
679   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
680   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
681   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
682   WORD32 n0, j0;
683   WORD32 *x2, *x0;
684   WORD32 *y0, *y1, *y2, *y3;
685 
686   y0 = ptr_y;
687   y2 = ptr_y + (WORD32)npoints;
688   x0 = ptr_x;
689   x2 = ptr_x + (WORD32)(npoints >> 1);
690 
691   y1 = y0 + (WORD32)(npoints >> 1);
692   y3 = y2 + (WORD32)(npoints >> 1);
693 
694   j0 = 4;
695   n0 = npoints >> 2;
696 
697   for (k = 0; k < 2; k++) {
698     for (i = 0; i<npoints>> 1; i += 8) {
699       h2 = *p_dig_rev_tbl++ >> 2;
700       x_0 = *x0++;
701       x_1 = *x0++;
702       x_2 = *x0++;
703       x_3 = *x0++;
704       x_4 = *x0++;
705       x_5 = *x0++;
706       x_6 = *x0++;
707       x_7 = *x0++;
708 
709       xh0_0 = x_0 + x_4;
710       xh1_0 = x_1 + x_5;
711       xl0_0 = x_0 - x_4;
712       xl1_0 = x_1 - x_5;
713       xh0_1 = x_2 + x_6;
714       xh1_1 = x_3 + x_7;
715       xl0_1 = x_2 - x_6;
716       xl1_1 = x_3 - x_7;
717 
718       n00 = xh0_0 + xh0_1;
719       n01 = xh1_0 + xh1_1;
720       n10 = xl0_0 + xl1_1;
721       n11 = xl1_0 - xl0_1;
722       n20 = xh0_0 - xh0_1;
723       n21 = xh1_0 - xh1_1;
724       n30 = xl0_0 - xl1_1;
725       n31 = xl1_0 + xl0_1;
726 
727       y0[h2] = n00;
728       y0[h2 + 1] = n01;
729       y1[h2] = n10;
730       y1[h2 + 1] = n11;
731       y2[h2] = n20;
732       y2[h2 + 1] = n21;
733       y3[h2] = n30;
734       y3[h2 + 1] = n31;
735 
736       x_8 = *x2++;
737       x_9 = *x2++;
738       x_a = *x2++;
739       x_b = *x2++;
740       x_c = *x2++;
741       x_d = *x2++;
742       x_e = *x2++;
743       x_f = *x2++;
744 
745       xh0_2 = x_8 + x_c;
746       xh1_2 = x_9 + x_d;
747       xl0_2 = x_8 - x_c;
748       xl1_2 = x_9 - x_d;
749       xh0_3 = x_a + x_e;
750       xh1_3 = x_b + x_f;
751       xl0_3 = x_a - x_e;
752       xl1_3 = x_b - x_f;
753 
754       n02 = xh0_2 + xh0_3;
755       n03 = xh1_2 + xh1_3;
756       n12 = xl0_2 + xl1_3;
757       n13 = xl1_2 - xl0_3;
758       n22 = xh0_2 - xh0_3;
759       n23 = xh1_2 - xh1_3;
760       n32 = xl0_2 - xl1_3;
761       n33 = xl1_2 + xl0_3;
762 
763       y0[h2 + 2] = n02;
764       y0[h2 + 3] = n03;
765       y1[h2 + 2] = n12;
766       y1[h2 + 3] = n13;
767       y2[h2 + 2] = n22;
768       y2[h2 + 3] = n23;
769       y3[h2 + 2] = n32;
770       y3[h2 + 3] = n33;
771     }
772     x0 += (WORD32)npoints >> 1;
773     x2 += (WORD32)npoints >> 1;
774   }
775 }
776 
ixheaacd_esbr_cos_sin_mod(WORD32 * subband,ia_sbr_qmf_filter_bank_struct * qmf_bank,WORD32 * p_twiddle,WORD32 * p_dig_rev_tbl)777 VOID ixheaacd_esbr_cos_sin_mod(WORD32 *subband,
778                                ia_sbr_qmf_filter_bank_struct *qmf_bank,
779                                WORD32 *p_twiddle, WORD32 *p_dig_rev_tbl) {
780   WORD32 z;
781   WORD32 temp[128];
782   WORD32 scaleshift = 0;
783 
784   WORD32 re2, re3;
785   WORD32 wim, wre;
786 
787   WORD32 i, M_2;
788   WORD32 M = ixheaacd_shr32(qmf_bank->no_channels, 1);
789 
790   const WORD32 *p_sin;
791   const WORD32 *p_sin_cos;
792 
793   WORD32 subband_tmp[128];
794   WORD32 re;
795   WORD32 im;
796   WORD32 *psubband, *psubband1;
797   WORD32 *psubband_t, *psubband1_t;
798   WORD32 *psubband2, *psubband12;
799   WORD32 *psubband_t2, *psubband1_t2;
800 
801   M_2 = ixheaacd_shr32(M, 1);
802 
803   p_sin_cos = qmf_bank->esbr_cos_twiddle;
804 
805   psubband = &subband[0];
806   psubband1 = &subband[2 * M - 1];
807   psubband_t = subband_tmp;
808   psubband1_t = &subband_tmp[2 * M - 1];
809 
810   psubband2 = &subband[64];
811   psubband12 = &subband[2 * M - 1 + 64];
812   psubband_t2 = &subband_tmp[64];
813   psubband1_t2 = &subband_tmp[2 * M - 1 + 64];
814 
815   for (i = (M_2 >> 1) - 1; i >= 0; i--) {
816     re = *psubband++;
817     im = *psubband1--;
818 
819     wim = *p_sin_cos++;
820     wre = *p_sin_cos++;
821 
822     *psubband_t++ = (WORD32)(
823         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
824         32);
825     *psubband_t++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
826                                                  ixheaacd_mult64(re, wim))) >>
827                              32);
828 
829     re = *psubband2++;
830     im = *psubband12--;
831 
832     *psubband_t2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
833                                                   ixheaacd_mult64(re, wre))) >>
834                               32);
835     *psubband_t2++ = (WORD32)(
836         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
837         32);
838 
839     re = *psubband1--;
840     im = *psubband++;
841 
842     wim = *p_sin_cos++;
843     wre = *p_sin_cos++;
844 
845     *psubband1_t-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
846                                                   ixheaacd_mult64(re, wim))) >>
847                               32);
848     *psubband1_t-- = (WORD32)(
849         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
850         32);
851 
852     re = *psubband12--;
853     im = *psubband2++;
854 
855     *psubband1_t2-- = (WORD32)(
856         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
857         32);
858     *psubband1_t2-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
859                                                    ixheaacd_mult64(re, wre))) >>
860                                32);
861 
862     re = *psubband++;
863     im = *psubband1--;
864 
865     wim = *p_sin_cos++;
866     wre = *p_sin_cos++;
867 
868     *psubband_t++ = (WORD32)(
869         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
870         32);
871     *psubband_t++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
872                                                  ixheaacd_mult64(re, wim))) >>
873                              32);
874 
875     re = *psubband2++;
876     im = *psubband12--;
877 
878     *psubband_t2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
879                                                   ixheaacd_mult64(re, wre))) >>
880                               32);
881     *psubband_t2++ = (WORD32)(
882         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
883         32);
884 
885     re = *psubband1--;
886     im = *psubband++;
887     ;
888 
889     wim = *p_sin_cos++;
890     wre = *p_sin_cos++;
891 
892     *psubband1_t-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
893                                                   ixheaacd_mult64(re, wim))) >>
894                               32);
895     *psubband1_t-- = (WORD32)(
896         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
897         32);
898 
899     re = *psubband12--;
900     im = *psubband2++;
901     ;
902 
903     *psubband1_t2-- = (WORD32)(
904         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
905         32);
906     *psubband1_t2-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
907                                                    ixheaacd_mult64(re, wre))) >>
908                                32);
909   }
910 
911   if (M == 32) {
912     ixheaacd_esbr_radix4bfly(p_twiddle, subband_tmp, 1, 8);
913     ixheaacd_esbr_radix4bfly(p_twiddle + 48, subband_tmp, 4, 2);
914     ixheaacd_esbr_postradixcompute2(subband, subband_tmp, p_dig_rev_tbl, 32);
915 
916     ixheaacd_esbr_radix4bfly(p_twiddle, &subband_tmp[64], 1, 8);
917     ixheaacd_esbr_radix4bfly(p_twiddle + 48, &subband_tmp[64], 4, 2);
918     ixheaacd_esbr_postradixcompute2(&subband[64], &subband_tmp[64],
919                                     p_dig_rev_tbl, 32);
920 
921   }
922 
923   else if (M == 16) {
924     ixheaacd_esbr_radix4bfly(p_twiddle, subband_tmp, 1, 4);
925     ixheaacd_esbr_postradixcompute4(subband, subband_tmp, p_dig_rev_tbl, 16);
926 
927     ixheaacd_esbr_radix4bfly(p_twiddle, &subband_tmp[64], 1, 4);
928     ixheaacd_esbr_postradixcompute4(&subband[64], &subband_tmp[64],
929                                     p_dig_rev_tbl, 16);
930 
931   }
932 
933   else if (M == 12) {
934     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
935       temp[z] = subband_tmp[2 * z];
936       temp[12 + z] = subband_tmp[2 * z + 1];
937     }
938 
939     ixheaacd_complex_fft_p3(temp, &temp[12], 12, -1, &scaleshift);
940 
941     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
942       subband[2 * z] = temp[z];
943       subband[2 * z + 1] = temp[z + 12];
944     }
945     scaleshift = 0;
946     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
947       temp[z] = subband_tmp[64 + 2 * z];
948       temp[12 + z] = subband_tmp[64 + 2 * z + 1];
949     }
950 
951     ixheaacd_complex_fft_p3(temp, &temp[12], 12, -1, &scaleshift);
952 
953     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
954       subband[64 + 2 * z] = temp[z];
955       subband[64 + 2 * z + 1] = temp[z + 12];
956     }
957 
958   }
959 
960   else {
961     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
962       temp[z] = subband_tmp[2 * z];
963       temp[8 + z] = subband_tmp[2 * z + 1];
964     }
965 
966     (*ixheaacd_complex_fft_p2)(temp, &temp[8], 8, -1, &scaleshift);
967 
968     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
969       subband[2 * z] = temp[z] << scaleshift;
970       subband[2 * z + 1] = temp[z + 8] << scaleshift;
971     }
972     scaleshift = 0;
973     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
974       temp[z] = subband_tmp[64 + 2 * z];
975       temp[8 + z] = subband_tmp[64 + 2 * z + 1];
976     }
977 
978     (*ixheaacd_complex_fft_p2)(temp, &temp[8], 8, -1, &scaleshift);
979 
980     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
981       subband[64 + 2 * z] = temp[z] << scaleshift;
982       subband[64 + 2 * z + 1] = temp[8 + z] << scaleshift;
983     }
984   }
985 
986   psubband = &subband[0];
987   psubband1 = &subband[2 * M - 1];
988 
989   re = *psubband1;
990 
991   *psubband = *psubband >> 1;
992   psubband++;
993   *psubband1 = ixheaacd_negate32(*psubband >> 1);
994   psubband1--;
995 
996   p_sin = qmf_bank->esbr_alt_sin_twiddle;
997 
998   wim = *p_sin++;
999   wre = *p_sin++;
1000 
1001   im = *psubband1;
1002   ;
1003 
1004   *psubband1-- = (WORD32)(
1005       (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
1006       32);
1007   *psubband++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
1008                                              ixheaacd_mult64(re, wim))) >>
1009                          32);
1010 
1011   psubband2 = &subband[64];
1012   psubband12 = &subband[2 * M - 1 + 64];
1013 
1014   re = *psubband12;
1015   ;
1016 
1017   *psubband12-- = ixheaacd_negate32_sat(*psubband2 >> 1);
1018   ;
1019   *psubband2 = psubband2[1] >> 1;
1020   ;
1021   psubband2++;
1022 
1023   im = *psubband12;
1024   ;
1025 
1026   *psubband2++ = ixheaacd_negate32_sat((WORD32)(
1027       (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
1028       32));
1029   *psubband12-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re, wim),
1030                                                ixheaacd_mult64(im, wre))) >>
1031                            32);
1032 
1033   for (i = (M_2 - 2); i >= 0; i--) {
1034     im = psubband[0];
1035     ;
1036     re = psubband[1];
1037     ;
1038     re2 = *psubband1;
1039     ;
1040 
1041     *psubband++ = (WORD32)(
1042         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
1043         32);
1044     *psubband1-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
1045                                                 ixheaacd_mult64(re, wre))) >>
1046                             32);
1047 
1048     im = psubband2[0];
1049     ;
1050     re = psubband2[1];
1051     ;
1052     re3 = *psubband12;
1053     ;
1054 
1055     *psubband12-- = ixheaacd_negate32_sat((WORD32)(
1056         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
1057         32));
1058     *psubband2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re, wre),
1059                                                 ixheaacd_mult64(im, wim))) >>
1060                             32);
1061 
1062     wim = *p_sin++;
1063     wre = *p_sin++;
1064     im = psubband1[0];
1065     ;
1066 
1067     *psubband1-- = (WORD32)(
1068         (ixheaacd_add64(ixheaacd_mult64(re2, wre), ixheaacd_mult64(im, wim))) >>
1069         32);
1070     *psubband++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
1071                                                ixheaacd_mult64(re2, wim))) >>
1072                            32);
1073 
1074     im = psubband12[0];
1075     ;
1076 
1077     *psubband2++ = ixheaacd_negate32_sat((WORD32)(
1078         (ixheaacd_add64(ixheaacd_mult64(re3, wre), ixheaacd_mult64(im, wim))) >>
1079         32));
1080     *psubband12-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re3, wim),
1081                                                  ixheaacd_mult64(im, wre))) >>
1082                              32);
1083   }
1084 }
1085 
ixheaacd_esbr_fwd_modulation(const WORD32 * time_sample_buf,WORD32 * real_subband,WORD32 * imag_subband,ia_sbr_qmf_filter_bank_struct * qmf_bank,ia_qmf_dec_tables_struct * qmf_dec_tables_ptr)1086 VOID ixheaacd_esbr_fwd_modulation(
1087     const WORD32 *time_sample_buf, WORD32 *real_subband, WORD32 *imag_subband,
1088     ia_sbr_qmf_filter_bank_struct *qmf_bank,
1089     ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
1090   WORD32 i;
1091   const WORD32 *time_sample_buf1 =
1092       &time_sample_buf[2 * qmf_bank->no_channels - 1];
1093   WORD32 temp1, temp2;
1094   WORD32 *t_real_subband = real_subband;
1095   WORD32 *t_imag_subband = imag_subband;
1096   const WORD32 *tcos;
1097 
1098   for (i = qmf_bank->no_channels - 1; i >= 0; i--) {
1099     temp1 = ixheaacd_shr32(*time_sample_buf++, HQ_SHIFT_64);
1100     temp2 = ixheaacd_shr32(*time_sample_buf1--, HQ_SHIFT_64);
1101 
1102     *t_real_subband++ = ixheaacd_sub32_sat(temp1, temp2);
1103     ;
1104     *t_imag_subband++ = ixheaacd_add32(temp1, temp2);
1105     ;
1106   }
1107 
1108   ixheaacd_esbr_cos_sin_mod(real_subband, qmf_bank,
1109                             qmf_dec_tables_ptr->esbr_w_16,
1110                             qmf_dec_tables_ptr->dig_rev_table4_16);
1111 
1112   tcos = qmf_bank->esbr_t_cos;
1113 
1114   for (i = (qmf_bank->usb - qmf_bank->lsb - 1); i >= 0; i--) {
1115     WORD32 cosh, sinh;
1116     WORD32 re, im;
1117 
1118     re = *real_subband;
1119     im = *imag_subband;
1120     cosh = *tcos++;
1121     sinh = *tcos++;
1122     *real_subband++ = (WORD32)((ixheaacd_add64(ixheaacd_mult64(re, cosh),
1123                                                ixheaacd_mult64(im, sinh))) >>
1124                                31);
1125     *imag_subband++ =
1126         (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, cosh),
1127                                      ixheaacd_mult64(re, sinh))) >>
1128                  31);
1129   }
1130 }
1131 
ixheaacd_esbr_qmfsyn64_winadd(WORD32 * tmp1,WORD32 * tmp2,WORD32 * inp1,WORD32 * sample_buffer,WORD32 ch_fac)1132 VOID ixheaacd_esbr_qmfsyn64_winadd(WORD32 *tmp1, WORD32 *tmp2, WORD32 *inp1,
1133                                    WORD32 *sample_buffer, WORD32 ch_fac) {
1134   WORD32 k;
1135 
1136   for (k = 0; k < 64; k++) {
1137     WORD64 syn_out = 0;
1138 
1139     syn_out =
1140         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[0 + k], inp1[k + 0]));
1141     syn_out =
1142         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[256 + k], inp1[k + 128]));
1143     syn_out =
1144         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[512 + k], inp1[k + 256]));
1145     syn_out =
1146         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[768 + k], inp1[k + 384]));
1147     syn_out =
1148         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[1024 + k], inp1[k + 512]));
1149 
1150     syn_out =
1151         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[128 + k], inp1[k + 64]));
1152     syn_out =
1153         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[384 + k], inp1[k + 192]));
1154     syn_out =
1155         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[640 + k], inp1[k + 320]));
1156     syn_out =
1157         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[896 + k], inp1[k + 448]));
1158     syn_out =
1159         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[1152 + k], inp1[k + 576]));
1160 
1161     sample_buffer[ch_fac * k] = (WORD32)(syn_out >> 31);
1162   }
1163 }
1164 
ixheaacd_shiftrountine(WORD32 * qmf_real,WORD32 * qmf_imag,WORD32 len,WORD32 common_shift)1165 VOID ixheaacd_shiftrountine(WORD32 *qmf_real, WORD32 *qmf_imag, WORD32 len,
1166                             WORD32 common_shift) {
1167   WORD32 treal, timag;
1168   WORD32 j;
1169 
1170   if (common_shift < 0) {
1171     WORD32 cshift = -common_shift;
1172     cshift = ixheaacd_min32(cshift, 31);
1173     for (j = len - 1; j >= 0; j--) {
1174       treal = *qmf_real;
1175       timag = *qmf_imag;
1176 
1177       treal = (ixheaacd_shr32(treal, cshift));
1178       timag = (ixheaacd_shr32(timag, cshift));
1179 
1180       *qmf_real++ = treal;
1181       *qmf_imag++ = timag;
1182     }
1183   } else {
1184     for (j = len - 1; j >= 0; j--) {
1185       treal = (ixheaacd_shl32_sat(*qmf_real, common_shift));
1186       timag = (ixheaacd_shl32_sat(*qmf_imag, common_shift));
1187       *qmf_real++ = treal;
1188       *qmf_imag++ = timag;
1189     }
1190   }
1191 }
1192 
ixheaacd_shiftrountine_with_rnd_hq(WORD32 * qmf_real,WORD32 * qmf_imag,WORD32 * filter_states,WORD32 len,WORD32 shift)1193 VOID ixheaacd_shiftrountine_with_rnd_hq(WORD32 *qmf_real, WORD32 *qmf_imag,
1194                                         WORD32 *filter_states, WORD32 len,
1195                                         WORD32 shift) {
1196   WORD32 *filter_states_rev = filter_states + len;
1197   WORD32 treal, timag;
1198   WORD32 j;
1199 
1200   for (j = (len - 1); j >= 0; j -= 2) {
1201     WORD32 r1, r2, i1, i2;
1202     i2 = qmf_imag[j];
1203     r2 = qmf_real[j];
1204     r1 = *qmf_real++;
1205     i1 = *qmf_imag++;
1206 
1207     timag = ixheaacd_add32_sat(i1, r1);
1208     timag = (ixheaacd_shl32_sat(timag, shift));
1209     filter_states_rev[j] = timag;
1210 
1211     treal = ixheaacd_sub32_sat(i2, r2);
1212     treal = (ixheaacd_shl32_sat(treal, shift));
1213     filter_states[j] = treal;
1214 
1215     treal = ixheaacd_sub32_sat(i1, r1);
1216     treal = (ixheaacd_shl32_sat(treal, shift));
1217     *filter_states++ = treal;
1218 
1219     timag = ixheaacd_add32_sat(i2, r2);
1220     timag = (ixheaacd_shl32_sat(timag, shift));
1221     *filter_states_rev++ = timag;
1222   }
1223 }
1224 
ixheaacd_radix4bfly(const WORD16 * w,WORD32 * x,WORD32 index1,WORD32 index)1225 VOID ixheaacd_radix4bfly(const WORD16 *w, WORD32 *x, WORD32 index1,
1226                          WORD32 index) {
1227   int i;
1228   WORD32 l1, l2, h2, fft_jmp;
1229   WORD32 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
1230   WORD32 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
1231   WORD32 x_0, x_1, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
1232   WORD32 x_h2_0, x_h2_1;
1233   WORD16 si10, si20, si30, co10, co20, co30;
1234 
1235   WORD32 mul_1, mul_2, mul_3, mul_4, mul_5, mul_6;
1236   WORD32 mul_7, mul_8, mul_9, mul_10, mul_11, mul_12;
1237   WORD32 *x_l1;
1238   WORD32 *x_l2;
1239   WORD32 *x_h2;
1240   const WORD16 *w_ptr = w;
1241   WORD32 i1;
1242 
1243   h2 = index << 1;
1244   l1 = index << 2;
1245   l2 = (index << 2) + (index << 1);
1246 
1247   x_l1 = &(x[l1]);
1248   x_l2 = &(x[l2]);
1249   x_h2 = &(x[h2]);
1250 
1251   fft_jmp = 6 * (index);
1252 
1253   for (i1 = 0; i1 < index1; i1++) {
1254     for (i = 0; i < index; i++) {
1255       si10 = (*w_ptr++);
1256       co10 = (*w_ptr++);
1257       si20 = (*w_ptr++);
1258       co20 = (*w_ptr++);
1259       si30 = (*w_ptr++);
1260       co30 = (*w_ptr++);
1261 
1262       x_0 = x[0];
1263       x_h2_0 = x[h2];
1264       x_l1_0 = x[l1];
1265       x_l2_0 = x[l2];
1266 
1267       xh0_0 = x_0 + x_l1_0;
1268       xl0_0 = x_0 - x_l1_0;
1269 
1270       xh20_0 = x_h2_0 + x_l2_0;
1271       xl20_0 = x_h2_0 - x_l2_0;
1272 
1273       x[0] = xh0_0 + xh20_0;
1274       xt0_0 = xh0_0 - xh20_0;
1275 
1276       x_1 = x[1];
1277       x_h2_1 = x[h2 + 1];
1278       x_l1_1 = x[l1 + 1];
1279       x_l2_1 = x[l2 + 1];
1280 
1281       xh1_0 = x_1 + x_l1_1;
1282       xl1_0 = x_1 - x_l1_1;
1283 
1284       xh21_0 = x_h2_1 + x_l2_1;
1285       xl21_0 = x_h2_1 - x_l2_1;
1286 
1287       x[1] = xh1_0 + xh21_0;
1288       yt0_0 = xh1_0 - xh21_0;
1289 
1290       xt1_0 = xl0_0 + xl21_0;
1291       xt2_0 = xl0_0 - xl21_0;
1292 
1293       yt2_0 = xl1_0 + xl20_0;
1294       yt1_0 = xl1_0 - xl20_0;
1295 
1296       mul_11 = ixheaacd_mult32x16in32(xt2_0, co30);
1297       mul_3 = ixheaacd_mult32x16in32(yt2_0, si30);
1298       x[l2] = (mul_3 + mul_11) << RADIXSHIFT;
1299 
1300       mul_5 = ixheaacd_mult32x16in32(xt2_0, si30);
1301       mul_9 = ixheaacd_mult32x16in32(yt2_0, co30);
1302       x[l2 + 1] = (mul_9 - mul_5) << RADIXSHIFT;
1303 
1304       mul_12 = ixheaacd_mult32x16in32(xt0_0, co20);
1305       mul_2 = ixheaacd_mult32x16in32(yt0_0, si20);
1306       x[l1] = (mul_2 + mul_12) << RADIXSHIFT;
1307 
1308       mul_6 = ixheaacd_mult32x16in32(xt0_0, si20);
1309       mul_8 = ixheaacd_mult32x16in32(yt0_0, co20);
1310       x[l1 + 1] = (mul_8 - mul_6) << RADIXSHIFT;
1311 
1312       mul_4 = ixheaacd_mult32x16in32(xt1_0, co10);
1313       mul_1 = ixheaacd_mult32x16in32(yt1_0, si10);
1314       x[h2] = (mul_1 + mul_4) << RADIXSHIFT;
1315 
1316       mul_10 = ixheaacd_mult32x16in32(xt1_0, si10);
1317       mul_7 = ixheaacd_mult32x16in32(yt1_0, co10);
1318       x[h2 + 1] = (mul_7 - mul_10) << RADIXSHIFT;
1319 
1320       x += 2;
1321     }
1322     x += fft_jmp;
1323     w_ptr = w_ptr - fft_jmp;
1324   }
1325 }
1326