• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 #include "ixheaacd_sbr_common.h"
21 #include <ixheaacd_type_def.h>
22 
23 #include "ixheaacd_constants.h"
24 #include <ixheaacd_basic_ops32.h>
25 #include <ixheaacd_basic_ops16.h>
26 #include <ixheaacd_basic_ops40.h>
27 #include "ixheaacd_basic_ops.h"
28 
29 #include "ixheaacd_defines.h"
30 #include "ixheaacd_common_rom.h"
31 #include "ixheaacd_basic_funcs.h"
32 #include <ixheaacd_aac_rom.h>
33 #include "ixheaacd_aac_imdct.h"
34 #include "ixheaacd_intrinsics.h"
35 
36 #include <ixheaacd_basic_op.h>
37 #include "ixheaacd_function_selector.h"
38 
39 #include "ixheaacd_audioobjtypes.h"
40 #include "ixheaacd_tns.h"
41 
42 #define DIG_REV(i, m, j)                                      \
43   do {                                                        \
44     unsigned _ = (i);                                         \
45     _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2);   \
46     _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4);   \
47     _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8);   \
48     _ = ((_ & 0x0000FFFF) << 16) | ((_ & ~0x0000FFFF) >> 16); \
49     (j) = _ >> (m);                                           \
50   } while (0)
51 
52 #define MPYHIRC(x, y)                                                         \
53                                                                               \
54   (((WORD32)((short)(x >> 16) * (unsigned short)(y & 0x0000FFFF) + 0x4000) >> \
55     15) +                                                                     \
56    ((WORD32)((short)(x >> 16) * (short)((y) >> 16)) << 1))
57 
58 #define MPYLUHS(x, y) \
59   ((WORD32)((unsigned short)(x & 0x0000FFFF) * (short)(y >> 16)))
60 
61 #define MDCT_LEN 480
62 #define FFT15X2 30
63 #define MDCT_LEN_BY2 240
64 #define FFT5 5
65 #define FFT16 16
66 #define FFT15 15
67 #define FFT16X2 32
68 
69 WORD32 ixheaacd_fft5out[FFT15X2];
70 
ixheaacd_shr32_drc(WORD32 a,WORD32 b)71 static PLATFORM_INLINE WORD32 ixheaacd_shr32_drc(WORD32 a, WORD32 b) {
72   WORD32 out_val;
73 
74   b = ((UWORD32)(b << 24) >> 24);
75   if (b >= 31) {
76     if (a < 0)
77       out_val = -1;
78     else
79       out_val = 0;
80   } else {
81     a = ixheaacd_add32_sat(a, (1 << (b - 1)));
82     out_val = (WORD32)a >> b;
83   }
84 
85   return out_val;
86 }
87 
ixheaacd_mult32x16hin32_drc(WORD32 a,WORD32 b)88 static PLATFORM_INLINE WORD32 ixheaacd_mult32x16hin32_drc(WORD32 a, WORD32 b) {
89   WORD32 result;
90   WORD64 temp_result;
91   temp_result = (WORD64)a * (WORD64)(b >> 16);
92   result = (WORD32)(temp_result >> 16);
93   return (result);
94 }
95 
ixheaacd_mult32x16lin32(WORD32 a,WORD32 b)96 static PLATFORM_INLINE WORD32 ixheaacd_mult32x16lin32(WORD32 a, WORD32 b) {
97   WORD32 result;
98   WORD64 temp_result;
99   temp_result = (WORD64)a * (WORD64)(((b & 0xFFFF) << 16) >> 16);
100   result = (WORD32)(temp_result >> 16);
101   return (result);
102 }
103 
ixheaacd_mac32x16lin32(WORD32 a,WORD32 b,WORD32 c)104 static PLATFORM_INLINE WORD32 ixheaacd_mac32x16lin32(WORD32 a, WORD32 b,
105                                                      WORD32 c) {
106   WORD32 result;
107   result = a + ixheaacd_mult32x16lin32(b, c);
108   return (result);
109 }
110 
ixheaacd_mult32x16lin32_drc(WORD32 a,WORD32 b)111 static PLATFORM_INLINE WORD32 ixheaacd_mult32x16lin32_drc(WORD32 a, WORD32 b) {
112   WORD32 result;
113   WORD64 temp_result;
114   temp_result = (WORD64)a * (WORD64)(((b & 0xFFFF) << 16) >> 16);
115   if (temp_result < (WORD64)MIN_32)
116     result = MIN_32;
117   else if (temp_result > (WORD64)MAX_32)
118     result = MAX_32;
119   else
120     result = (WORD32)(temp_result);
121   return (result);
122 }
123 
ixheaacd_neg_expo_inc_dec(WORD16 neg_expo)124 WORD16 ixheaacd_neg_expo_inc_dec(WORD16 neg_expo) { return (neg_expo + 2); }
125 
ixheaacd_neg_expo_inc_arm(WORD16 neg_expo)126 WORD16 ixheaacd_neg_expo_inc_arm(WORD16 neg_expo) { return (neg_expo + 3); }
127 
ixheaacd_pretwiddle_compute_dec(WORD32 * spec_data1,WORD32 * spec_data2,WORD32 * out_ptr,ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD npoints4,WORD32 neg_expo)128 VOID ixheaacd_pretwiddle_compute_dec(
129     WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
130     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
131     WORD32 neg_expo) {
132   WORD32 i;
133   WORD32 tempr, tempi;
134   WORD32 tempr1, tempi1;
135   WORD32 npoints2 = npoints4 * 2;
136   WORD32 *out_ptr1 = out_ptr + (npoints2 << 1) - 1;
137   const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
138 
139   WORD16 cos = 0, cos1 = 0, sin = 0, sin1 = 0;
140   if (neg_expo < 0) {
141     neg_expo = -neg_expo;
142     if (npoints4 == 256) {
143       cos = *cos_sin_ptr++;
144       sin = *cos_sin_ptr++;
145     } else if (npoints4 == 32) {
146       cos = *cos_sin_ptr++;
147       sin = *cos_sin_ptr;
148       cos_sin_ptr += 15;
149     }
150     tempr = *spec_data1++;
151     tempi = *spec_data2--;
152 
153     *out_ptr =
154         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
155 
156     *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
157     out_ptr++;
158 
159     *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, cos),
160                               ixheaacd_mult32x16in32(tempr, sin));
161 
162     *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
163     out_ptr++;
164 
165     for (i = 0; i < npoints4 - 1; i++) {
166       if (npoints4 == 256) {
167         sin = *cos_sin_ptr++;
168         cos = *cos_sin_ptr++;
169       } else if (npoints4 == 32) {
170         sin = *cos_sin_ptr++;
171         cos = *cos_sin_ptr;
172         cos_sin_ptr += 15;
173       }
174 
175       tempi1 = *spec_data1++;
176       tempr = *spec_data1++;
177       tempr1 = *spec_data2--;
178       tempi = *spec_data2--;
179 
180       *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos),
181                                  ixheaacd_mult32x16in32(tempr1, sin));
182 
183       *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
184       out_ptr1--;
185 
186       *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos),
187                                         tempi1, sin);
188       *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
189       out_ptr1--;
190 
191       *out_ptr =
192           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
193       *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
194       out_ptr++;
195 
196       *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, sin),
197                                 ixheaacd_mult32x16in32(tempr, cos));
198       *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
199       out_ptr++;
200     }
201     cos1 = *cos_sin_ptr++;
202     sin1 = *cos_sin_ptr;
203 
204     tempr1 = *spec_data2;
205     tempi1 = *spec_data1;
206 
207     *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos1),
208                                ixheaacd_mult32x16in32(tempr1, sin1));
209     *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
210     out_ptr1--;
211 
212     *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos1),
213                                       tempi1, sin1);
214     *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
215     out_ptr1--;
216 
217   } else {
218     if (npoints4 == 256) {
219       cos = *cos_sin_ptr++;
220       sin = *cos_sin_ptr++;
221 
222     } else if (npoints4 == 32) {
223       cos = *cos_sin_ptr++;
224       sin = *cos_sin_ptr;
225       cos_sin_ptr += 15;
226     }
227     tempr = *spec_data1++;
228     tempi = *spec_data2--;
229 
230     *out_ptr =
231         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
232     *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
233     out_ptr++;
234 
235     *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, cos),
236                               ixheaacd_mult32x16in32(tempr, sin));
237 
238     *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
239     out_ptr++;
240 
241     for (i = 0; i < npoints4 - 1; i++) {
242       if (npoints4 == 256) {
243         sin = *cos_sin_ptr++;
244         cos = *cos_sin_ptr++;
245       } else if (npoints4 == 32) {
246         sin = *cos_sin_ptr++;
247         cos = *cos_sin_ptr;
248         cos_sin_ptr += 15;
249       }
250 
251       tempi1 = *spec_data1++;
252       tempr = *spec_data1++;
253       tempr1 = *spec_data2--;
254       tempi = *spec_data2--;
255 
256       *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos),
257                                  ixheaacd_mult32x16in32(tempr1, sin));
258       *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
259       out_ptr1--;
260 
261       *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos),
262                                         tempi1, sin);
263       *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
264       out_ptr1--;
265 
266       *out_ptr =
267           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
268       *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
269       out_ptr++;
270 
271       *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, sin),
272                                 ixheaacd_mult32x16in32(tempr, cos));
273       *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
274       out_ptr++;
275     }
276     cos1 = *cos_sin_ptr++;
277     sin1 = *cos_sin_ptr;
278 
279     tempr1 = *spec_data2;
280     tempi1 = *spec_data1;
281 
282     *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos1),
283                                ixheaacd_mult32x16in32(tempr1, sin1));
284     *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
285     out_ptr1--;
286 
287     *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos1),
288                                       tempi1, sin1);
289     *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
290     out_ptr1--;
291   }
292 }
293 
ixheaacd_post_twiddle_dec(WORD32 out_ptr[],WORD32 spec_data[],ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD npoints)294 VOID ixheaacd_post_twiddle_dec(WORD32 out_ptr[], WORD32 spec_data[],
295                                ia_aac_dec_imdct_tables_struct *ptr_imdct_tables,
296                                WORD npoints) {
297   WORD i;
298   WORD16 cos, cos1, sin, sin1;
299   WORD32 *spec_data1 = spec_data + npoints - 1;
300   WORD32 *out_ptr1 = out_ptr + npoints - 1;
301   WORD16 adjust = 50, adjust1 = -50;
302   const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
303 
304   if (npoints == 1024) {
305     WORD32 tempr, tempi, outi, outr, temp1, temp2;
306     tempr = *spec_data++;
307     tempi = *spec_data++;
308 
309     cos = *cos_sin_ptr;
310     cos_sin_ptr++;
311     sin = *cos_sin_ptr;
312     cos_sin_ptr++;
313 
314     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
315                           ixheaacd_mult32x16in32(tempi, cos));
316     outr =
317         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
318 
319     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
320     temp2 = ixheaacd_mult32x16in32(outr, adjust);
321 
322     outr = outr + temp1;
323     outi = outi + temp2;
324     *out_ptr1-- = outi;
325     *out_ptr++ = outr;
326 
327     for (i = 0; i < (npoints / 2 - 2); i++) {
328       sin = *cos_sin_ptr++;
329       cos = *cos_sin_ptr++;
330 
331       tempi = *spec_data1--;
332       tempr = *spec_data1--;
333 
334       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
335                             ixheaacd_mult32x16in32(tempi, cos));
336       outr =
337           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
338 
339       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
340       temp2 = ixheaacd_mult32x16in32(outr, adjust);
341 
342       outr = outr + temp1;
343       outi = outi + temp2;
344 
345       *out_ptr++ = outi;
346       *out_ptr1-- = outr;
347 
348       i++;
349       tempr = *spec_data++;
350       tempi = *spec_data++;
351 
352       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
353                             ixheaacd_mult32x16in32(tempi, sin));
354       outr =
355           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
356 
357       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
358       temp2 = ixheaacd_mult32x16in32(outr, adjust);
359 
360       outr = outr + temp1;
361       outi = outi + temp2;
362 
363       *out_ptr1-- = outi;
364       *out_ptr++ = outr;
365     }
366     cos1 = *cos_sin_ptr++;
367     sin1 = *cos_sin_ptr;
368 
369     tempi = *spec_data1--;
370     tempr = *spec_data1--;
371 
372     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
373                           ixheaacd_mult32x16in32(tempi, cos1));
374     outr =
375         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos1), tempi, sin1);
376 
377     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
378     temp2 = ixheaacd_mult32x16in32(outr, adjust);
379 
380     outr = outr + temp1;
381     outi = outi + temp2;
382 
383     *out_ptr++ = outi;
384     *out_ptr1-- = outr;
385   } else if (npoints == 128) {
386     WORD32 tempr, tempi, outi, outr, temp1, temp2;
387     tempr = *spec_data++;
388     tempi = *spec_data++;
389 
390     cos = *cos_sin_ptr++;
391     sin = *cos_sin_ptr;
392     cos_sin_ptr += 15;
393 
394     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
395                           ixheaacd_mult32x16in32(tempi, cos));
396     outr =
397         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
398 
399     temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
400     temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
401 
402     outr = outr + temp1;
403     outi = outi + temp2;
404     *out_ptr1-- = outi;
405     *out_ptr++ = outr;
406 
407     for (i = 0; i < (npoints / 2 - 2); i++) {
408       sin = *cos_sin_ptr++;
409       cos = *cos_sin_ptr;
410       cos_sin_ptr += 15;
411 
412       tempi = *spec_data1--;
413       tempr = *spec_data1--;
414 
415       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
416                             ixheaacd_mult32x16in32(tempi, cos));
417       outr =
418           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
419 
420       temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
421       temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
422 
423       outr = outr + temp1;
424       outi = outi + temp2;
425 
426       *out_ptr++ = outi;
427       *out_ptr1-- = outr;
428 
429       i++;
430       tempr = *spec_data++;
431       tempi = *spec_data++;
432 
433       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
434                             ixheaacd_mult32x16in32(tempi, sin));
435       outr =
436           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
437 
438       temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
439       temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
440 
441       outr = outr + temp1;
442       outi = outi + temp2;
443 
444       *out_ptr1-- = outi;
445       *out_ptr++ = outr;
446     }
447     cos1 = *cos_sin_ptr++;
448     sin1 = *cos_sin_ptr;
449 
450     tempi = *spec_data1--;
451     tempr = *spec_data1--;
452 
453     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
454                           ixheaacd_mult32x16in32(tempi, cos1));
455     outr =
456         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos1), tempi, sin1);
457 
458     temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
459     temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
460 
461     outr = outr + temp1;
462     outi = outi + temp2;
463 
464     *out_ptr++ = outi;
465     *out_ptr1-- = outr;
466   }
467 }
468 
ixheaacd_post_twid_overlap_add_dec(WORD16 pcm_out[],WORD32 spec_data[],ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD npoints,WORD32 * ptr_overlap_buf,WORD16 q_shift,const WORD16 * window,WORD16 ch_fac)469 VOID ixheaacd_post_twid_overlap_add_dec(
470     WORD16 pcm_out[], WORD32 spec_data[],
471     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints,
472     WORD32 *ptr_overlap_buf, WORD16 q_shift, const WORD16 *window,
473     WORD16 ch_fac) {
474   WORD i;
475   WORD16 cos, cos1, sin, sin1;
476   WORD32 size = npoints / 2;
477   WORD16 *pcmout1 = pcm_out + (ch_fac * size);
478   const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
479 
480   pcm_out = pcmout1 - ch_fac;
481   spec_data += size;
482 
483   if (q_shift > 0) {
484     WORD32 tempr, tempi, outr, outi, win1, accu, temp1, temp2;
485     WORD16 adjust, adjust1;
486     WORD32 overlap_data;
487 
488     tempr = *(spec_data - size);
489     tempi = *(spec_data - size + 1);
490     adjust = 50;
491     adjust1 = -50;
492     cos = *cos_sin_ptr++;
493     sin = *cos_sin_ptr++;
494     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
495                           ixheaacd_mult32x16in32(tempi, cos));
496     outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
497                           ixheaacd_mult32x16in32(tempi, sin));
498 
499     overlap_data = *ptr_overlap_buf;
500 
501     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
502     temp2 = ixheaacd_mult32x16in32(outr, adjust);
503 
504     outr = outr + temp1;
505     outi = outi + temp2;
506 
507     *ptr_overlap_buf++ = ixheaacd_shr32_drc(outr, 16 - q_shift);
508 
509     win1 = *((WORD32 *)window + size - 1);
510     accu = ixheaacd_sub32_sat(
511         ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outi, win1), q_shift),
512         ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
513 
514     *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
515 
516     pcm_out -= ch_fac;
517     accu = ixheaacd_sub32_sat(
518         ixheaacd_shl32_sat(
519             ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outi), win1),
520             q_shift),
521         ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1)));
522 
523     *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
524 
525     pcmout1 += ch_fac;
526 
527     for (i = size - 2; i != 0;) {
528       sin = *cos_sin_ptr++;
529       cos = *cos_sin_ptr++;
530 
531       tempr = *(spec_data + i);
532       tempi = *(spec_data + i + 1);
533 
534       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
535                             ixheaacd_mult32x16in32(tempi, sin));
536       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
537                             ixheaacd_mult32x16in32(tempi, cos));
538 
539       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
540       temp2 = ixheaacd_mult32x16in32(outr, adjust);
541 
542       outr = outr + temp1;
543       outi = outi + temp2;
544 
545       overlap_data = *ptr_overlap_buf;
546 
547       *ptr_overlap_buf++ = ixheaacd_shr32_drc(outi, 16 - q_shift);
548 
549       win1 = *((WORD32 *)window + i);
550       accu = ixheaacd_sub32_sat(
551           ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outr, win1), q_shift),
552           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
553       *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
554       pcm_out -= ch_fac;
555       accu = ixheaacd_sub32_sat(
556           ixheaacd_shl32_sat(
557               ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outr), win1),
558               q_shift),
559           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)win1));
560       *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
561       pcmout1 += ch_fac;
562 
563       tempr = *(spec_data - i);
564       tempi = *(spec_data - i + 1);
565 
566       i -= 2;
567 
568       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
569                             ixheaacd_mult32x16in32(tempi, sin));
570       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, sin),
571                             ixheaacd_mult32x16in32(tempi, cos));
572 
573       overlap_data = *ptr_overlap_buf;
574 
575       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
576 
577       temp2 = ixheaacd_mult32x16in32(outr, adjust);
578 
579       outr = outr + temp1;
580       outi = outi + temp2;
581 
582       *ptr_overlap_buf++ = ixheaacd_shr32_drc(outr, 16 - q_shift);
583 
584       win1 = *((WORD32 *)window + i + 1);
585       accu = ixheaacd_sub32_sat(
586           ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outi, win1), q_shift),
587           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
588       *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
589       pcm_out -= ch_fac;
590       accu = ixheaacd_sub32_sat(
591           ixheaacd_shl32_sat(
592               ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outi), win1),
593               q_shift),
594           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1)));
595       *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
596       pcmout1 += ch_fac;
597     }
598     cos1 = *cos_sin_ptr++;
599     sin1 = *cos_sin_ptr;
600 
601     tempr = *(spec_data + i);
602     tempi = *(spec_data + i + 1);
603 
604     outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos1),
605                           ixheaacd_mult32x16in32(tempi, sin1));
606     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
607                           ixheaacd_mult32x16in32(tempi, cos1));
608 
609     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
610 
611     temp2 = ixheaacd_mult32x16in32(outr, adjust);
612 
613     outr = outr + temp1;
614     outi = outi + temp2;
615 
616     overlap_data = *ptr_overlap_buf;
617 
618     *ptr_overlap_buf++ = ixheaacd_shr32_drc(outi, 16 - q_shift);
619     win1 = *((WORD32 *)window + i);
620     accu = ixheaacd_sub32_sat(
621         ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outr, win1), q_shift),
622         ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
623     *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
624     pcm_out -= ch_fac;
625     accu = ixheaacd_sub32_sat(
626         ixheaacd_shl32_sat(
627             ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outr), win1),
628             q_shift),
629         ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)win1));
630     *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
631     pcmout1 += ch_fac;
632   } else {
633     q_shift = -q_shift;
634     {
635       WORD32 tempr, tempi, temp1, temp2, outr, outi, win1, accu;
636       WORD16 adjust, adjust1;
637       WORD16 overlap_data;
638       tempr = *(spec_data - size);
639       tempi = *(spec_data - size + 1);
640 
641       adjust = 50;
642       adjust1 = -50;
643       cos = *cos_sin_ptr++;
644       sin = *cos_sin_ptr++;
645 
646       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
647                             ixheaacd_mult32x16in32(tempi, cos));
648       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
649                             ixheaacd_mult32x16in32(tempi, sin));
650 
651       overlap_data = *ptr_overlap_buf;
652 
653       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
654       temp2 = ixheaacd_mult32x16in32(outr, adjust);
655 
656       outr = outr + temp1;
657       outi = outi + temp2;
658 
659       *ptr_overlap_buf++ = ixheaacd_shr32_drc(outr, 16 + q_shift);
660 
661       win1 = *((WORD32 *)window + size - 1);
662       accu = ixheaacd_sub32_sat(
663           ixheaacd_shr32(ixheaacd_mult32x16lin32(outi, win1), q_shift),
664           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
665 
666       *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
667 
668       pcm_out -= ch_fac;
669       accu = ixheaacd_sub32_sat(
670           ixheaacd_shr32(
671               ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outi), win1),
672               q_shift),
673           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1)));
674 
675       *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
676       pcmout1 += ch_fac;
677 
678       for (i = size - 2; i != 0;) {
679         sin = *cos_sin_ptr++;
680         cos = *cos_sin_ptr++;
681 
682         tempr = *(spec_data + i);
683         tempi = *(spec_data + i + 1);
684 
685         outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
686                               ixheaacd_mult32x16in32(tempi, sin));
687         outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
688                               ixheaacd_mult32x16in32(tempi, cos));
689 
690         overlap_data = *ptr_overlap_buf;
691 
692         temp1 = ixheaacd_mult32x16in32(outi, adjust1);
693 
694         temp2 = ixheaacd_mult32x16in32(outr, adjust);
695         outr = outr + temp1;
696         outi = outi + temp2;
697         *ptr_overlap_buf++ = ixheaacd_shr32_drc(outi, 16 + q_shift);
698 
699         win1 = *((WORD32 *)window + i);
700         accu = ixheaacd_sub32_sat(
701             ixheaacd_shr32(ixheaacd_mult32x16lin32(outr, win1), q_shift),
702             ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
703 
704         *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
705         pcm_out -= ch_fac;
706         accu = ixheaacd_sub32_sat(
707             ixheaacd_shr32(
708                 ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outr), win1),
709                 q_shift),
710             ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)win1));
711         *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
712         pcmout1 += ch_fac;
713 
714         tempr = *(spec_data - i);
715         tempi = *(spec_data - i + 1);
716         i -= 2;
717 
718         outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
719                               ixheaacd_mult32x16in32(tempi, sin));
720         outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, sin),
721                               ixheaacd_mult32x16in32(tempi, cos));
722 
723         overlap_data = *ptr_overlap_buf;
724 
725         temp1 = ixheaacd_mult32x16in32(outi, adjust1);
726         temp2 = ixheaacd_mult32x16in32(outr, adjust);
727 
728         outr = outr + temp1;
729         outi = outi + temp2;
730 
731         *ptr_overlap_buf++ = ixheaacd_shr32_drc(outr, 16 + q_shift);
732 
733         win1 = *((WORD32 *)window + i + 1);
734         accu = ixheaacd_sub32_sat(
735             ixheaacd_shr32(ixheaacd_mult32x16lin32(outi, win1), q_shift),
736             ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
737 
738         *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
739         pcm_out -= ch_fac;
740         accu = ixheaacd_sub32_sat(
741             ixheaacd_shr32(
742                 ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outi), win1),
743                 q_shift),
744             ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1)));
745 
746         *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
747         pcmout1 += ch_fac;
748       }
749       cos1 = *cos_sin_ptr++;
750       sin1 = *cos_sin_ptr++;
751 
752       tempr = *(spec_data + i);
753       tempi = *(spec_data + i + 1);
754 
755       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos1),
756                             ixheaacd_mult32x16in32(tempi, sin1));
757       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
758                             ixheaacd_mult32x16in32(tempi, cos1));
759 
760       overlap_data = *ptr_overlap_buf;
761 
762       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
763 
764       temp2 = ixheaacd_mult32x16in32(outr, adjust);
765 
766       outr = outr + temp1;
767       outi = outi + temp2;
768 
769       *ptr_overlap_buf++ = ixheaacd_shr32_drc(outi, 16 + q_shift);
770 
771       win1 = *((WORD32 *)window + i);
772       accu = ixheaacd_sub32_sat(
773           ixheaacd_shr32(ixheaacd_mult32x16lin32(outr, win1), q_shift),
774           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
775 
776       *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
777       pcm_out -= ch_fac;
778       accu = ixheaacd_sub32_sat(
779           ixheaacd_shr32(
780               ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outr), win1),
781               q_shift),
782           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)win1));
783       *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
784       pcmout1 += ch_fac;
785     }
786   }
787 }
788 
ixheaacd_imdct_using_fft_dec(ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD32 npoints,WORD32 * ptr_x,WORD32 * ptr_y)789 VOID ixheaacd_imdct_using_fft_dec(
790     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 npoints,
791     WORD32 *ptr_x, WORD32 *ptr_y)
792 
793 {
794   WORD32 i, j, k, k1, n_stages;
795   WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i,
796       x7r, x7i;
797   WORD32 del, nodespacing, in_loop_cnt, tmp, twiddle_val, *ptr_tmp;
798   const WORD32 *ptr_twiddle;
799   WORD8 *ptr_dig_rev_table;
800   n_stages = ixheaacd_norm32(npoints);
801 
802   n_stages = (30 - n_stages) / 3;
803 
804   ptr_tmp = ptr_y;
805 
806   ptr_twiddle = ptr_imdct_tables->fft_twiddle;
807   ptr_dig_rev_table = ((npoints << 1) == 1024)
808                           ? ptr_imdct_tables->dig_rev_table8_long
809                           : ptr_imdct_tables->dig_rev_table8_short;
810 
811   for (i = npoints; i != 0; i -= 8) {
812     WORD32 *data = ptr_x;
813     data = data + (*ptr_dig_rev_table++ << 1);
814 
815     x0r = *data;
816     x0i = *(data + 1);
817     data += (npoints >> 1);
818 
819     x2r = *data;
820     x2i = *(data + 1);
821     data += (npoints >> 1);
822 
823     x4r = *data;
824     x4i = *(data + 1);
825     data += (npoints >> 1);
826 
827     x6r = *data;
828     x6i = *(data + 1);
829     data -= 5 * (npoints >> 2);
830 
831     x0r = x0r + x4r;
832     x0i = x0i + x4i;
833     x4r = x0r - (x4r << 1);
834     x4i = x0i - (x4i << 1);
835 
836     x2r = x2r + x6r;
837     x2i = x2i + x6i;
838     x6r = x2r - (x6r << 1);
839     x6i = x2i - (x6i << 1);
840 
841     x0r = x0r + x2r;
842     x0i = x0i + x2i;
843     x2r = x0r - (x2r << 1);
844     x2i = x0i - (x2i << 1);
845 
846     x4r = x4r + x6i;
847     x4i = x4i - x6r;
848     tmp = x6r;
849     x6r = x4r - (x6i << 1);
850     x6i = x4i + (tmp << 1);
851 
852     x1r = *data;
853     x1i = *(data + 1);
854     data += (npoints >> 1);
855 
856     x3r = *data;
857     x3i = *(data + 1);
858     data += (npoints >> 1);
859 
860     x5r = *data;
861     x5i = *(data + 1);
862     data += (npoints >> 1);
863 
864     x7r = *data;
865     x7i = *(data + 1);
866     data -= 7 * (npoints >> 2);
867 
868     x1r = x1r + x5r;
869     x1i = x1i + x5i;
870     x5r = x1r - (x5r << 1);
871     x5i = x1i - (x5i << 1);
872 
873     x3r = x3r + x7r;
874     x3i = x3i + x7i;
875     x7r = x3r - (x7r << 1);
876     x7i = x3i - (x7i << 1);
877 
878     x1r = x1r + x3r;
879     x1i = x1i + x3i;
880     x3r = x1r - (x3r << 1);
881     x3i = x1i - (x3i << 1);
882 
883     x5r = x5r + x5i;
884     x5i = x5r - (x5i << 1);
885 
886     x7r = x7r + x7i;
887     x7i = x7r - (x7i << 1);
888 
889     x7i = x5r - x7i;
890     x5r = x7i - (x5r << 1);
891 
892     x5i = x7r - x5i;
893     x7r = x5i - (x7r << 1);
894 
895     x7i = x7i << 1;
896     x5r = x5r << 1;
897     x5i = x5i << 1;
898     x7r = x7r << 1;
899 
900     x0r = x0r + x1r;
901     x0i = x0i + x1i;
902     x1r = x0r - (x1r << 1);
903     x1i = x0i - (x1i << 1);
904 
905     x2r = x2r + x3i;
906     tmp = x2r - (x3i << 1);
907     x2i = x2i - x3r;
908     x3i = x2i + (x3r << 1);
909 
910     *ptr_tmp = x0r;
911     *(ptr_tmp + 1) = x0i;
912     ptr_tmp += 4;
913 
914     *ptr_tmp = x2r;
915     *(ptr_tmp + 1) = x2i;
916     ptr_tmp += 4;
917 
918     *ptr_tmp = x1r;
919     *(ptr_tmp + 1) = x1i;
920     ptr_tmp += 4;
921 
922     *ptr_tmp = tmp;
923     *(ptr_tmp + 1) = x3i;
924     ptr_tmp -= 10;
925 
926     tmp = 0x5A82;
927 
928     x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
929     x4r = x7i - (x4r << 1);
930 
931     x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
932     x4i = x7r - (x4i << 1);
933 
934     x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
935     x6r = x5i - (x6r << 1);
936 
937     x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
938     x6i = x5r - (x6i << 1);
939 
940     *ptr_tmp = x7i;
941     *(ptr_tmp + 1) = x7r;
942     ptr_tmp += 4;
943 
944     *ptr_tmp = x5i;
945     *(ptr_tmp + 1) = x5r;
946     ptr_tmp += 4;
947 
948     *ptr_tmp = -x4r;
949     *(ptr_tmp + 1) = -x4i;
950     ptr_tmp += 4;
951 
952     *ptr_tmp = -x6r;
953     *(ptr_tmp + 1) = -x6i;
954     ptr_tmp += 2;
955   }
956 
957   del = 8;
958 
959   nodespacing = 64;
960   in_loop_cnt = npoints >> 6;
961 
962   for (k1 = n_stages - 2; k1 > 0; k1--) {
963     WORD32 *data = ptr_y;
964     const WORD32 *twiddles;
965 
966     for (i = 0; i != npoints; i += 8 * del) {
967       data = ptr_y + (i << 1);
968       x0r = *data;
969       x0i = *(data + 1);
970       data += (del << 2);
971 
972       x2r = *data;
973       x2i = *(data + 1);
974       data += (del << 2);
975 
976       x4r = *data;
977       x4i = *(data + 1);
978       data += (del << 2);
979 
980       x6r = *data;
981       x6i = *(data + 1);
982       data -= 5 * (del << 1);
983 
984       x0r = x0r + x4r;
985       x0i = x0i + x4i;
986       x4r = x0r - (x4r << 1);
987       x4i = x0i - (x4i << 1);
988 
989       x2r = x2r + x6r;
990       x2i = x2i + x6i;
991       x6r = x2r - (x6r << 1);
992       x6i = x2i - (x6i << 1);
993 
994       x0r = x0r + x2r;
995       x0i = x0i + x2i;
996       x2r = x0r - (x2r << 1);
997       x2i = x0i - (x2i << 1);
998 
999       x4r = x4r + x6i;
1000       x4i = x4i - x6r;
1001       tmp = x6r;
1002       x6r = x4r - (x6i << 1);
1003       x6i = x4i + (tmp << 1);
1004 
1005       x1r = *data;
1006       x1i = *(data + 1);
1007       data += (del << 2);
1008 
1009       x3r = *data;
1010       x3i = *(data + 1);
1011       data += (del << 2);
1012 
1013       x5r = *data;
1014       x5i = *(data + 1);
1015       data += (del << 2);
1016 
1017       x7r = *data;
1018       x7i = *(data + 1);
1019       data -= 7 * (del << 1);
1020 
1021       x1r = x1r + x5r;
1022       x1i = x1i + x5i;
1023       x5r = x1r - (x5r << 1);
1024       x5i = x1i - (x5i << 1);
1025 
1026       x3r = x3r + x7r;
1027       x3i = x3i + x7i;
1028       x7r = x3r - (x7r << 1);
1029       x7i = x3i - (x7i << 1);
1030 
1031       x1r = x1r + x3r;
1032       x1i = x1i + x3i;
1033       x3r = x1r - (x3r << 1);
1034       x3i = x1i - (x3i << 1);
1035 
1036       x5r = x5r + x5i;
1037       x5i = x5r - (x5i << 1);
1038 
1039       x7r = x7r + x7i;
1040       x7i = x7r - (x7i << 1);
1041 
1042       x7i = x5r - x7i;
1043       x5r = x7i - (x5r << 1);
1044 
1045       x5i = x7r - x5i;
1046       x7r = x5i - (x7r << 1);
1047 
1048       x7i = x7i << 1;
1049       x5r = x5r << 1;
1050       x5i = x5i << 1;
1051       x7r = x7r << 1;
1052 
1053       x0r = x0r + x1r;
1054       x0i = x0i + x1i;
1055       x1r = x0r - (x1r << 1);
1056       x1i = x0i - (x1i << 1);
1057 
1058       x2r = x2r + x3i;
1059       tmp = x2r - (x3i << 1);
1060       x2i = x2i - x3r;
1061       x3i = x2i + (x3r << 1);
1062 
1063       *data = x0r;
1064       *(data + 1) = x0i;
1065       data += (del << 2);
1066 
1067       *data = x2r;
1068       *(data + 1) = x2i;
1069       data += (del << 2);
1070 
1071       *data = x1r;
1072       *(data + 1) = x1i;
1073       data += (del << 2);
1074 
1075       *data = tmp;
1076       *(data + 1) = x3i;
1077       data -= 5 * (del << 1);
1078 
1079       tmp = 0x5A82;
1080 
1081       x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
1082       x4r = x7i - (x4r << 1);
1083       x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
1084       x4i = x7r - (x4i << 1);
1085 
1086       x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
1087       x6r = x5i - (x6r << 1);
1088       x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
1089       x6i = x5r - (x6i << 1);
1090 
1091       *data = x7i;
1092       *(data + 1) = x7r;
1093       data += (del << 2);
1094 
1095       *data = x5i;
1096       *(data + 1) = x5r;
1097       data += (del << 2);
1098 
1099       *data = -x4r;
1100       *(data + 1) = -x4i;
1101       data += (del << 2);
1102 
1103       *data = -x6r;
1104       *(data + 1) = -x6i;
1105 
1106       data -= 7 * (del << 1);
1107     }
1108 
1109     twiddles = ptr_twiddle;
1110     data = ptr_y;
1111 
1112     for (j = nodespacing; j < nodespacing * del; j += nodespacing) {
1113       data = data + 2;
1114 
1115       for (k = in_loop_cnt; k != 0; k--) {
1116         data += (del << 2);
1117         x2r = *data;
1118         x2i = *(data + 1);
1119 
1120         data += (del << 2);
1121         x4r = *data;
1122         x4i = *(data + 1);
1123 
1124         data += (del << 2);
1125         x6r = *data;
1126         x6i = *(data + 1);
1127 
1128         data -= 6 * (del << 1);
1129 
1130         twiddles += (j >> 2);
1131 
1132         twiddle_val = *(twiddles);
1133 
1134         tmp = (ixheaacd_mult32x16lin32(x2r, twiddle_val) -
1135                ixheaacd_mult32x16hin32_drc(x2i, twiddle_val));
1136         x2i = (ixheaacd_mac32x16lin32(
1137                   ixheaacd_mult32x16hin32_drc(x2r, twiddle_val), x2i,
1138                   twiddle_val))
1139               << 1;
1140         x2r = tmp << 1;
1141 
1142         twiddles += (j >> 2);
1143         twiddle_val = *(twiddles);
1144 
1145         tmp = (ixheaacd_mult32x16lin32(x4r, twiddle_val) -
1146                ixheaacd_mult32x16hin32_drc(x4i, twiddle_val));
1147         x4i = (ixheaacd_mac32x16lin32(
1148                   ixheaacd_mult32x16hin32_drc(x4r, twiddle_val), x4i,
1149                   twiddle_val))
1150               << 1;
1151         x4r = tmp << 1;
1152 
1153         twiddles += (j >> 2);
1154         twiddle_val = *(twiddles);
1155 
1156         tmp = (ixheaacd_mult32x16lin32(x6r, twiddle_val) -
1157                ixheaacd_mult32x16hin32_drc(x6i, twiddle_val));
1158         x6i = (ixheaacd_mac32x16lin32(
1159                   ixheaacd_mult32x16hin32_drc(x6r, twiddle_val), x6i,
1160                   twiddle_val))
1161               << 1;
1162         x6r = tmp << 1;
1163 
1164         x0r = *data;
1165         x0i = *(data + 1);
1166         data += (del << 1);
1167 
1168         x0r = x0r + x4r;
1169         x0i = x0i + x4i;
1170         x4r = x0r - (x4r << 1);
1171         x4i = x0i - (x4i << 1);
1172 
1173         x2r = x2r + x6r;
1174         x2i = x2i + x6i;
1175         x6r = x2r - (x6r << 1);
1176         x6i = x2i - (x6i << 1);
1177 
1178         x0r = x0r + x2r;
1179         x0i = x0i + x2i;
1180         x2r = x0r - (x2r << 1);
1181         x2i = x0i - (x2i << 1);
1182 
1183         x4r = x4r + x6i;
1184         x4i = x4i - x6r;
1185         tmp = x6r;
1186         x6r = x4r - (x6i << 1);
1187         x6i = x4i + (tmp << 1);
1188 
1189         x1r = *data;
1190         x1i = *(data + 1);
1191         data += (del << 2);
1192 
1193         twiddles -= 5 * (j >> 3);
1194         twiddle_val = *(twiddles);
1195 
1196         tmp = (ixheaacd_mult32x16lin32(x1r, twiddle_val) -
1197                ixheaacd_mult32x16hin32_drc(x1i, twiddle_val));
1198         x1i = (ixheaacd_mac32x16lin32(
1199                   ixheaacd_mult32x16hin32_drc(x1r, twiddle_val), x1i,
1200                   twiddle_val))
1201               << 1;
1202         x1r = tmp << 1;
1203 
1204         x3r = *data;
1205         x3i = *(data + 1);
1206         data += (del << 2);
1207 
1208         twiddles += (j >> 2);
1209         twiddle_val = *(twiddles);
1210 
1211         tmp = (ixheaacd_mult32x16lin32(x3r, twiddle_val) -
1212                ixheaacd_mult32x16hin32_drc(x3i, twiddle_val));
1213         x3i = (ixheaacd_mac32x16lin32(
1214             ixheaacd_mult32x16hin32_drc(x3r, twiddle_val), x3i, twiddle_val));
1215         x3r = tmp;
1216 
1217         x5r = *data;
1218         x5i = *(data + 1);
1219         data += (del << 2);
1220 
1221         twiddles += (j >> 2);
1222         twiddle_val = *(twiddles);
1223 
1224         tmp = (ixheaacd_mult32x16lin32(x5r, twiddle_val) -
1225                ixheaacd_mult32x16hin32_drc(x5i, twiddle_val));
1226         x5i = (ixheaacd_mac32x16lin32(
1227             ixheaacd_mult32x16hin32_drc(x5r, twiddle_val), x5i, twiddle_val));
1228         x5r = tmp;
1229 
1230         x7r = *data;
1231         x7i = *(data + 1);
1232         data -= 7 * (del << 1);
1233 
1234         twiddles += (j >> 2);
1235         twiddle_val = *(twiddles);
1236         twiddles -= 7 * (j >> 3);
1237 
1238         tmp = (ixheaacd_mult32x16lin32(x7r, twiddle_val) -
1239                ixheaacd_mult32x16hin32_drc(x7i, twiddle_val));
1240         x7i = (ixheaacd_mac32x16lin32(
1241             ixheaacd_mult32x16hin32_drc(x7r, twiddle_val), x7i, twiddle_val));
1242         x7r = tmp;
1243 
1244         x1r = x1r + (x5r << 1);
1245         x1i = x1i + (x5i << 1);
1246         x5r = x1r - (x5r << 2);
1247         x5i = x1i - (x5i << 2);
1248 
1249         x3r = x3r + x7r;
1250         x3i = x3i + x7i;
1251         x7r = x3r - (x7r << 1);
1252         x7i = x3i - (x7i << 1);
1253 
1254         x1r = x1r + (x3r << 1);
1255         x1i = x1i + (x3i << 1);
1256         x3r = x1r - (x3r << 2);
1257         x3i = x1i - (x3i << 2);
1258 
1259         x5r = x5r + x5i;
1260         x5i = x5r - (x5i << 1);
1261 
1262         x7r = x7r + x7i;
1263         x7i = x7r - (x7i << 1);
1264 
1265         x7i = x5r - (x7i << 1);
1266         x5r = x7i - (x5r << 1);
1267 
1268         x5i = (x7r << 1) - x5i;
1269         x7r = x5i - (x7r << 2);
1270 
1271         x7i = x7i << 1;
1272         x5r = x5r << 1;
1273         x5i = x5i << 1;
1274         x7r = x7r << 1;
1275 
1276         x0r = x0r + x1r;
1277         x0i = x0i + x1i;
1278         x1r = x0r - (x1r << 1);
1279         x1i = x0i - (x1i << 1);
1280 
1281         x2r = x2r + x3i;
1282         tmp = x2r - (x3i << 1);
1283         x2i = x2i - x3r;
1284         x3i = x2i + (x3r << 1);
1285 
1286         *data = x0r;
1287         *(data + 1) = x0i;
1288         data += (del << 2);
1289 
1290         *data = x2r;
1291         *(data + 1) = x2i;
1292         data += (del << 2);
1293 
1294         *data = x1r;
1295         *(data + 1) = x1i;
1296         data += (del << 2);
1297 
1298         *data = tmp;
1299         *(data + 1) = x3i;
1300         data -= 5 * (del << 1);
1301 
1302         tmp = 0x5A82;
1303 
1304         x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
1305         x4r = x7i - (x4r << 1);
1306 
1307         x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
1308         x4i = x7r - (x4i << 1);
1309 
1310         x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
1311         x6r = x5i - (x6r << 1);
1312 
1313         x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
1314         x6i = x5r - (x6i << 1);
1315 
1316         *data = x7i;
1317         *(data + 1) = x7r;
1318         data += (del << 2);
1319 
1320         *data = x5i;
1321         *(data + 1) = x5r;
1322         data += (del << 2);
1323 
1324         *data = -x4r;
1325         *(data + 1) = -x4i;
1326         data += (del << 2);
1327 
1328         *data = -x6r;
1329         *(data + 1) = -x6i;
1330 
1331         data -= 7 * (del << 1);
1332         data += (del << 4);
1333       }
1334       data -= npoints << 1;
1335     }
1336     nodespacing >>= 3;
1337     del <<= 3;
1338     in_loop_cnt >>= 3;
1339   }
1340 
1341   {
1342     WORD32 *data = ptr_y;
1343     const WORD32 *twiddles;
1344     twiddles = ptr_twiddle;
1345     data = ptr_y;
1346     data = data - 2;
1347 
1348     for (j = 0; j < nodespacing * del; j += nodespacing) {
1349       data = data + 2;
1350 
1351       {
1352         data += (del << 2);
1353         x2r = *data;
1354         x2i = *(data + 1);
1355 
1356         data += (del << 2);
1357         x4r = *data;
1358         x4i = *(data + 1);
1359 
1360         data += (del << 2);
1361         x6r = *data;
1362         x6i = *(data + 1);
1363 
1364         data -= 6 * (del << 1);
1365 
1366         twiddles += (j >> 2);
1367 
1368         twiddle_val = *(twiddles);
1369 
1370         tmp = (ixheaacd_mult32x16lin32(x2r, twiddle_val) -
1371                ixheaacd_mult32x16hin32_drc(x2i, twiddle_val));
1372         x2i = (ixheaacd_mac32x16lin32(
1373                   ixheaacd_mult32x16hin32_drc(x2r, twiddle_val), x2i,
1374                   twiddle_val))
1375               << 1;
1376         x2r = tmp << 1;
1377 
1378         twiddles += (j >> 2);
1379         twiddle_val = *(twiddles);
1380 
1381         tmp = (ixheaacd_mult32x16lin32(x4r, twiddle_val) -
1382                ixheaacd_mult32x16hin32_drc(x4i, twiddle_val));
1383         x4i = (ixheaacd_mac32x16lin32(
1384                   ixheaacd_mult32x16hin32_drc(x4r, twiddle_val), x4i,
1385                   twiddle_val))
1386               << 1;
1387         x4r = tmp << 1;
1388 
1389         twiddles += (j >> 2);
1390         twiddle_val = *(twiddles);
1391 
1392         tmp = (ixheaacd_mult32x16lin32(x6r, twiddle_val) -
1393                ixheaacd_mult32x16hin32_drc(x6i, twiddle_val));
1394         x6i = (ixheaacd_mac32x16lin32(
1395                   ixheaacd_mult32x16hin32_drc(x6r, twiddle_val), x6i,
1396                   twiddle_val))
1397               << 1;
1398         x6r = tmp << 1;
1399 
1400         x0r = *data;
1401         x0i = *(data + 1);
1402         data += (del << 1);
1403 
1404         x0r = x0r + x4r;
1405         x0i = x0i + x4i;
1406         x4r = x0r - (x4r << 1);
1407         x4i = x0i - (x4i << 1);
1408 
1409         x2r = x2r + x6r;
1410         x2i = x2i + x6i;
1411         x6r = x2r - (x6r << 1);
1412         x6i = x2i - (x6i << 1);
1413 
1414         x0r = x0r + x2r;
1415         x0i = x0i + x2i;
1416         x2r = x0r - (x2r << 1);
1417         x2i = x0i - (x2i << 1);
1418 
1419         x4r = x4r + x6i;
1420         x4i = x4i - x6r;
1421         tmp = x6r;
1422         x6r = x4r - (x6i << 1);
1423         x6i = x4i + (tmp << 1);
1424 
1425         x1r = *data;
1426         x1i = *(data + 1);
1427         data += (del << 2);
1428 
1429         twiddles -= 5 * (j >> 3);
1430         twiddle_val = *(twiddles);
1431 
1432         tmp = (ixheaacd_mult32x16lin32(x1r, twiddle_val) -
1433                ixheaacd_mult32x16hin32_drc(x1i, twiddle_val));
1434         x1i = (ixheaacd_mac32x16lin32(
1435                   ixheaacd_mult32x16hin32_drc(x1r, twiddle_val), x1i,
1436                   twiddle_val))
1437               << 1;
1438         x1r = tmp << 1;
1439 
1440         x3r = *data;
1441         x3i = *(data + 1);
1442         data += (del << 2);
1443 
1444         twiddles += (j >> 2);
1445         twiddle_val = *(twiddles);
1446 
1447         tmp = (ixheaacd_mult32x16lin32(x3r, twiddle_val) -
1448                ixheaacd_mult32x16hin32_drc(x3i, twiddle_val));
1449         x3i = (ixheaacd_mac32x16lin32(
1450             ixheaacd_mult32x16hin32_drc(x3r, twiddle_val), x3i, twiddle_val));
1451         x3r = tmp;
1452 
1453         x5r = *data;
1454         x5i = *(data + 1);
1455         data += (del << 2);
1456 
1457         twiddles += (j >> 2);
1458         twiddle_val = *(twiddles);
1459 
1460         tmp = (ixheaacd_mult32x16lin32(x5r, twiddle_val) -
1461                ixheaacd_mult32x16hin32_drc(x5i, twiddle_val));
1462         x5i = (ixheaacd_mac32x16lin32(
1463             ixheaacd_mult32x16hin32_drc(x5r, twiddle_val), x5i, twiddle_val));
1464         x5r = tmp;
1465 
1466         x7r = *data;
1467         x7i = *(data + 1);
1468         data -= 7 * (del << 1);
1469 
1470         twiddles += (j >> 2);
1471         twiddle_val = *(twiddles);
1472         twiddles -= 7 * (j >> 3);
1473 
1474         tmp = (ixheaacd_mult32x16lin32(x7r, twiddle_val) -
1475                ixheaacd_mult32x16hin32_drc(x7i, twiddle_val));
1476         x7i = (ixheaacd_mac32x16lin32(
1477             ixheaacd_mult32x16hin32_drc(x7r, twiddle_val), x7i, twiddle_val));
1478         x7r = tmp;
1479 
1480         x1r = x1r + (x5r << 1);
1481         x1i = x1i + (x5i << 1);
1482         x5r = x1r - (x5r << 2);
1483         x5i = x1i - (x5i << 2);
1484 
1485         x3r = x3r + x7r;
1486         x3i = x3i + x7i;
1487         x7r = x3r - (x7r << 1);
1488         x7i = x3i - (x7i << 1);
1489 
1490         x1r = x1r + (x3r << 1);
1491         x1i = x1i + (x3i << 1);
1492         x3r = x1r - (x3r << 2);
1493         x3i = x1i - (x3i << 2);
1494 
1495         x5r = x5r + x5i;
1496         x5i = x5r - (x5i << 1);
1497 
1498         x7r = x7r + x7i;
1499         x7i = x7r - (x7i << 1);
1500 
1501         x7i = x5r - (x7i << 1);
1502         x5r = x7i - (x5r << 1);
1503 
1504         x5i = (x7r << 1) - x5i;
1505         x7r = x5i - (x7r << 2);
1506 
1507         x7i = x7i << 1;
1508         x5r = x5r << 1;
1509         x5i = x5i << 1;
1510         x7r = x7r << 1;
1511 
1512         x0r = x0r + x1r;
1513         x0i = x0i + x1i;
1514         x1r = x0r - (x1r << 1);
1515         x1i = x0i - (x1i << 1);
1516 
1517         x2r = x2r + x3i;
1518         tmp = x2r - (x3i << 1);
1519         x2i = x2i - x3r;
1520         x3i = x2i + (x3r << 1);
1521 
1522         *data = x0r;
1523         *(data + 1) = x0i;
1524         data += (del << 2);
1525 
1526         *data = x2r;
1527         *(data + 1) = x2i;
1528         data += (del << 2);
1529 
1530         *data = x1r;
1531         *(data + 1) = x1i;
1532         data += (del << 2);
1533 
1534         *data = tmp;
1535         *(data + 1) = x3i;
1536         data -= 5 * (del << 1);
1537 
1538         tmp = 0x5A82;
1539 
1540         x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
1541         x4r = x7i - (x4r << 1);
1542 
1543         x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
1544         x4i = x7r - (x4i << 1);
1545 
1546         x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
1547         x6r = x5i - (x6r << 1);
1548 
1549         x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
1550         x6i = x5r - (x6i << 1);
1551 
1552         *data = x7i;
1553         *(data + 1) = x7r;
1554         data += (del << 2);
1555 
1556         *data = x5i;
1557         *(data + 1) = x5r;
1558         data += (del << 2);
1559 
1560         *data = -x4r;
1561         *(data + 1) = -x4i;
1562         data += (del << 2);
1563 
1564         *data = -x6r;
1565         *(data + 1) = -x6i;
1566 
1567         data -= 7 * (del << 1);
1568         data += (del << 4);
1569       }
1570       data -= npoints << 1;
1571     }
1572 
1573     nodespacing >>= 3;
1574     del <<= 3;
1575     in_loop_cnt >>= 3;
1576   }
1577 }
1578 
ixheaacd_inverse_transform(WORD32 spec_data[],WORD32 scratch[],ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD32 expo,WORD32 npoints)1579 WORD32 ixheaacd_inverse_transform(
1580     WORD32 spec_data[], WORD32 scratch[],
1581     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 expo,
1582     WORD32 npoints) {
1583   (*ixheaacd_pretwiddle_compute)(spec_data, spec_data + npoints - 1, scratch,
1584                                  ptr_imdct_tables, (npoints >> 2), expo);
1585 
1586   (*ixheaacd_imdct_using_fft)(ptr_imdct_tables, npoints >> 1, scratch,
1587                               spec_data);
1588 
1589   expo += 2;
1590 
1591   return expo;
1592 }
1593 
ixheaacd_mdct_480_ld(WORD32 * inp,WORD32 * scratch,WORD32 * mdct_scale,WORD32 mdct_flag,ia_aac_dec_imdct_tables_struct * imdct_tables_ptr,WORD32 object_type)1594 VOID ixheaacd_mdct_480_ld(WORD32 *inp, WORD32 *scratch, WORD32 *mdct_scale,
1595                           WORD32 mdct_flag,
1596                           ia_aac_dec_imdct_tables_struct *imdct_tables_ptr,
1597                           WORD32 object_type) {
1598   WORD32 expo, neg_expo = 0, k;
1599 
1600   WORD32 const_mltfac = 1145324612;
1601 
1602   expo = (*ixheaacd_calc_max_spectral_line)(inp, MDCT_LEN) - 1;
1603   ;
1604 
1605   memcpy(scratch, inp, sizeof(WORD32) * MDCT_LEN);
1606 
1607   neg_expo = 7 - expo;
1608 
1609   ixheaacd_pre_twiddle(inp, scratch, 480, imdct_tables_ptr->cosine_array_960,
1610                        neg_expo);
1611 
1612   ixheaacd_fft_480_ld(inp, scratch, imdct_tables_ptr);
1613 
1614   if (object_type == AOT_ER_AAC_LD) {
1615     ixheaacd_post_twiddle_ld(inp, scratch, imdct_tables_ptr->cosine_array_960,
1616                              480);
1617   } else if (object_type == AOT_ER_AAC_ELD) {
1618     ixheaacd_post_twiddle_eld(inp + (480), scratch,
1619                               imdct_tables_ptr->cosine_array_960, 480);
1620   }
1621 
1622   if (0 == mdct_flag) {
1623     WORD32 *data = inp;
1624 
1625     if (object_type != AOT_ER_AAC_ELD) {
1626       for (k = MDCT_LEN - 1; k >= 0; k -= 2) {
1627         *data = ixheaacd_mult32_shl(*data, const_mltfac);
1628         data++;
1629         *data = ixheaacd_mult32_shl(*data, const_mltfac);
1630         data++;
1631       }
1632       neg_expo += 1;
1633     } else {
1634       data = inp + 480;
1635       for (k = (MDCT_LEN << 1) - 1; k >= 0; k -= 2) {
1636         *data = ixheaacd_mult32_shl(*data, const_mltfac);
1637         data++;
1638         *data = ixheaacd_mult32_shl(*data, const_mltfac);
1639         data++;
1640       }
1641       neg_expo += 1;
1642     }
1643   }
1644 
1645   *mdct_scale = neg_expo + 3;
1646 }
1647 
ixheaacd_inverse_transform_512(WORD32 data[],WORD32 temp[],WORD32 * imdct_scale,WORD32 * cos_sin_ptr,ia_aac_dec_imdct_tables_struct * imdct_tables_ptr,WORD32 object_type)1648 VOID ixheaacd_inverse_transform_512(
1649     WORD32 data[], WORD32 temp[], WORD32 *imdct_scale, WORD32 *cos_sin_ptr,
1650     ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 object_type) {
1651   WORD32 n;
1652   WORD32 npoints_2;
1653   WORD16 expo, neg_expo;
1654 
1655   n = 512;
1656 
1657   npoints_2 = n >> 1;
1658 
1659   expo = (*ixheaacd_calc_max_spectral_line)(data, n) - 1;
1660 
1661   memcpy(temp, data, sizeof(WORD32) * n);
1662 
1663   neg_expo = 7 - expo;
1664 
1665   ixheaacd_pre_twiddle(data, temp, n, cos_sin_ptr, neg_expo);
1666 
1667   (*ixheaacd_fft32x32_ld)(imdct_tables_ptr, npoints_2, data, temp);
1668 
1669   neg_expo = (*ixheaacd_neg_expo_inc)(neg_expo);
1670 
1671   *imdct_scale = neg_expo + 1;
1672 
1673   if (object_type == AOT_ER_AAC_ELD)
1674     ixheaacd_post_twiddle_eld((data + n), temp, cos_sin_ptr, n);
1675   else
1676     ixheaacd_post_twiddle_ld((data), temp, cos_sin_ptr, n);
1677 }
1678 
ixheaacd_fft_480_ld(WORD32 * inp,WORD32 * op,ia_aac_dec_imdct_tables_struct * imdct_tables_ptr)1679 VOID ixheaacd_fft_480_ld(WORD32 *inp, WORD32 *op,
1680                          ia_aac_dec_imdct_tables_struct *imdct_tables_ptr) {
1681   WORD32 i;
1682   WORD32 *buf1, *buf2;
1683   UWORD8 *re_arr_tab_sml_240_ptr;
1684 
1685   (*ixheaacd_aac_ld_dec_rearrange)(inp, op, MDCT_LEN_BY2,
1686                                    imdct_tables_ptr->re_arr_tab_16);
1687 
1688   buf1 = op;
1689   buf2 = inp;
1690 
1691   for (i = 0; i < FFT15; i++) {
1692     (*ixheaacd_fft32x32_ld2)(imdct_tables_ptr, 16, buf1, buf2);
1693 
1694     buf1 += (FFT16X2);
1695     buf2 += (FFT16X2);
1696   }
1697   re_arr_tab_sml_240_ptr = imdct_tables_ptr->re_arr_tab_sml_240;
1698   buf1 = inp;
1699 
1700   for (i = 0; i < FFT16; i++) {
1701     (*ixheaacd_fft_15_ld)(buf1, op, ixheaacd_fft5out, re_arr_tab_sml_240_ptr);
1702     re_arr_tab_sml_240_ptr += FFT15;
1703     buf1 += 2;
1704   }
1705 }
1706 
ixheaacd_pre_twiddle(WORD32 * xptr,WORD32 * data,WORD32 n,WORD32 * cos_sin_ptr,WORD32 neg_expo)1707 VOID ixheaacd_pre_twiddle(WORD32 *xptr, WORD32 *data, WORD32 n,
1708                           WORD32 *cos_sin_ptr, WORD32 neg_expo) {
1709   WORD npoints_4, i;
1710   WORD32 tempr, tempi, temp;
1711   WORD32 c, c1, s, s1;
1712   WORD32 *in_ptr1, *in_ptr2;
1713 
1714   npoints_4 = n >> 2;
1715 
1716   in_ptr1 = data;
1717   in_ptr2 = data + n - 1;
1718 
1719   if (neg_expo >= 0) {
1720     for (i = npoints_4 - 1; i >= 0; i--) {
1721       c = *cos_sin_ptr++;
1722       c1 = *cos_sin_ptr++;
1723       s = *cos_sin_ptr++;
1724       s1 = *cos_sin_ptr++;
1725 
1726       tempr = *in_ptr1;
1727       tempi = *in_ptr2;
1728 
1729       in_ptr1 += 2;
1730       in_ptr2 -= 2;
1731 
1732       temp =
1733           -ixheaacd_add32(ixheaacd_mult32(tempr, c), ixheaacd_mult32(tempi, s));
1734       *xptr++ = ixheaacd_shr32(temp, neg_expo);
1735 
1736       temp =
1737           ixheaacd_sub32(ixheaacd_mult32(tempr, s), ixheaacd_mult32(tempi, c));
1738       *xptr++ = ixheaacd_shr32(temp, neg_expo);
1739 
1740       tempr = *in_ptr1;
1741       tempi = *in_ptr2;
1742 
1743       in_ptr1 += 2;
1744       in_ptr2 -= 2;
1745 
1746       temp = -ixheaacd_add32(ixheaacd_mult32(tempr, c1),
1747                              ixheaacd_mult32(tempi, s1));
1748       *xptr++ = ixheaacd_shr32(temp, neg_expo);
1749 
1750       temp = ixheaacd_sub32(ixheaacd_mult32(tempr, s1),
1751                             ixheaacd_mult32(tempi, c1));
1752       *xptr++ = ixheaacd_shr32(temp, neg_expo);
1753     }
1754   } else {
1755     neg_expo = -neg_expo;
1756 
1757     for (i = npoints_4 - 1; i >= 0; i--) {
1758       c = *cos_sin_ptr++;
1759       c1 = *cos_sin_ptr++;
1760       s = *cos_sin_ptr++;
1761       s1 = *cos_sin_ptr++;
1762 
1763       tempr = *in_ptr1;
1764       tempi = *in_ptr2;
1765 
1766       in_ptr1 += 2;
1767       in_ptr2 -= 2;
1768 
1769       temp =
1770           -ixheaacd_add32(ixheaacd_mult32(tempr, c), ixheaacd_mult32(tempi, s));
1771       *xptr++ = ixheaacd_shl32(temp, neg_expo);
1772 
1773       temp =
1774           ixheaacd_sub32(ixheaacd_mult32(tempr, s), ixheaacd_mult32(tempi, c));
1775       *xptr++ = ixheaacd_shl32(temp, neg_expo);
1776 
1777       tempr = *in_ptr1;
1778       tempi = *in_ptr2;
1779 
1780       in_ptr1 += 2;
1781       in_ptr2 -= 2;
1782 
1783       temp = -ixheaacd_add32(ixheaacd_mult32(tempr, c1),
1784                              ixheaacd_mult32(tempi, s1));
1785       *xptr++ = ixheaacd_shl32(temp, neg_expo);
1786 
1787       temp = ixheaacd_sub32(ixheaacd_mult32(tempr, s1),
1788                             ixheaacd_mult32(tempi, c1));
1789       *xptr++ = ixheaacd_shl32(temp, neg_expo);
1790     }
1791   }
1792 }
1793 
ixheaacd_post_twiddle_ld(WORD32 out[],WORD32 x[],const WORD32 * cos_sin_ptr,WORD m)1794 VOID ixheaacd_post_twiddle_ld(WORD32 out[], WORD32 x[],
1795                               const WORD32 *cos_sin_ptr, WORD m) {
1796   WORD i;
1797 
1798   WORD32 *ptr_x = &x[0];
1799   WORD32 *ptr_out, *ptr_out1;
1800 
1801   ptr_out = &out[0];
1802   ptr_out1 = &out[m - 1];
1803 
1804   for (i = (m >> 2) - 1; i >= 0; i--) {
1805     WORD32 c, c1, s, s1;
1806     WORD32 re, im;
1807 
1808     c = *cos_sin_ptr++;
1809     c1 = *cos_sin_ptr++;
1810     s = *cos_sin_ptr++;
1811     s1 = *cos_sin_ptr++;
1812 
1813     re = *ptr_x++;
1814     im = *ptr_x++;
1815 
1816     *ptr_out1 = ixheaacd_sub32(ixheaacd_mult32(im, c), ixheaacd_mult32(re, s));
1817 
1818     *ptr_out = -ixheaacd_add32(ixheaacd_mult32(re, c), ixheaacd_mult32(im, s));
1819 
1820     ptr_out += 2;
1821     ptr_out1 -= 2;
1822 
1823     re = *ptr_x++;
1824     im = *ptr_x++;
1825 
1826     *ptr_out1 =
1827         ixheaacd_sub32(ixheaacd_mult32(im, c1), ixheaacd_mult32(re, s1));
1828     *ptr_out =
1829         -ixheaacd_add32(ixheaacd_mult32(re, c1), ixheaacd_mult32(im, s1));
1830 
1831     ptr_out += 2;
1832     ptr_out1 -= 2;
1833   }
1834 }
1835 
ixheaacd_post_twiddle_eld(WORD32 out[],WORD32 x[],const WORD32 * cos_sin_ptr,WORD m)1836 VOID ixheaacd_post_twiddle_eld(WORD32 out[], WORD32 x[],
1837                                const WORD32 *cos_sin_ptr, WORD m) {
1838   WORD i = 0;
1839 
1840   WORD32 *ptr_x = &x[0];
1841   WORD32 *ptr_out_767, *ptr_out_256;
1842   WORD32 *ptr_out_768, *ptr_out_255;
1843   WORD32 *ptr_out_0, *ptr_out_1279;
1844   WORD32 tempr, tempi;
1845 
1846   ptr_out_767 = &out[m + (m >> 1) - 1 - 2 * i];
1847   ptr_out_256 = &out[(m >> 1) + 2 * i];
1848 
1849   ptr_out_768 = &out[m + (m >> 1) + 2 * i];
1850   ptr_out_255 = &out[(m >> 1) - 1 - 2 * i];
1851 
1852   for (i = 0; i < (m >> 3); i++) {
1853     WORD32 c, c1, s, s1;
1854     WORD32 re, im;
1855 
1856     c = *cos_sin_ptr++;
1857     c1 = *cos_sin_ptr++;
1858     s = *cos_sin_ptr++;
1859     s1 = *cos_sin_ptr++;
1860 
1861     re = *ptr_x++;
1862     im = *ptr_x++;
1863 
1864     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c), ixheaacd_mult32(re, s));
1865     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c), ixheaacd_mult32(im, s));
1866 
1867     *ptr_out_767 = tempr;
1868     *ptr_out_256 = tempi;
1869 
1870     *ptr_out_768 = *ptr_out_767;
1871     *ptr_out_255 = -*ptr_out_256;
1872 
1873     ptr_out_256 += 2;
1874     ptr_out_767 -= 2;
1875     ptr_out_768 += 2;
1876     ptr_out_255 -= 2;
1877 
1878     re = *ptr_x++;
1879     im = *ptr_x++;
1880 
1881     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c1), ixheaacd_mult32(re, s1));
1882     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c1), ixheaacd_mult32(im, s1));
1883 
1884     *ptr_out_767 = tempr;
1885     *ptr_out_256 = tempi;
1886 
1887     *ptr_out_768 = *ptr_out_767;
1888     *ptr_out_255 = -*ptr_out_256;
1889 
1890     ptr_out_256 += 2;
1891     ptr_out_767 -= 2;
1892     ptr_out_768 += 2;
1893     ptr_out_255 -= 2;
1894   }
1895 
1896   ptr_out_0 = &out[2 * 2 * i - (m >> 1)];
1897   ptr_out_1279 = &out[m + m + (m >> 1) - 1 - 2 * 2 * i];
1898 
1899   for (; i < (m >> 2); i++) {
1900     WORD32 c, c1, s, s1;
1901     WORD32 re, im;
1902 
1903     c = *cos_sin_ptr++;
1904     c1 = *cos_sin_ptr++;
1905     s = *cos_sin_ptr++;
1906     s1 = *cos_sin_ptr++;
1907 
1908     re = *ptr_x++;
1909     im = *ptr_x++;
1910 
1911     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c), ixheaacd_mult32(re, s));
1912     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c), ixheaacd_mult32(im, s));
1913 
1914     *ptr_out_767 = tempr;
1915     *ptr_out_256 = tempi;
1916 
1917     *ptr_out_0 = -*ptr_out_767;
1918     *ptr_out_1279 = *ptr_out_256;
1919 
1920     ptr_out_256 += 2;
1921     ptr_out_767 -= 2;
1922     ptr_out_0 += 2;
1923     ptr_out_1279 -= 2;
1924 
1925     re = *ptr_x++;
1926     im = *ptr_x++;
1927 
1928     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c1), ixheaacd_mult32(re, s1));
1929     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c1), ixheaacd_mult32(im, s1));
1930 
1931     *ptr_out_767 = tempr;
1932     *ptr_out_256 = tempi;
1933 
1934     *ptr_out_0 = -*ptr_out_767;
1935     *ptr_out_1279 = *ptr_out_256;
1936 
1937     ptr_out_256 += 2;
1938     ptr_out_767 -= 2;
1939     ptr_out_0 += 2;
1940     ptr_out_1279 -= 2;
1941   }
1942 }
1943 
ixheaacd_fft32x32_ld_dec(ia_aac_dec_imdct_tables_struct * imdct_tables_ptr,WORD32 npoints,WORD32 * ptr_x,WORD32 * ptr_y)1944 VOID ixheaacd_fft32x32_ld_dec(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr,
1945                               WORD32 npoints, WORD32 *ptr_x, WORD32 *ptr_y) {
1946   WORD32 i, j, l1, l2, h2, predj, tw_offset, stride, fft_jmp;
1947   WORD32 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
1948   WORD32 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
1949   WORD32 xh0_1, xh1_1, xl0_1, xl1_1;
1950   WORD32 x_0, x_1, x_2, x_3, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
1951   WORD32 xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;
1952   WORD32 x_4, x_5, x_6, x_7, x_h2_0, x_h2_1;
1953   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
1954   WORD32 si10, si20, si30, co10, co20, co30;
1955   WORD32 *w;
1956   WORD32 *x, *x2, *x0;
1957   WORD32 *y0, *y1, *y2, *y3;
1958   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
1959   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
1960   WORD32 n0, j0;
1961   WORD32 radix;
1962   WORD32 norm;
1963   WORD32 m;
1964   WORD32 *ptr_w;
1965 
1966   if (npoints == 256)
1967     ptr_w = imdct_tables_ptr->w_256;
1968   else
1969     ptr_w = imdct_tables_ptr->w_16;
1970 
1971   for (i = 31, m = 1; (npoints & (1 << i)) == 0; i--, m++)
1972     ;
1973   radix = m & 1 ? 2 : 4;
1974   norm = m - 2;
1975 
1976   stride = npoints;
1977   tw_offset = 0;
1978   fft_jmp = 6 * stride;
1979 
1980   while (stride > radix) {
1981     j = 0;
1982     fft_jmp >>= 2;
1983 
1984     h2 = stride >> 1;
1985     l1 = stride;
1986     l2 = stride + (stride >> 1);
1987 
1988     x = ptr_x;
1989     w = ptr_w + tw_offset;
1990     tw_offset += fft_jmp;
1991 
1992     stride >>= 2;
1993 
1994     for (i = 0; i < npoints; i += 4) {
1995       co10 = w[j + 1];
1996       si10 = w[j + 0];
1997       co20 = w[j + 3];
1998       si20 = w[j + 2];
1999       co30 = w[j + 5];
2000       si30 = w[j + 4];
2001 
2002       x_0 = x[0];
2003       x_1 = x[1];
2004       x_l1_0 = x[l1];
2005       x_l1_1 = x[l1 + 1];
2006       x_l2_0 = x[l2];
2007       x_l2_1 = x[l2 + 1];
2008       x_h2_0 = x[h2];
2009       x_h2_1 = x[h2 + 1];
2010 
2011       xh0_0 = x_0 + x_l1_0;
2012       xh1_0 = x_1 + x_l1_1;
2013       xl0_0 = x_0 - x_l1_0;
2014       xl1_0 = x_1 - x_l1_1;
2015       xh20_0 = x_h2_0 + x_l2_0;
2016       xh21_0 = x_h2_1 + x_l2_1;
2017       xl20_0 = x_h2_0 - x_l2_0;
2018       xl21_0 = x_h2_1 - x_l2_1;
2019 
2020       x0 = x;
2021       x2 = x0;
2022 
2023       j += 6;
2024       x += 2;
2025       predj = (j - fft_jmp);
2026       if (!predj) x += fft_jmp;
2027       if (!predj) j = 0;
2028 
2029       x0[0] = xh0_0 + xh20_0;
2030       x0[1] = xh1_0 + xh21_0;
2031       xt0_0 = xh0_0 - xh20_0;
2032       yt0_0 = xh1_0 - xh21_0;
2033       xt1_0 = xl0_0 + xl21_0;
2034       yt2_0 = xl1_0 + xl20_0;
2035       xt2_0 = xl0_0 - xl21_0;
2036       yt1_0 = xl1_0 - xl20_0;
2037 
2038       x2[h2] =
2039           MPYHIRC(si10, yt1_0) + MPYHIRC(co10, xt1_0) +
2040           (((MPYLUHS(si10, yt1_0) + MPYLUHS(co10, xt1_0) + 0x8000) >> 16) << 1);
2041 
2042       x2[h2 + 1] =
2043           MPYHIRC(co10, yt1_0) - MPYHIRC(si10, xt1_0) +
2044           (((MPYLUHS(co10, yt1_0) - MPYLUHS(si10, xt1_0) + 0x8000) >> 16) << 1);
2045 
2046       x2[l1] =
2047           MPYHIRC(si20, yt0_0) + MPYHIRC(co20, xt0_0) +
2048           (((MPYLUHS(si20, yt0_0) + MPYLUHS(co20, xt0_0) + 0x8000) >> 16) << 1);
2049 
2050       x2[l1 + 1] =
2051           MPYHIRC(co20, yt0_0) - MPYHIRC(si20, xt0_0) +
2052           (((MPYLUHS(co20, yt0_0) - MPYLUHS(si20, xt0_0) + 0x8000) >> 16) << 1);
2053 
2054       x2[l2] =
2055           MPYHIRC(si30, yt2_0) + MPYHIRC(co30, xt2_0) +
2056           (((MPYLUHS(si30, yt2_0) + MPYLUHS(co30, xt2_0) + 0x8000) >> 16) << 1);
2057 
2058       x2[l2 + 1] =
2059           MPYHIRC(co30, yt2_0) - MPYHIRC(si30, xt2_0) +
2060           (((MPYLUHS(co30, yt2_0) - MPYLUHS(si30, xt2_0) + 0x8000) >> 16) << 1);
2061     }
2062   }
2063 
2064   y0 = ptr_y;
2065   y2 = ptr_y + (WORD32)npoints;
2066   x0 = ptr_x;
2067   x2 = ptr_x + (WORD32)(npoints >> 1);
2068 
2069   if (radix == 2) {
2070     y1 = y0 + (WORD32)(npoints >> 2);
2071     y3 = y2 + (WORD32)(npoints >> 2);
2072     l1 = norm + 1;
2073     j0 = 8;
2074     n0 = npoints >> 1;
2075   } else {
2076     y1 = y0 + (WORD32)(npoints >> 1);
2077     y3 = y2 + (WORD32)(npoints >> 1);
2078     l1 = norm + 2;
2079     j0 = 4;
2080     n0 = npoints >> 2;
2081   }
2082 
2083   j = 0;
2084 
2085   for (i = 0; i < npoints; i += 8) {
2086     DIG_REV(j, l1, h2);
2087 
2088     x_0 = x0[0];
2089     x_1 = x0[1];
2090     x_2 = x0[2];
2091     x_3 = x0[3];
2092     x_4 = x0[4];
2093     x_5 = x0[5];
2094     x_6 = x0[6];
2095     x_7 = x0[7];
2096     x0 += 8;
2097 
2098     xh0_0 = x_0 + x_4;
2099     xh1_0 = x_1 + x_5;
2100     xl0_0 = x_0 - x_4;
2101     xl1_0 = x_1 - x_5;
2102     xh0_1 = x_2 + x_6;
2103     xh1_1 = x_3 + x_7;
2104     xl0_1 = x_2 - x_6;
2105     xl1_1 = x_3 - x_7;
2106 
2107     n00 = xh0_0 + xh0_1;
2108     n01 = xh1_0 + xh1_1;
2109     n10 = xl0_0 + xl1_1;
2110     n11 = xl1_0 - xl0_1;
2111     n20 = xh0_0 - xh0_1;
2112     n21 = xh1_0 - xh1_1;
2113     n30 = xl0_0 - xl1_1;
2114     n31 = xl1_0 + xl0_1;
2115 
2116     if (radix == 2) {
2117       n00 = x_0 + x_2;
2118       n01 = x_1 + x_3;
2119       n20 = x_0 - x_2;
2120       n21 = x_1 - x_3;
2121       n10 = x_4 + x_6;
2122       n11 = x_5 + x_7;
2123       n30 = x_4 - x_6;
2124       n31 = x_5 - x_7;
2125     }
2126 
2127     y0[2 * h2] = n00;
2128     y0[2 * h2 + 1] = n01;
2129     y1[2 * h2] = n10;
2130     y1[2 * h2 + 1] = n11;
2131     y2[2 * h2] = n20;
2132     y2[2 * h2 + 1] = n21;
2133     y3[2 * h2] = n30;
2134     y3[2 * h2 + 1] = n31;
2135 
2136     x_8 = x2[0];
2137     x_9 = x2[1];
2138     x_a = x2[2];
2139     x_b = x2[3];
2140     x_c = x2[4];
2141     x_d = x2[5];
2142     x_e = x2[6];
2143     x_f = x2[7];
2144     x2 += 8;
2145 
2146     xh0_2 = x_8 + x_c;
2147     xh1_2 = x_9 + x_d;
2148     xl0_2 = x_8 - x_c;
2149     xl1_2 = x_9 - x_d;
2150     xh0_3 = x_a + x_e;
2151     xh1_3 = x_b + x_f;
2152     xl0_3 = x_a - x_e;
2153     xl1_3 = x_b - x_f;
2154 
2155     n02 = xh0_2 + xh0_3;
2156     n03 = xh1_2 + xh1_3;
2157     n12 = xl0_2 + xl1_3;
2158     n13 = xl1_2 - xl0_3;
2159     n22 = xh0_2 - xh0_3;
2160     n23 = xh1_2 - xh1_3;
2161     n32 = xl0_2 - xl1_3;
2162     n33 = xl1_2 + xl0_3;
2163 
2164     if (radix == 2) {
2165       n02 = x_8 + x_a;
2166       n03 = x_9 + x_b;
2167       n22 = x_8 - x_a;
2168       n23 = x_9 - x_b;
2169       n12 = x_c + x_e;
2170       n13 = x_d + x_f;
2171       n32 = x_c - x_e;
2172       n33 = x_d - x_f;
2173     }
2174 
2175     y0[2 * h2 + 2] = n02;
2176     y0[2 * h2 + 3] = n03;
2177     y1[2 * h2 + 2] = n12;
2178     y1[2 * h2 + 3] = n13;
2179     y2[2 * h2 + 2] = n22;
2180     y2[2 * h2 + 3] = n23;
2181     y3[2 * h2 + 2] = n32;
2182     y3[2 * h2 + 3] = n33;
2183 
2184     j += j0;
2185 
2186     if (j == n0) {
2187       j += n0;
2188       x0 += (WORD32)npoints >> 1;
2189       x2 += (WORD32)npoints >> 1;
2190     }
2191   }
2192 }
2193 
ixheaacd_rearrange_dec(WORD32 * ip,WORD32 * op,WORD32 mdct_len_2,UWORD8 * re_arr_tab)2194 VOID ixheaacd_rearrange_dec(WORD32 *ip, WORD32 *op, WORD32 mdct_len_2,
2195                             UWORD8 *re_arr_tab) {
2196   WORD32 n, i = 0;
2197 
2198   for (n = 0; n < mdct_len_2; n++) {
2199     WORD32 idx = re_arr_tab[n] << 1;
2200 
2201     op[i++] = ip[idx];
2202     op[i++] = ip[idx + 1];
2203   }
2204 }
2205 
ixheaacd_fft_15_ld_dec(WORD32 * inp,WORD32 * op,WORD32 * fft3out,UWORD8 * re_arr_tab_sml_240_ptr)2206 VOID ixheaacd_fft_15_ld_dec(WORD32 *inp, WORD32 *op, WORD32 *fft3out,
2207                             UWORD8 *re_arr_tab_sml_240_ptr) {
2208   WORD32 i, n, idx;
2209   WORD32 *buf1, *buf2, *buf1a;
2210   WORD32 add_r, sub_r;
2211   WORD32 add_i, sub_i;
2212   WORD32 x01_real, x_01_imag, temp;
2213   WORD32 p1, p2, p3, p4;
2214 
2215   WORD32 sinmu = 1859775393;
2216   WORD32 cos_51 = 2042378317;
2217   WORD32 cos_52 = -1652318768;
2218   WORD32 cos_53 = -780119100;
2219   WORD32 cos_54 = 1200479854;
2220   WORD32 cos_55 = -1342177280;
2221 
2222   WORD32 r1, r2, r3, r4;
2223   WORD32 s1, s2, s3, s4, t, temp1, temp2;
2224   WORD32 *fft3outptr = fft3out;
2225 
2226   WORD32 xr_0, xr_1, xr_2;
2227   WORD32 xi_0, xi_1, xi_2;
2228 
2229   buf2 = fft3out;
2230   buf1 = buf1a = fft3out;
2231   n = 0;
2232 
2233   {
2234     *buf1++ = inp[0];
2235     *buf1++ = inp[1];
2236 
2237     *buf1++ = inp[96];
2238     *buf1++ = inp[97];
2239 
2240     *buf1++ = inp[192];
2241     *buf1++ = inp[193];
2242 
2243     *buf1++ = inp[288];
2244     *buf1++ = inp[289];
2245 
2246     *buf1++ = inp[384];
2247     *buf1++ = inp[385];
2248 
2249     r1 = ixheaacd_add32_sat(buf1a[2], buf1a[8]);
2250     r4 = ixheaacd_sub32_sat(buf1a[2], buf1a[8]);
2251     r3 = ixheaacd_add32_sat(buf1a[4], buf1a[6]);
2252     r2 = ixheaacd_sub32_sat(buf1a[4], buf1a[6]);
2253 
2254     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(r1, r3), cos_54);
2255 
2256     r1 = ixheaacd_add32_sat(r1, r3);
2257 
2258     temp1 = ixheaacd_add32_sat(buf1a[0], r1);
2259 
2260     r1 = ixheaacd_add32_sat(
2261         temp1, ixheaacd_shl32_sat((ixheaacd_mult32_shl(r1, cos_55)), 1));
2262 
2263     r3 = ixheaacd_sub32_sat(r1, t);
2264     r1 = ixheaacd_add32_sat(r1, t);
2265 
2266     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(r4, r2), cos_51);
2267     r4 = ixheaacd_add32_sat(
2268         t, ixheaacd_shl32_sat(ixheaacd_mult32_shl(r4, cos_52), 1));
2269     r2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r2, cos_53));
2270 
2271     s1 = ixheaacd_add32_sat(buf1a[3], buf1a[9]);
2272     s4 = ixheaacd_sub32_sat(buf1a[3], buf1a[9]);
2273     s3 = ixheaacd_add32_sat(buf1a[5], buf1a[7]);
2274     s2 = ixheaacd_sub32_sat(buf1a[5], buf1a[7]);
2275 
2276     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(s1, s3), cos_54);
2277     s1 = ixheaacd_add32_sat(s1, s3);
2278 
2279     temp2 = ixheaacd_add32_sat(buf1a[1], s1);
2280 
2281     s1 = ixheaacd_add32_sat(
2282         temp2, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s1, cos_55)), 1));
2283 
2284     s3 = ixheaacd_sub32_sat(s1, t);
2285     s1 = ixheaacd_add32_sat(s1, t);
2286 
2287     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(s4, s2), cos_51);
2288     s4 = ixheaacd_add32_sat(
2289         t, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s4, cos_52)), 1));
2290     s2 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s2, cos_53)));
2291 
2292     *buf2++ = temp1;
2293     *buf2++ = temp2;
2294     *buf2++ = ixheaacd_add32_sat(r1, s2);
2295     *buf2++ = ixheaacd_sub32_sat(s1, r2);
2296     *buf2++ = ixheaacd_sub32_sat(r3, s4);
2297     *buf2++ = ixheaacd_add32_sat(s3, r4);
2298     *buf2++ = ixheaacd_add32_sat(r3, s4);
2299     *buf2++ = ixheaacd_sub32_sat(s3, r4);
2300     *buf2++ = ixheaacd_sub32_sat(r1, s2);
2301     *buf2++ = ixheaacd_add32_sat(s1, r2);
2302     buf1a = buf1;
2303 
2304     *buf1++ = inp[160];
2305     *buf1++ = inp[161];
2306 
2307     *buf1++ = inp[256];
2308     *buf1++ = inp[257];
2309 
2310     *buf1++ = inp[352];
2311     *buf1++ = inp[353];
2312 
2313     *buf1++ = inp[448];
2314     *buf1++ = inp[449];
2315 
2316     *buf1++ = inp[64];
2317     *buf1++ = inp[65];
2318 
2319     r1 = ixheaacd_add32_sat(buf1a[2], buf1a[8]);
2320     r4 = ixheaacd_sub32_sat(buf1a[2], buf1a[8]);
2321     r3 = ixheaacd_add32_sat(buf1a[4], buf1a[6]);
2322     r2 = ixheaacd_sub32_sat(buf1a[4], buf1a[6]);
2323 
2324     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(r1, r3), cos_54);
2325 
2326     r1 = ixheaacd_add32_sat(r1, r3);
2327 
2328     temp1 = ixheaacd_add32_sat(buf1a[0], r1);
2329 
2330     r1 = ixheaacd_add32_sat(
2331         temp1, ixheaacd_shl32_sat((ixheaacd_mult32_shl(r1, cos_55)), 1));
2332 
2333     r3 = ixheaacd_sub32_sat(r1, t);
2334     r1 = ixheaacd_add32_sat(r1, t);
2335 
2336     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(r4, r2), cos_51);
2337     r4 = ixheaacd_add32_sat(
2338         t, ixheaacd_shl32_sat(ixheaacd_mult32_shl(r4, cos_52), 1));
2339     r2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r2, cos_53));
2340 
2341     s1 = ixheaacd_add32_sat(buf1a[3], buf1a[9]);
2342     s4 = ixheaacd_sub32_sat(buf1a[3], buf1a[9]);
2343     s3 = ixheaacd_add32_sat(buf1a[5], buf1a[7]);
2344     s2 = ixheaacd_sub32_sat(buf1a[5], buf1a[7]);
2345 
2346     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(s1, s3), cos_54);
2347 
2348     s1 = ixheaacd_add32_sat(s1, s3);
2349 
2350     temp2 = ixheaacd_add32_sat(buf1a[1], s1);
2351 
2352     s1 = ixheaacd_add32_sat(
2353         temp2, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s1, cos_55)), 1));
2354 
2355     s3 = ixheaacd_sub32_sat(s1, t);
2356     s1 = ixheaacd_add32_sat(s1, t);
2357 
2358     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(s4, s2), cos_51);
2359     s4 = ixheaacd_add32_sat(
2360         t, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s4, cos_52)), 1));
2361     s2 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s2, cos_53)));
2362 
2363     *buf2++ = temp1;
2364     *buf2++ = temp2;
2365     *buf2++ = ixheaacd_add32_sat(r1, s2);
2366     *buf2++ = ixheaacd_sub32_sat(s1, r2);
2367     *buf2++ = ixheaacd_sub32_sat(r3, s4);
2368     *buf2++ = ixheaacd_add32_sat(s3, r4);
2369     *buf2++ = ixheaacd_add32_sat(r3, s4);
2370     *buf2++ = ixheaacd_sub32_sat(s3, r4);
2371     *buf2++ = ixheaacd_sub32_sat(r1, s2);
2372     *buf2++ = ixheaacd_add32_sat(s1, r2);
2373     buf1a = buf1;
2374     ;
2375 
2376     *buf1++ = inp[320];
2377     *buf1++ = inp[321];
2378 
2379     *buf1++ = inp[416];
2380     *buf1++ = inp[417];
2381 
2382     *buf1++ = inp[32];
2383     *buf1++ = inp[33];
2384 
2385     *buf1++ = inp[128];
2386     *buf1++ = inp[129];
2387 
2388     *buf1++ = inp[224];
2389     *buf1++ = inp[225];
2390 
2391     r1 = ixheaacd_add32_sat(buf1a[2], buf1a[8]);
2392     r4 = ixheaacd_sub32_sat(buf1a[2], buf1a[8]);
2393     r3 = ixheaacd_add32_sat(buf1a[4], buf1a[6]);
2394     r2 = ixheaacd_sub32_sat(buf1a[4], buf1a[6]);
2395 
2396     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(r1, r3), cos_54);
2397 
2398     r1 = ixheaacd_add32_sat(r1, r3);
2399 
2400     temp1 = ixheaacd_add32_sat(buf1a[0], r1);
2401 
2402     r1 = ixheaacd_add32_sat(
2403         temp1, ixheaacd_shl32_sat((ixheaacd_mult32_shl(r1, cos_55)), 1));
2404 
2405     r3 = ixheaacd_sub32_sat(r1, t);
2406     r1 = ixheaacd_add32_sat(r1, t);
2407 
2408     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(r4, r2), cos_51);
2409     r4 = ixheaacd_add32_sat(
2410         t, ixheaacd_shl32_sat(ixheaacd_mult32_shl(r4, cos_52), 1));
2411     r2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r2, cos_53));
2412 
2413     s1 = ixheaacd_add32_sat(buf1a[3], buf1a[9]);
2414     s4 = ixheaacd_sub32_sat(buf1a[3], buf1a[9]);
2415     s3 = ixheaacd_add32_sat(buf1a[5], buf1a[7]);
2416     s2 = ixheaacd_sub32_sat(buf1a[5], buf1a[7]);
2417 
2418     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(s1, s3), cos_54);
2419 
2420     s1 = ixheaacd_add32_sat(s1, s3);
2421 
2422     temp2 = ixheaacd_add32_sat(buf1a[1], s1);
2423 
2424     s1 = ixheaacd_add32_sat(
2425         temp2, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s1, cos_55)), 1));
2426 
2427     s3 = ixheaacd_sub32_sat(s1, t);
2428     s1 = ixheaacd_add32_sat(s1, t);
2429 
2430     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(s4, s2), cos_51);
2431     s4 = ixheaacd_add32_sat(
2432         t, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s4, cos_52)), 1));
2433     s2 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s2, cos_53)));
2434 
2435     *buf2++ = temp1;
2436     *buf2++ = temp2;
2437     *buf2++ = ixheaacd_add32_sat(r1, s2);
2438     *buf2++ = ixheaacd_sub32_sat(s1, r2);
2439     *buf2++ = ixheaacd_sub32_sat(r3, s4);
2440     *buf2++ = ixheaacd_add32_sat(s3, r4);
2441     *buf2++ = ixheaacd_add32_sat(r3, s4);
2442     *buf2++ = ixheaacd_sub32_sat(s3, r4);
2443     *buf2++ = ixheaacd_sub32_sat(r1, s2);
2444     *buf2++ = ixheaacd_add32_sat(s1, r2);
2445     buf1a = buf1;
2446     ;
2447   }
2448 
2449   n = 0;
2450   for (i = 0; i < FFT5; i++) {
2451     xr_0 = fft3outptr[0];
2452     xi_0 = fft3outptr[1];
2453 
2454     xr_1 = fft3outptr[10];
2455     xi_1 = fft3outptr[11];
2456 
2457     xr_2 = fft3outptr[20];
2458     xi_2 = fft3outptr[21];
2459 
2460     x01_real = ixheaacd_add32_sat(xr_0, xr_1);
2461     x_01_imag = ixheaacd_add32_sat(xi_0, xi_1);
2462 
2463     add_r = ixheaacd_add32_sat(xr_1, xr_2);
2464     add_i = ixheaacd_add32_sat(xi_1, xi_2);
2465 
2466     sub_r = ixheaacd_sub32_sat(xr_1, xr_2);
2467     sub_i = ixheaacd_sub32_sat(xi_1, xi_2);
2468 
2469     p1 = add_r >> 1;
2470 
2471     p2 = ixheaacd_mult32_shl(sub_i, sinmu);
2472     p3 = ixheaacd_mult32_shl(sub_r, sinmu);
2473 
2474     p4 = add_i >> 1;
2475 
2476     temp = ixheaacd_sub32_sat(xr_0, p1);
2477     temp1 = ixheaacd_add32_sat(xi_0, p3);
2478     temp2 = ixheaacd_sub32_sat(xi_0, p3);
2479 
2480     idx = re_arr_tab_sml_240_ptr[n++] << 1;
2481     op[idx] = ixheaacd_add32_sat(x01_real, xr_2);
2482     op[idx + 1] = ixheaacd_add32_sat(x_01_imag, xi_2);
2483 
2484     idx = re_arr_tab_sml_240_ptr[n++] << 1;
2485     op[idx] = ixheaacd_add32_sat(temp, p2);
2486     op[idx + 1] = ixheaacd_sub32_sat(temp2, p4);
2487 
2488     idx = re_arr_tab_sml_240_ptr[n++] << 1;
2489     op[idx] = ixheaacd_sub32_sat(temp, p2);
2490     op[idx + 1] = ixheaacd_sub32_sat(temp1, p4);
2491     fft3outptr += 2;
2492   }
2493 }