• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 #include "ixheaacd_sbr_common.h"
21 #include "ixheaacd_type_def.h"
22 
23 #include "ixheaacd_constants.h"
24 #include "ixheaacd_basic_ops32.h"
25 #include "ixheaacd_basic_ops16.h"
26 #include "ixheaacd_basic_ops40.h"
27 #include "ixheaacd_basic_ops.h"
28 
29 #include "ixheaacd_defines.h"
30 #include "ixheaacd_common_rom.h"
31 #include "ixheaacd_basic_funcs.h"
32 #include "ixheaacd_aac_rom.h"
33 #include "ixheaacd_aac_imdct.h"
34 #include "ixheaacd_intrinsics.h"
35 
36 #include "ixheaacd_basic_op.h"
37 #include "ixheaacd_function_selector.h"
38 
39 #include "ixheaacd_audioobjtypes.h"
40 #include "ixheaacd_tns.h"
41 
42 #define DIG_REV(i, m, j)                                      \
43   do {                                                        \
44     unsigned _ = (i);                                         \
45     _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2);   \
46     _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4);   \
47     _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8);   \
48     _ = ((_ & 0x0000FFFF) << 16) | ((_ & ~0x0000FFFF) >> 16); \
49     (j) = _ >> (m);                                           \
50   } while (0)
51 
52 #define MPYHIRC(x, y)                                                         \
53                                                                               \
54   (((WORD32)((WORD16)(x >> 16) * (UWORD16)(y & 0x0000FFFF) + 0x4000) >>       \
55     15) +                                                                     \
56    ((WORD32)((WORD16)(x >> 16) * (WORD16)((y) >> 16)) << 1))
57 
58 #define MPYLUHS(x, y) \
59   ((WORD32)((UWORD16)(x & 0x0000FFFF) * (WORD16)(y >> 16)))
60 
61 #define MPYLIRC(x, y) \
62   (((WORD32)((WORD16)(x) * (UWORD16)(y & 0x0000FFFF) + 0x4000) >> 15)+ \
63 ((WORD32)((WORD16)(x) * (WORD16)((y) >> 16)) << 1))
64 
65 WORD32 rev_dig[] = { 0, 8, 2, 10 };
66 
67 #define MDCT_LEN 480
68 #define FFT15X2 30
69 #define MDCT_LEN_BY2 240
70 #define FFT5 5
71 #define FFT16 16
72 #define FFT4 4
73 #define FFT3 3
74 #define FFT15 15
75 #define FFT16X2 32
76 #define MDCT_LEN_960 960
77 
78 WORD32 ixheaacd_fft5out[FFT15X2];
79 
ixheaacd_mult32x16lin32(WORD32 a,WORD32 b)80 static PLATFORM_INLINE WORD32 ixheaacd_mult32x16lin32(WORD32 a, WORD32 b) {
81   WORD32 result;
82   WORD64 temp_result;
83   temp_result = (WORD64)a * (WORD64)(((b & 0xFFFF) << 16) >> 16);
84   result = (WORD32)(temp_result >> 16);
85   return (result);
86 }
87 
ixheaacd_mac32x16lin32(WORD32 a,WORD32 b,WORD32 c)88 static PLATFORM_INLINE WORD32 ixheaacd_mac32x16lin32(WORD32 a, WORD32 b,
89                                                      WORD32 c) {
90   WORD32 result;
91   result = a + ixheaacd_mult32x16lin32(b, c);
92   return (result);
93 }
94 
ixheaacd_mult32x16lin32_sat(WORD32 a,WORD32 b)95 static PLATFORM_INLINE WORD32 ixheaacd_mult32x16lin32_sat(WORD32 a, WORD32 b) {
96   WORD32 result;
97   WORD64 temp_result;
98   temp_result = (WORD64)a * (WORD64)(((b & 0xFFFF) << 16) >> 16);
99   if (temp_result < (WORD64)MIN_32)
100     result = MIN_32;
101   else if (temp_result > (WORD64)MAX_32)
102     result = MAX_32;
103   else
104     result = (WORD32)(temp_result);
105   return (result);
106 }
107 
ixheaacd_neg_expo_inc_dec(WORD16 neg_expo)108 WORD16 ixheaacd_neg_expo_inc_dec(WORD16 neg_expo) { return (neg_expo + 2); }
109 
ixheaacd_neg_expo_inc_arm(WORD16 neg_expo)110 WORD16 ixheaacd_neg_expo_inc_arm(WORD16 neg_expo) { return (neg_expo + 3); }
111 
ixheaacd_pretwiddle_compute_960_dec(WORD32 * spec_data1,WORD32 * spec_data2,WORD32 * out_ptr,ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD npoints4,WORD32 neg_expo)112 VOID ixheaacd_pretwiddle_compute_960_dec(
113     WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
114     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
115     WORD32 neg_expo) {
116 
117   WORD32 i;
118   WORD32 tempr, tempi;
119 
120   WORD16 c, c1, s, s1;
121   WORD32 *out_ptr1 = out_ptr + ((npoints4 << 2) - 1);
122   const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_240;
123 
124   for (i = 0; i < npoints4; i++) {
125     c = *cos_sin_ptr++;
126     s = *cos_sin_ptr++;
127 
128     tempr = *spec_data1++;
129     tempi = *spec_data2--;
130 
131     *out_ptr =
132         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, c), tempi, s);
133 
134 
135     *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
136     out_ptr++;
137 
138     *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, c),
139                               ixheaacd_mult32x16in32(tempr, s));
140 
141     *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
142     out_ptr++;
143 
144     c1 = *cos_sin_ptr++;
145     s1 = *cos_sin_ptr++;
146 
147     tempi = *spec_data1++;
148     tempr = *spec_data2--;
149 
150 
151     *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, c1),
152                                ixheaacd_mult32x16in32(tempr, s1));
153 
154     *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
155     out_ptr1--;
156 
157     *out_ptr1 =
158         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, c1), tempi, s1);
159 
160     *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
161     out_ptr1--;
162   }
163 }
164 
ixheaacd_pretwiddle_compute_dec(WORD32 * spec_data1,WORD32 * spec_data2,WORD32 * out_ptr,ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD npoints4,WORD32 neg_expo)165 VOID ixheaacd_pretwiddle_compute_dec(
166     WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
167     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
168     WORD32 neg_expo) {
169   WORD32 i;
170   WORD32 tempr, tempi;
171   WORD32 tempr1, tempi1;
172   WORD32 npoints2 = npoints4 * 2;
173   WORD32 *out_ptr1 = out_ptr + (npoints2 << 1) - 1;
174   const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
175 
176   WORD16 cos = 0, cos1 = 0, sin = 0, sin1 = 0;
177   if (neg_expo < 0) {
178     neg_expo = -neg_expo;
179     if (npoints4 == 256) {
180       cos = *cos_sin_ptr++;
181       sin = *cos_sin_ptr++;
182     } else if (npoints4 == 32) {
183       cos = *cos_sin_ptr++;
184       sin = *cos_sin_ptr;
185       cos_sin_ptr += 15;
186     }
187     tempr = *spec_data1++;
188     tempi = *spec_data2--;
189 
190     *out_ptr =
191         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
192 
193     *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
194     out_ptr++;
195 
196     *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, cos),
197                               ixheaacd_mult32x16in32(tempr, sin));
198 
199     *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
200     out_ptr++;
201 
202     for (i = 0; i < npoints4 - 1; i++) {
203       if (npoints4 == 256) {
204         sin = *cos_sin_ptr++;
205         cos = *cos_sin_ptr++;
206       } else if (npoints4 == 32) {
207         sin = *cos_sin_ptr++;
208         cos = *cos_sin_ptr;
209         cos_sin_ptr += 15;
210       }
211 
212       tempi1 = *spec_data1++;
213       tempr = *spec_data1++;
214       tempr1 = *spec_data2--;
215       tempi = *spec_data2--;
216 
217       *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos),
218                                  ixheaacd_mult32x16in32(tempr1, sin));
219 
220       *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
221       out_ptr1--;
222 
223       *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos),
224                                         tempi1, sin);
225       *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
226       out_ptr1--;
227 
228       *out_ptr =
229           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
230       *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
231       out_ptr++;
232 
233       *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, sin),
234                                 ixheaacd_mult32x16in32(tempr, cos));
235       *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
236       out_ptr++;
237     }
238     cos1 = *cos_sin_ptr++;
239     sin1 = *cos_sin_ptr;
240 
241     tempr1 = *spec_data2;
242     tempi1 = *spec_data1;
243 
244     *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos1),
245                                ixheaacd_mult32x16in32(tempr1, sin1));
246     *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
247     out_ptr1--;
248 
249     *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos1),
250                                       tempi1, sin1);
251     *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
252     out_ptr1--;
253 
254   } else {
255     if (npoints4 == 256) {
256       cos = *cos_sin_ptr++;
257       sin = *cos_sin_ptr++;
258 
259     } else if (npoints4 == 32) {
260       cos = *cos_sin_ptr++;
261       sin = *cos_sin_ptr;
262       cos_sin_ptr += 15;
263     }
264     tempr = *spec_data1++;
265     tempi = *spec_data2--;
266 
267     *out_ptr =
268         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
269     *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
270     out_ptr++;
271 
272     *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, cos),
273                               ixheaacd_mult32x16in32(tempr, sin));
274 
275     *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
276     out_ptr++;
277 
278     for (i = 0; i < npoints4 - 1; i++) {
279       if (npoints4 == 256) {
280         sin = *cos_sin_ptr++;
281         cos = *cos_sin_ptr++;
282       } else if (npoints4 == 32) {
283         sin = *cos_sin_ptr++;
284         cos = *cos_sin_ptr;
285         cos_sin_ptr += 15;
286       }
287 
288       tempi1 = *spec_data1++;
289       tempr = *spec_data1++;
290       tempr1 = *spec_data2--;
291       tempi = *spec_data2--;
292 
293       *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos),
294                                  ixheaacd_mult32x16in32(tempr1, sin));
295       *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
296       out_ptr1--;
297 
298       *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos),
299                                         tempi1, sin);
300       *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
301       out_ptr1--;
302 
303       *out_ptr =
304           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
305       *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
306       out_ptr++;
307 
308       *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, sin),
309                                 ixheaacd_mult32x16in32(tempr, cos));
310       *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
311       out_ptr++;
312     }
313     cos1 = *cos_sin_ptr++;
314     sin1 = *cos_sin_ptr;
315 
316     tempr1 = *spec_data2;
317     tempi1 = *spec_data1;
318 
319     *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos1),
320                                ixheaacd_mult32x16in32(tempr1, sin1));
321     *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
322     out_ptr1--;
323 
324     *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos1),
325                                       tempi1, sin1);
326     *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
327     out_ptr1--;
328   }
329 }
330 
ixheaacd_post_twiddle_dec(WORD32 out_ptr[],WORD32 spec_data[],ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD npoints)331 VOID ixheaacd_post_twiddle_dec(WORD32 out_ptr[], WORD32 spec_data[],
332                                ia_aac_dec_imdct_tables_struct *ptr_imdct_tables,
333                                WORD npoints) {
334   WORD i;
335   WORD16 cos, cos1, sin, sin1;
336   WORD32 *spec_data1 = spec_data + npoints - 1;
337   WORD32 *out_ptr1 = out_ptr + npoints - 1;
338   WORD16 adjust = 50, adjust1 = -50;
339   const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
340 
341   if (npoints == 1024) {
342     WORD32 tempr, tempi, outi, outr, temp1, temp2;
343     tempr = *spec_data++;
344     tempi = *spec_data++;
345 
346     cos = *cos_sin_ptr;
347     cos_sin_ptr++;
348     sin = *cos_sin_ptr;
349     cos_sin_ptr++;
350 
351     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
352                           ixheaacd_mult32x16in32(tempi, cos));
353     outr =
354         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
355 
356     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
357     temp2 = ixheaacd_mult32x16in32(outr, adjust);
358 
359     outr = outr + temp1;
360     outi = outi + temp2;
361     *out_ptr1-- = outi;
362     *out_ptr++ = outr;
363 
364     for (i = 0; i < (npoints / 2 - 2); i++) {
365       sin = *cos_sin_ptr++;
366       cos = *cos_sin_ptr++;
367 
368       tempi = *spec_data1--;
369       tempr = *spec_data1--;
370 
371       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
372                             ixheaacd_mult32x16in32(tempi, cos));
373       outr =
374           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
375 
376       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
377       temp2 = ixheaacd_mult32x16in32(outr, adjust);
378 
379       outr = outr + temp1;
380       outi = outi + temp2;
381 
382       *out_ptr++ = outi;
383       *out_ptr1-- = outr;
384 
385       i++;
386       tempr = *spec_data++;
387       tempi = *spec_data++;
388 
389       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
390                             ixheaacd_mult32x16in32(tempi, sin));
391       outr =
392           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
393 
394       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
395       temp2 = ixheaacd_mult32x16in32(outr, adjust);
396 
397       outr = outr + temp1;
398       outi = outi + temp2;
399 
400       *out_ptr1-- = outi;
401       *out_ptr++ = outr;
402     }
403     cos1 = *cos_sin_ptr++;
404     sin1 = *cos_sin_ptr;
405 
406     tempi = *spec_data1--;
407     tempr = *spec_data1--;
408 
409     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
410                           ixheaacd_mult32x16in32(tempi, cos1));
411     outr =
412         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos1), tempi, sin1);
413 
414     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
415     temp2 = ixheaacd_mult32x16in32(outr, adjust);
416 
417     outr = outr + temp1;
418     outi = outi + temp2;
419 
420     *out_ptr++ = outi;
421     *out_ptr1-- = outr;
422   } else if (npoints == 128) {
423     WORD32 tempr, tempi, outi, outr, temp1, temp2;
424     tempr = *spec_data++;
425     tempi = *spec_data++;
426 
427     cos = *cos_sin_ptr++;
428     sin = *cos_sin_ptr;
429     cos_sin_ptr += 15;
430 
431     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
432                           ixheaacd_mult32x16in32(tempi, cos));
433     outr =
434         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
435 
436     temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
437     temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
438 
439     outr = outr + temp1;
440     outi = outi + temp2;
441     *out_ptr1-- = outi;
442     *out_ptr++ = outr;
443 
444     for (i = 0; i < (npoints / 2 - 2); i++) {
445       sin = *cos_sin_ptr++;
446       cos = *cos_sin_ptr;
447       cos_sin_ptr += 15;
448 
449       tempi = *spec_data1--;
450       tempr = *spec_data1--;
451 
452       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
453                             ixheaacd_mult32x16in32(tempi, cos));
454       outr =
455           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
456 
457       temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
458       temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
459 
460       outr = outr + temp1;
461       outi = outi + temp2;
462 
463       *out_ptr++ = outi;
464       *out_ptr1-- = outr;
465 
466       i++;
467       tempr = *spec_data++;
468       tempi = *spec_data++;
469 
470       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
471                             ixheaacd_mult32x16in32(tempi, sin));
472       outr =
473           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
474 
475       temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
476       temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
477 
478       outr = outr + temp1;
479       outi = outi + temp2;
480 
481       *out_ptr1-- = outi;
482       *out_ptr++ = outr;
483     }
484     cos1 = *cos_sin_ptr++;
485     sin1 = *cos_sin_ptr;
486 
487     tempi = *spec_data1--;
488     tempr = *spec_data1--;
489 
490     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
491                           ixheaacd_mult32x16in32(tempi, cos1));
492     outr =
493         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos1), tempi, sin1);
494 
495     temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
496     temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
497 
498     outr = outr + temp1;
499     outi = outi + temp2;
500 
501     *out_ptr++ = outi;
502     *out_ptr1-- = outr;
503   }
504 }
505 
ixheaacd_post_twid_overlap_add_dec(WORD32 pcm_out[],WORD32 spec_data[],ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD npoints,WORD32 * ptr_overlap_buf,WORD16 q_shift,const WORD16 * window,WORD16 ch_fac)506 VOID ixheaacd_post_twid_overlap_add_dec(
507     WORD32 pcm_out[], WORD32 spec_data[],
508     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints,
509     WORD32 *ptr_overlap_buf, WORD16 q_shift, const WORD16 *window,
510     WORD16 ch_fac) {
511   WORD i;
512   WORD16 cos, cos1, sin, sin1;
513   WORD32 size = npoints / 2;
514   WORD32 *pcmout1 = pcm_out + (ch_fac * size);
515   const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
516 
517   pcm_out = pcmout1 - ch_fac;
518   spec_data += size;
519 
520   if (q_shift > 0) {
521     WORD32 tempr, tempi, outr, outi, win1, accu, temp1, temp2;
522     WORD16 adjust, adjust1;
523     WORD32 overlap_data;
524 
525     tempr = *(spec_data - size);
526     tempi = *(spec_data - size + 1);
527     adjust = 50;
528     adjust1 = -50;
529     cos = *cos_sin_ptr++;
530     sin = *cos_sin_ptr++;
531     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
532                           ixheaacd_mult32x16in32(tempi, cos));
533     outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
534                           ixheaacd_mult32x16in32(tempi, sin));
535 
536     overlap_data = *ptr_overlap_buf;
537 
538     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
539     temp2 = ixheaacd_mult32x16in32(outr, adjust);
540 
541     outr = outr + temp1;
542     outi = outi + temp2;
543 
544     *ptr_overlap_buf++ = ixheaacd_shr32_sat(outr, 16 - q_shift);
545 
546     win1 = *((WORD32 *)window + size - 1);
547     accu = ixheaacd_sub32_sat(
548         ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outi, win1), q_shift),
549         ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
550 
551     *pcm_out = accu;
552 
553     pcm_out -= ch_fac;
554     accu = ixheaacd_sub32_sat(
555         ixheaacd_shl32_sat(
556             ixheaacd_mult32x16hin32(ixheaacd_negate32_sat(outi), win1),
557             q_shift),
558         ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1)));
559 
560     *pcmout1 = accu;
561 
562     pcmout1 += ch_fac;
563 
564     for (i = size - 2; i != 0;) {
565       sin = *cos_sin_ptr++;
566       cos = *cos_sin_ptr++;
567 
568       tempr = *(spec_data + i);
569       tempi = *(spec_data + i + 1);
570 
571       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
572                             ixheaacd_mult32x16in32(tempi, sin));
573       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
574                             ixheaacd_mult32x16in32(tempi, cos));
575 
576       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
577       temp2 = ixheaacd_mult32x16in32(outr, adjust);
578 
579       outr = outr + temp1;
580       outi = outi + temp2;
581 
582       overlap_data = *ptr_overlap_buf;
583 
584       *ptr_overlap_buf++ = ixheaacd_shr32_sat(outi, 16 - q_shift);
585 
586       win1 = *((WORD32 *)window + i);
587       accu = ixheaacd_sub32_sat(
588           ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outr, win1), q_shift),
589           ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
590 
591       *pcm_out = accu;
592       pcm_out -= ch_fac;
593       accu = ixheaacd_sub32_sat(
594           ixheaacd_shl32_sat(
595               ixheaacd_mult32x16hin32(ixheaacd_negate32_sat(outr), win1),
596               q_shift),
597           ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)win1));
598 
599       *pcmout1 = accu;
600       pcmout1 += ch_fac;
601 
602       tempr = *(spec_data - i);
603       tempi = *(spec_data - i + 1);
604 
605       i -= 2;
606 
607       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
608                             ixheaacd_mult32x16in32(tempi, sin));
609       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, sin),
610                             ixheaacd_mult32x16in32(tempi, cos));
611 
612       overlap_data = *ptr_overlap_buf;
613 
614       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
615 
616       temp2 = ixheaacd_mult32x16in32(outr, adjust);
617 
618       outr = outr + temp1;
619       outi = outi + temp2;
620 
621       *ptr_overlap_buf++ = ixheaacd_shr32_sat(outr, 16 - q_shift);
622 
623       win1 = *((WORD32 *)window + i + 1);
624       accu = ixheaacd_sub32_sat(
625           ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outi, win1), q_shift),
626           ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
627 
628       *pcm_out = accu;
629       pcm_out -= ch_fac;
630       accu = ixheaacd_sub32_sat(
631           ixheaacd_shl32_sat(
632               ixheaacd_mult32x16hin32(ixheaacd_negate32_sat(outi), win1),
633               q_shift),
634           ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1)));
635       *pcmout1 = accu;
636       pcmout1 += ch_fac;
637     }
638     cos1 = *cos_sin_ptr++;
639     sin1 = *cos_sin_ptr;
640 
641     tempr = *(spec_data + i);
642     tempi = *(spec_data + i + 1);
643 
644     outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos1),
645                           ixheaacd_mult32x16in32(tempi, sin1));
646     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
647                           ixheaacd_mult32x16in32(tempi, cos1));
648 
649     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
650 
651     temp2 = ixheaacd_mult32x16in32(outr, adjust);
652 
653     outr = outr + temp1;
654     outi = outi + temp2;
655 
656     overlap_data = *ptr_overlap_buf;
657 
658     *ptr_overlap_buf++ = ixheaacd_shr32_sat(outi, 16 - q_shift);
659     win1 = *((WORD32 *)window + i);
660     accu = ixheaacd_sub32_sat(
661         ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outr, win1), q_shift),
662         ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
663 
664     *pcm_out = accu;
665     pcm_out -= ch_fac;
666     accu = ixheaacd_sub32_sat(
667         ixheaacd_shl32_sat(
668             ixheaacd_mult32x16hin32(ixheaacd_negate32_sat(outr), win1),
669             q_shift),
670         ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)win1));
671 
672     *pcmout1 = accu;
673     pcmout1 += ch_fac;
674   } else {
675     q_shift = -q_shift;
676     {
677       WORD32 tempr, tempi, temp1, temp2, outr, outi, win1, accu;
678       WORD16 adjust, adjust1;
679       WORD16 overlap_data;
680       tempr = *(spec_data - size);
681       tempi = *(spec_data - size + 1);
682 
683       adjust = 50;
684       adjust1 = -50;
685       cos = *cos_sin_ptr++;
686       sin = *cos_sin_ptr++;
687 
688       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
689                             ixheaacd_mult32x16in32(tempi, cos));
690       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
691                             ixheaacd_mult32x16in32(tempi, sin));
692 
693       overlap_data = *ptr_overlap_buf;
694 
695       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
696       temp2 = ixheaacd_mult32x16in32(outr, adjust);
697 
698       outr = outr + temp1;
699       outi = outi + temp2;
700 
701       *ptr_overlap_buf++ = ixheaacd_shr32_sat(outr, 16 + q_shift);
702 
703       win1 = *((WORD32 *)window + size - 1);
704       accu = ixheaacd_sub32_sat(
705           ixheaacd_shr32(ixheaacd_mult32x16lin32(outi, win1), q_shift),
706           ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
707 
708       *pcm_out = accu;
709 
710       pcm_out -= ch_fac;
711       accu = ixheaacd_sub32_sat(
712           ixheaacd_shr32(
713               ixheaacd_mult32x16hin32(ixheaacd_negate32_sat(outi), win1),
714               q_shift),
715           ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1)));
716 
717       *pcmout1 = accu;
718       pcmout1 += ch_fac;
719 
720       for (i = size - 2; i != 0;) {
721         sin = *cos_sin_ptr++;
722         cos = *cos_sin_ptr++;
723 
724         tempr = *(spec_data + i);
725         tempi = *(spec_data + i + 1);
726 
727         outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
728                               ixheaacd_mult32x16in32(tempi, sin));
729         outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
730                               ixheaacd_mult32x16in32(tempi, cos));
731 
732         overlap_data = *ptr_overlap_buf;
733 
734         temp1 = ixheaacd_mult32x16in32(outi, adjust1);
735 
736         temp2 = ixheaacd_mult32x16in32(outr, adjust);
737         outr = outr + temp1;
738         outi = outi + temp2;
739         *ptr_overlap_buf++ = ixheaacd_shr32_sat(outi, 16 + q_shift);
740 
741         win1 = *((WORD32 *)window + i);
742         accu = ixheaacd_sub32_sat(
743             ixheaacd_shr32(ixheaacd_mult32x16lin32(outr, win1), q_shift),
744             ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
745 
746         *pcm_out = accu;
747         pcm_out -= ch_fac;
748 
749         accu = ixheaacd_sub32_sat(
750             ixheaacd_shr32(
751                 ixheaacd_mult32x16hin32(ixheaacd_negate32_sat(outr), win1),
752                 q_shift),
753             ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)win1));
754 
755         *pcmout1 = accu;
756         pcmout1 += ch_fac;
757 
758         tempr = *(spec_data - i);
759         tempi = *(spec_data - i + 1);
760         i -= 2;
761 
762         outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
763                               ixheaacd_mult32x16in32(tempi, sin));
764         outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, sin),
765                               ixheaacd_mult32x16in32(tempi, cos));
766 
767         overlap_data = *ptr_overlap_buf;
768 
769         temp1 = ixheaacd_mult32x16in32(outi, adjust1);
770         temp2 = ixheaacd_mult32x16in32(outr, adjust);
771 
772         outr = outr + temp1;
773         outi = outi + temp2;
774 
775         *ptr_overlap_buf++ = ixheaacd_shr32_sat(outr, 16 + q_shift);
776 
777         win1 = *((WORD32 *)window + i + 1);
778         accu = ixheaacd_sub32_sat(
779             ixheaacd_shr32(ixheaacd_mult32x16lin32(outi, win1), q_shift),
780             ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
781 
782         *pcm_out = accu;
783         pcm_out -= ch_fac;
784 
785         accu = ixheaacd_sub32_sat(
786             ixheaacd_shr32(
787                 ixheaacd_mult32x16hin32(ixheaacd_negate32_sat(outi), win1),
788                 q_shift),
789             ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1)));
790 
791         *pcmout1 = accu;
792         pcmout1 += ch_fac;
793       }
794       cos1 = *cos_sin_ptr++;
795       sin1 = *cos_sin_ptr++;
796 
797       tempr = *(spec_data + i);
798       tempi = *(spec_data + i + 1);
799 
800       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos1),
801                             ixheaacd_mult32x16in32(tempi, sin1));
802       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
803                             ixheaacd_mult32x16in32(tempi, cos1));
804 
805       overlap_data = *ptr_overlap_buf;
806 
807       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
808 
809       temp2 = ixheaacd_mult32x16in32(outr, adjust);
810 
811       outr = outr + temp1;
812       outi = outi + temp2;
813 
814       *ptr_overlap_buf++ = ixheaacd_shr32_sat(outi, 16 + q_shift);
815 
816       win1 = *((WORD32 *)window + i);
817       accu = ixheaacd_sub32_sat(
818           ixheaacd_shr32(ixheaacd_mult32x16lin32(outr, win1), q_shift),
819           ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)(win1 >> 16)));
820 
821       *pcm_out = accu;
822       pcm_out -= ch_fac;
823       accu = ixheaacd_sub32_sat(
824           ixheaacd_shr32(
825               ixheaacd_mult32x16hin32(ixheaacd_negate32_sat(outr), win1),
826               q_shift),
827           ixheaacd_mult32x16lin32_sat(overlap_data, (WORD16)win1));
828       *pcmout1 = accu;
829       pcmout1 += ch_fac;
830     }
831   }
832 }
833 
ixheaacd_imdct_using_fft_dec(ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD32 npoints,WORD32 * ptr_x,WORD32 * ptr_y)834 VOID ixheaacd_imdct_using_fft_dec(
835     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 npoints,
836     WORD32 *ptr_x, WORD32 *ptr_y)
837 
838 {
839   WORD32 i, j, k, k1, n_stages;
840   WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i,
841       x7r, x7i;
842   WORD32 del, nodespacing, in_loop_cnt, tmp, twiddle_val, *ptr_tmp;
843   const WORD32 *ptr_twiddle;
844   WORD8 *ptr_dig_rev_table;
845   n_stages = ixheaacd_norm32(npoints);
846 
847   n_stages = (30 - n_stages) / 3;
848 
849   ptr_tmp = ptr_y;
850 
851   ptr_twiddle = ptr_imdct_tables->fft_twiddle;
852   ptr_dig_rev_table = ((npoints << 1) == 1024)
853                           ? ptr_imdct_tables->dig_rev_table8_long
854                           : ptr_imdct_tables->dig_rev_table8_short;
855 
856   for (i = npoints; i != 0; i -= 8) {
857     WORD32 *data = ptr_x;
858     data = data + (*ptr_dig_rev_table++ << 1);
859 
860     x0r = *data;
861     x0i = *(data + 1);
862     data += (npoints >> 1);
863 
864     x2r = *data;
865     x2i = *(data + 1);
866     data += (npoints >> 1);
867 
868     x4r = *data;
869     x4i = *(data + 1);
870     data += (npoints >> 1);
871 
872     x6r = *data;
873     x6i = *(data + 1);
874     data -= 5 * (npoints >> 2);
875 
876     x0r = x0r + x4r;
877     x0i = x0i + x4i;
878     x4r = x0r - (x4r << 1);
879     x4i = x0i - (x4i << 1);
880 
881     x2r = x2r + x6r;
882     x2i = x2i + x6i;
883     x6r = x2r - (x6r << 1);
884     x6i = x2i - (x6i << 1);
885 
886     x0r = x0r + x2r;
887     x0i = x0i + x2i;
888     x2r = x0r - (x2r << 1);
889     x2i = x0i - (x2i << 1);
890 
891     x4r = x4r + x6i;
892     x4i = x4i - x6r;
893     tmp = x6r;
894     x6r = x4r - (x6i << 1);
895     x6i = x4i + (tmp << 1);
896 
897     x1r = *data;
898     x1i = *(data + 1);
899     data += (npoints >> 1);
900 
901     x3r = *data;
902     x3i = *(data + 1);
903     data += (npoints >> 1);
904 
905     x5r = *data;
906     x5i = *(data + 1);
907     data += (npoints >> 1);
908 
909     x7r = *data;
910     x7i = *(data + 1);
911     data -= 7 * (npoints >> 2);
912 
913     x1r = x1r + x5r;
914     x1i = x1i + x5i;
915     x5r = x1r - (x5r << 1);
916     x5i = x1i - (x5i << 1);
917 
918     x3r = x3r + x7r;
919     x3i = x3i + x7i;
920     x7r = x3r - (x7r << 1);
921     x7i = x3i - (x7i << 1);
922 
923     x1r = x1r + x3r;
924     x1i = x1i + x3i;
925     x3r = x1r - (x3r << 1);
926     x3i = x1i - (x3i << 1);
927 
928     x5r = x5r + x5i;
929     x5i = x5r - (x5i << 1);
930 
931     x7r = x7r + x7i;
932     x7i = x7r - (x7i << 1);
933 
934     x7i = x5r - x7i;
935     x5r = x7i - (x5r << 1);
936 
937     x5i = x7r - x5i;
938     x7r = x5i - (x7r << 1);
939 
940     x7i = x7i << 1;
941     x5r = x5r << 1;
942     x5i = x5i << 1;
943     x7r = x7r << 1;
944 
945     x0r = x0r + x1r;
946     x0i = x0i + x1i;
947     x1r = x0r - (x1r << 1);
948     x1i = x0i - (x1i << 1);
949 
950     x2r = x2r + x3i;
951     tmp = x2r - (x3i << 1);
952     x2i = x2i - x3r;
953     x3i = x2i + (x3r << 1);
954 
955     *ptr_tmp = x0r;
956     *(ptr_tmp + 1) = x0i;
957     ptr_tmp += 4;
958 
959     *ptr_tmp = x2r;
960     *(ptr_tmp + 1) = x2i;
961     ptr_tmp += 4;
962 
963     *ptr_tmp = x1r;
964     *(ptr_tmp + 1) = x1i;
965     ptr_tmp += 4;
966 
967     *ptr_tmp = tmp;
968     *(ptr_tmp + 1) = x3i;
969     ptr_tmp -= 10;
970 
971     tmp = 0x5A82;
972 
973     x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
974     x4r = x7i - (x4r << 1);
975 
976     x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
977     x4i = x7r - (x4i << 1);
978 
979     x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
980     x6r = x5i - (x6r << 1);
981 
982     x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
983     x6i = x5r - (x6i << 1);
984 
985     *ptr_tmp = x7i;
986     *(ptr_tmp + 1) = x7r;
987     ptr_tmp += 4;
988 
989     *ptr_tmp = x5i;
990     *(ptr_tmp + 1) = x5r;
991     ptr_tmp += 4;
992 
993     *ptr_tmp = -x4r;
994     *(ptr_tmp + 1) = -x4i;
995     ptr_tmp += 4;
996 
997     *ptr_tmp = -x6r;
998     *(ptr_tmp + 1) = -x6i;
999     ptr_tmp += 2;
1000   }
1001 
1002   del = 8;
1003 
1004   nodespacing = 64;
1005   in_loop_cnt = npoints >> 6;
1006 
1007   for (k1 = n_stages - 2; k1 > 0; k1--) {
1008     WORD32 *data = ptr_y;
1009     const WORD32 *twiddles;
1010 
1011     for (i = 0; i != npoints; i += 8 * del) {
1012       data = ptr_y + (i << 1);
1013       x0r = *data;
1014       x0i = *(data + 1);
1015       data += (del << 2);
1016 
1017       x2r = *data;
1018       x2i = *(data + 1);
1019       data += (del << 2);
1020 
1021       x4r = *data;
1022       x4i = *(data + 1);
1023       data += (del << 2);
1024 
1025       x6r = *data;
1026       x6i = *(data + 1);
1027       data -= 5 * (del << 1);
1028 
1029       x0r = x0r + x4r;
1030       x0i = x0i + x4i;
1031       x4r = x0r - (x4r << 1);
1032       x4i = x0i - (x4i << 1);
1033 
1034       x2r = x2r + x6r;
1035       x2i = x2i + x6i;
1036       x6r = x2r - (x6r << 1);
1037       x6i = x2i - (x6i << 1);
1038 
1039       x0r = x0r + x2r;
1040       x0i = x0i + x2i;
1041       x2r = x0r - (x2r << 1);
1042       x2i = x0i - (x2i << 1);
1043 
1044       x4r = x4r + x6i;
1045       x4i = x4i - x6r;
1046       tmp = x6r;
1047       x6r = x4r - (x6i << 1);
1048       x6i = x4i + (tmp << 1);
1049 
1050       x1r = *data;
1051       x1i = *(data + 1);
1052       data += (del << 2);
1053 
1054       x3r = *data;
1055       x3i = *(data + 1);
1056       data += (del << 2);
1057 
1058       x5r = *data;
1059       x5i = *(data + 1);
1060       data += (del << 2);
1061 
1062       x7r = *data;
1063       x7i = *(data + 1);
1064       data -= 7 * (del << 1);
1065 
1066       x1r = x1r + x5r;
1067       x1i = x1i + x5i;
1068       x5r = x1r - (x5r << 1);
1069       x5i = x1i - (x5i << 1);
1070 
1071       x3r = x3r + x7r;
1072       x3i = x3i + x7i;
1073       x7r = x3r - (x7r << 1);
1074       x7i = x3i - (x7i << 1);
1075 
1076       x1r = x1r + x3r;
1077       x1i = x1i + x3i;
1078       x3r = x1r - (x3r << 1);
1079       x3i = x1i - (x3i << 1);
1080 
1081       x5r = x5r + x5i;
1082       x5i = x5r - (x5i << 1);
1083 
1084       x7r = x7r + x7i;
1085       x7i = x7r - (x7i << 1);
1086 
1087       x7i = x5r - x7i;
1088       x5r = x7i - (x5r << 1);
1089 
1090       x5i = x7r - x5i;
1091       x7r = x5i - (x7r << 1);
1092 
1093       x7i = x7i << 1;
1094       x5r = x5r << 1;
1095       x5i = x5i << 1;
1096       x7r = x7r << 1;
1097 
1098       x0r = x0r + x1r;
1099       x0i = x0i + x1i;
1100       x1r = x0r - (x1r << 1);
1101       x1i = x0i - (x1i << 1);
1102 
1103       x2r = x2r + x3i;
1104       tmp = x2r - (x3i << 1);
1105       x2i = x2i - x3r;
1106       x3i = x2i + (x3r << 1);
1107 
1108       *data = x0r;
1109       *(data + 1) = x0i;
1110       data += (del << 2);
1111 
1112       *data = x2r;
1113       *(data + 1) = x2i;
1114       data += (del << 2);
1115 
1116       *data = x1r;
1117       *(data + 1) = x1i;
1118       data += (del << 2);
1119 
1120       *data = tmp;
1121       *(data + 1) = x3i;
1122       data -= 5 * (del << 1);
1123 
1124       tmp = 0x5A82;
1125 
1126       x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
1127       x4r = x7i - (x4r << 1);
1128       x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
1129       x4i = x7r - (x4i << 1);
1130 
1131       x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
1132       x6r = x5i - (x6r << 1);
1133       x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
1134       x6i = x5r - (x6i << 1);
1135 
1136       *data = x7i;
1137       *(data + 1) = x7r;
1138       data += (del << 2);
1139 
1140       *data = x5i;
1141       *(data + 1) = x5r;
1142       data += (del << 2);
1143 
1144       *data = -x4r;
1145       *(data + 1) = -x4i;
1146       data += (del << 2);
1147 
1148       *data = -x6r;
1149       *(data + 1) = -x6i;
1150 
1151       data -= 7 * (del << 1);
1152     }
1153 
1154     twiddles = ptr_twiddle;
1155     data = ptr_y;
1156 
1157     for (j = nodespacing; j < nodespacing * del; j += nodespacing) {
1158       data = data + 2;
1159 
1160       for (k = in_loop_cnt; k != 0; k--) {
1161         data += (del << 2);
1162         x2r = *data;
1163         x2i = *(data + 1);
1164 
1165         data += (del << 2);
1166         x4r = *data;
1167         x4i = *(data + 1);
1168 
1169         data += (del << 2);
1170         x6r = *data;
1171         x6i = *(data + 1);
1172 
1173         data -= 6 * (del << 1);
1174 
1175         twiddles += (j >> 2);
1176 
1177         twiddle_val = *(twiddles);
1178 
1179         tmp = (ixheaacd_mult32x16lin32(x2r, twiddle_val) -
1180                ixheaacd_mult32x16hin32(x2i, twiddle_val));
1181         x2i = (ixheaacd_mac32x16lin32(
1182                   ixheaacd_mult32x16hin32(x2r, twiddle_val), x2i,
1183                   twiddle_val))
1184               << 1;
1185         x2r = tmp << 1;
1186 
1187         twiddles += (j >> 2);
1188         twiddle_val = *(twiddles);
1189 
1190         tmp = (ixheaacd_mult32x16lin32(x4r, twiddle_val) -
1191                ixheaacd_mult32x16hin32(x4i, twiddle_val));
1192         x4i = (ixheaacd_mac32x16lin32(
1193                   ixheaacd_mult32x16hin32(x4r, twiddle_val), x4i,
1194                   twiddle_val))
1195               << 1;
1196         x4r = tmp << 1;
1197 
1198         twiddles += (j >> 2);
1199         twiddle_val = *(twiddles);
1200 
1201         tmp = (ixheaacd_mult32x16lin32(x6r, twiddle_val) -
1202                ixheaacd_mult32x16hin32(x6i, twiddle_val));
1203         x6i = (ixheaacd_mac32x16lin32(
1204                   ixheaacd_mult32x16hin32(x6r, twiddle_val), x6i,
1205                   twiddle_val))
1206               << 1;
1207         x6r = tmp << 1;
1208 
1209         x0r = *data;
1210         x0i = *(data + 1);
1211         data += (del << 1);
1212 
1213         x0r = x0r + x4r;
1214         x0i = x0i + x4i;
1215         x4r = x0r - (x4r << 1);
1216         x4i = x0i - (x4i << 1);
1217 
1218         x2r = x2r + x6r;
1219         x2i = x2i + x6i;
1220         x6r = x2r - (x6r << 1);
1221         x6i = x2i - (x6i << 1);
1222 
1223         x0r = x0r + x2r;
1224         x0i = x0i + x2i;
1225         x2r = x0r - (x2r << 1);
1226         x2i = x0i - (x2i << 1);
1227 
1228         x4r = x4r + x6i;
1229         x4i = x4i - x6r;
1230         tmp = x6r;
1231         x6r = x4r - (x6i << 1);
1232         x6i = x4i + (tmp << 1);
1233 
1234         x1r = *data;
1235         x1i = *(data + 1);
1236         data += (del << 2);
1237 
1238         twiddles -= 5 * (j >> 3);
1239         twiddle_val = *(twiddles);
1240 
1241         tmp = (ixheaacd_mult32x16lin32(x1r, twiddle_val) -
1242                ixheaacd_mult32x16hin32(x1i, twiddle_val));
1243         x1i = (ixheaacd_mac32x16lin32(
1244                   ixheaacd_mult32x16hin32(x1r, twiddle_val), x1i,
1245                   twiddle_val))
1246               << 1;
1247         x1r = tmp << 1;
1248 
1249         x3r = *data;
1250         x3i = *(data + 1);
1251         data += (del << 2);
1252 
1253         twiddles += (j >> 2);
1254         twiddle_val = *(twiddles);
1255 
1256         tmp = (ixheaacd_mult32x16lin32(x3r, twiddle_val) -
1257                ixheaacd_mult32x16hin32(x3i, twiddle_val));
1258         x3i = (ixheaacd_mac32x16lin32(
1259             ixheaacd_mult32x16hin32(x3r, twiddle_val), x3i, twiddle_val));
1260         x3r = tmp;
1261 
1262         x5r = *data;
1263         x5i = *(data + 1);
1264         data += (del << 2);
1265 
1266         twiddles += (j >> 2);
1267         twiddle_val = *(twiddles);
1268 
1269         tmp = (ixheaacd_mult32x16lin32(x5r, twiddle_val) -
1270                ixheaacd_mult32x16hin32(x5i, twiddle_val));
1271         x5i = (ixheaacd_mac32x16lin32(
1272             ixheaacd_mult32x16hin32(x5r, twiddle_val), x5i, twiddle_val));
1273         x5r = tmp;
1274 
1275         x7r = *data;
1276         x7i = *(data + 1);
1277         data -= 7 * (del << 1);
1278 
1279         twiddles += (j >> 2);
1280         twiddle_val = *(twiddles);
1281         twiddles -= 7 * (j >> 3);
1282 
1283         tmp = (ixheaacd_mult32x16lin32(x7r, twiddle_val) -
1284                ixheaacd_mult32x16hin32(x7i, twiddle_val));
1285         x7i = (ixheaacd_mac32x16lin32(
1286             ixheaacd_mult32x16hin32(x7r, twiddle_val), x7i, twiddle_val));
1287         x7r = tmp;
1288 
1289         x1r = x1r + (x5r << 1);
1290         x1i = x1i + (x5i << 1);
1291         x5r = x1r - (x5r << 2);
1292         x5i = x1i - (x5i << 2);
1293 
1294         x3r = x3r + x7r;
1295         x3i = x3i + x7i;
1296         x7r = x3r - (x7r << 1);
1297         x7i = x3i - (x7i << 1);
1298 
1299         x1r = x1r + (x3r << 1);
1300         x1i = x1i + (x3i << 1);
1301         x3r = x1r - (x3r << 2);
1302         x3i = x1i - (x3i << 2);
1303 
1304         x5r = x5r + x5i;
1305         x5i = x5r - (x5i << 1);
1306 
1307         x7r = x7r + x7i;
1308         x7i = x7r - (x7i << 1);
1309 
1310         x7i = x5r - (x7i << 1);
1311         x5r = x7i - (x5r << 1);
1312 
1313         x5i = (x7r << 1) - x5i;
1314         x7r = x5i - (x7r << 2);
1315 
1316         x7i = x7i << 1;
1317         x5r = x5r << 1;
1318         x5i = x5i << 1;
1319         x7r = x7r << 1;
1320 
1321         x0r = x0r + x1r;
1322         x0i = x0i + x1i;
1323         x1r = x0r - (x1r << 1);
1324         x1i = x0i - (x1i << 1);
1325 
1326         x2r = x2r + x3i;
1327         tmp = x2r - (x3i << 1);
1328         x2i = x2i - x3r;
1329         x3i = x2i + (x3r << 1);
1330 
1331         *data = x0r;
1332         *(data + 1) = x0i;
1333         data += (del << 2);
1334 
1335         *data = x2r;
1336         *(data + 1) = x2i;
1337         data += (del << 2);
1338 
1339         *data = x1r;
1340         *(data + 1) = x1i;
1341         data += (del << 2);
1342 
1343         *data = tmp;
1344         *(data + 1) = x3i;
1345         data -= 5 * (del << 1);
1346 
1347         tmp = 0x5A82;
1348 
1349         x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
1350         x4r = x7i - (x4r << 1);
1351 
1352         x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
1353         x4i = x7r - (x4i << 1);
1354 
1355         x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
1356         x6r = x5i - (x6r << 1);
1357 
1358         x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
1359         x6i = x5r - (x6i << 1);
1360 
1361         *data = x7i;
1362         *(data + 1) = x7r;
1363         data += (del << 2);
1364 
1365         *data = x5i;
1366         *(data + 1) = x5r;
1367         data += (del << 2);
1368 
1369         *data = -x4r;
1370         *(data + 1) = -x4i;
1371         data += (del << 2);
1372 
1373         *data = -x6r;
1374         *(data + 1) = -x6i;
1375 
1376         data -= 7 * (del << 1);
1377         data += (del << 4);
1378       }
1379       data -= npoints << 1;
1380     }
1381     nodespacing >>= 3;
1382     del <<= 3;
1383     in_loop_cnt >>= 3;
1384   }
1385 
1386   {
1387     WORD32 *data = ptr_y;
1388     const WORD32 *twiddles;
1389     twiddles = ptr_twiddle;
1390     data = ptr_y;
1391     data = data - 2;
1392 
1393     for (j = 0; j < nodespacing * del; j += nodespacing) {
1394       data = data + 2;
1395 
1396       {
1397         data += (del << 2);
1398         x2r = *data;
1399         x2i = *(data + 1);
1400 
1401         data += (del << 2);
1402         x4r = *data;
1403         x4i = *(data + 1);
1404 
1405         data += (del << 2);
1406         x6r = *data;
1407         x6i = *(data + 1);
1408 
1409         data -= 6 * (del << 1);
1410 
1411         twiddles += (j >> 2);
1412 
1413         twiddle_val = *(twiddles);
1414 
1415         tmp = (ixheaacd_mult32x16lin32(x2r, twiddle_val) -
1416                ixheaacd_mult32x16hin32(x2i, twiddle_val));
1417         x2i = (ixheaacd_mac32x16lin32(
1418                   ixheaacd_mult32x16hin32(x2r, twiddle_val), x2i,
1419                   twiddle_val))
1420               << 1;
1421         x2r = tmp << 1;
1422 
1423         twiddles += (j >> 2);
1424         twiddle_val = *(twiddles);
1425 
1426         tmp = (ixheaacd_mult32x16lin32(x4r, twiddle_val) -
1427                ixheaacd_mult32x16hin32(x4i, twiddle_val));
1428         x4i = (ixheaacd_mac32x16lin32(
1429                   ixheaacd_mult32x16hin32(x4r, twiddle_val), x4i,
1430                   twiddle_val))
1431               << 1;
1432         x4r = tmp << 1;
1433 
1434         twiddles += (j >> 2);
1435         twiddle_val = *(twiddles);
1436 
1437         tmp = (ixheaacd_mult32x16lin32(x6r, twiddle_val) -
1438                ixheaacd_mult32x16hin32(x6i, twiddle_val));
1439         x6i = (ixheaacd_mac32x16lin32(
1440                   ixheaacd_mult32x16hin32(x6r, twiddle_val), x6i,
1441                   twiddle_val))
1442               << 1;
1443         x6r = tmp << 1;
1444 
1445         x0r = *data;
1446         x0i = *(data + 1);
1447         data += (del << 1);
1448 
1449         x0r = x0r + x4r;
1450         x0i = x0i + x4i;
1451         x4r = x0r - (x4r << 1);
1452         x4i = x0i - (x4i << 1);
1453 
1454         x2r = x2r + x6r;
1455         x2i = x2i + x6i;
1456         x6r = x2r - (x6r << 1);
1457         x6i = x2i - (x6i << 1);
1458 
1459         x0r = x0r + x2r;
1460         x0i = x0i + x2i;
1461         x2r = x0r - (x2r << 1);
1462         x2i = x0i - (x2i << 1);
1463 
1464         x4r = x4r + x6i;
1465         x4i = x4i - x6r;
1466         tmp = x6r;
1467         x6r = x4r - (x6i << 1);
1468         x6i = x4i + (tmp << 1);
1469 
1470         x1r = *data;
1471         x1i = *(data + 1);
1472         data += (del << 2);
1473 
1474         twiddles -= 5 * (j >> 3);
1475         twiddle_val = *(twiddles);
1476 
1477         tmp = (ixheaacd_mult32x16lin32(x1r, twiddle_val) -
1478                ixheaacd_mult32x16hin32(x1i, twiddle_val));
1479         x1i = (ixheaacd_mac32x16lin32(
1480                   ixheaacd_mult32x16hin32(x1r, twiddle_val), x1i,
1481                   twiddle_val))
1482               << 1;
1483         x1r = tmp << 1;
1484 
1485         x3r = *data;
1486         x3i = *(data + 1);
1487         data += (del << 2);
1488 
1489         twiddles += (j >> 2);
1490         twiddle_val = *(twiddles);
1491 
1492         tmp = (ixheaacd_mult32x16lin32(x3r, twiddle_val) -
1493                ixheaacd_mult32x16hin32(x3i, twiddle_val));
1494         x3i = (ixheaacd_mac32x16lin32(
1495             ixheaacd_mult32x16hin32(x3r, twiddle_val), x3i, twiddle_val));
1496         x3r = tmp;
1497 
1498         x5r = *data;
1499         x5i = *(data + 1);
1500         data += (del << 2);
1501 
1502         twiddles += (j >> 2);
1503         twiddle_val = *(twiddles);
1504 
1505         tmp = (ixheaacd_mult32x16lin32(x5r, twiddle_val) -
1506                ixheaacd_mult32x16hin32(x5i, twiddle_val));
1507         x5i = (ixheaacd_mac32x16lin32(
1508             ixheaacd_mult32x16hin32(x5r, twiddle_val), x5i, twiddle_val));
1509         x5r = tmp;
1510 
1511         x7r = *data;
1512         x7i = *(data + 1);
1513         data -= 7 * (del << 1);
1514 
1515         twiddles += (j >> 2);
1516         twiddle_val = *(twiddles);
1517         twiddles -= 7 * (j >> 3);
1518 
1519         tmp = (ixheaacd_mult32x16lin32(x7r, twiddle_val) -
1520                ixheaacd_mult32x16hin32(x7i, twiddle_val));
1521         x7i = (ixheaacd_mac32x16lin32(
1522             ixheaacd_mult32x16hin32(x7r, twiddle_val), x7i, twiddle_val));
1523         x7r = tmp;
1524 
1525         x1r = x1r + (x5r << 1);
1526         x1i = x1i + (x5i << 1);
1527         x5r = x1r - (x5r << 2);
1528         x5i = x1i - (x5i << 2);
1529 
1530         x3r = x3r + x7r;
1531         x3i = x3i + x7i;
1532         x7r = x3r - (x7r << 1);
1533         x7i = x3i - (x7i << 1);
1534 
1535         x1r = x1r + (x3r << 1);
1536         x1i = x1i + (x3i << 1);
1537         x3r = x1r - (x3r << 2);
1538         x3i = x1i - (x3i << 2);
1539 
1540         x5r = x5r + x5i;
1541         x5i = x5r - (x5i << 1);
1542 
1543         x7r = x7r + x7i;
1544         x7i = x7r - (x7i << 1);
1545 
1546         x7i = x5r - (x7i << 1);
1547         x5r = x7i - (x5r << 1);
1548 
1549         x5i = (x7r << 1) - x5i;
1550         x7r = x5i - (x7r << 2);
1551 
1552         x7i = x7i << 1;
1553         x5r = x5r << 1;
1554         x5i = x5i << 1;
1555         x7r = x7r << 1;
1556 
1557         x0r = x0r + x1r;
1558         x0i = x0i + x1i;
1559         x1r = x0r - (x1r << 1);
1560         x1i = x0i - (x1i << 1);
1561 
1562         x2r = x2r + x3i;
1563         tmp = x2r - (x3i << 1);
1564         x2i = x2i - x3r;
1565         x3i = x2i + (x3r << 1);
1566 
1567         *data = x0r;
1568         *(data + 1) = x0i;
1569         data += (del << 2);
1570 
1571         *data = x2r;
1572         *(data + 1) = x2i;
1573         data += (del << 2);
1574 
1575         *data = x1r;
1576         *(data + 1) = x1i;
1577         data += (del << 2);
1578 
1579         *data = tmp;
1580         *(data + 1) = x3i;
1581         data -= 5 * (del << 1);
1582 
1583         tmp = 0x5A82;
1584 
1585         x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
1586         x4r = x7i - (x4r << 1);
1587 
1588         x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
1589         x4i = x7r - (x4i << 1);
1590 
1591         x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
1592         x6r = x5i - (x6r << 1);
1593 
1594         x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
1595         x6i = x5r - (x6i << 1);
1596 
1597         *data = x7i;
1598         *(data + 1) = x7r;
1599         data += (del << 2);
1600 
1601         *data = x5i;
1602         *(data + 1) = x5r;
1603         data += (del << 2);
1604 
1605         *data = -x4r;
1606         *(data + 1) = -x4i;
1607         data += (del << 2);
1608 
1609         *data = -x6r;
1610         *(data + 1) = -x6i;
1611 
1612         data -= 7 * (del << 1);
1613         data += (del << 4);
1614       }
1615       data -= npoints << 1;
1616     }
1617 
1618     nodespacing >>= 3;
1619     del <<= 3;
1620     in_loop_cnt >>= 3;
1621   }
1622 }
1623 
ixheaacd_inverse_transform_960(WORD32 spec_data[],WORD32 scratch[],ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD32 expo,WORD32 * imdct_scale)1624 VOID ixheaacd_inverse_transform_960(
1625     WORD32 spec_data[], WORD32 scratch[],
1626     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 expo,
1627     WORD32 *imdct_scale) {
1628 
1629   WORD32 n;
1630   WORD32 Nd2;
1631   WORD16 const_mltfac;
1632   WORD32 neg_expo;
1633 
1634   WORD32 i;
1635 
1636   n = 120;
1637   Nd2 = n >> 1;
1638   neg_expo = 4;
1639 
1640   ixheaacd_pre_twiddle_120(spec_data, scratch, n, ptr_imdct_tables->cosine_array_240,
1641                            neg_expo - expo);
1642 
1643   ixheaacd_fft_120(ptr_imdct_tables, Nd2, spec_data, scratch);
1644 
1645   neg_expo += 2;
1646   *imdct_scale = neg_expo + 1;
1647 
1648   ixheaacd_post_twiddle_120(spec_data, scratch, ptr_imdct_tables->cosine_array_240,
1649                             n);
1650   const_mltfac = 17476;
1651   for (i = 0; i < 120; i++)
1652   {
1653     spec_data[i] = ixheaacd_mult32x16in32_shl(spec_data[i], const_mltfac);
1654   }
1655 }
1656 
ixheaacd_inverse_transform(WORD32 spec_data[],WORD32 scratch[],ia_aac_dec_imdct_tables_struct * ptr_imdct_tables,WORD32 expo,WORD32 npoints)1657 WORD32 ixheaacd_inverse_transform(
1658     WORD32 spec_data[], WORD32 scratch[],
1659     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 expo,
1660     WORD32 npoints) {
1661   (*ixheaacd_pretwiddle_compute)(spec_data, spec_data + npoints - 1, scratch,
1662                                  ptr_imdct_tables, (npoints >> 2), expo);
1663 
1664   (*ixheaacd_imdct_using_fft)(ptr_imdct_tables, npoints >> 1, scratch,
1665                               spec_data);
1666 
1667   expo += 2;
1668 
1669   return expo;
1670 }
1671 
ixheaacd_mdct_960(WORD32 * inp,WORD32 * scratch,WORD32 * mdct_scale,WORD32 mdct_flag,ia_aac_dec_imdct_tables_struct * imdct_tables_ptr)1672 VOID ixheaacd_mdct_960(WORD32 *inp, WORD32 *scratch, WORD32 *mdct_scale,
1673                        WORD32 mdct_flag,
1674                        ia_aac_dec_imdct_tables_struct *imdct_tables_ptr) {
1675   WORD32 expo, neg_expo = 0, k;
1676 
1677   WORD16 const_mltfac = 17476;
1678 
1679   expo = (*ixheaacd_calc_max_spectral_line)(inp, MDCT_LEN_960) - 1;
1680   ;
1681 
1682   memcpy(scratch, inp, sizeof(WORD32) * MDCT_LEN_960);
1683 
1684   neg_expo = 7 - expo;
1685 
1686   ixheaacd_pre_twiddle_960(inp, scratch, MDCT_LEN_960, imdct_tables_ptr->cosine_array_1920,
1687                            neg_expo);
1688 
1689   ixheaacd_fft_960(inp, scratch, imdct_tables_ptr);
1690 
1691   ixheaacd_post_twiddle_960(inp, scratch, imdct_tables_ptr->cosine_array_1920,
1692                             MDCT_LEN_960);
1693 
1694   if (0 == mdct_flag) {
1695     WORD32 *data = inp;
1696 
1697     for (k = MDCT_LEN_960 - 1; k >= 0; k -= 2) {
1698       *data = ixheaacd_mult32x16in32_shl(*data, const_mltfac);
1699       data++;
1700       *data = ixheaacd_mult32x16in32_shl(*data, const_mltfac);
1701       data++;
1702     }
1703   }
1704   *mdct_scale = neg_expo + 1 + 1 + 1;
1705 }
1706 
ixheaacd_mdct_480_ld(WORD32 * inp,WORD32 * scratch,WORD32 * mdct_scale,WORD32 mdct_flag,ia_aac_dec_imdct_tables_struct * imdct_tables_ptr,WORD32 object_type)1707 VOID ixheaacd_mdct_480_ld(WORD32 *inp, WORD32 *scratch, WORD32 *mdct_scale,
1708                           WORD32 mdct_flag,
1709                           ia_aac_dec_imdct_tables_struct *imdct_tables_ptr,
1710                           WORD32 object_type) {
1711   WORD32 expo, neg_expo = 0, k;
1712 
1713   WORD32 const_mltfac = 1145324612;
1714 
1715   expo = (*ixheaacd_calc_max_spectral_line)(inp, MDCT_LEN) - 1;
1716   ;
1717 
1718   memcpy(scratch, inp, sizeof(WORD32) * MDCT_LEN);
1719 
1720   neg_expo = 7 - expo;
1721 
1722   ixheaacd_pre_twiddle(inp, scratch, 480, imdct_tables_ptr->cosine_array_960,
1723                        neg_expo);
1724 
1725   ixheaacd_fft_480_ld(inp, scratch, imdct_tables_ptr);
1726 
1727   if (object_type == AOT_ER_AAC_LD) {
1728     ixheaacd_post_twiddle_ld(inp, scratch, imdct_tables_ptr->cosine_array_960,
1729                              480);
1730   } else if (object_type == AOT_ER_AAC_ELD) {
1731     ixheaacd_post_twiddle_eld(inp + (480), scratch,
1732                               imdct_tables_ptr->cosine_array_960, 480);
1733   }
1734 
1735   if (0 == mdct_flag) {
1736     WORD32 *data = inp;
1737 
1738     if (object_type != AOT_ER_AAC_ELD) {
1739       for (k = MDCT_LEN - 1; k >= 0; k -= 2) {
1740         *data = ixheaacd_mult32_shl(*data, const_mltfac);
1741         data++;
1742         *data = ixheaacd_mult32_shl(*data, const_mltfac);
1743         data++;
1744       }
1745       neg_expo += 1;
1746     } else {
1747       data = inp + 480;
1748       for (k = (MDCT_LEN << 1) - 1; k >= 0; k -= 2) {
1749         *data = ixheaacd_mult32_shl(*data, const_mltfac);
1750         data++;
1751         *data = ixheaacd_mult32_shl(*data, const_mltfac);
1752         data++;
1753       }
1754       neg_expo += 1;
1755     }
1756   }
1757 
1758   *mdct_scale = neg_expo + 3;
1759 }
1760 
ixheaacd_inverse_transform_512(WORD32 data[],WORD32 temp[],WORD32 * imdct_scale,WORD32 * cos_sin_ptr,ia_aac_dec_imdct_tables_struct * imdct_tables_ptr,WORD32 object_type)1761 VOID ixheaacd_inverse_transform_512(
1762     WORD32 data[], WORD32 temp[], WORD32 *imdct_scale, WORD32 *cos_sin_ptr,
1763     ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 object_type) {
1764   WORD32 n;
1765   WORD32 npoints_2;
1766   WORD16 expo, neg_expo;
1767 
1768   n = 512;
1769 
1770   npoints_2 = n >> 1;
1771 
1772   expo = (*ixheaacd_calc_max_spectral_line)(data, n) - 1;
1773 
1774   memcpy(temp, data, sizeof(WORD32) * n);
1775 
1776   neg_expo = 7 - expo;
1777 
1778   ixheaacd_pre_twiddle(data, temp, n, cos_sin_ptr, neg_expo);
1779 
1780   (*ixheaacd_fft32x32_ld)(imdct_tables_ptr, npoints_2, data, temp);
1781 
1782   neg_expo = (*ixheaacd_neg_expo_inc)(neg_expo);
1783 
1784   *imdct_scale = neg_expo + 1;
1785 
1786   if (object_type == AOT_ER_AAC_ELD)
1787     ixheaacd_post_twiddle_eld((data + n), temp, cos_sin_ptr, n);
1788   else
1789     ixheaacd_post_twiddle_ld((data), temp, cos_sin_ptr, n);
1790 }
1791 
ixheaacd_fft_960(WORD32 * inp,WORD32 * op,ia_aac_dec_imdct_tables_struct * imdct_tables_ptr)1792 VOID ixheaacd_fft_960(WORD32 *inp, WORD32 *op,
1793                       ia_aac_dec_imdct_tables_struct *imdct_tables_ptr) {
1794   WORD32 i;
1795   WORD32 *buf1, *buf2;
1796   WORD16 *re_arr_tab_sml_480_ptr;
1797 
1798   (*ixheaacd_aac_ld_dec_rearrange_960)(inp, op, 480,
1799                                        imdct_tables_ptr->re_arr_tab_32);
1800 
1801   buf1 = op;
1802   buf2 = inp;
1803 
1804   for (i = 0; i < FFT15; i++) {
1805     ixheaacd_fft_32_points(imdct_tables_ptr->w_32,
1806                            32, buf1, buf2);
1807 
1808     buf1 += (FFT16X2 * 2);
1809     buf2 += (FFT16X2 * 2);
1810   }
1811 
1812   re_arr_tab_sml_480_ptr = imdct_tables_ptr->re_arr_tab_sml_480;
1813   buf1 = inp;
1814 
1815   for (i = 0; i < FFT16 * 2; i++) {
1816     ixheaacd_ld_dec_fft_15_opt(buf1, op,
1817                                ixheaacd_fft5out, re_arr_tab_sml_480_ptr);
1818     buf1 += 2;
1819     re_arr_tab_sml_480_ptr += FFT15;
1820   }
1821 }
1822 
ixheaacd_fft_32_points(WORD16 * ptr_w,WORD32 npoints,WORD32 * ptr_x,WORD32 * ptr_y)1823 VOID ixheaacd_fft_32_points(WORD16 *ptr_w, WORD32 npoints,
1824                             WORD32* ptr_x, WORD32* ptr_y) {
1825   WORD32   i, j, l1, l2, h2, predj, tw_offset, stride, fft_jmp;
1826   WORD32   xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
1827   WORD32   xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
1828   WORD32   x_0, x_1, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
1829   WORD32   x_h2_0, x_h2_1;
1830   WORD16 si10, si20, si30, co10, co20, co30;
1831   WORD16 *w;
1832   WORD32   *x, *x2, *x0;
1833   WORD32   *y0, *y1, *y2, *y3;
1834   WORD32   n0, j0;
1835   WORD32   radix;
1836   WORD32   norm;
1837 
1838   radix = 2;
1839   norm = 25;
1840 
1841   stride = 32;
1842   tw_offset = 0;
1843   fft_jmp = 192;
1844 
1845   while (stride > radix) {
1846     j = 0;
1847     fft_jmp >>= 2;
1848 
1849     h2 = stride >> 1;
1850     l1 = stride;
1851     l2 = stride + (stride >> 1);
1852 
1853     x = ptr_x;
1854     w = ptr_w + tw_offset;
1855     tw_offset += fft_jmp;
1856 
1857     for (i = 0; i < npoints; i += 4) {
1858       co10 = w[j + 1];            si10 = w[j + 0];
1859       co20 = w[j + 3];            si20 = w[j + 2];
1860       co30 = w[j + 5];            si30 = w[j + 4];
1861 
1862       x_0 = x[0];             x_1 = x[1];
1863       x_l1_0 = x[l1];         x_l1_1 = x[l1 + 1];
1864       x_l2_0 = x[l2];         x_l2_1 = x[l2 + 1];
1865       x_h2_0 = x[h2];         x_h2_1 = x[h2 + 1];
1866 
1867       xh0_0 = ixheaacd_add32_sat(x_0, x_l1_0);
1868       xh1_0 = ixheaacd_add32_sat(x_1, x_l1_1);
1869       xl0_0 = ixheaacd_sub32_sat(x_0, x_l1_0);
1870       xl1_0 = ixheaacd_sub32_sat(x_1, x_l1_1);
1871       xh20_0 = ixheaacd_add32_sat(x_h2_0, x_l2_0);
1872       xh21_0 = ixheaacd_add32_sat(x_h2_1, x_l2_1);
1873       xl20_0 = ixheaacd_sub32_sat(x_h2_0, x_l2_0);
1874       xl21_0 = ixheaacd_sub32_sat(x_h2_1, x_l2_1);
1875 
1876       x0 = x;
1877       x2 = x0;
1878 
1879       j += 6;
1880       x += 2;
1881       predj = (j - fft_jmp);
1882       if (!predj) x += fft_jmp;
1883       if (!predj) j = 0;
1884 
1885       x0[0] = ixheaacd_add32_sat(xh0_0, xh20_0);
1886       x0[1] = ixheaacd_add32_sat(xh1_0, xh21_0);
1887       xt0_0 = ixheaacd_sub32_sat(xh0_0, xh20_0);
1888       yt0_0 = ixheaacd_sub32_sat(xh1_0, xh21_0);
1889       xt1_0 = ixheaacd_add32_sat(xl0_0, xl21_0);
1890       yt2_0 = ixheaacd_add32_sat(xl1_0, xl20_0);
1891       xt2_0 = ixheaacd_sub32_sat(xl0_0, xl21_0);
1892       yt1_0 = ixheaacd_sub32_sat(xl1_0, xl20_0);
1893 
1894       x2[h2] = ixheaacd_add32_sat(MPYLIRC(si10, yt1_0), MPYLIRC(co10, xt1_0));
1895 
1896       x2[h2 + 1] = ixheaacd_sub32_sat(MPYLIRC(co10, yt1_0), MPYLIRC(si10, xt1_0));
1897 
1898       x2[l1] = ixheaacd_add32_sat(MPYLIRC(si20, yt0_0), MPYLIRC(co20, xt0_0));
1899 
1900       x2[l1 + 1] = ixheaacd_sub32_sat(MPYLIRC(co20, yt0_0), MPYLIRC(si20, xt0_0));
1901       yt0_0 = MPYLIRC(si20, yt0_0);
1902 
1903       x2[l2] = ixheaacd_add32_sat(MPYLIRC(si30, yt2_0), MPYLIRC(co30, xt2_0));
1904 
1905       x2[l2 + 1] = ixheaacd_sub32_sat(MPYLIRC(co30, yt2_0), MPYLIRC(si30, xt2_0));
1906       yt2_0 = MPYLIRC(si30, yt2_0);
1907 
1908     }
1909     stride >>= 2;
1910   }
1911 
1912   y0 = ptr_y;
1913   y2 = ptr_y + (int)npoints;
1914   x0 = ptr_x;
1915   x2 = ptr_x + (int)(npoints >> 1);
1916 
1917   y1 = y0 + (int)(npoints >> 2);
1918   y3 = y2 + (int)(npoints >> 2);
1919   l1 = norm + 1;
1920   j0 = 8;
1921   n0 = npoints >> 1;
1922 
1923   j = 0;
1924   for (i = 0; i < 4; i++) {
1925     int t1, t2;
1926     h2 = rev_dig[i];
1927 
1928     t1 = h2 << 1;
1929     t2 = t1 + 1;
1930 
1931     y0[t1] = ixheaacd_add32_sat(x0[0], x0[2]);
1932     y2[t1] = ixheaacd_sub32_sat(x0[0], x0[2]);
1933     y0[t2] = ixheaacd_add32_sat(x0[1], x0[3]);
1934     y2[t2] = ixheaacd_sub32_sat(x0[1], x0[3]);
1935     y1[t1] = ixheaacd_add32_sat(x0[4], x0[6]);
1936     y3[t1] = ixheaacd_sub32_sat(x0[4], x0[6]);
1937     y1[t2] = ixheaacd_add32_sat(x0[5], x0[7]);
1938     y3[t2] = ixheaacd_sub32_sat(x0[5], x0[7]);
1939     x0 += 8;
1940 
1941     t1 += 2;
1942     t2 += 2;
1943 
1944     y0[t1] = ixheaacd_add32_sat(x2[0], x2[2]);
1945     y2[t1] = ixheaacd_sub32_sat(x2[0], x2[2]);
1946     y0[t2] = ixheaacd_add32_sat(x2[1], x2[3]);
1947     y2[t2] = ixheaacd_sub32_sat(x2[1], x2[3]);
1948     y1[t1] = ixheaacd_add32_sat(x2[4], x2[6]);
1949     y3[t1] = ixheaacd_sub32_sat(x2[4], x2[6]);
1950     y1[t2] = ixheaacd_add32_sat(x2[5], x2[7]);
1951     y3[t2] = ixheaacd_sub32_sat(x2[5], x2[7]);
1952     x2 += 8;
1953 
1954     j += j0;
1955 
1956     if (j == n0)
1957     {
1958       j += n0;
1959       x0 += (int)npoints >> 1;
1960       x2 += (int)npoints >> 1;
1961     }
1962   }
1963 }
1964 
ixheaacd_dec_rearrange_short(WORD32 * ip,WORD32 * op,WORD32 mdct_len_2,WORD16 * re_arr_tab)1965 VOID ixheaacd_dec_rearrange_short(WORD32 *ip, WORD32 *op, WORD32 mdct_len_2, WORD16 *re_arr_tab) {
1966   WORD32 n, i = 0;
1967 
1968   for (n = 0; n < mdct_len_2; n++) {
1969     WORD32 idx = re_arr_tab[n] << 1;
1970     op[i++] = ip[idx];
1971     op[i++] = ip[idx + 1];
1972   }
1973 }
1974 
ixheaacd_ld_dec_fft_15_opt(WORD32 * inp,WORD32 * op,WORD32 * fft3out,WORD16 * ptr_re_arr_tab_sml_240)1975 VOID ixheaacd_ld_dec_fft_15_opt(WORD32 *inp, WORD32 *op, WORD32 *fft3out,
1976                                 WORD16 *ptr_re_arr_tab_sml_240) {
1977   WORD32 i, n, idx;
1978   WORD32 *buf1, *buf2, *buf1a;
1979   WORD32 add_r, sub_r;
1980   WORD32 add_i, sub_i;
1981   WORD32 x_01_r, x_01_i, temp;
1982   WORD32 p1, p2, p3, p4;
1983 
1984   WORD32 sinmu = 1859775393;
1985   WORD32 c_51 = 2042378317;
1986   WORD32 c_52 = -1652318768;
1987   WORD32 c_53 = -780119100;
1988   WORD32 c_54 = 1200479854;
1989   WORD32 c_55 = -1342177280;
1990 
1991   WORD32 r1, r2, r3, r4;
1992   WORD32 s1, s2, s3, s4, t, temp1, temp2;
1993   WORD32 *fft3outptr = fft3out;
1994 
1995   WORD32 xr_0, xr_1, xr_2;
1996   WORD32 xi_0, xi_1, xi_2;
1997 
1998   buf2 = fft3out;
1999   buf1 = buf1a = fft3out;
2000   n = 0;
2001 
2002   {
2003     *buf1++ = inp[0];
2004     *buf1++ = inp[1];
2005 
2006     *buf1++ = inp[192];
2007     *buf1++ = inp[193];
2008 
2009     *buf1++ = inp[384];
2010     *buf1++ = inp[385];
2011 
2012     *buf1++ = inp[576];
2013     *buf1++ = inp[577];
2014 
2015     *buf1++ = inp[768];
2016     *buf1++ = inp[769];
2017 
2018     r1 = ixheaacd_add32_sat(buf1a[2], buf1a[8]);
2019     r4 = ixheaacd_sub32_sat(buf1a[2], buf1a[8]);
2020     r3 = ixheaacd_add32_sat(buf1a[4], buf1a[6]);
2021     r2 = ixheaacd_sub32_sat(buf1a[4], buf1a[6]);
2022 
2023     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(r1, r3), c_54);
2024 
2025     r1 = ixheaacd_add32_sat(r1, r3);
2026 
2027     temp1 = ixheaacd_add32_sat(buf1a[0], r1);
2028 
2029     r1 = ixheaacd_add32_sat(temp1, (ixheaacd_mult32_shl(r1, c_55) << 1));
2030 
2031     r3 = ixheaacd_sub32_sat(r1, t);
2032     r1 = ixheaacd_add32_sat(r1, t);
2033 
2034     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(r4, r2), c_51);
2035     r4 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(r4, c_52) << 1));
2036     r2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r2, c_53));
2037 
2038     s1 = ixheaacd_add32_sat(buf1a[3], buf1a[9]);
2039     s4 = ixheaacd_sub32_sat(buf1a[3], buf1a[9]);
2040     s3 = ixheaacd_add32_sat(buf1a[5], buf1a[7]);
2041     s2 = ixheaacd_sub32_sat(buf1a[5], buf1a[7]);
2042 
2043     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(s1, s3), c_54);
2044 
2045     s1 = ixheaacd_add32_sat(s1, s3);
2046 
2047     temp2 = ixheaacd_add32_sat(buf1a[1], s1);
2048 
2049 
2050     s1 = ixheaacd_add32_sat(temp2, (ixheaacd_mult32_shl(s1, c_55) << 1));
2051 
2052     s3 = ixheaacd_sub32_sat(s1, t);
2053     s1 = ixheaacd_add32_sat(s1, t);
2054 
2055     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(s4, s2), c_51);
2056     s4 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s4, c_52) << 1));
2057     s2 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s2, c_53)));
2058 
2059     *buf2++ = temp1;
2060     *buf2++ = temp2;
2061     *buf2++ = ixheaacd_add32_sat(r1, s2);
2062     *buf2++ = ixheaacd_sub32_sat(s1, r2);
2063     *buf2++ = ixheaacd_sub32_sat(r3, s4);
2064     *buf2++ = ixheaacd_add32_sat(s3, r4);
2065     *buf2++ = ixheaacd_add32_sat(r3, s4);
2066     *buf2++ = ixheaacd_sub32_sat(s3, r4);
2067     *buf2++ = ixheaacd_sub32_sat(r1, s2);
2068     *buf2++ = ixheaacd_add32_sat(s1, r2);
2069     buf1a = buf1;
2070 
2071     *buf1++ = inp[320];
2072     *buf1++ = inp[321];
2073 
2074     *buf1++ = inp[512];
2075     *buf1++ = inp[513];
2076 
2077     *buf1++ = inp[704];
2078     *buf1++ = inp[705];
2079 
2080     *buf1++ = inp[896];
2081     *buf1++ = inp[897];
2082 
2083     *buf1++ = inp[128];
2084     *buf1++ = inp[129];
2085 
2086     r1 = ixheaacd_add32_sat(buf1a[2], buf1a[8]);
2087     r4 = ixheaacd_sub32_sat(buf1a[2], buf1a[8]);
2088     r3 = ixheaacd_add32_sat(buf1a[4], buf1a[6]);
2089     r2 = ixheaacd_sub32_sat(buf1a[4], buf1a[6]);
2090 
2091     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(r1, r3), c_54);
2092 
2093     r1 = ixheaacd_add32_sat(r1, r3);
2094 
2095     temp1 = ixheaacd_add32_sat(buf1a[0], r1);
2096 
2097     r1 = ixheaacd_add32_sat(temp1, (ixheaacd_mult32_shl(r1, c_55) << 1));
2098 
2099     r3 = ixheaacd_sub32_sat(r1, t);
2100     r1 = ixheaacd_add32_sat(r1, t);
2101 
2102     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(r4, r2), c_51);
2103     r4 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(r4, c_52) << 1));
2104     r2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r2, c_53));
2105 
2106     s1 = ixheaacd_add32_sat(buf1a[3], buf1a[9]);
2107     s4 = ixheaacd_sub32_sat(buf1a[3], buf1a[9]);
2108     s3 = ixheaacd_add32_sat(buf1a[5], buf1a[7]);
2109     s2 = ixheaacd_sub32_sat(buf1a[5], buf1a[7]);
2110 
2111     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(s1, s3), c_54);
2112 
2113     s1 = ixheaacd_add32_sat(s1, s3);
2114 
2115     temp2 = ixheaacd_add32_sat(buf1a[1], s1);
2116 
2117     s1 = ixheaacd_add32_sat(temp2, (ixheaacd_mult32_shl(s1, c_55) << 1));
2118 
2119     s3 = ixheaacd_sub32_sat(s1, t);
2120     s1 = ixheaacd_add32_sat(s1, t);
2121 
2122     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(s4, s2), c_51);
2123     s4 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s4, c_52) << 1));
2124     s2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(s2, c_53));
2125 
2126     *buf2++ = temp1;
2127     *buf2++ = temp2;
2128     *buf2++ = ixheaacd_add32_sat(r1, s2);
2129     *buf2++ = ixheaacd_sub32_sat(s1, r2);
2130     *buf2++ = ixheaacd_sub32_sat(r3, s4);
2131     *buf2++ = ixheaacd_add32_sat(s3, r4);
2132     *buf2++ = ixheaacd_add32_sat(r3, s4);
2133     *buf2++ = ixheaacd_sub32_sat(s3, r4);
2134     *buf2++ = ixheaacd_sub32_sat(r1, s2);
2135     *buf2++ = ixheaacd_add32_sat(s1, r2);
2136     buf1a = buf1;
2137 
2138     *buf1++ = inp[640];
2139     *buf1++ = inp[641];
2140 
2141     *buf1++ = inp[832];
2142     *buf1++ = inp[833];
2143 
2144     *buf1++ = inp[64];
2145     *buf1++ = inp[65];
2146 
2147     *buf1++ = inp[256];
2148     *buf1++ = inp[257];
2149 
2150     *buf1++ = inp[448];
2151     *buf1++ = inp[449];
2152 
2153     r1 = ixheaacd_add32_sat(buf1a[2], buf1a[8]);
2154     r4 = ixheaacd_sub32_sat(buf1a[2], buf1a[8]);
2155     r3 = ixheaacd_add32_sat(buf1a[4], buf1a[6]);
2156     r2 = ixheaacd_sub32_sat(buf1a[4], buf1a[6]);
2157 
2158     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(r1, r3), c_54);
2159 
2160     r1 = ixheaacd_add32_sat(r1, r3);
2161 
2162     temp1 = ixheaacd_add32_sat(buf1a[0], r1);
2163 
2164     r1 = ixheaacd_add32_sat(temp1, (ixheaacd_mult32_shl(r1, c_55) << 1));
2165 
2166     r3 = ixheaacd_sub32_sat(r1, t);
2167     r1 = ixheaacd_add32_sat(r1, t);
2168 
2169     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(r4, r2), c_51);
2170     r4 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r4, c_52) << 1);
2171     r2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r2, c_53));
2172 
2173     s1 = ixheaacd_add32_sat(buf1a[3], buf1a[9]);
2174     s4 = ixheaacd_sub32_sat(buf1a[3], buf1a[9]);
2175     s3 = ixheaacd_add32_sat(buf1a[5], buf1a[7]);
2176     s2 = ixheaacd_sub32_sat(buf1a[5], buf1a[7]);
2177 
2178     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(s1, s3), c_54);
2179 
2180     s1 = ixheaacd_add32_sat(s1, s3);
2181 
2182     temp2 = ixheaacd_add32_sat(buf1a[1], s1);
2183 
2184     s1 = ixheaacd_add32_sat(temp2, (ixheaacd_mult32_shl(s1, c_55) << 1));
2185 
2186     s3 = ixheaacd_sub32_sat(s1, t);
2187     s1 = ixheaacd_add32_sat(s1, t);
2188 
2189     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(s4, s2), c_51);
2190     s4 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s4, c_52) << 1));
2191     s2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(s2, c_53));
2192 
2193     *buf2++ = temp1;
2194     *buf2++ = temp2;
2195     *buf2++ = ixheaacd_add32_sat(r1, s2);
2196     *buf2++ = ixheaacd_sub32_sat(s1, r2);
2197     *buf2++ = ixheaacd_sub32_sat(r3, s4);
2198     *buf2++ = ixheaacd_add32_sat(s3, r4);
2199     *buf2++ = ixheaacd_add32_sat(r3, s4);
2200     *buf2++ = ixheaacd_sub32_sat(s3, r4);
2201     *buf2++ = ixheaacd_sub32_sat(r1, s2);
2202     *buf2++ = ixheaacd_add32_sat(s1, r2);
2203     buf1a = buf1;
2204   }
2205 
2206   n = 0;
2207   for (i = 0; i < FFT5; i++) {
2208     xr_0 = fft3outptr[0];
2209     xi_0 = fft3outptr[1];
2210 
2211     xr_1 = fft3outptr[10];
2212     xi_1 = fft3outptr[11];
2213 
2214     xr_2 = fft3outptr[20];
2215     xi_2 = fft3outptr[21];
2216 
2217     x_01_r = ixheaacd_add32_sat(xr_0, xr_1);
2218     x_01_i = ixheaacd_add32_sat(xi_0, xi_1);
2219 
2220     add_r = ixheaacd_add32_sat(xr_1, xr_2);
2221     add_i = ixheaacd_add32_sat(xi_1, xi_2);
2222 
2223     sub_r = ixheaacd_sub32_sat(xr_1, xr_2);
2224     sub_i = ixheaacd_sub32_sat(xi_1, xi_2);
2225 
2226     p1 = add_r >> 1;
2227 
2228     p2 = ixheaacd_mult32_shl(sub_i, sinmu);
2229     p3 = ixheaacd_mult32_shl(sub_r, sinmu);
2230 
2231     p4 = add_i >> 1;
2232 
2233     temp = ixheaacd_sub32_sat(xr_0, p1);
2234     temp1 = ixheaacd_add32_sat(xi_0, p3);
2235     temp2 = ixheaacd_sub32_sat(xi_0, p3);
2236 
2237     idx = ptr_re_arr_tab_sml_240[n++] << 1;
2238     op[idx] = ixheaacd_add32_sat(x_01_r, xr_2);
2239     op[idx + 1] = ixheaacd_add32_sat(x_01_i, xi_2);
2240 
2241     idx = ptr_re_arr_tab_sml_240[n++] << 1;
2242     op[idx] = ixheaacd_add32_sat(temp, p2);
2243     op[idx + 1] = ixheaacd_sub32_sat(temp2, p4);
2244 
2245     idx = ptr_re_arr_tab_sml_240[n++] << 1;
2246     op[idx] = ixheaacd_sub32_sat(temp, p2);
2247     op[idx + 1] = ixheaacd_sub32_sat(temp1, p4);
2248     fft3outptr += 2;
2249   }
2250   return;
2251 }
2252 
ixheaacd_fft_120(ia_aac_dec_imdct_tables_struct * imdct_tables_ptr,WORD32 npoints,WORD32 * ptr_x,WORD32 * ptr_y)2253 VOID ixheaacd_fft_120(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr,
2254                       WORD32 npoints, WORD32* ptr_x, WORD32* ptr_y) {
2255   WORD32 i;
2256   WORD32 *buf1, *buf2;
2257   WORD32 *inp, *op;
2258 
2259   inp = ptr_x;
2260   op = ptr_y;
2261 
2262   ixheaacd_dec_rearrange_short(inp, op, 60, imdct_tables_ptr->re_arr_tab_4);
2263   buf1 = op;
2264   buf2 = inp;
2265 
2266   for (i = 0; i < FFT15; i++) {
2267     {
2268       WORD32   x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7;
2269       WORD32   *y0, *y1, *y2, *y3;
2270       WORD32   *x0;
2271       WORD32   xh0_0, xh1_0, xh0_1, xh1_1, xl0_0, xl1_0, xl0_1, xl1_1;
2272       WORD32   h2;
2273       WORD32   n00, n01, n10, n11, n20, n21, n30, n31;
2274 
2275       ptr_x = buf1;
2276       ptr_y = buf2;
2277       npoints = 4;
2278       h2 = 0;
2279 
2280       y0 = ptr_y;
2281       y2 = ptr_y + (WORD32)npoints;
2282       x0 = ptr_x;
2283 
2284       y1 = y0 + (WORD32)(npoints >> 1);
2285       y3 = y2 + (WORD32)(npoints >> 1);
2286 
2287       x_0 = x0[0];         x_1 = x0[1];
2288       x_2 = x0[2];         x_3 = x0[3];
2289       x_4 = x0[4];         x_5 = x0[5];
2290       x_6 = x0[6];         x_7 = x0[7];
2291       x0 += 8;
2292 
2293       xh0_0 = ixheaacd_add32_sat(x_0, x_4);
2294       xh1_0 = ixheaacd_add32_sat(x_1, x_5);
2295       xl0_0 = ixheaacd_sub32_sat(x_0, x_4);
2296       xl1_0 = ixheaacd_sub32_sat(x_1, x_5);
2297       xh0_1 = ixheaacd_add32_sat(x_2, x_6);
2298       xh1_1 = ixheaacd_add32_sat(x_3, x_7);
2299       xl0_1 = ixheaacd_sub32_sat(x_2, x_6);
2300       xl1_1 = ixheaacd_sub32_sat(x_3, x_7);
2301 
2302       n00 = ixheaacd_add32_sat(xh0_0, xh0_1);
2303       n01 = ixheaacd_add32_sat(xh1_0, xh1_1);
2304       n10 = ixheaacd_add32_sat(xl0_0, xl1_1);
2305       n11 = ixheaacd_sub32_sat(xl1_0, xl0_1);
2306       n20 = ixheaacd_sub32_sat(xh0_0, xh0_1);
2307       n21 = ixheaacd_sub32_sat(xh1_0, xh1_1);
2308       n30 = ixheaacd_sub32_sat(xl0_0, xl1_1);
2309       n31 = ixheaacd_add32_sat(xl1_0, xl0_1);
2310 
2311       y0[2 * h2] = n00;            y0[2 * h2 + 1] = n01;
2312       y1[2 * h2] = n10;            y1[2 * h2 + 1] = n11;
2313       y2[2 * h2] = n20;            y2[2 * h2 + 1] = n21;
2314       y3[2 * h2] = n30;            y3[2 * h2 + 1] = n31;
2315     }
2316     buf1 += (FFT4 * 2);
2317     buf2 += (FFT4 * 2);
2318   }
2319 
2320   ixheaacd_dec_rearrange_short(inp, op, 60, imdct_tables_ptr->re_arr_tab_15_4);
2321 
2322   buf1 = op;
2323   buf2 = inp;
2324 
2325   for (i = 0; i < FFT4; i++) {
2326     ixheaacd_fft_960_15(buf1, buf2, imdct_tables_ptr);
2327     buf1 += (FFT15 * 2);
2328     buf2 += (FFT15 * 2);
2329   }
2330 
2331   ixheaacd_dec_rearrange_short(inp, op, 60, imdct_tables_ptr->re_arr_tab_120);
2332 }
2333 
ixheaacd_fft_960_15(WORD32 * inp,WORD32 * op,ia_aac_dec_imdct_tables_struct * imdct_tables_ptr)2334 VOID ixheaacd_fft_960_15(WORD32 *inp, WORD32 *op,
2335                          ia_aac_dec_imdct_tables_struct *imdct_tables_ptr) {
2336   WORD32 i;
2337   WORD32 *buf1, *buf2;
2338 
2339   ixheaacd_dec_rearrange_short(inp, op, FFT15, imdct_tables_ptr->re_arr_tab_5);
2340 
2341   buf1 = op;
2342   buf2 = inp;
2343 
2344   for (i = 0; i < FFT3; i++) {
2345     ixheaacd_fft_5(buf1, buf2);
2346 
2347     buf1 += (FFT5 * 2);
2348     buf2 += (FFT5 * 2);
2349   }
2350 
2351   ixheaacd_dec_rearrange_short(inp, op, FFT15, imdct_tables_ptr->re_arr_tab_3);
2352 
2353   buf1 = op;
2354   buf2 = inp;
2355 
2356   for (i = 0; i < FFT5; i++) {
2357     ixheaacd_fft_3(buf1, buf2);
2358 
2359     buf1 += (FFT3 * 2);
2360     buf2 += (FFT3 * 2);
2361   }
2362 
2363   ixheaacd_dec_rearrange_short(inp, op, FFT15, imdct_tables_ptr->re_arr_tab_sml);
2364 }
2365 
ixheaacd_fft_3(WORD32 * inp,WORD32 * op)2366 VOID ixheaacd_fft_3(WORD32 *inp, WORD32 *op) {
2367   WORD32 add_r, sub_r;
2368   WORD32 add_i, sub_i;
2369   WORD32 x_01_r, x_01_i, temp;
2370 
2371   WORD32 p1, p2, p3, p4;
2372   WORD32 sinmu = 1859775393;
2373 
2374   x_01_r = ixheaacd_add32_sat(inp[0], inp[2]);
2375   x_01_i = ixheaacd_add32_sat(inp[1], inp[3]);
2376 
2377   add_r = ixheaacd_add32_sat(inp[2], inp[4]);
2378   add_i = ixheaacd_add32_sat(inp[3], inp[5]);
2379 
2380   sub_r = ixheaacd_sub32_sat(inp[2], inp[4]);
2381   sub_i = ixheaacd_sub32_sat(inp[3], inp[5]);
2382 
2383   p1 = add_r >> 1;
2384   p2 = ixheaacd_mult32_shl(sub_i, sinmu);
2385   p3 = ixheaacd_mult32_shl(sub_r, sinmu);
2386   p4 = add_i >> 1;
2387 
2388   temp = ixheaacd_sub32_sat(inp[0], p1);
2389 
2390   op[0] = ixheaacd_add32_sat(x_01_r, inp[4]);
2391   op[1] = ixheaacd_add32_sat(x_01_i, inp[5]);
2392   op[2] = ixheaacd_add32_sat(temp, p2);
2393   op[3] = ixheaacd_sub32_sat(ixheaacd_sub32_sat(inp[1], p3), p4);
2394   op[4] = ixheaacd_sub32_sat(temp, p2);
2395   op[5] = ixheaacd_sub32_sat(ixheaacd_add32_sat(inp[1], p3), p4);
2396 }
2397 
ixheaacd_fft_5(WORD32 * inp,WORD32 * op)2398 VOID ixheaacd_fft_5(WORD32 *inp, WORD32 *op) {
2399   WORD32 c_51 = 2042378317;
2400   WORD32 c_52 = -1652318768;
2401   WORD32 c_53 = -780119100;
2402   WORD32 c_54 = 1200479854;
2403   WORD32 c_55 = -1342177280;
2404 
2405   WORD32 r1, r2, r3, r4;
2406   WORD32 s1, s2, s3, s4, t, temp1, temp2;
2407 
2408   r1 = ixheaacd_add32_sat(inp[2], inp[8]);
2409   r4 = ixheaacd_sub32_sat(inp[2], inp[8]);
2410   r3 = ixheaacd_add32_sat(inp[4], inp[6]);
2411   r2 = ixheaacd_sub32_sat(inp[4], inp[6]);
2412 
2413   t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(r1, r3), c_54);
2414   r1 = ixheaacd_add32_sat(r1, r3);
2415 
2416   temp1 = ixheaacd_add32_sat(inp[0], r1);
2417   r1 = ixheaacd_add32_sat(
2418       temp1, ixheaacd_shl32_sat((ixheaacd_mult32_shl(r1, c_55)), 1));
2419 
2420   r3 = ixheaacd_sub32_sat(r1, t);
2421   r1 = ixheaacd_add32_sat(r1, t);
2422 
2423   t = ixheaacd_mult32_shl(ixheaacd_add32_sat(r4, r2), c_51);
2424   r4 = ixheaacd_add32_sat(
2425       t, ixheaacd_shl32_sat(ixheaacd_mult32_shl(r4, c_52), 1));
2426   r2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r2, c_53));
2427 
2428   s1 = ixheaacd_add32_sat(inp[3], inp[9]);
2429   s4 = ixheaacd_sub32_sat(inp[3], inp[9]);
2430   s3 = ixheaacd_add32_sat(inp[5], inp[7]);
2431   s2 = ixheaacd_sub32_sat(inp[5], inp[7]);
2432 
2433   t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(s1, s3), c_54);
2434   s1 = ixheaacd_add32_sat(s1, s3);
2435 
2436   temp2 = ixheaacd_add32_sat(inp[1], s1);
2437 
2438   s1 = ixheaacd_add32_sat(
2439       temp2, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s1, c_55)), 1));
2440 
2441   s3 = ixheaacd_sub32_sat(s1, t);
2442   s1 = ixheaacd_add32_sat(s1, t);
2443 
2444   t = ixheaacd_mult32_shl(ixheaacd_add32_sat(s4, s2), c_51);
2445   s4 = ixheaacd_add32_sat(
2446       t, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s4, c_52)), 1));
2447   s2 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s2, c_53)));
2448 
2449   op[0] = temp1;
2450   op[1] = temp2;
2451   op[2] = ixheaacd_add32_sat(r1, s2);
2452   op[3] = ixheaacd_sub32_sat(s1, r2);
2453   op[4] = ixheaacd_sub32_sat(r3, s4);
2454   op[5] = ixheaacd_add32_sat(s3, r4);
2455   op[6] = ixheaacd_add32_sat(r3, s4);
2456   op[7] = ixheaacd_sub32_sat(s3, r4);
2457   op[8] = ixheaacd_sub32_sat(r1, s2);
2458   op[9] = ixheaacd_add32_sat(s1, r2);
2459 }
2460 
ixheaacd_fft_480_ld(WORD32 * inp,WORD32 * op,ia_aac_dec_imdct_tables_struct * imdct_tables_ptr)2461 VOID ixheaacd_fft_480_ld(WORD32 *inp, WORD32 *op,
2462                          ia_aac_dec_imdct_tables_struct *imdct_tables_ptr) {
2463   WORD32 i;
2464   WORD32 *buf1, *buf2;
2465   UWORD8 *re_arr_tab_sml_240_ptr;
2466 
2467   (*ixheaacd_aac_ld_dec_rearrange)(inp, op, MDCT_LEN_BY2,
2468                                    imdct_tables_ptr->re_arr_tab_16);
2469 
2470   buf1 = op;
2471   buf2 = inp;
2472 
2473   for (i = 0; i < FFT15; i++) {
2474     (*ixheaacd_fft32x32_ld2)(imdct_tables_ptr, 16, buf1, buf2);
2475 
2476     buf1 += (FFT16X2);
2477     buf2 += (FFT16X2);
2478   }
2479   re_arr_tab_sml_240_ptr = imdct_tables_ptr->re_arr_tab_sml_240;
2480   buf1 = inp;
2481 
2482   for (i = 0; i < FFT16; i++) {
2483     (*ixheaacd_fft_15_ld)(buf1, op, ixheaacd_fft5out, re_arr_tab_sml_240_ptr);
2484     re_arr_tab_sml_240_ptr += FFT15;
2485     buf1 += 2;
2486   }
2487 }
2488 
ixheaacd_pre_twiddle_960(WORD32 * xptr,WORD32 * data,WORD32 n,WORD32 * cos_sin_ptr,WORD32 neg_expo)2489 VOID ixheaacd_pre_twiddle_960(WORD32 *xptr, WORD32 *data, WORD32 n,
2490                               WORD32 *cos_sin_ptr, WORD32 neg_expo) {
2491   WORD npoints_4, i;
2492   WORD32 tempr, tempi, temp;
2493   WORD32 c, c1, s, s1;
2494   WORD32 *in_ptr1, *in_ptr2;
2495   WORD32 *xprt1 = xptr + (n - 1);
2496 
2497   npoints_4 = n >> 2;
2498 
2499   in_ptr1 = data;
2500   in_ptr2 = data + n - 1;
2501 
2502   for (i = 0; i < npoints_4; i++) {
2503     c = *cos_sin_ptr++;
2504     s = *cos_sin_ptr++;
2505 
2506     tempr = *in_ptr1++;
2507     tempi = *in_ptr2--;
2508 
2509     temp = -ixheaacd_add32(ixheaacd_mult32x32in32(tempr, c),
2510                            ixheaacd_mult32x32in32(tempi, s));
2511     *xptr++ = ixheaacd_shr32_dir_sat(temp, neg_expo);
2512 
2513     temp = -ixheaacd_sub32(ixheaacd_mult32x32in32(tempi, c),
2514                            ixheaacd_mult32x32in32(tempr, s));
2515     *xptr++ = ixheaacd_shr32_dir_sat(temp, neg_expo);
2516 
2517     c1 = *cos_sin_ptr++;
2518     s1 = *cos_sin_ptr++;
2519 
2520     tempi = *in_ptr1++;
2521     tempr = *in_ptr2--;
2522 
2523     temp = -ixheaacd_sub32(ixheaacd_mult32x32in32(tempi, c1),
2524                            ixheaacd_mult32x32in32(tempr, s1));
2525     *xprt1-- = ixheaacd_shr32_dir_sat(temp, neg_expo);
2526 
2527     temp = -ixheaacd_add32(ixheaacd_mult32x32in32(tempr, c1),
2528                            ixheaacd_mult32x32in32(tempi, s1));
2529     *xprt1-- = ixheaacd_shr32_dir_sat(temp, neg_expo);
2530   }
2531 }
2532 
ixheaacd_pre_twiddle_120(WORD32 * xptr,WORD32 * data,WORD32 n,WORD16 * cos_sin_ptr,WORD32 neg_expo)2533 VOID ixheaacd_pre_twiddle_120(WORD32 *xptr, WORD32 *data, WORD32 n,
2534                               WORD16 *cos_sin_ptr, WORD32 neg_expo) {
2535   WORD npoints_4, i;
2536   WORD32 tempr, tempi, temp;
2537   WORD16 c, c1, s, s1;
2538   WORD32 *in_ptr1, *in_ptr2;
2539   WORD32 *xprt1 = xptr + (n - 1);
2540 
2541   npoints_4 = n >> 2;
2542 
2543   in_ptr1 = data;
2544   in_ptr2 = data + n - 1;
2545 
2546   for (i = 0; i < npoints_4; i++) {
2547     c = *cos_sin_ptr++;
2548     s = *cos_sin_ptr++;
2549 
2550     tempr = *in_ptr1++;
2551     tempi = *in_ptr2--;
2552 
2553     temp = -ixheaacd_add32(ixheaacd_mult32x16in32(tempr, c),
2554                            ixheaacd_mult32x16in32(tempi, s));
2555     *xptr++ = ixheaacd_shr32_dir_sat(temp, neg_expo);
2556 
2557     temp = -ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, c),
2558                            ixheaacd_mult32x16in32(tempr, s));
2559     *xptr++ = ixheaacd_shr32_dir_sat(temp, neg_expo);
2560 
2561     c1 = *cos_sin_ptr++;
2562     s1 = *cos_sin_ptr++;
2563 
2564     tempi = *in_ptr1++;
2565     tempr = *in_ptr2--;
2566 
2567     temp = -ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, c1),
2568                            ixheaacd_mult32x16in32(tempr, s1));
2569     *xprt1-- = ixheaacd_shr32_dir_sat(temp, neg_expo);
2570 
2571     temp = -ixheaacd_add32(ixheaacd_mult32x16in32(tempr, c1),
2572                            ixheaacd_mult32x16in32(tempi, s1));
2573     *xprt1-- = ixheaacd_shr32_dir_sat(temp, neg_expo);
2574   }
2575 }
2576 
ixheaacd_pre_twiddle(WORD32 * xptr,WORD32 * data,WORD32 n,WORD32 * cos_sin_ptr,WORD32 neg_expo)2577 VOID ixheaacd_pre_twiddle(WORD32 *xptr, WORD32 *data, WORD32 n,
2578                           WORD32 *cos_sin_ptr, WORD32 neg_expo) {
2579   WORD npoints_4, i;
2580   WORD32 tempr, tempi, temp;
2581   WORD32 c, c1, s, s1;
2582   WORD32 *in_ptr1, *in_ptr2;
2583 
2584   npoints_4 = n >> 2;
2585 
2586   in_ptr1 = data;
2587   in_ptr2 = data + n - 1;
2588 
2589   if (neg_expo >= 0) {
2590     for (i = npoints_4 - 1; i >= 0; i--) {
2591       c = *cos_sin_ptr++;
2592       c1 = *cos_sin_ptr++;
2593       s = *cos_sin_ptr++;
2594       s1 = *cos_sin_ptr++;
2595 
2596       tempr = *in_ptr1;
2597       tempi = *in_ptr2;
2598 
2599       in_ptr1 += 2;
2600       in_ptr2 -= 2;
2601 
2602       temp =
2603           -ixheaacd_add32(ixheaacd_mult32(tempr, c), ixheaacd_mult32(tempi, s));
2604       *xptr++ = ixheaacd_shr32(temp, neg_expo);
2605 
2606       temp =
2607           ixheaacd_sub32(ixheaacd_mult32(tempr, s), ixheaacd_mult32(tempi, c));
2608       *xptr++ = ixheaacd_shr32(temp, neg_expo);
2609 
2610       tempr = *in_ptr1;
2611       tempi = *in_ptr2;
2612 
2613       in_ptr1 += 2;
2614       in_ptr2 -= 2;
2615 
2616       temp = -ixheaacd_add32(ixheaacd_mult32(tempr, c1),
2617                              ixheaacd_mult32(tempi, s1));
2618       *xptr++ = ixheaacd_shr32(temp, neg_expo);
2619 
2620       temp = ixheaacd_sub32(ixheaacd_mult32(tempr, s1),
2621                             ixheaacd_mult32(tempi, c1));
2622       *xptr++ = ixheaacd_shr32(temp, neg_expo);
2623     }
2624   } else {
2625     neg_expo = -neg_expo;
2626 
2627     for (i = npoints_4 - 1; i >= 0; i--) {
2628       c = *cos_sin_ptr++;
2629       c1 = *cos_sin_ptr++;
2630       s = *cos_sin_ptr++;
2631       s1 = *cos_sin_ptr++;
2632 
2633       tempr = *in_ptr1;
2634       tempi = *in_ptr2;
2635 
2636       in_ptr1 += 2;
2637       in_ptr2 -= 2;
2638 
2639       temp =
2640           -ixheaacd_add32(ixheaacd_mult32(tempr, c), ixheaacd_mult32(tempi, s));
2641       *xptr++ = ixheaacd_shl32(temp, neg_expo);
2642 
2643       temp =
2644           ixheaacd_sub32(ixheaacd_mult32(tempr, s), ixheaacd_mult32(tempi, c));
2645       *xptr++ = ixheaacd_shl32(temp, neg_expo);
2646 
2647       tempr = *in_ptr1;
2648       tempi = *in_ptr2;
2649 
2650       in_ptr1 += 2;
2651       in_ptr2 -= 2;
2652 
2653       temp = -ixheaacd_add32(ixheaacd_mult32(tempr, c1),
2654                              ixheaacd_mult32(tempi, s1));
2655       *xptr++ = ixheaacd_shl32(temp, neg_expo);
2656 
2657       temp = ixheaacd_sub32(ixheaacd_mult32(tempr, s1),
2658                             ixheaacd_mult32(tempi, c1));
2659       *xptr++ = ixheaacd_shl32(temp, neg_expo);
2660     }
2661   }
2662 }
2663 
ixheaacd_post_twiddle_120(WORD32 out[],WORD32 x[],const WORD16 * cos_sin_ptr,WORD m)2664 VOID ixheaacd_post_twiddle_120(WORD32 out[], WORD32 x[],
2665                                const WORD16 *cos_sin_ptr, WORD m) {
2666   WORD i;
2667   WORD16 c, c1, s, s1;
2668   WORD32 tempr, tempi, temp;
2669   WORD32 *in_ptr2 = x + (m - 1);
2670   WORD32 *in_ptr1 = x;
2671   WORD32 *xptr = out;
2672   WORD32 *xptr1 = out + (m - 1);
2673 
2674   for (i = 0; i < m; i += 4) {
2675     c = *cos_sin_ptr++;
2676     s = *cos_sin_ptr++;
2677     c1 = *cos_sin_ptr++;
2678     s1 = *cos_sin_ptr++;
2679 
2680     tempr = *in_ptr1++;
2681     tempi = *in_ptr1++;
2682 
2683     temp = -ixheaacd_sub32_sat(ixheaacd_mult32x16in32(tempr, s),
2684                                ixheaacd_mult32x16in32(tempi, c));
2685     *xptr1-- = temp;
2686 
2687     temp = -ixheaacd_add32_sat(ixheaacd_mult32x16in32(tempr, c),
2688                                ixheaacd_mult32x16in32(tempi, s));
2689     *xptr++ = temp;
2690 
2691     tempi = *in_ptr2--;
2692     tempr = *in_ptr2--;
2693 
2694     temp = -ixheaacd_sub32_sat(ixheaacd_mult32x16in32(tempr, s1),
2695                                ixheaacd_mult32x16in32(tempi, c1));
2696     *xptr++ = temp;
2697 
2698     temp = -ixheaacd_add32_sat(ixheaacd_mult32x16in32(tempr, c1),
2699                                ixheaacd_mult32x16in32(tempi, s1));
2700     *xptr1-- = temp;
2701   }
2702 }
2703 
ixheaacd_post_twiddle_960(WORD32 out[],WORD32 x[],const WORD32 * cos_sin_ptr,WORD m)2704 VOID ixheaacd_post_twiddle_960(WORD32 out[], WORD32 x[],
2705                                const WORD32 *cos_sin_ptr, WORD m) {
2706   WORD i;
2707   WORD32 c, c1, s, s1;
2708   WORD32 tempr, tempi, temp;
2709   WORD32 *in_ptr2 = x + (m - 1);
2710   WORD32 *in_ptr1 = x;
2711   WORD32 *xptr = out;
2712   WORD32 *xptr1 = out + (m - 1);
2713 
2714   for (i = 0; i < m; i += 4) {
2715     c = *cos_sin_ptr++;
2716     s = *cos_sin_ptr++;
2717     c1 = *cos_sin_ptr++;
2718     s1 = *cos_sin_ptr++;
2719 
2720     tempr = *in_ptr1++;
2721     tempi = *in_ptr1++;
2722 
2723     temp = -ixheaacd_sub32_sat(ixheaacd_mult32x32in32(tempr, s),
2724                                ixheaacd_mult32x32in32(tempi, c));
2725     *xptr1-- = temp;
2726 
2727     temp = -ixheaacd_add32_sat(ixheaacd_mult32x32in32(tempr, c),
2728                                ixheaacd_mult32x32in32(tempi, s));
2729     *xptr++ = temp;
2730 
2731     tempi = *in_ptr2--;
2732     tempr = *in_ptr2--;
2733 
2734     temp = -ixheaacd_sub32_sat(ixheaacd_mult32x32in32(tempr, s1),
2735                                ixheaacd_mult32x32in32(tempi, c1));
2736     *xptr++ = temp;
2737 
2738     temp = -ixheaacd_add32_sat(ixheaacd_mult32x32in32(tempr, c1),
2739                                ixheaacd_mult32x32in32(tempi, s1));
2740     *xptr1-- = temp;
2741   }
2742 }
2743 
ixheaacd_post_twiddle_ld(WORD32 out[],WORD32 x[],const WORD32 * cos_sin_ptr,WORD m)2744 VOID ixheaacd_post_twiddle_ld(WORD32 out[], WORD32 x[],
2745                               const WORD32 *cos_sin_ptr, WORD m) {
2746   WORD i;
2747 
2748   WORD32 *ptr_x = &x[0];
2749   WORD32 *ptr_out, *ptr_out1;
2750 
2751   ptr_out = &out[0];
2752   ptr_out1 = &out[m - 1];
2753 
2754   for (i = (m >> 2) - 1; i >= 0; i--) {
2755     WORD32 c, c1, s, s1;
2756     WORD32 re, im;
2757 
2758     c = *cos_sin_ptr++;
2759     c1 = *cos_sin_ptr++;
2760     s = *cos_sin_ptr++;
2761     s1 = *cos_sin_ptr++;
2762 
2763     re = *ptr_x++;
2764     im = *ptr_x++;
2765 
2766     *ptr_out1 = ixheaacd_sub32(ixheaacd_mult32(im, c), ixheaacd_mult32(re, s));
2767 
2768     *ptr_out = -ixheaacd_add32(ixheaacd_mult32(re, c), ixheaacd_mult32(im, s));
2769 
2770     ptr_out += 2;
2771     ptr_out1 -= 2;
2772 
2773     re = *ptr_x++;
2774     im = *ptr_x++;
2775 
2776     *ptr_out1 =
2777         ixheaacd_sub32(ixheaacd_mult32(im, c1), ixheaacd_mult32(re, s1));
2778     *ptr_out =
2779         -ixheaacd_add32(ixheaacd_mult32(re, c1), ixheaacd_mult32(im, s1));
2780 
2781     ptr_out += 2;
2782     ptr_out1 -= 2;
2783   }
2784 }
2785 
ixheaacd_post_twiddle_eld(WORD32 out[],WORD32 x[],const WORD32 * cos_sin_ptr,WORD m)2786 VOID ixheaacd_post_twiddle_eld(WORD32 out[], WORD32 x[],
2787                                const WORD32 *cos_sin_ptr, WORD m) {
2788   WORD i = 0;
2789 
2790   WORD32 *ptr_x = &x[0];
2791   WORD32 *ptr_out_767, *ptr_out_256;
2792   WORD32 *ptr_out_768, *ptr_out_255;
2793   WORD32 *ptr_out_0, *ptr_out_1279;
2794   WORD32 tempr, tempi;
2795 
2796   ptr_out_767 = &out[m + (m >> 1) - 1 - 2 * i];
2797   ptr_out_256 = &out[(m >> 1) + 2 * i];
2798 
2799   ptr_out_768 = &out[m + (m >> 1) + 2 * i];
2800   ptr_out_255 = &out[(m >> 1) - 1 - 2 * i];
2801 
2802   for (i = 0; i < (m >> 3); i++) {
2803     WORD32 c, c1, s, s1;
2804     WORD32 re, im;
2805 
2806     c = *cos_sin_ptr++;
2807     c1 = *cos_sin_ptr++;
2808     s = *cos_sin_ptr++;
2809     s1 = *cos_sin_ptr++;
2810 
2811     re = *ptr_x++;
2812     im = *ptr_x++;
2813 
2814     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c), ixheaacd_mult32(re, s));
2815     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c), ixheaacd_mult32(im, s));
2816 
2817     *ptr_out_767 = tempr;
2818     *ptr_out_256 = tempi;
2819 
2820     *ptr_out_768 = *ptr_out_767;
2821     *ptr_out_255 = -*ptr_out_256;
2822 
2823     ptr_out_256 += 2;
2824     ptr_out_767 -= 2;
2825     ptr_out_768 += 2;
2826     ptr_out_255 -= 2;
2827 
2828     re = *ptr_x++;
2829     im = *ptr_x++;
2830 
2831     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c1), ixheaacd_mult32(re, s1));
2832     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c1), ixheaacd_mult32(im, s1));
2833 
2834     *ptr_out_767 = tempr;
2835     *ptr_out_256 = tempi;
2836 
2837     *ptr_out_768 = *ptr_out_767;
2838     *ptr_out_255 = -*ptr_out_256;
2839 
2840     ptr_out_256 += 2;
2841     ptr_out_767 -= 2;
2842     ptr_out_768 += 2;
2843     ptr_out_255 -= 2;
2844   }
2845 
2846   ptr_out_0 = &out[2 * 2 * i - (m >> 1)];
2847   ptr_out_1279 = &out[m + m + (m >> 1) - 1 - 2 * 2 * i];
2848 
2849   for (; i < (m >> 2); i++) {
2850     WORD32 c, c1, s, s1;
2851     WORD32 re, im;
2852 
2853     c = *cos_sin_ptr++;
2854     c1 = *cos_sin_ptr++;
2855     s = *cos_sin_ptr++;
2856     s1 = *cos_sin_ptr++;
2857 
2858     re = *ptr_x++;
2859     im = *ptr_x++;
2860 
2861     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c), ixheaacd_mult32(re, s));
2862     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c), ixheaacd_mult32(im, s));
2863 
2864     *ptr_out_767 = tempr;
2865     *ptr_out_256 = tempi;
2866 
2867     *ptr_out_0 = -*ptr_out_767;
2868     *ptr_out_1279 = *ptr_out_256;
2869 
2870     ptr_out_256 += 2;
2871     ptr_out_767 -= 2;
2872     ptr_out_0 += 2;
2873     ptr_out_1279 -= 2;
2874 
2875     re = *ptr_x++;
2876     im = *ptr_x++;
2877 
2878     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c1), ixheaacd_mult32(re, s1));
2879     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c1), ixheaacd_mult32(im, s1));
2880 
2881     *ptr_out_767 = tempr;
2882     *ptr_out_256 = tempi;
2883 
2884     *ptr_out_0 = -*ptr_out_767;
2885     *ptr_out_1279 = *ptr_out_256;
2886 
2887     ptr_out_256 += 2;
2888     ptr_out_767 -= 2;
2889     ptr_out_0 += 2;
2890     ptr_out_1279 -= 2;
2891   }
2892 }
2893 
ixheaacd_fft32x32_ld_dec(ia_aac_dec_imdct_tables_struct * imdct_tables_ptr,WORD32 npoints,WORD32 * ptr_x,WORD32 * ptr_y)2894 VOID ixheaacd_fft32x32_ld_dec(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr,
2895                               WORD32 npoints, WORD32 *ptr_x, WORD32 *ptr_y) {
2896   WORD32 i, j, l1, l2, h2, predj, tw_offset, stride, fft_jmp, k;
2897   WORD32 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
2898   WORD32 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
2899   WORD32 xh0_1, xh1_1, xl0_1, xl1_1;
2900   WORD32 x_0, x_1, x_2, x_3, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
2901   WORD32 xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;
2902   WORD32 x_4, x_5, x_6, x_7, x_h2_0, x_h2_1;
2903   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
2904   WORD32 si10, si20, si30, co10, co20, co30;
2905   WORD32 *w;
2906   WORD32 *x, *x2, *x0;
2907   WORD32 *y0, *y1, *y2, *y3;
2908   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
2909   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
2910   WORD32 n0, j0;
2911   WORD32 radix;
2912   WORD32 norm;
2913   WORD32 m;
2914   WORD32 *ptr_w;
2915 
2916   if (npoints == 256)
2917     ptr_w = imdct_tables_ptr->w_256;
2918   else if (npoints == 32)
2919     ptr_w = (WORD32*)imdct_tables_ptr->w_32;
2920   else
2921     ptr_w = imdct_tables_ptr->w_16;
2922 
2923   for (i = 31, m = 1; (npoints & (1 << i)) == 0; i--, m++)
2924     ;
2925   radix = m & 1 ? 2 : 4;
2926   norm = m - 2;
2927 
2928   stride = npoints;
2929   tw_offset = 0;
2930   fft_jmp = 6 * stride;
2931 
2932   while (stride > radix) {
2933     j = 0;
2934     fft_jmp >>= 2;
2935 
2936     h2 = stride >> 1;
2937     l1 = stride;
2938     l2 = stride + (stride >> 1);
2939 
2940     x = ptr_x;
2941     w = ptr_w + tw_offset;
2942     tw_offset += fft_jmp;
2943 
2944     stride >>= 2;
2945 
2946     for (i = 0; i < npoints; i += 4) {
2947       co10 = w[j + 1];
2948       si10 = w[j + 0];
2949       co20 = w[j + 3];
2950       si20 = w[j + 2];
2951       co30 = w[j + 5];
2952       si30 = w[j + 4];
2953 
2954       x_0 = x[0];
2955       x_1 = x[1];
2956       x_l1_0 = x[l1];
2957       x_l1_1 = x[l1 + 1];
2958       x_l2_0 = x[l2];
2959       x_l2_1 = x[l2 + 1];
2960       x_h2_0 = x[h2];
2961       x_h2_1 = x[h2 + 1];
2962 
2963       xh0_0 = ixheaacd_add32_sat(x_0, x_l1_0);
2964       xh1_0 = ixheaacd_add32_sat(x_1, x_l1_1);
2965       xl0_0 = ixheaacd_sub32_sat(x_0, x_l1_0);
2966       xl1_0 = ixheaacd_sub32_sat(x_1, x_l1_1);
2967       xh20_0 = ixheaacd_add32_sat(x_h2_0, x_l2_0);
2968       xh21_0 = ixheaacd_add32_sat(x_h2_1, x_l2_1);
2969       xl20_0 = ixheaacd_sub32_sat(x_h2_0, x_l2_0);
2970       xl21_0 = ixheaacd_sub32_sat(x_h2_1, x_l2_1);
2971 
2972       x0 = x;
2973       x2 = x0;
2974 
2975       j += 6;
2976       x += 2;
2977       predj = (j - fft_jmp);
2978       if (!predj) x += fft_jmp;
2979       if (!predj) j = 0;
2980 
2981       x0[0] = ixheaacd_add32_sat(xh0_0, xh20_0);
2982       x0[1] = ixheaacd_add32_sat(xh1_0, xh21_0);
2983       xt0_0 = ixheaacd_sub32_sat(xh0_0, xh20_0);
2984       yt0_0 = ixheaacd_sub32_sat(xh1_0, xh21_0);
2985       xt1_0 = ixheaacd_add32_sat(xl0_0, xl21_0);
2986       yt2_0 = ixheaacd_add32_sat(xl1_0, xl20_0);
2987       xt2_0 = ixheaacd_sub32_sat(xl0_0, xl21_0);
2988       yt1_0 = ixheaacd_sub32_sat(xl1_0, xl20_0);
2989 
2990       x2[h2] =
2991           MPYHIRC(si10, yt1_0) + MPYHIRC(co10, xt1_0) +
2992           (((MPYLUHS(si10, yt1_0) + MPYLUHS(co10, xt1_0) + 0x8000) >> 16) << 1);
2993 
2994       x2[h2 + 1] =
2995           MPYHIRC(co10, yt1_0) - MPYHIRC(si10, xt1_0) +
2996           (((MPYLUHS(co10, yt1_0) - MPYLUHS(si10, xt1_0) + 0x8000) >> 16) << 1);
2997 
2998       x2[l1] =
2999           MPYHIRC(si20, yt0_0) + MPYHIRC(co20, xt0_0) +
3000           (((MPYLUHS(si20, yt0_0) + MPYLUHS(co20, xt0_0) + 0x8000) >> 16) << 1);
3001 
3002       x2[l1 + 1] =
3003           MPYHIRC(co20, yt0_0) - MPYHIRC(si20, xt0_0) +
3004           (((MPYLUHS(co20, yt0_0) - MPYLUHS(si20, xt0_0) + 0x8000) >> 16) << 1);
3005 
3006       x2[l2] =
3007           MPYHIRC(si30, yt2_0) + MPYHIRC(co30, xt2_0) +
3008           (((MPYLUHS(si30, yt2_0) + MPYLUHS(co30, xt2_0) + 0x8000) >> 16) << 1);
3009 
3010       x2[l2 + 1] =
3011           MPYHIRC(co30, yt2_0) - MPYHIRC(si30, xt2_0) +
3012           (((MPYLUHS(co30, yt2_0) - MPYLUHS(si30, xt2_0) + 0x8000) >> 16) << 1);
3013     }
3014   }
3015 
3016   y0 = ptr_y;
3017   y2 = ptr_y + (WORD32)npoints;
3018   x0 = ptr_x;
3019   x2 = ptr_x + (WORD32)(npoints >> 1);
3020 
3021   if (radix == 2) {
3022     y1 = y0 + (WORD32)(npoints >> 2);
3023     y3 = y2 + (WORD32)(npoints >> 2);
3024     l1 = norm + 1;
3025     j0 = 8;
3026     n0 = npoints >> 1;
3027   } else {
3028     y1 = y0 + (WORD32)(npoints >> 1);
3029     y3 = y2 + (WORD32)(npoints >> 1);
3030     l1 = norm + 2;
3031     j0 = 4;
3032     n0 = npoints >> 2;
3033   }
3034 
3035   j = 0;
3036   k = 0;
3037 
3038   for (i = 0; i < npoints; i += 8) {
3039     if (npoints == 32)
3040       h2 = rev_dig[k++];
3041     else
3042       DIG_REV(j, l1, h2);
3043 
3044     x_0 = x0[0];
3045     x_1 = x0[1];
3046     x_2 = x0[2];
3047     x_3 = x0[3];
3048     x_4 = x0[4];
3049     x_5 = x0[5];
3050     x_6 = x0[6];
3051     x_7 = x0[7];
3052     x0 += 8;
3053 
3054     xh0_0 = ixheaacd_add32_sat(x_0, x_4);
3055     xh1_0 = ixheaacd_add32_sat(x_1, x_5);
3056     xl0_0 = ixheaacd_sub32_sat(x_0, x_4);
3057     xl1_0 = ixheaacd_sub32_sat(x_1, x_5);
3058     xh0_1 = ixheaacd_add32_sat(x_2, x_6);
3059     xh1_1 = ixheaacd_add32_sat(x_3, x_7);
3060     xl0_1 = ixheaacd_sub32_sat(x_2, x_6);
3061     xl1_1 = ixheaacd_sub32_sat(x_3, x_7);
3062 
3063     n00 = ixheaacd_add32_sat(xh0_0, xh0_1);
3064     n01 = ixheaacd_add32_sat(xh1_0, xh1_1);
3065     n10 = ixheaacd_add32_sat(xl0_0, xl1_1);
3066     n11 = ixheaacd_sub32_sat(xl1_0, xl0_1);
3067     n20 = ixheaacd_sub32_sat(xh0_0, xh0_1);
3068     n21 = ixheaacd_sub32_sat(xh1_0, xh1_1);
3069     n30 = ixheaacd_sub32_sat(xl0_0, xl1_1);
3070     n31 = ixheaacd_add32_sat(xl1_0, xl0_1);
3071 
3072     if (radix == 2) {
3073       n00 = ixheaacd_add32_sat(x_0, x_2);
3074       n01 = ixheaacd_add32_sat(x_1, x_3);
3075       n20 = ixheaacd_sub32_sat(x_0, x_2);
3076       n21 = ixheaacd_sub32_sat(x_1, x_3);
3077       n10 = ixheaacd_add32_sat(x_4, x_6);
3078       n11 = ixheaacd_add32_sat(x_5, x_7);
3079       n30 = ixheaacd_sub32_sat(x_4, x_6);
3080       n31 = ixheaacd_sub32_sat(x_5, x_7);
3081     }
3082 
3083     y0[2 * h2] = n00;
3084     y0[2 * h2 + 1] = n01;
3085     y1[2 * h2] = n10;
3086     y1[2 * h2 + 1] = n11;
3087     y2[2 * h2] = n20;
3088     y2[2 * h2 + 1] = n21;
3089     y3[2 * h2] = n30;
3090     y3[2 * h2 + 1] = n31;
3091 
3092     x_8 = x2[0];
3093     x_9 = x2[1];
3094     x_a = x2[2];
3095     x_b = x2[3];
3096     x_c = x2[4];
3097     x_d = x2[5];
3098     x_e = x2[6];
3099     x_f = x2[7];
3100     x2 += 8;
3101 
3102     xh0_2 = ixheaacd_add32_sat(x_8, x_c);
3103     xh1_2 = ixheaacd_add32_sat(x_9, x_d);
3104     xl0_2 = ixheaacd_sub32_sat(x_8, x_c);
3105     xl1_2 = ixheaacd_sub32_sat(x_9, x_d);
3106     xh0_3 = ixheaacd_add32_sat(x_a, x_e);
3107     xh1_3 = ixheaacd_add32_sat(x_b, x_f);
3108     xl0_3 = ixheaacd_sub32_sat(x_a, x_e);
3109     xl1_3 = ixheaacd_sub32_sat(x_b, x_f);
3110 
3111     n02 = ixheaacd_add32_sat(xh0_2, xh0_3);
3112     n03 = ixheaacd_add32_sat(xh1_2, xh1_3);
3113     n12 = ixheaacd_add32_sat(xl0_2, xl1_3);
3114     n13 = ixheaacd_sub32_sat(xl1_2, xl0_3);
3115     n22 = ixheaacd_sub32_sat(xh0_2, xh0_3);
3116     n23 = ixheaacd_sub32_sat(xh1_2, xh1_3);
3117     n32 = ixheaacd_sub32_sat(xl0_2, xl1_3);
3118     n33 = ixheaacd_add32_sat(xl1_2, xl0_3);
3119 
3120     if (radix == 2) {
3121       n02 = ixheaacd_add32_sat(x_8, x_a);
3122       n03 = ixheaacd_add32_sat(x_9, x_b);
3123       n22 = ixheaacd_sub32_sat(x_8, x_a);
3124       n23 = ixheaacd_sub32_sat(x_9, x_b);
3125       n12 = ixheaacd_add32_sat(x_c, x_e);
3126       n13 = ixheaacd_add32_sat(x_d, x_f);
3127       n32 = ixheaacd_sub32_sat(x_c, x_e);
3128       n33 = ixheaacd_sub32_sat(x_d, x_f);
3129     }
3130 
3131     y0[2 * h2 + 2] = n02;
3132     y0[2 * h2 + 3] = n03;
3133     y1[2 * h2 + 2] = n12;
3134     y1[2 * h2 + 3] = n13;
3135     y2[2 * h2 + 2] = n22;
3136     y2[2 * h2 + 3] = n23;
3137     y3[2 * h2 + 2] = n32;
3138     y3[2 * h2 + 3] = n33;
3139 
3140     j += j0;
3141 
3142     if (j == n0) {
3143       j += n0;
3144       x0 += (WORD32)npoints >> 1;
3145       x2 += (WORD32)npoints >> 1;
3146     }
3147   }
3148 }
3149 
ixheaacd_rearrange_dec(WORD32 * ip,WORD32 * op,WORD32 mdct_len_2,UWORD8 * re_arr_tab)3150 VOID ixheaacd_rearrange_dec(WORD32 *ip, WORD32 *op, WORD32 mdct_len_2,
3151                             UWORD8 *re_arr_tab) {
3152   WORD32 n, i = 0;
3153 
3154   for (n = 0; n < mdct_len_2; n++) {
3155     WORD32 idx = re_arr_tab[n] << 1;
3156 
3157     op[i++] = ip[idx];
3158     op[i++] = ip[idx + 1];
3159   }
3160 }
3161 
ixheaacd_fft_15_ld_dec(WORD32 * inp,WORD32 * op,WORD32 * fft3out,UWORD8 * re_arr_tab_sml_240_ptr)3162 VOID ixheaacd_fft_15_ld_dec(WORD32 *inp, WORD32 *op, WORD32 *fft3out,
3163                             UWORD8 *re_arr_tab_sml_240_ptr) {
3164   WORD32 i, n, idx;
3165   WORD32 *buf1, *buf2, *buf1a;
3166   WORD32 add_r, sub_r;
3167   WORD32 add_i, sub_i;
3168   WORD32 x01_real, x_01_imag, temp;
3169   WORD32 p1, p2, p3, p4;
3170 
3171   WORD32 sinmu = 1859775393;
3172   WORD32 cos_51 = 2042378317;
3173   WORD32 cos_52 = -1652318768;
3174   WORD32 cos_53 = -780119100;
3175   WORD32 cos_54 = 1200479854;
3176   WORD32 cos_55 = -1342177280;
3177 
3178   WORD32 r1, r2, r3, r4;
3179   WORD32 s1, s2, s3, s4, t, temp1, temp2;
3180   WORD32 *fft3outptr = fft3out;
3181 
3182   WORD32 xr_0, xr_1, xr_2;
3183   WORD32 xi_0, xi_1, xi_2;
3184 
3185   buf2 = fft3out;
3186   buf1 = buf1a = fft3out;
3187   n = 0;
3188 
3189   {
3190     *buf1++ = inp[0];
3191     *buf1++ = inp[1];
3192 
3193     *buf1++ = inp[96];
3194     *buf1++ = inp[97];
3195 
3196     *buf1++ = inp[192];
3197     *buf1++ = inp[193];
3198 
3199     *buf1++ = inp[288];
3200     *buf1++ = inp[289];
3201 
3202     *buf1++ = inp[384];
3203     *buf1++ = inp[385];
3204 
3205     r1 = ixheaacd_add32_sat(buf1a[2], buf1a[8]);
3206     r4 = ixheaacd_sub32_sat(buf1a[2], buf1a[8]);
3207     r3 = ixheaacd_add32_sat(buf1a[4], buf1a[6]);
3208     r2 = ixheaacd_sub32_sat(buf1a[4], buf1a[6]);
3209 
3210     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(r1, r3), cos_54);
3211 
3212     r1 = ixheaacd_add32_sat(r1, r3);
3213 
3214     temp1 = ixheaacd_add32_sat(buf1a[0], r1);
3215 
3216     r1 = ixheaacd_add32_sat(
3217         temp1, ixheaacd_shl32_sat((ixheaacd_mult32_shl(r1, cos_55)), 1));
3218 
3219     r3 = ixheaacd_sub32_sat(r1, t);
3220     r1 = ixheaacd_add32_sat(r1, t);
3221 
3222     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(r4, r2), cos_51);
3223     r4 = ixheaacd_add32_sat(
3224         t, ixheaacd_shl32_sat(ixheaacd_mult32_shl(r4, cos_52), 1));
3225     r2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r2, cos_53));
3226 
3227     s1 = ixheaacd_add32_sat(buf1a[3], buf1a[9]);
3228     s4 = ixheaacd_sub32_sat(buf1a[3], buf1a[9]);
3229     s3 = ixheaacd_add32_sat(buf1a[5], buf1a[7]);
3230     s2 = ixheaacd_sub32_sat(buf1a[5], buf1a[7]);
3231 
3232     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(s1, s3), cos_54);
3233     s1 = ixheaacd_add32_sat(s1, s3);
3234 
3235     temp2 = ixheaacd_add32_sat(buf1a[1], s1);
3236 
3237     s1 = ixheaacd_add32_sat(
3238         temp2, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s1, cos_55)), 1));
3239 
3240     s3 = ixheaacd_sub32_sat(s1, t);
3241     s1 = ixheaacd_add32_sat(s1, t);
3242 
3243     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(s4, s2), cos_51);
3244     s4 = ixheaacd_add32_sat(
3245         t, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s4, cos_52)), 1));
3246     s2 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s2, cos_53)));
3247 
3248     *buf2++ = temp1;
3249     *buf2++ = temp2;
3250     *buf2++ = ixheaacd_add32_sat(r1, s2);
3251     *buf2++ = ixheaacd_sub32_sat(s1, r2);
3252     *buf2++ = ixheaacd_sub32_sat(r3, s4);
3253     *buf2++ = ixheaacd_add32_sat(s3, r4);
3254     *buf2++ = ixheaacd_add32_sat(r3, s4);
3255     *buf2++ = ixheaacd_sub32_sat(s3, r4);
3256     *buf2++ = ixheaacd_sub32_sat(r1, s2);
3257     *buf2++ = ixheaacd_add32_sat(s1, r2);
3258     buf1a = buf1;
3259 
3260     *buf1++ = inp[160];
3261     *buf1++ = inp[161];
3262 
3263     *buf1++ = inp[256];
3264     *buf1++ = inp[257];
3265 
3266     *buf1++ = inp[352];
3267     *buf1++ = inp[353];
3268 
3269     *buf1++ = inp[448];
3270     *buf1++ = inp[449];
3271 
3272     *buf1++ = inp[64];
3273     *buf1++ = inp[65];
3274 
3275     r1 = ixheaacd_add32_sat(buf1a[2], buf1a[8]);
3276     r4 = ixheaacd_sub32_sat(buf1a[2], buf1a[8]);
3277     r3 = ixheaacd_add32_sat(buf1a[4], buf1a[6]);
3278     r2 = ixheaacd_sub32_sat(buf1a[4], buf1a[6]);
3279 
3280     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(r1, r3), cos_54);
3281 
3282     r1 = ixheaacd_add32_sat(r1, r3);
3283 
3284     temp1 = ixheaacd_add32_sat(buf1a[0], r1);
3285 
3286     r1 = ixheaacd_add32_sat(
3287         temp1, ixheaacd_shl32_sat((ixheaacd_mult32_shl(r1, cos_55)), 1));
3288 
3289     r3 = ixheaacd_sub32_sat(r1, t);
3290     r1 = ixheaacd_add32_sat(r1, t);
3291 
3292     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(r4, r2), cos_51);
3293     r4 = ixheaacd_add32_sat(
3294         t, ixheaacd_shl32_sat(ixheaacd_mult32_shl(r4, cos_52), 1));
3295     r2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r2, cos_53));
3296 
3297     s1 = ixheaacd_add32_sat(buf1a[3], buf1a[9]);
3298     s4 = ixheaacd_sub32_sat(buf1a[3], buf1a[9]);
3299     s3 = ixheaacd_add32_sat(buf1a[5], buf1a[7]);
3300     s2 = ixheaacd_sub32_sat(buf1a[5], buf1a[7]);
3301 
3302     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(s1, s3), cos_54);
3303 
3304     s1 = ixheaacd_add32_sat(s1, s3);
3305 
3306     temp2 = ixheaacd_add32_sat(buf1a[1], s1);
3307 
3308     s1 = ixheaacd_add32_sat(
3309         temp2, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s1, cos_55)), 1));
3310 
3311     s3 = ixheaacd_sub32_sat(s1, t);
3312     s1 = ixheaacd_add32_sat(s1, t);
3313 
3314     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(s4, s2), cos_51);
3315     s4 = ixheaacd_add32_sat(
3316         t, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s4, cos_52)), 1));
3317     s2 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s2, cos_53)));
3318 
3319     *buf2++ = temp1;
3320     *buf2++ = temp2;
3321     *buf2++ = ixheaacd_add32_sat(r1, s2);
3322     *buf2++ = ixheaacd_sub32_sat(s1, r2);
3323     *buf2++ = ixheaacd_sub32_sat(r3, s4);
3324     *buf2++ = ixheaacd_add32_sat(s3, r4);
3325     *buf2++ = ixheaacd_add32_sat(r3, s4);
3326     *buf2++ = ixheaacd_sub32_sat(s3, r4);
3327     *buf2++ = ixheaacd_sub32_sat(r1, s2);
3328     *buf2++ = ixheaacd_add32_sat(s1, r2);
3329     buf1a = buf1;
3330     ;
3331 
3332     *buf1++ = inp[320];
3333     *buf1++ = inp[321];
3334 
3335     *buf1++ = inp[416];
3336     *buf1++ = inp[417];
3337 
3338     *buf1++ = inp[32];
3339     *buf1++ = inp[33];
3340 
3341     *buf1++ = inp[128];
3342     *buf1++ = inp[129];
3343 
3344     *buf1++ = inp[224];
3345     *buf1++ = inp[225];
3346 
3347     r1 = ixheaacd_add32_sat(buf1a[2], buf1a[8]);
3348     r4 = ixheaacd_sub32_sat(buf1a[2], buf1a[8]);
3349     r3 = ixheaacd_add32_sat(buf1a[4], buf1a[6]);
3350     r2 = ixheaacd_sub32_sat(buf1a[4], buf1a[6]);
3351 
3352     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(r1, r3), cos_54);
3353 
3354     r1 = ixheaacd_add32_sat(r1, r3);
3355 
3356     temp1 = ixheaacd_add32_sat(buf1a[0], r1);
3357 
3358     r1 = ixheaacd_add32_sat(
3359         temp1, ixheaacd_shl32_sat((ixheaacd_mult32_shl(r1, cos_55)), 1));
3360 
3361     r3 = ixheaacd_sub32_sat(r1, t);
3362     r1 = ixheaacd_add32_sat(r1, t);
3363 
3364     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(r4, r2), cos_51);
3365     r4 = ixheaacd_add32_sat(
3366         t, ixheaacd_shl32_sat(ixheaacd_mult32_shl(r4, cos_52), 1));
3367     r2 = ixheaacd_add32_sat(t, ixheaacd_mult32_shl(r2, cos_53));
3368 
3369     s1 = ixheaacd_add32_sat(buf1a[3], buf1a[9]);
3370     s4 = ixheaacd_sub32_sat(buf1a[3], buf1a[9]);
3371     s3 = ixheaacd_add32_sat(buf1a[5], buf1a[7]);
3372     s2 = ixheaacd_sub32_sat(buf1a[5], buf1a[7]);
3373 
3374     t = ixheaacd_mult32_shl(ixheaacd_sub32_sat(s1, s3), cos_54);
3375 
3376     s1 = ixheaacd_add32_sat(s1, s3);
3377 
3378     temp2 = ixheaacd_add32_sat(buf1a[1], s1);
3379 
3380     s1 = ixheaacd_add32_sat(
3381         temp2, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s1, cos_55)), 1));
3382 
3383     s3 = ixheaacd_sub32_sat(s1, t);
3384     s1 = ixheaacd_add32_sat(s1, t);
3385 
3386     t = ixheaacd_mult32_shl(ixheaacd_add32_sat(s4, s2), cos_51);
3387     s4 = ixheaacd_add32_sat(
3388         t, ixheaacd_shl32_sat((ixheaacd_mult32_shl(s4, cos_52)), 1));
3389     s2 = ixheaacd_add32_sat(t, (ixheaacd_mult32_shl(s2, cos_53)));
3390 
3391     *buf2++ = temp1;
3392     *buf2++ = temp2;
3393     *buf2++ = ixheaacd_add32_sat(r1, s2);
3394     *buf2++ = ixheaacd_sub32_sat(s1, r2);
3395     *buf2++ = ixheaacd_sub32_sat(r3, s4);
3396     *buf2++ = ixheaacd_add32_sat(s3, r4);
3397     *buf2++ = ixheaacd_add32_sat(r3, s4);
3398     *buf2++ = ixheaacd_sub32_sat(s3, r4);
3399     *buf2++ = ixheaacd_sub32_sat(r1, s2);
3400     *buf2++ = ixheaacd_add32_sat(s1, r2);
3401     buf1a = buf1;
3402     ;
3403   }
3404 
3405   n = 0;
3406   for (i = 0; i < FFT5; i++) {
3407     xr_0 = fft3outptr[0];
3408     xi_0 = fft3outptr[1];
3409 
3410     xr_1 = fft3outptr[10];
3411     xi_1 = fft3outptr[11];
3412 
3413     xr_2 = fft3outptr[20];
3414     xi_2 = fft3outptr[21];
3415 
3416     x01_real = ixheaacd_add32_sat(xr_0, xr_1);
3417     x_01_imag = ixheaacd_add32_sat(xi_0, xi_1);
3418 
3419     add_r = ixheaacd_add32_sat(xr_1, xr_2);
3420     add_i = ixheaacd_add32_sat(xi_1, xi_2);
3421 
3422     sub_r = ixheaacd_sub32_sat(xr_1, xr_2);
3423     sub_i = ixheaacd_sub32_sat(xi_1, xi_2);
3424 
3425     p1 = add_r >> 1;
3426 
3427     p2 = ixheaacd_mult32_shl(sub_i, sinmu);
3428     p3 = ixheaacd_mult32_shl(sub_r, sinmu);
3429 
3430     p4 = add_i >> 1;
3431 
3432     temp = ixheaacd_sub32_sat(xr_0, p1);
3433     temp1 = ixheaacd_add32_sat(xi_0, p3);
3434     temp2 = ixheaacd_sub32_sat(xi_0, p3);
3435 
3436     idx = re_arr_tab_sml_240_ptr[n++] << 1;
3437     op[idx] = ixheaacd_add32_sat(x01_real, xr_2);
3438     op[idx + 1] = ixheaacd_add32_sat(x_01_imag, xi_2);
3439 
3440     idx = re_arr_tab_sml_240_ptr[n++] << 1;
3441     op[idx] = ixheaacd_add32_sat(temp, p2);
3442     op[idx + 1] = ixheaacd_sub32_sat(temp2, p4);
3443 
3444     idx = re_arr_tab_sml_240_ptr[n++] << 1;
3445     op[idx] = ixheaacd_sub32_sat(temp, p2);
3446     op[idx + 1] = ixheaacd_sub32_sat(temp1, p4);
3447     fft3outptr += 2;
3448   }
3449 }