• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 #include <stdlib.h>
21 #include <stdio.h>
22 
23 #include "ixheaacd_type_def.h"
24 #include "ixheaacd_interface.h"
25 #include "ixheaacd_constants.h"
26 #include "ixheaacd_basic_ops32.h"
27 #include "ixheaacd_basic_ops40.h"
28 #include "ixheaacd_function_selector.h"
29 
30 extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
31 extern const FLOAT32 ixheaacd_twiddle_table_fft[514];
32 extern const FLOAT32 ixheaacd_twiddle_table_fft_flt[16];
33 extern const WORD32 ixheaacd_twiddle_table_3pr[1155];
34 extern const WORD32 ixheaacd_twiddle_table_3pi[1155];
35 extern const WORD8 ixheaacd_mps_dig_rev[8];
36 
37 #define PLATFORM_INLINE __inline
38 
39 #define DIG_REV(i, m, j)                                    \
40   do {                                                      \
41     unsigned _ = (i);                                       \
42     _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
43     _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
44     _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
45     (j) = _ >> (m);                                         \
46   } while (0)
47 
ixheaacd_mult32_sat(WORD32 a,WORD32 b)48 static PLATFORM_INLINE WORD32 ixheaacd_mult32_sat(WORD32 a, WORD32 b) {
49   WORD32 result;
50   WORD64 temp_result;
51 
52   temp_result = (WORD64)a * (WORD64)b;
53   result = ixheaacd_sat64_32(temp_result >> 31);
54 
55   return (result);
56 }
57 
ixheaacd_mac32_sat(WORD32 a,WORD32 b,WORD32 c)58 static PLATFORM_INLINE WORD32 ixheaacd_mac32_sat(WORD32 a, WORD32 b, WORD32 c) {
59   WORD32 result;
60 
61   result = ixheaacd_add32_sat(a, ixheaacd_mult32_sat(b, c));
62 
63   return (result);
64 }
65 
ixheaacd_mult32X32float(FLOAT32 a,FLOAT32 b)66 static PLATFORM_INLINE FLOAT32 ixheaacd_mult32X32float(FLOAT32 a, FLOAT32 b) {
67   FLOAT32 result;
68 
69   result = a * b;
70 
71   return result;
72 }
73 
ixheaacd_mac32X32float(FLOAT32 a,FLOAT32 b,FLOAT32 c)74 static PLATFORM_INLINE FLOAT32 ixheaacd_mac32X32float(FLOAT32 a, FLOAT32 b, FLOAT32 c) {
75   FLOAT32 result;
76 
77   result = a + b * c;
78 
79   return result;
80 }
81 
ixheaacd_mps_synth_calc_fft(FLOAT32 * ptr_xr,FLOAT32 * ptr_xi,WORD32 npoints)82 VOID ixheaacd_mps_synth_calc_fft(FLOAT32 *ptr_xr, FLOAT32 *ptr_xi,
83                                  WORD32 npoints) {
84   WORD32 i, j, k;
85   FLOAT32 y[64], z[64];
86   FLOAT32 *ptr_y = y, *ptr_z = z;
87   const FLOAT32 *ptr_w = ixheaacd_twiddle_table_fft_flt;
88 
89   for (i = 0; i < npoints; i += 4) {
90     FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
91     FLOAT32 *inp = ptr_xr;
92     FLOAT32 tmk;
93 
94     WORD32 h2 = ixheaacd_mps_dig_rev[i >> 2];
95 
96     inp += (h2);
97 
98     x0r = *inp;
99     x0i = *(inp + 1);
100     inp += 16;
101 
102     x1r = *inp;
103     x1i = *(inp + 1);
104     inp += 16;
105 
106     x2r = *inp;
107     x2i = *(inp + 1);
108     inp += 16;
109 
110     x3r = *inp;
111     x3i = *(inp + 1);
112 
113     x0r = x0r + x2r;
114     x0i = x0i + x2i;
115 
116     tmk = x0r - x2r;
117     x2r = tmk - x2r;
118     tmk = x0i - x2i;
119     x2i = tmk - x2i;
120 
121     x1r = x1r + x3r;
122     x1i = x1i + x3i;
123 
124     tmk = x1r - x3r;
125     x3r = tmk - x3r;
126     tmk = x1i - x3i;
127     x3i = tmk - x3i;
128 
129     x0r = x0r + x1r;
130     x0i = x0i + x1i;
131 
132     tmk = x0r - x1r;
133     x1r = tmk - x1r;
134     tmk = x0i - x1i;
135     x1i = tmk - x1i;
136 
137     x2r = x2r + x3i;
138     x2i = x2i - x3r;
139 
140     tmk = x2r - x3i;
141     x3i = tmk - x3i;
142     tmk = x2i + x3r;
143     x3r = tmk + x3r;
144 
145     *ptr_y++ = x0r;
146     *ptr_y++ = x0i;
147     *ptr_y++ = x2r;
148     *ptr_y++ = x2i;
149     *ptr_y++ = x1r;
150     *ptr_y++ = x1i;
151     *ptr_y++ = x3i;
152     *ptr_y++ = x3r;
153 
154     inp = ptr_xi;
155 
156     inp += (h2);
157 
158     x0r = *inp;
159     x0i = *(inp + 1);
160     inp += 16;
161 
162     x1r = *inp;
163     x1i = *(inp + 1);
164     inp += 16;
165 
166     x2r = *inp;
167     x2i = *(inp + 1);
168     inp += 16;
169 
170     x3r = *inp;
171     x3i = *(inp + 1);
172 
173     x0r = x0r + x2r;
174     x0i = x0i + x2i;
175 
176     tmk = x0r - x2r;
177     x2r = tmk - x2r;
178     tmk = x0i - x2i;
179     x2i = tmk - x2i;
180 
181     x1r = x1r + x3r;
182     x1i = x1i + x3i;
183 
184     tmk = x1r - x3r;
185     x3r = tmk - x3r;
186     tmk = x1i - x3i;
187     x3i = tmk - x3i;
188 
189     x0r = x0r + x1r;
190     x0i = x0i + x1i;
191 
192     tmk = x0r - x1r;
193     x1r = tmk - x1r;
194     tmk = x0i - x1i;
195     x1i = tmk - x1i;
196 
197     x2r = x2r + x3i;
198     x2i = x2i - x3r;
199 
200     tmk = x2r - x3i;
201     x3i = tmk - x3i;
202     tmk = x2i + x3r;
203     x3r = tmk + x3r;
204 
205     *ptr_z++ = x0r;
206     *ptr_z++ = x0i;
207     *ptr_z++ = x2r;
208     *ptr_z++ = x2i;
209     *ptr_z++ = x1r;
210     *ptr_z++ = x1i;
211     *ptr_z++ = x3i;
212     *ptr_z++ = x3r;
213   }
214   ptr_y -= 64;
215   ptr_z -= 64;
216   {
217     FLOAT32 *data_r = ptr_y;
218     FLOAT32 *data_i = ptr_z;
219     for (k = 2; k != 0; k--) {
220       FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
221 
222       x0r = (*data_r);
223       x0i = (*(data_r + 1));
224       data_r += 8;
225 
226       x1r = (*data_r);
227       x1i = (*(data_r + 1));
228       data_r += 8;
229 
230       x2r = (*data_r);
231       x2i = (*(data_r + 1));
232       data_r += 8;
233 
234       x3r = (*data_r);
235       x3i = (*(data_r + 1));
236       data_r -= 24;
237 
238       x0r = x0r + x2r;
239       x0i = x0i + x2i;
240       x2r = x0r - (x2r * 2);
241       x2i = x0i - (x2i * 2);
242       x1r = x1r + x3r;
243       x1i = x1i + x3i;
244       x3r = x1r - (x3r * 2);
245       x3i = x1i - (x3i * 2);
246 
247       x0r = x0r + x1r;
248       x0i = x0i + x1i;
249       x1r = x0r - (x1r * 2);
250       x1i = x0i - (x1i * 2);
251       x2r = x2r + x3i;
252       x2i = x2i - x3r;
253       x3i = x2r - (x3i * 2);
254       x3r = x2i + (x3r * 2);
255 
256       *data_r = x0r;
257       *(data_r + 1) = x0i;
258       data_r += 8;
259 
260       *data_r = x2r;
261       *(data_r + 1) = x2i;
262       data_r += 8;
263 
264       *data_r = x1r;
265       *(data_r + 1) = x1i;
266       data_r += 8;
267 
268       *data_r = x3i;
269       *(data_r + 1) = x3r;
270       data_r += 8;
271 
272       x0r = (*data_i);
273       x0i = (*(data_i + 1));
274       data_i += 8;
275 
276       x1r = (*data_i);
277       x1i = (*(data_i + 1));
278       data_i += 8;
279 
280       x2r = (*data_i);
281       x2i = (*(data_i + 1));
282       data_i += 8;
283 
284       x3r = (*data_i);
285       x3i = (*(data_i + 1));
286       data_i -= 24;
287 
288       x0r = x0r + x2r;
289       x0i = x0i + x2i;
290       x2r = x0r - (x2r * 2);
291       x2i = x0i - (x2i * 2);
292       x1r = x1r + x3r;
293       x1i = x1i + x3i;
294       x3r = x1r - (x3r * 2);
295       x3i = x1i - (x3i * 2);
296 
297       x0r = x0r + x1r;
298       x0i = x0i + x1i;
299       x1r = x0r - (x1r * 2);
300       x1i = x0i - (x1i * 2);
301       x2r = x2r + x3i;
302       x2i = x2i - x3r;
303       x3i = x2r - (x3i * 2);
304       x3r = x2i + (x3r * 2);
305 
306       *data_i = x0r;
307       *(data_i + 1) = x0i;
308       data_i += 8;
309 
310       *data_i = x2r;
311       *(data_i + 1) = x2i;
312       data_i += 8;
313 
314       *data_i = x1r;
315       *(data_i + 1) = x1i;
316       data_i += 8;
317 
318       *data_i = x3i;
319       *(data_i + 1) = x3r;
320       data_i += 8;
321     }
322     data_r = ptr_y + 2;
323     data_i = ptr_z + 2;
324 
325     for (k = 2; k != 0; k--) {
326       FLOAT32 tmp;
327       FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
328 
329       data_r += 8;
330 
331       x1r = *data_r;
332       x1i = *(data_r + 1);
333       data_r += 8;
334 
335       x2r = *data_r;
336       x2i = *(data_r + 1);
337       data_r += 8;
338 
339       x3r = *data_r;
340       x3i = *(data_r + 1);
341       data_r -= 24;
342 
343       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
344                       ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
345       x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
346                                    (FLOAT32)x1i, 0.923880f);
347       x1r = tmp;
348 
349       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
350                       ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
351       x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
352                                    (FLOAT32)x2i, 0.707107f);
353       x2r = tmp;
354 
355       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
356                       ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
357       x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
358                                    (FLOAT32)x3i, 0.382683f);
359       x3r = tmp;
360 
361       x0r = (*data_r);
362       x0i = (*(data_r + 1));
363 
364       x0r = x0r + (x2r);
365       x0i = x0i + (x2i);
366       x2r = x0r - (x2r * 2);
367       x2i = x0i - (x2i * 2);
368       x1r = x1r + x3r;
369       x1i = x1i + x3i;
370       x3r = x1r - (x3r * 2);
371       x3i = x1i - (x3i * 2);
372 
373       x0r = x0r + (x1r);
374       x0i = x0i + (x1i);
375       x1r = x0r - (x1r * 2);
376       x1i = x0i - (x1i * 2);
377       x2r = x2r + (x3i);
378       x2i = x2i - (x3r);
379       x3i = x2r - (x3i * 2);
380       x3r = x2i + (x3r * 2);
381 
382       *data_r = x0r;
383       *(data_r + 1) = x0i;
384       data_r += 8;
385 
386       *data_r = x2r;
387       *(data_r + 1) = x2i;
388       data_r += 8;
389 
390       *data_r = x1r;
391       *(data_r + 1) = x1i;
392       data_r += 8;
393 
394       *data_r = x3i;
395       *(data_r + 1) = x3r;
396       data_r += 8;
397       data_i += 8;
398 
399       x1r = *data_i;
400       x1i = *(data_i + 1);
401       data_i += 8;
402 
403       x2r = *data_i;
404       x2i = *(data_i + 1);
405       data_i += 8;
406 
407       x3r = *data_i;
408       x3i = *(data_i + 1);
409       data_i -= 24;
410 
411       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
412                       ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
413       x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
414                                    (FLOAT32)x1i, 0.923880f);
415       x1r = tmp;
416 
417       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
418                       ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
419       x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
420                                    (FLOAT32)x2i, 0.707107f);
421       x2r = tmp;
422 
423       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
424                       ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
425       x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
426                                    (FLOAT32)x3i, 0.382683f);
427       x3r = tmp;
428 
429       x0r = (*data_i);
430       x0i = (*(data_i + 1));
431 
432       x0r = x0r + (x2r);
433       x0i = x0i + (x2i);
434       x2r = x0r - (x2r * 2);
435       x2i = x0i - (x2i * 2);
436       x1r = x1r + x3r;
437       x1i = x1i + x3i;
438       x3r = x1r - (x3r * 2);
439       x3i = x1i - (x3i * 2);
440 
441       x0r = x0r + (x1r);
442       x0i = x0i + (x1i);
443       x1r = x0r - (x1r * 2);
444       x1i = x0i - (x1i * 2);
445       x2r = x2r + (x3i);
446       x2i = x2i - (x3r);
447       x3i = x2r - (x3i * 2);
448       x3r = x2i + (x3r * 2);
449 
450       *data_i = x0r;
451       *(data_i + 1) = x0i;
452       data_i += 8;
453 
454       *data_i = x2r;
455       *(data_i + 1) = x2i;
456       data_i += 8;
457 
458       *data_i = x1r;
459       *(data_i + 1) = x1i;
460       data_i += 8;
461 
462       *data_i = x3i;
463       *(data_i + 1) = x3r;
464       data_i += 8;
465     }
466     data_r -= 62;
467     data_i -= 62;
468     for (k = 2; k != 0; k--) {
469       FLOAT32 tmp;
470       FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
471 
472       data_r += 8;
473 
474       x1r = *data_r;
475       x1i = *(data_r + 1);
476       data_r += 8;
477 
478       x2r = *data_r;
479       x2i = *(data_r + 1);
480       data_r += 8;
481 
482       x3r = *data_r;
483       x3i = *(data_r + 1);
484       data_r -= 24;
485 
486       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
487                       ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
488       x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
489                                    (FLOAT32)x1i, 0.707107f);
490       x1r = tmp;
491 
492       tmp = x2i;
493       x2i = -x2r;
494       x2r = tmp;
495 
496       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
497                       ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
498       x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
499                       ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
500       x3r = tmp;
501 
502       x0r = (*data_r);
503       x0i = (*(data_r + 1));
504 
505       x0r = x0r + (x2r);
506       x0i = x0i + (x2i);
507       x2r = x0r - (x2r * 2);
508       x2i = x0i - (x2i * 2);
509       x1r = x1r + x3r;
510       x1i = x1i + x3i;
511       x3r = x1r - (x3r * 2);
512       x3i = x1i - (x3i * 2);
513 
514       x0r = x0r + (x1r);
515       x0i = x0i + (x1i);
516       x1r = x0r - (x1r * 2);
517       x1i = x0i - (x1i * 2);
518       x2r = x2r + (x3i);
519       x2i = x2i - (x3r);
520       x3i = x2r - (x3i * 2);
521       x3r = x2i + (x3r * 2);
522 
523       *data_r = x0r;
524       *(data_r + 1) = x0i;
525       data_r += 8;
526 
527       *data_r = x2r;
528       *(data_r + 1) = x2i;
529       data_r += 8;
530 
531       *data_r = x1r;
532       *(data_r + 1) = x1i;
533       data_r += 8;
534 
535       *data_r = x3i;
536       *(data_r + 1) = x3r;
537       data_r += 8;
538       data_i += 8;
539 
540       x1r = *data_i;
541       x1i = *(data_i + 1);
542       data_i += 8;
543 
544       x2r = *data_i;
545       x2i = *(data_i + 1);
546       data_i += 8;
547 
548       x3r = *data_i;
549       x3i = *(data_i + 1);
550       data_i -= 24;
551 
552       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
553                       ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
554       x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
555                                    (FLOAT32)x1i, 0.707107f);
556       x1r = tmp;
557 
558       tmp = x2i;
559       x2i = -x2r;
560       x2r = tmp;
561 
562       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
563                       ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
564       x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
565                       ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
566       x3r = tmp;
567 
568       x0r = (*data_i);
569       x0i = (*(data_i + 1));
570 
571       x0r = x0r + (x2r);
572       x0i = x0i + (x2i);
573       x2r = x0r - (x2r * 2);
574       x2i = x0i - (x2i * 2);
575       x1r = x1r + x3r;
576       x1i = x1i + x3i;
577       x3r = x1r - (x3r * 2);
578       x3i = x1i - (x3i * 2);
579 
580       x0r = x0r + (x1r);
581       x0i = x0i + (x1i);
582       x1r = x0r - (x1r * 2);
583       x1i = x0i - (x1i * 2);
584       x2r = x2r + (x3i);
585       x2i = x2i - (x3r);
586       x3i = x2r - (x3i * 2);
587       x3r = x2i + (x3r * 2);
588 
589       *data_i = x0r;
590       *(data_i + 1) = x0i;
591       data_i += 8;
592 
593       *data_i = x2r;
594       *(data_i + 1) = x2i;
595       data_i += 8;
596 
597       *data_i = x1r;
598       *(data_i + 1) = x1i;
599       data_i += 8;
600 
601       *data_i = x3i;
602       *(data_i + 1) = x3r;
603       data_i += 8;
604     }
605     data_r -= 62;
606     data_i -= 62;
607     for (k = 2; k != 0; k--) {
608       FLOAT32 tmp;
609       FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
610 
611       data_r += 8;
612 
613       x1r = *data_r;
614       x1i = *(data_r + 1);
615       data_r += 8;
616 
617       x2r = *data_r;
618       x2i = *(data_r + 1);
619       data_r += 8;
620 
621       x3r = *data_r;
622       x3i = *(data_r + 1);
623       data_r -= 24;
624 
625       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
626                       ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
627       x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
628                                    (FLOAT32)x1i, 0.382683f);
629       x1r = tmp;
630 
631       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
632                       ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
633       x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
634                       ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
635       x2r = tmp;
636 
637       tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
638                       ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
639       x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
640                                    (FLOAT32)x3i, 0.923880f);
641       x3r = tmp;
642 
643       x0r = (*data_r);
644       x0i = (*(data_r + 1));
645 
646       x0r = x0r + (x2r);
647       x0i = x0i + (x2i);
648       x2r = x0r - (x2r * 2);
649       x2i = x0i - (x2i * 2);
650       x1r = x1r + x3r;
651       x1i = x1i - x3i;
652       x3r = x1r - (x3r * 2);
653       x3i = x1i + (x3i * 2);
654 
655       x0r = x0r + (x1r);
656       x0i = x0i + (x1i);
657       x1r = x0r - (x1r * 2);
658       x1i = x0i - (x1i * 2);
659       x2r = x2r + (x3i);
660       x2i = x2i - (x3r);
661       x3i = x2r - (x3i * 2);
662       x3r = x2i + (x3r * 2);
663 
664       *data_r = x0r;
665       *(data_r + 1) = x0i;
666       data_r += 8;
667 
668       *data_r = x2r;
669       *(data_r + 1) = x2i;
670       data_r += 8;
671 
672       *data_r = x1r;
673       *(data_r + 1) = x1i;
674       data_r += 8;
675 
676       *data_r = x3i;
677       *(data_r + 1) = x3r;
678       data_r += 8;
679       data_i += 8;
680 
681       x1r = *data_i;
682       x1i = *(data_i + 1);
683       data_i += 8;
684 
685       x2r = *data_i;
686       x2i = *(data_i + 1);
687       data_i += 8;
688 
689       x3r = *data_i;
690       x3i = *(data_i + 1);
691       data_i -= 24;
692 
693       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
694                       ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
695       x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
696                                    (FLOAT32)x1i, 0.382683f);
697       x1r = tmp;
698 
699       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
700                       ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
701       x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
702                       ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
703       x2r = tmp;
704 
705       tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
706                       ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
707       x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
708                                    (FLOAT32)x3i, 0.923880f);
709       x3r = tmp;
710 
711       x0r = (*data_i);
712       x0i = (*(data_i + 1));
713 
714       x0r = x0r + (x2r);
715       x0i = x0i + (x2i);
716       x2r = x0r - (x2r * 2);
717       x2i = x0i - (x2i * 2);
718       x1r = x1r + x3r;
719       x1i = x1i - x3i;
720       x3r = x1r - (x3r * 2);
721       x3i = x1i + (x3i * 2);
722 
723       x0r = x0r + (x1r);
724       x0i = x0i + (x1i);
725       x1r = x0r - (x1r * 2);
726       x1i = x0i - (x1i * 2);
727       x2r = x2r + (x3i);
728       x2i = x2i - (x3r);
729       x3i = x2r - (x3i * 2);
730       x3r = x2i + (x3r * 2);
731 
732       *data_i = x0r;
733       *(data_i + 1) = x0i;
734       data_i += 8;
735 
736       *data_i = x2r;
737       *(data_i + 1) = x2i;
738       data_i += 8;
739 
740       *data_i = x1r;
741       *(data_i + 1) = x1i;
742       data_i += 8;
743 
744       *data_i = x3i;
745       *(data_i + 1) = x3r;
746       data_i += 8;
747     }
748     data_r -= 62;
749     data_i -= 62;
750   }
751   {
752     const FLOAT32 *twiddles = ptr_w;
753     FLOAT32 x0r, x0i, x1r, x1i;
754     for (j = 8; j != 0; j--) {
755       FLOAT32 W1 = *twiddles;
756       twiddles++;
757       FLOAT32 W4 = *twiddles;
758       twiddles++;
759       FLOAT32 tmp;
760 
761       x0r = *ptr_y;
762       x0i = *(ptr_y + 1);
763       ptr_y += 32;
764       ptr_xr += 32;
765 
766       x1r = *ptr_y;
767       x1i = *(ptr_y + 1);
768 
769       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
770                       ixheaacd_mult32X32float((FLOAT32)x1i, W4));
771       x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
772                                    (FLOAT32)x1i, W1);
773       x1r = tmp;
774 
775       *ptr_xr = (x0r) - (x1r);
776       *(ptr_xr + 1) = (x0i) - (x1i);
777       ptr_y -= 32;
778       ptr_xr -= 32;
779 
780       *ptr_xr = (x0r) + (x1r);
781       *(ptr_xr + 1) = (x0i) + (x1i);
782       ptr_y += 2;
783       ptr_xr += 2;
784 
785       x0r = *ptr_z;
786       x0i = *(ptr_z + 1);
787       ptr_z += 32;
788       ptr_xi += 32;
789 
790       x1r = *ptr_z;
791       x1i = *(ptr_z + 1);
792 
793       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
794                       ixheaacd_mult32X32float((FLOAT32)x1i, W4));
795       x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
796                                    (FLOAT32)x1i, W1);
797       x1r = tmp;
798 
799       *ptr_xi = (x0r) - (x1r);
800       *(ptr_xi + 1) = (x0i) - (x1i);
801       ptr_z -= 32;
802       ptr_xi -= 32;
803 
804       *ptr_xi = (x0r) + (x1r);
805       *(ptr_xi + 1) = (x0i) + (x1i);
806       ptr_z += 2;
807       ptr_xi += 2;
808     }
809     twiddles = ptr_w;
810     for (j = 8; j != 0; j--) {
811       FLOAT32 W1 = *twiddles;
812       twiddles++;
813       FLOAT32 W4 = *twiddles;
814       twiddles++;
815       FLOAT32 tmp;
816 
817       x0r = *ptr_y;
818       x0i = *(ptr_y + 1);
819       ptr_y += 32;
820       ptr_xr += 32;
821 
822       x1r = *ptr_y;
823       x1i = *(ptr_y + 1);
824 
825       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
826                       ixheaacd_mult32X32float((FLOAT32)x1i, W1));
827       x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
828                       ixheaacd_mult32X32float((FLOAT32)x1i, W4));
829       x1r = tmp;
830 
831       *ptr_xr = (x0r) - (x1r);
832       *(ptr_xr + 1) = (x0i) - (x1i);
833       ptr_y -= 32;
834       ptr_xr -= 32;
835 
836       *ptr_xr = (x0r) + (x1r);
837       *(ptr_xr + 1) = (x0i) + (x1i);
838       ptr_y += 2;
839       ptr_xr += 2;
840 
841       x0r = *ptr_z;
842       x0i = *(ptr_z + 1);
843       ptr_z += 32;
844       ptr_xi += 32;
845 
846       x1r = *ptr_z;
847       x1i = *(ptr_z + 1);
848 
849       tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
850                       ixheaacd_mult32X32float((FLOAT32)x1i, W1));
851       x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
852                       ixheaacd_mult32X32float((FLOAT32)x1i, W4));
853       x1r = tmp;
854 
855       *ptr_xi = (x0r) - (x1r);
856       *(ptr_xi + 1) = (x0i) - (x1i);
857       ptr_z -= 32;
858       ptr_xi -= 32;
859 
860       *ptr_xi = (x0r) + (x1r);
861       *(ptr_xi + 1) = (x0i) + (x1i);
862       ptr_z += 2;
863       ptr_xi += 2;
864     }
865   }
866 }
867 
ixheaacd_mps_complex_fft(FLOAT32 * xr,FLOAT32 * xi,WORD32 nlength)868 VOID ixheaacd_mps_complex_fft(FLOAT32 *xr, FLOAT32 *xi, WORD32 nlength) {
869   WORD32 i, j, k, n_stages, h2;
870   FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
871   WORD32 del, nodespacing, in_loop_cnt;
872   WORD32 dig_rev_shift;
873   WORD32 not_power_4;
874   FLOAT32 ptr_x[256];
875   FLOAT32 y[256];
876   WORD32 npoints = nlength;
877   FLOAT32 *ptr_y = y;
878   const FLOAT32 *ptr_w;
879   dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16;
880   n_stages = 30 - ixheaacd_norm32(npoints);
881   not_power_4 = n_stages & 1;
882 
883   n_stages = n_stages >> 1;
884 
885 
886   for (i = 0; i<nlength; i++)
887   {
888     ptr_x[2 * i] = xr[i];
889     ptr_x[2 * i + 1] = xi[i];
890   }
891 
892   ptr_w = ixheaacd_twiddle_table_fft;
893 
894   for (i = 0; i<npoints; i += 4)
895   {
896     FLOAT32 *inp = ptr_x;
897 
898     DIG_REV(i, dig_rev_shift, h2);
899     if (not_power_4)
900     {
901       h2 += 1;
902       h2 &= ~1;
903     }
904     inp += (h2);
905 
906     x0r = *inp;
907     x0i = *(inp + 1);
908     inp += (npoints >> 1);
909 
910     x1r = *inp;
911     x1i = *(inp + 1);
912     inp += (npoints >> 1);
913 
914     x2r = *inp;
915     x2i = *(inp + 1);
916     inp += (npoints >> 1);
917 
918     x3r = *inp;
919     x3i = *(inp + 1);
920 
921     x0r = x0r + x2r;
922     x0i = x0i + x2i;
923     x2r = x0r - (x2r * 2);
924     x2i = x0i - (x2i * 2);
925     x1r = x1r + x3r;
926     x1i = x1i + x3i;
927     x3r = x1r - (x3r * 2);
928     x3i = x1i - (x3i * 2);
929 
930     x0r = x0r + x1r;
931     x0i = x0i + x1i;
932     x1r = x0r - (x1r * 2);
933     x1i = x0i - (x1i * 2);
934     x2r = x2r + x3i;
935     x2i = x2i - x3r;
936     x3i = x2r - (x3i * 2);
937     x3r = x2i + (x3r * 2);
938 
939     *ptr_y++ = x0r;
940     *ptr_y++ = x0i;
941     *ptr_y++ = x2r;
942     *ptr_y++ = x2i;
943     *ptr_y++ = x1r;
944     *ptr_y++ = x1i;
945     *ptr_y++ = x3i;
946     *ptr_y++ = x3r;
947   }
948   ptr_y -= 2 * npoints;
949   del = 4;
950   nodespacing = 64;
951   in_loop_cnt = npoints >> 4;
952   for (i = n_stages - 1; i>0; i--)
953   {
954     const FLOAT32 *twiddles = ptr_w;
955     FLOAT32 *data = ptr_y;
956     FLOAT32 w1h, w2h, w3h, w1l, w2l, w3l;
957     WORD32 sec_loop_cnt;
958 
959     for (k = in_loop_cnt; k != 0; k--)
960     {
961       x0r = (*data);
962       x0i = (*(data + 1));
963       data += (del << 1);
964 
965       x1r = (*data);
966       x1i = (*(data + 1));
967       data += (del << 1);
968 
969       x2r = (*data);
970       x2i = (*(data + 1));
971       data += (del << 1);
972 
973       x3r = (*data);
974       x3i = (*(data + 1));
975       data -= 3 * (del << 1);
976 
977       x0r = x0r + x2r;
978       x0i = x0i + x2i;
979       x2r = x0r - (x2r * 2);
980       x2i = x0i - (x2i * 2);
981       x1r = x1r + x3r;
982       x1i = x1i + x3i;
983       x3r = x1r - (x3r * 2);
984       x3i = x1i - (x3i * 2);
985 
986       x0r = x0r + x1r;
987       x0i = x0i + x1i;
988       x1r = x0r - (x1r * 2);
989       x1i = x0i - (x1i * 2);
990       x2r = x2r + x3i;
991       x2i = x2i - x3r;
992       x3i = x2r - (x3i * 2);
993       x3r = x2i + (x3r * 2);
994 
995       *data = x0r;
996       *(data + 1) = x0i;
997       data += (del << 1);
998 
999       *data = x2r;
1000       *(data + 1) = x2i;
1001       data += (del << 1);
1002 
1003       *data = x1r;
1004       *(data + 1) = x1i;
1005       data += (del << 1);
1006 
1007       *data = x3i;
1008       *(data + 1) = x3r;
1009       data += (del << 1);
1010     }
1011     data = ptr_y + 2;
1012 
1013     sec_loop_cnt = (nodespacing * del);
1014     sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) \
1015             + (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) \
1016             - (sec_loop_cnt / 256);
1017     j = nodespacing;
1018 
1019     for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing)
1020     {
1021       w1h = *(twiddles + 2 * j);
1022       w1l = *(twiddles + 2 * j + 1);
1023       w2h = *(twiddles + 2 * (j << 1));
1024       w2l = *(twiddles + 2 * (j << 1) + 1);
1025       w3h = *(twiddles + 2 * j + 2 * (j << 1));
1026       w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1027 
1028       for (k = in_loop_cnt; k != 0; k--)
1029       {
1030         FLOAT32 tmp;
1031         FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1032 
1033         data += (del << 1);
1034 
1035         x1r = *data;
1036         x1i = *(data + 1);
1037         data += (del << 1);
1038 
1039         x2r = *data;
1040         x2i = *(data + 1);
1041         data += (del << 1);
1042 
1043         x3r = *data;
1044         x3i = *(data + 1);
1045         data -= 3 * (del << 1);
1046 
1047         tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1048         x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1049         x1r = tmp;
1050 
1051         tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
1052         x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
1053         x2r = tmp;
1054 
1055         tmp = (ixheaacd_mult32X32float(x3r, w3l) - ixheaacd_mult32X32float(x3i, w3h));
1056         x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
1057         x3r = tmp;
1058 
1059         x0r = (*data);
1060         x0i = (*(data + 1));
1061 
1062         x0r = x0r + (x2r);
1063         x0i = x0i + (x2i);
1064         x2r = x0r - (x2r * 2);
1065         x2i = x0i - (x2i * 2);
1066         x1r = x1r + x3r;
1067         x1i = x1i + x3i;
1068         x3r = x1r - (x3r * 2);
1069         x3i = x1i - (x3i * 2);
1070 
1071         x0r = x0r + (x1r);
1072         x0i = x0i + (x1i);
1073         x1r = x0r - (x1r * 2);
1074         x1i = x0i - (x1i * 2);
1075         x2r = x2r + (x3i);
1076         x2i = x2i - (x3r);
1077         x3i = x2r - (x3i * 2);
1078         x3r = x2i + (x3r * 2);
1079 
1080         *data = x0r;
1081         *(data + 1) = x0i;
1082         data += (del << 1);
1083 
1084         *data = x2r;
1085         *(data + 1) = x2i;
1086         data += (del << 1);
1087 
1088         *data = x1r;
1089         *(data + 1) = x1i;
1090         data += (del << 1);
1091 
1092         *data = x3i;
1093         *(data + 1) = x3r;
1094         data += (del << 1);
1095       }
1096       data -= 2 * npoints;
1097       data += 2;
1098     }
1099     for (; j <= (nodespacing * del) >> 1; j += nodespacing)
1100     {
1101       w1h = *(twiddles + 2 * j);
1102       w2h = *(twiddles + 2 * (j << 1));
1103       w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1104       w1l = *(twiddles + 2 * j + 1);
1105       w2l = *(twiddles + 2 * (j << 1) + 1);
1106       w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1107 
1108       for (k = in_loop_cnt; k != 0; k--)
1109       {
1110         FLOAT32 tmp;
1111         FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1112 
1113         data += (del << 1);
1114 
1115         x1r = *data;
1116         x1i = *(data + 1);
1117         data += (del << 1);
1118 
1119         x2r = *data;
1120         x2i = *(data + 1);
1121         data += (del << 1);
1122 
1123         x3r = *data;
1124         x3i = *(data + 1);
1125         data -= 3 * (del << 1);
1126 
1127         tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1128         x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1129         x1r = tmp;
1130 
1131         tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
1132         x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
1133         x2r = tmp;
1134 
1135         tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
1136         x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
1137         x3r = tmp;
1138 
1139         x0r = (*data);
1140         x0i = (*(data + 1));
1141 
1142         x0r = x0r + (x2r);
1143         x0i = x0i + (x2i);
1144         x2r = x0r - (x2r * 2);
1145         x2i = x0i - (x2i * 2);
1146         x1r = x1r + x3r;
1147         x1i = x1i + x3i;
1148         x3r = x1r - (x3r * 2);
1149         x3i = x1i - (x3i * 2);
1150 
1151         x0r = x0r + (x1r);
1152         x0i = x0i + (x1i);
1153         x1r = x0r - (x1r * 2);
1154         x1i = x0i - (x1i * 2);
1155         x2r = x2r + (x3i);
1156         x2i = x2i - (x3r);
1157         x3i = x2r - (x3i * 2);
1158         x3r = x2i + (x3r * 2);
1159 
1160         *data = x0r;
1161         *(data + 1) = x0i;
1162         data += (del << 1);
1163 
1164         *data = x2r;
1165         *(data + 1) = x2i;
1166         data += (del << 1);
1167 
1168         *data = x1r;
1169         *(data + 1) = x1i;
1170         data += (del << 1);
1171 
1172         *data = x3i;
1173         *(data + 1) = x3r;
1174         data += (del << 1);
1175       }
1176       data -= 2 * npoints;
1177       data += 2;
1178     }
1179     for (; j <= sec_loop_cnt * 2; j += nodespacing)
1180     {
1181       w1h = *(twiddles + 2 * j);
1182       w2h = *(twiddles + 2 * (j << 1) - 512);
1183       w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1184       w1l = *(twiddles + 2 * j + 1);
1185       w2l = *(twiddles + 2 * (j << 1) - 511);
1186       w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1187 
1188       for (k = in_loop_cnt; k != 0; k--)
1189       {
1190         FLOAT32 tmp;
1191         FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1192 
1193         data += (del << 1);
1194 
1195         x1r = *data;
1196         x1i = *(data + 1);
1197         data += (del << 1);
1198 
1199         x2r = *data;
1200         x2i = *(data + 1);
1201         data += (del << 1);
1202 
1203         x3r = *data;
1204         x3i = *(data + 1);
1205         data -= 3 * (del << 1);
1206 
1207         tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1208         x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1209         x1r = tmp;
1210 
1211         tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
1212         x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
1213         x2r = tmp;
1214 
1215         tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
1216         x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
1217         x3r = tmp;
1218 
1219         x0r = (*data);
1220         x0i = (*(data + 1));
1221 
1222         x0r = x0r + (x2r);
1223         x0i = x0i + (x2i);
1224         x2r = x0r - (x2r * 2);
1225         x2i = x0i - (x2i * 2);
1226         x1r = x1r + x3r;
1227         x1i = x1i + x3i;
1228         x3r = x1r - (x3r * 2);
1229         x3i = x1i - (x3i * 2);
1230 
1231         x0r = x0r + (x1r);
1232         x0i = x0i + (x1i);
1233         x1r = x0r - (x1r * 2);
1234         x1i = x0i - (x1i * 2);
1235         x2r = x2r + (x3i);
1236         x2i = x2i - (x3r);
1237         x3i = x2r - (x3i * 2);
1238         x3r = x2i + (x3r * 2);
1239 
1240         *data = x0r;
1241         *(data + 1) = x0i;
1242         data += (del << 1);
1243 
1244         *data = x2r;
1245         *(data + 1) = x2i;
1246         data += (del << 1);
1247 
1248         *data = x1r;
1249         *(data + 1) = x1i;
1250         data += (del << 1);
1251 
1252         *data = x3i;
1253         *(data + 1) = x3r;
1254         data += (del << 1);
1255       }
1256       data -= 2 * npoints;
1257       data += 2;
1258     }
1259     for (; j<nodespacing * del; j += nodespacing)
1260     {
1261       w1h = *(twiddles + 2 * j);
1262       w2h = *(twiddles + 2 * (j << 1) - 512);
1263       w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1264       w1l = *(twiddles + 2 * j + 1);
1265       w2l = *(twiddles + 2 * (j << 1) - 511);
1266       w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1267 
1268       for (k = in_loop_cnt; k != 0; k--)
1269       {
1270         FLOAT32 tmp;
1271         FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1272 
1273         data += (del << 1);
1274 
1275         x1r = *data;
1276         x1i = *(data + 1);
1277         data += (del << 1);
1278 
1279         x2r = *data;
1280         x2i = *(data + 1);
1281         data += (del << 1);
1282 
1283         x3r = *data;
1284         x3i = *(data + 1);
1285         data -= 3 * (del << 1);
1286 
1287         tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1288         x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1289         x1r = tmp;
1290 
1291         tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
1292         x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
1293         x2r = tmp;
1294 
1295         tmp = (-ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h));
1296         x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
1297         x3r = tmp;
1298 
1299         x0r = (*data);
1300         x0i = (*(data + 1));
1301 
1302         x0r = x0r + (x2r);
1303         x0i = x0i + (x2i);
1304         x2r = x0r - (x2r * 2);
1305         x2i = x0i - (x2i * 2);
1306         x1r = x1r + x3r;
1307         x1i = x1i - x3i;
1308         x3r = x1r - (x3r * 2);
1309         x3i = x1i + (x3i * 2);
1310 
1311         x0r = x0r + (x1r);
1312         x0i = x0i + (x1i);
1313         x1r = x0r - (x1r * 2);
1314         x1i = x0i - (x1i * 2);
1315         x2r = x2r + (x3i);
1316         x2i = x2i - (x3r);
1317         x3i = x2r - (x3i * 2);
1318         x3r = x2i + (x3r * 2);
1319 
1320         *data = x0r;
1321         *(data + 1) = x0i;
1322         data += (del << 1);
1323 
1324         *data = x2r;
1325         *(data + 1) = x2i;
1326         data += (del << 1);
1327 
1328         *data = x1r;
1329         *(data + 1) = x1i;
1330         data += (del << 1);
1331 
1332         *data = x3i;
1333         *(data + 1) = x3r;
1334         data += (del << 1);
1335       }
1336       data -= 2 * npoints;
1337       data += 2;
1338     }
1339     nodespacing >>= 2;
1340     del <<= 2;
1341     in_loop_cnt >>= 2;
1342   }
1343   if (not_power_4)
1344   {
1345     const FLOAT32 *twiddles = ptr_w;
1346     nodespacing <<= 1;
1347 
1348     for (j = del / 2; j != 0; j--)
1349     {
1350       FLOAT32 w1h = *twiddles;
1351       FLOAT32 w1l = *(twiddles + 1);
1352       FLOAT32 tmp;
1353       twiddles += nodespacing * 2;
1354 
1355       x0r = *ptr_y;
1356       x0i = *(ptr_y + 1);
1357       ptr_y += (del << 1);
1358 
1359       x1r = *ptr_y;
1360       x1i = *(ptr_y + 1);
1361 
1362       tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1363       x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1364       x1r = tmp;
1365 
1366       *ptr_y = (x0r) - (x1r);
1367       *(ptr_y + 1) = (x0i) - (x1i);
1368       ptr_y -= (del << 1);
1369 
1370       *ptr_y = (x0r) + (x1r);
1371       *(ptr_y + 1) = (x0i) + (x1i);
1372       ptr_y += 2;
1373     }
1374     twiddles = ptr_w;
1375     for (j = del / 2; j != 0; j--)
1376     {
1377       FLOAT32 w1h = *twiddles;
1378       FLOAT32 w1l = *(twiddles + 1);
1379       FLOAT32 tmp;
1380       twiddles += nodespacing * 2;
1381 
1382       x0r = *ptr_y;
1383       x0i = *(ptr_y + 1);
1384       ptr_y += (del << 1);
1385 
1386       x1r = *ptr_y;
1387       x1i = *(ptr_y + 1);
1388 
1389       tmp = (ixheaacd_mult32X32float(x1r, w1h) + ixheaacd_mult32X32float(x1i, w1l));
1390       x1i = -ixheaacd_mult32X32float(x1r, w1l) + ixheaacd_mult32X32float(x1i, w1h);
1391       x1r = tmp;
1392 
1393       *ptr_y = (x0r) - (x1r);
1394       *(ptr_y + 1) = (x0i) - (x1i);
1395       ptr_y -= (del << 1);
1396 
1397       *ptr_y = (x0r) + (x1r);
1398       *(ptr_y + 1) = (x0i) + (x1i);
1399       ptr_y += 2;
1400     }
1401   }
1402 
1403   for (i = 0; i<nlength; i++)
1404   {
1405     xr[i] = y[2 * i];
1406     xi[i] = y[2 * i + 1];
1407   }
1408 
1409   return;
1410 }
1411 
ixheaacd_complex_fft_p2_dec(WORD32 * xr,WORD32 * xi,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)1412 VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength,
1413                                  WORD32 fft_mode, WORD32 *preshift) {
1414   WORD32 i, j, k, n_stages;
1415   WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1416   WORD32 del, nodespacing, in_loop_cnt;
1417   WORD32 not_power_4;
1418   WORD32 npts, shift;
1419   WORD32 dig_rev_shift;
1420   WORD32 ptr_x[1024];
1421   WORD32 y[1024];
1422   WORD32 npoints = nlength;
1423   WORD32 n = 0;
1424   WORD32 *ptr_y = y;
1425   const WORD32 *ptr_w;
1426   dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16;
1427   n_stages = 30 - ixheaacd_norm32(npoints);
1428   not_power_4 = n_stages & 1;
1429 
1430   n_stages = n_stages >> 1;
1431 
1432   npts = npoints;
1433   while (npts >> 1) {
1434     n++;
1435     npts = npts >> 1;
1436   }
1437 
1438   if (n % 2 == 0)
1439     shift = ((n + 4)) / 2;
1440   else
1441     shift = ((n + 3) / 2);
1442 
1443   for (i = 0; i < nlength; i++) {
1444     ptr_x[2 * i] = (xr[i] / (1 << (shift)));
1445     ptr_x[2 * i + 1] = (xi[i] / (1 << (shift)));
1446   }
1447 
1448   if (fft_mode == -1) {
1449     ptr_w = ixheaacd_twiddle_table_fft_32x32;
1450 
1451     for (i = 0; i < npoints; i += 4) {
1452       WORD32 *inp = ptr_x;
1453 
1454       DIG_REV(i, dig_rev_shift, h2);
1455       if (not_power_4) {
1456         h2 += 1;
1457         h2 &= ~1;
1458       }
1459       inp += (h2);
1460 
1461       x0r = *inp;
1462       x0i = *(inp + 1);
1463       inp += (npoints >> 1);
1464 
1465       x1r = *inp;
1466       x1i = *(inp + 1);
1467       inp += (npoints >> 1);
1468 
1469       x2r = *inp;
1470       x2i = *(inp + 1);
1471       inp += (npoints >> 1);
1472 
1473       x3r = *inp;
1474       x3i = *(inp + 1);
1475 
1476       x0r = ixheaacd_add32_sat(x0r, x2r);
1477       x0i = ixheaacd_add32_sat(x0i, x2i);
1478       x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1479       x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1480       x1r = ixheaacd_add32_sat(x1r, x3r);
1481       x1i = ixheaacd_add32_sat(x1i, x3i);
1482       x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1483       x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1484 
1485       x0r = ixheaacd_add32_sat(x0r, x1r);
1486       x0i = ixheaacd_add32_sat(x0i, x1i);
1487       x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1488       x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1489       x2r = ixheaacd_add32_sat(x2r, x3i);
1490       x2i = ixheaacd_sub32_sat(x2i, x3r);
1491       x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1492       x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1493 
1494       *ptr_y++ = x0r;
1495       *ptr_y++ = x0i;
1496       *ptr_y++ = x2r;
1497       *ptr_y++ = x2i;
1498       *ptr_y++ = x1r;
1499       *ptr_y++ = x1i;
1500       *ptr_y++ = x3i;
1501       *ptr_y++ = x3r;
1502     }
1503     ptr_y -= 2 * npoints;
1504     del = 4;
1505     nodespacing = 64;
1506     in_loop_cnt = npoints >> 4;
1507     for (i = n_stages - 1; i > 0; i--) {
1508       const WORD32 *twiddles = ptr_w;
1509       WORD32 *data = ptr_y;
1510       WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
1511       WORD32 sec_loop_cnt;
1512 
1513       for (k = in_loop_cnt; k != 0; k--) {
1514         x0r = (*data);
1515         x0i = (*(data + 1));
1516         data += (del << 1);
1517 
1518         x1r = (*data);
1519         x1i = (*(data + 1));
1520         data += (del << 1);
1521 
1522         x2r = (*data);
1523         x2i = (*(data + 1));
1524         data += (del << 1);
1525 
1526         x3r = (*data);
1527         x3i = (*(data + 1));
1528         data -= 3 * (del << 1);
1529 
1530         x0r = ixheaacd_add32_sat(x0r, x2r);
1531         x0i = ixheaacd_add32_sat(x0i, x2i);
1532         x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1533         x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1534         x1r = ixheaacd_add32_sat(x1r, x3r);
1535         x1i = ixheaacd_add32_sat(x1i, x3i);
1536         x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1537         x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1538 
1539         x0r = ixheaacd_add32_sat(x0r, x1r);
1540         x0i = ixheaacd_add32_sat(x0i, x1i);
1541         x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1542         x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1543         x2r = ixheaacd_add32_sat(x2r, x3i);
1544         x2i = ixheaacd_sub32_sat(x2i, x3r);
1545         x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1546         x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1547 
1548         *data = x0r;
1549         *(data + 1) = x0i;
1550         data += (del << 1);
1551 
1552         *data = x2r;
1553         *(data + 1) = x2i;
1554         data += (del << 1);
1555 
1556         *data = x1r;
1557         *(data + 1) = x1i;
1558         data += (del << 1);
1559 
1560         *data = x3i;
1561         *(data + 1) = x3r;
1562         data += (del << 1);
1563       }
1564       data = ptr_y + 2;
1565 
1566       sec_loop_cnt = (nodespacing * del);
1567       sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
1568                      (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
1569                      (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
1570                      (sec_loop_cnt / 256);
1571       j = nodespacing;
1572 
1573       for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
1574         w1h = *(twiddles + 2 * j);
1575         w1l = *(twiddles + 2 * j + 1);
1576         w2h = *(twiddles + 2 * (j << 1));
1577         w2l = *(twiddles + 2 * (j << 1) + 1);
1578         w3h = *(twiddles + 2 * j + 2 * (j << 1));
1579         w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1580 
1581         for (k = in_loop_cnt; k != 0; k--) {
1582           WORD32 tmp;
1583           WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1584 
1585           data += (del << 1);
1586 
1587           x1r = *data;
1588           x1i = *(data + 1);
1589           data += (del << 1);
1590 
1591           x2r = *data;
1592           x2i = *(data + 1);
1593           data += (del << 1);
1594 
1595           x3r = *data;
1596           x3i = *(data + 1);
1597           data -= 3 * (del << 1);
1598 
1599           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1600                                    ixheaacd_mult32_sat(x1i, w1h));
1601           x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1602           x1r = tmp;
1603 
1604           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
1605                                    ixheaacd_mult32_sat(x2i, w2h));
1606           x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1607           x2r = tmp;
1608 
1609           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3l),
1610                                    ixheaacd_mult32_sat(x3i, w3h));
1611           x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1612           x3r = tmp;
1613 
1614           x0r = (*data);
1615           x0i = (*(data + 1));
1616 
1617           x0r = ixheaacd_add32_sat(x0r, x2r);
1618           x0i = ixheaacd_add32_sat(x0i, x2i);
1619           x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1620           x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1621           x1r = ixheaacd_add32_sat(x1r, x3r);
1622           x1i = ixheaacd_add32_sat(x1i, x3i);
1623           x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1624           x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1625 
1626           x0r = ixheaacd_add32_sat(x0r, x1r);
1627           x0i = ixheaacd_add32_sat(x0i, x1i);
1628           x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1629           x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1630           x2r = ixheaacd_add32_sat(x2r, x3i);
1631           x2i = ixheaacd_sub32_sat(x2i, x3r);
1632           x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1633           x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1634 
1635           *data = x0r;
1636           *(data + 1) = x0i;
1637           data += (del << 1);
1638 
1639           *data = x2r;
1640           *(data + 1) = x2i;
1641           data += (del << 1);
1642 
1643           *data = x1r;
1644           *(data + 1) = x1i;
1645           data += (del << 1);
1646 
1647           *data = x3i;
1648           *(data + 1) = x3r;
1649           data += (del << 1);
1650         }
1651         data -= 2 * npoints;
1652         data += 2;
1653       }
1654       for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1655         w1h = *(twiddles + 2 * j);
1656         w2h = *(twiddles + 2 * (j << 1));
1657         w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1658         w1l = *(twiddles + 2 * j + 1);
1659         w2l = *(twiddles + 2 * (j << 1) + 1);
1660         w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1661 
1662         for (k = in_loop_cnt; k != 0; k--) {
1663           WORD32 tmp;
1664           WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1665           data += (del << 1);
1666 
1667           x1r = *data;
1668           x1i = *(data + 1);
1669           data += (del << 1);
1670 
1671           x2r = *data;
1672           x2i = *(data + 1);
1673           data += (del << 1);
1674 
1675           x3r = *data;
1676           x3i = *(data + 1);
1677           data -= 3 * (del << 1);
1678 
1679           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1680                                    ixheaacd_mult32_sat(x1i, w1h));
1681           x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1682           x1r = tmp;
1683 
1684           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
1685                                    ixheaacd_mult32_sat(x2i, w2h));
1686           x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1687           x2r = tmp;
1688 
1689           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
1690                                    ixheaacd_mult32_sat(x3i, w3l));
1691           x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1692                                    ixheaacd_mult32_sat(x3r, w3l));
1693           x3r = tmp;
1694 
1695           x0r = (*data);
1696           x0i = (*(data + 1));
1697 
1698           x0r = ixheaacd_add32_sat(x0r, x2r);
1699           x0i = ixheaacd_add32_sat(x0i, x2i);
1700           x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1701           x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1702           x1r = ixheaacd_add32_sat(x1r, x3r);
1703           x1i = ixheaacd_add32_sat(x1i, x3i);
1704           x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1705           x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1706 
1707           x0r = ixheaacd_add32_sat(x0r, x1r);
1708           x0i = ixheaacd_add32_sat(x0i, x1i);
1709           x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1710           x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1711           x2r = ixheaacd_add32_sat(x2r, x3i);
1712           x2i = ixheaacd_sub32_sat(x2i, x3r);
1713           x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1714           x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1715 
1716           *data = x0r;
1717           *(data + 1) = x0i;
1718           data += (del << 1);
1719 
1720           *data = x2r;
1721           *(data + 1) = x2i;
1722           data += (del << 1);
1723 
1724           *data = x1r;
1725           *(data + 1) = x1i;
1726           data += (del << 1);
1727 
1728           *data = x3i;
1729           *(data + 1) = x3r;
1730           data += (del << 1);
1731         }
1732         data -= 2 * npoints;
1733         data += 2;
1734       }
1735       for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1736         w1h = *(twiddles + 2 * j);
1737         w2h = *(twiddles + 2 * (j << 1) - 512);
1738         w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1739         w1l = *(twiddles + 2 * j + 1);
1740         w2l = *(twiddles + 2 * (j << 1) - 511);
1741         w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1742 
1743         for (k = in_loop_cnt; k != 0; k--) {
1744           WORD32 tmp;
1745           WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1746 
1747           data += (del << 1);
1748 
1749           x1r = *data;
1750           x1i = *(data + 1);
1751           data += (del << 1);
1752 
1753           x2r = *data;
1754           x2i = *(data + 1);
1755           data += (del << 1);
1756 
1757           x3r = *data;
1758           x3i = *(data + 1);
1759           data -= 3 * (del << 1);
1760 
1761           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1762                                    ixheaacd_mult32_sat(x1i, w1h));
1763           x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1764           x1r = tmp;
1765 
1766           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
1767                                    ixheaacd_mult32_sat(x2i, w2l));
1768           x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
1769                                    ixheaacd_mult32_sat(x2r, w2l));
1770           x2r = tmp;
1771 
1772           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
1773                                    ixheaacd_mult32_sat(x3i, w3l));
1774           x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1775                                    ixheaacd_mult32_sat(x3r, w3l));
1776           x3r = tmp;
1777 
1778           x0r = (*data);
1779           x0i = (*(data + 1));
1780 
1781           x0r = ixheaacd_add32_sat(x0r, x2r);
1782           x0i = ixheaacd_add32_sat(x0i, x2i);
1783           x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1784           x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1785           x1r = ixheaacd_add32_sat(x1r, x3r);
1786           x1i = ixheaacd_add32_sat(x1i, x3i);
1787           x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1788           x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1789 
1790           x0r = ixheaacd_add32_sat(x0r, x1r);
1791           x0i = ixheaacd_add32_sat(x0i, x1i);
1792           x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1793           x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1794           x2r = ixheaacd_add32_sat(x2r, x3i);
1795           x2i = ixheaacd_sub32_sat(x2i, x3r);
1796           x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1797           x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1798 
1799           *data = x0r;
1800           *(data + 1) = x0i;
1801           data += (del << 1);
1802 
1803           *data = x2r;
1804           *(data + 1) = x2i;
1805           data += (del << 1);
1806 
1807           *data = x1r;
1808           *(data + 1) = x1i;
1809           data += (del << 1);
1810 
1811           *data = x3i;
1812           *(data + 1) = x3r;
1813           data += (del << 1);
1814         }
1815         data -= 2 * npoints;
1816         data += 2;
1817       }
1818       for (; j < nodespacing * del; j += nodespacing) {
1819         w1h = *(twiddles + 2 * j);
1820         w2h = *(twiddles + 2 * (j << 1) - 512);
1821         w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1822         w1l = *(twiddles + 2 * j + 1);
1823         w2l = *(twiddles + 2 * (j << 1) - 511);
1824         w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1825 
1826         for (k = in_loop_cnt; k != 0; k--) {
1827           WORD32 tmp;
1828           WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1829 
1830           data += (del << 1);
1831 
1832           x1r = *data;
1833           x1i = *(data + 1);
1834           data += (del << 1);
1835 
1836           x2r = *data;
1837           x2i = *(data + 1);
1838           data += (del << 1);
1839 
1840           x3r = *data;
1841           x3i = *(data + 1);
1842           data -= 3 * (del << 1);
1843 
1844           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1845                                    ixheaacd_mult32_sat(x1i, w1h));
1846           x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1847           x1r = tmp;
1848 
1849           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
1850                                    ixheaacd_mult32_sat(x2i, w2l));
1851           x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
1852                                    ixheaacd_mult32_sat(x2r, w2l));
1853           x2r = tmp;
1854 
1855           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1856                                    ixheaacd_mult32_sat(x3r, w3l));
1857           x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1858           x3r = tmp;
1859 
1860           x0r = (*data);
1861           x0i = (*(data + 1));
1862 
1863           x0r = ixheaacd_add32_sat(x0r, x2r);
1864           x0i = ixheaacd_add32_sat(x0i, x2i);
1865           x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1866           x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1867           x1r = ixheaacd_add32_sat(x1r, x3r);
1868           x1i = ixheaacd_sub32_sat(x1i, x3i);
1869           x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1870           x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1871 
1872           x0r = ixheaacd_add32_sat(x0r, x1r);
1873           x0i = ixheaacd_add32_sat(x0i, x1i);
1874           x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1875           x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1876           x2r = ixheaacd_add32_sat(x2r, x3i);
1877           x2i = ixheaacd_sub32_sat(x2i, x3r);
1878           x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1879           x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1880 
1881           *data = x0r;
1882           *(data + 1) = x0i;
1883           data += (del << 1);
1884 
1885           *data = x2r;
1886           *(data + 1) = x2i;
1887           data += (del << 1);
1888 
1889           *data = x1r;
1890           *(data + 1) = x1i;
1891           data += (del << 1);
1892 
1893           *data = x3i;
1894           *(data + 1) = x3r;
1895           data += (del << 1);
1896         }
1897         data -= 2 * npoints;
1898         data += 2;
1899       }
1900       nodespacing >>= 2;
1901       del <<= 2;
1902       in_loop_cnt >>= 2;
1903     }
1904     if (not_power_4) {
1905       const WORD32 *twiddles = ptr_w;
1906       nodespacing <<= 1;
1907       shift += 1;
1908 
1909       for (j = del / 2; j != 0; j--) {
1910         WORD32 w1h = *twiddles;
1911         WORD32 w1l = *(twiddles + 1);
1912         WORD32 tmp;
1913         twiddles += nodespacing * 2;
1914 
1915         x0r = *ptr_y;
1916         x0i = *(ptr_y + 1);
1917         ptr_y += (del << 1);
1918 
1919         x1r = *ptr_y;
1920         x1i = *(ptr_y + 1);
1921 
1922         tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1923                                  ixheaacd_mult32_sat(x1i, w1h));
1924         x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1925         x1r = tmp;
1926 
1927         *ptr_y = (x0r) / 2 - (x1r) / 2;
1928         *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1929         ptr_y -= (del << 1);
1930 
1931         *ptr_y = (x0r) / 2 + (x1r) / 2;
1932         *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1933         ptr_y += 2;
1934       }
1935       twiddles = ptr_w;
1936       for (j = del / 2; j != 0; j--) {
1937         WORD32 w1h = *twiddles;
1938         WORD32 w1l = *(twiddles + 1);
1939         WORD32 tmp;
1940         twiddles += nodespacing * 2;
1941 
1942         x0r = *ptr_y;
1943         x0i = *(ptr_y + 1);
1944         ptr_y += (del << 1);
1945 
1946         x1r = *ptr_y;
1947         x1i = *(ptr_y + 1);
1948 
1949         tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1h),
1950                                  ixheaacd_mult32_sat(x1i, w1l));
1951         x1i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1i, w1h),
1952                                  ixheaacd_mult32_sat(x1r, w1l));
1953         x1r = tmp;
1954 
1955         *ptr_y = (x0r) / 2 - (x1r) / 2;
1956         *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1957         ptr_y -= (del << 1);
1958 
1959         *ptr_y = (x0r) / 2 + (x1r) / 2;
1960         *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1961         ptr_y += 2;
1962       }
1963     }
1964   }
1965 
1966   else {
1967     ptr_w = ixheaacd_twiddle_table_fft_32x32;
1968 
1969     for (i = 0; i < npoints; i += 4) {
1970       WORD32 *inp = ptr_x;
1971 
1972       DIG_REV(i, dig_rev_shift, h2);
1973       if (not_power_4) {
1974         h2 += 1;
1975         h2 &= ~1;
1976       }
1977       inp += (h2);
1978 
1979       x0r = *inp;
1980       x0i = *(inp + 1);
1981       inp += (npoints >> 1);
1982 
1983       x1r = *inp;
1984       x1i = *(inp + 1);
1985       inp += (npoints >> 1);
1986 
1987       x2r = *inp;
1988       x2i = *(inp + 1);
1989       inp += (npoints >> 1);
1990 
1991       x3r = *inp;
1992       x3i = *(inp + 1);
1993 
1994       x0r = ixheaacd_add32_sat(x0r, x2r);
1995       x0i = ixheaacd_add32_sat(x0i, x2i);
1996       x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1997       x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1998       x1r = ixheaacd_add32_sat(x1r, x3r);
1999       x1i = ixheaacd_add32_sat(x1i, x3i);
2000       x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2001       x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2002 
2003       x0r = ixheaacd_add32_sat(x0r, x1r);
2004       x0i = ixheaacd_add32_sat(x0i, x1i);
2005       x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2006       x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2007       x2r = ixheaacd_sub32_sat(x2r, x3i);
2008       x2i = ixheaacd_add32_sat(x2i, x3r);
2009       x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2010       x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2011 
2012       *ptr_y++ = x0r;
2013       *ptr_y++ = x0i;
2014       *ptr_y++ = x2r;
2015       *ptr_y++ = x2i;
2016       *ptr_y++ = x1r;
2017       *ptr_y++ = x1i;
2018       *ptr_y++ = x3i;
2019       *ptr_y++ = x3r;
2020     }
2021     ptr_y -= 2 * npoints;
2022     del = 4;
2023     nodespacing = 64;
2024     in_loop_cnt = npoints >> 4;
2025     for (i = n_stages - 1; i > 0; i--) {
2026       const WORD32 *twiddles = ptr_w;
2027       WORD32 *data = ptr_y;
2028       WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
2029       WORD32 sec_loop_cnt;
2030 
2031       for (k = in_loop_cnt; k != 0; k--) {
2032         x0r = (*data);
2033         x0i = (*(data + 1));
2034         data += (del << 1);
2035 
2036         x1r = (*data);
2037         x1i = (*(data + 1));
2038         data += (del << 1);
2039 
2040         x2r = (*data);
2041         x2i = (*(data + 1));
2042         data += (del << 1);
2043 
2044         x3r = (*data);
2045         x3i = (*(data + 1));
2046         data -= 3 * (del << 1);
2047 
2048         x0r = ixheaacd_add32_sat(x0r, x2r);
2049         x0i = ixheaacd_add32_sat(x0i, x2i);
2050         x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
2051         x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
2052         x1r = ixheaacd_add32_sat(x1r, x3r);
2053         x1i = ixheaacd_add32_sat(x1i, x3i);
2054         x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2055         x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2056 
2057         x0r = ixheaacd_add32_sat(x0r, x1r);
2058         x0i = ixheaacd_add32_sat(x0i, x1i);
2059         x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2060         x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2061         x2r = ixheaacd_sub32_sat(x2r, x3i);
2062         x2i = ixheaacd_add32_sat(x2i, x3r);
2063         x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2064         x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2065 
2066         *data = x0r;
2067         *(data + 1) = x0i;
2068         data += (del << 1);
2069 
2070         *data = x2r;
2071         *(data + 1) = x2i;
2072         data += (del << 1);
2073 
2074         *data = x1r;
2075         *(data + 1) = x1i;
2076         data += (del << 1);
2077 
2078         *data = x3i;
2079         *(data + 1) = x3r;
2080         data += (del << 1);
2081       }
2082       data = ptr_y + 2;
2083 
2084       sec_loop_cnt = (nodespacing * del);
2085       sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
2086                      (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
2087                      (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
2088                      (sec_loop_cnt / 256);
2089       j = nodespacing;
2090 
2091       for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
2092         w1h = *(twiddles + 2 * j);
2093         w2h = *(twiddles + 2 * (j << 1));
2094         w3h = *(twiddles + 2 * j + 2 * (j << 1));
2095         w1l = *(twiddles + 2 * j + 1);
2096         w2l = *(twiddles + 2 * (j << 1) + 1);
2097         w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
2098 
2099         for (k = in_loop_cnt; k != 0; k--) {
2100           WORD32 tmp;
2101           WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2102 
2103           data += (del << 1);
2104 
2105           x1r = *data;
2106           x1i = *(data + 1);
2107           data += (del << 1);
2108 
2109           x2r = *data;
2110           x2i = *(data + 1);
2111           data += (del << 1);
2112 
2113           x3r = *data;
2114           x3i = *(data + 1);
2115           data -= 3 * (del << 1);
2116 
2117           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2118                                    ixheaacd_mult32_sat(x1i, w1h));
2119           x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2120           x1r = tmp;
2121 
2122           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2123                                    ixheaacd_mult32_sat(x2i, w2h));
2124           x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
2125           x2r = tmp;
2126 
2127           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2128                                    ixheaacd_mult32_sat(x3i, w3h));
2129           x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
2130           x3r = tmp;
2131 
2132           x0r = (*data);
2133           x0i = (*(data + 1));
2134 
2135           x0r = ixheaacd_add32_sat(x0r, x2r);
2136           x0i = ixheaacd_add32_sat(x0i, x2i);
2137           x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
2138           x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
2139           x1r = ixheaacd_add32_sat(x1r, x3r);
2140           x1i = ixheaacd_add32_sat(x1i, x3i);
2141           x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2142           x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2143 
2144           x0r = ixheaacd_add32_sat(x0r, x1r);
2145           x0i = ixheaacd_add32_sat(x0i, x1i);
2146           x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2147           x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2148           x2r = ixheaacd_sub32_sat(x2r, x3i);
2149           x2i = ixheaacd_add32_sat(x2i, x3r);
2150           x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2151           x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2152 
2153           *data = x0r;
2154           *(data + 1) = x0i;
2155           data += (del << 1);
2156 
2157           *data = x2r;
2158           *(data + 1) = x2i;
2159           data += (del << 1);
2160 
2161           *data = x1r;
2162           *(data + 1) = x1i;
2163           data += (del << 1);
2164 
2165           *data = x3i;
2166           *(data + 1) = x3r;
2167           data += (del << 1);
2168         }
2169         data -= 2 * npoints;
2170         data += 2;
2171       }
2172       for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
2173         w1h = *(twiddles + 2 * j);
2174         w2h = *(twiddles + 2 * (j << 1));
2175         w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
2176         w1l = *(twiddles + 2 * j + 1);
2177         w2l = *(twiddles + 2 * (j << 1) + 1);
2178         w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
2179 
2180         for (k = in_loop_cnt; k != 0; k--) {
2181           WORD32 tmp;
2182           WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2183 
2184           data += (del << 1);
2185 
2186           x1r = *data;
2187           x1i = *(data + 1);
2188           data += (del << 1);
2189 
2190           x2r = *data;
2191           x2i = *(data + 1);
2192           data += (del << 1);
2193 
2194           x3r = *data;
2195           x3i = *(data + 1);
2196           data -= 3 * (del << 1);
2197 
2198           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2199                                    ixheaacd_mult32_sat(x1i, w1h));
2200           x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2201           x1r = tmp;
2202 
2203           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2204                                    ixheaacd_mult32_sat(x2i, w2h));
2205           x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
2206           x2r = tmp;
2207 
2208           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
2209                                    ixheaacd_mult32_sat(x3i, w3l));
2210           x3i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2211                                    ixheaacd_mult32_sat(x3i, w3h));
2212           x3r = tmp;
2213 
2214           x0r = (*data);
2215           x0i = (*(data + 1));
2216 
2217           x0r = ixheaacd_add32_sat(x0r, x2r);
2218           x0i = ixheaacd_add32_sat(x0i, x2i);
2219           x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
2220           x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
2221           x1r = ixheaacd_add32_sat(x1r, x3r);
2222           x1i = ixheaacd_add32_sat(x1i, x3i);
2223           x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2224           x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2225 
2226           x0r = ixheaacd_add32_sat(x0r, x1r);
2227           x0i = ixheaacd_add32_sat(x0i, x1i);
2228           x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2229           x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2230           x2r = ixheaacd_sub32_sat(x2r, x3i);
2231           x2i = ixheaacd_add32_sat(x2i, x3r);
2232           x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2233           x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2234 
2235           *data = x0r;
2236           *(data + 1) = x0i;
2237           data += (del << 1);
2238 
2239           *data = x2r;
2240           *(data + 1) = x2i;
2241           data += (del << 1);
2242 
2243           *data = x1r;
2244           *(data + 1) = x1i;
2245           data += (del << 1);
2246 
2247           *data = x3i;
2248           *(data + 1) = x3r;
2249           data += (del << 1);
2250         }
2251         data -= 2 * npoints;
2252         data += 2;
2253       }
2254       for (; j <= sec_loop_cnt * 2; j += nodespacing) {
2255         w1h = *(twiddles + 2 * j);
2256         w2h = *(twiddles + 2 * (j << 1) - 512);
2257         w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
2258         w1l = *(twiddles + 2 * j + 1);
2259         w2l = *(twiddles + 2 * (j << 1) - 511);
2260         w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
2261 
2262         for (k = in_loop_cnt; k != 0; k--) {
2263           WORD32 tmp;
2264           WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2265 
2266           data += (del << 1);
2267 
2268           x1r = *data;
2269           x1i = *(data + 1);
2270           data += (del << 1);
2271 
2272           x2r = *data;
2273           x2i = *(data + 1);
2274           data += (del << 1);
2275 
2276           x3r = *data;
2277           x3i = *(data + 1);
2278           data -= 3 * (del << 1);
2279 
2280           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2281                                    ixheaacd_mult32_sat(x1i, w1h));
2282           x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2283           x1r = tmp;
2284 
2285           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
2286                                    ixheaacd_mult32_sat(x2i, w2l));
2287           x2i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2288                                    ixheaacd_mult32_sat(x2i, w2h));
2289           x2r = tmp;
2290 
2291           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
2292                                    ixheaacd_mult32_sat(x3i, w3l));
2293           x3i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2294                                    ixheaacd_mult32_sat(x3i, w3h));
2295           x3r = tmp;
2296 
2297           x0r = (*data);
2298           x0i = (*(data + 1));
2299 
2300           x0r = ixheaacd_add32_sat(x0r, x2r);
2301           x0i = ixheaacd_add32_sat(x0i, x2i);
2302           x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
2303           x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
2304           x1r = ixheaacd_add32_sat(x1r, x3r);
2305           x1i = ixheaacd_add32_sat(x1i, x3i);
2306           x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2307           x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2308 
2309           x0r = ixheaacd_add32_sat(x0r, x1r);
2310           x0i = ixheaacd_add32_sat(x0i, x1i);
2311           x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2312           x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2313           x2r = ixheaacd_sub32_sat(x2r, x3i);
2314           x2i = ixheaacd_add32_sat(x2i, x3r);
2315           x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2316           x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2317 
2318           *data = x0r;
2319           *(data + 1) = x0i;
2320           data += (del << 1);
2321 
2322           *data = x2r;
2323           *(data + 1) = x2i;
2324           data += (del << 1);
2325 
2326           *data = x1r;
2327           *(data + 1) = x1i;
2328           data += (del << 1);
2329 
2330           *data = x3i;
2331           *(data + 1) = x3r;
2332           data += (del << 1);
2333         }
2334         data -= 2 * npoints;
2335         data += 2;
2336       }
2337       for (; j < nodespacing * del; j += nodespacing) {
2338         w1h = *(twiddles + 2 * j);
2339         w2h = *(twiddles + 2 * (j << 1) - 512);
2340         w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
2341         w1l = *(twiddles + 2 * j + 1);
2342         w2l = *(twiddles + 2 * (j << 1) - 511);
2343         w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
2344 
2345         for (k = in_loop_cnt; k != 0; k--) {
2346           WORD32 tmp;
2347           WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2348 
2349           data += (del << 1);
2350 
2351           x1r = *data;
2352           x1i = *(data + 1);
2353           data += (del << 1);
2354 
2355           x2r = *data;
2356           x2i = *(data + 1);
2357           data += (del << 1);
2358 
2359           x3r = *data;
2360           x3i = *(data + 1);
2361           data -= 3 * (del << 1);
2362 
2363           tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2364                                    ixheaacd_mult32_sat(x1i, w1h));
2365           x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2366           x1r = tmp;
2367 
2368           tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
2369                                    ixheaacd_mult32_sat(x2i, w2l));
2370           x2i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2371                                    ixheaacd_mult32_sat(x2i, w2h));
2372           x2r = tmp;
2373 
2374           tmp = -ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2375                                     ixheaacd_mult32_sat(x3i, w3h));
2376           x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
2377           x3r = tmp;
2378 
2379           x0r = (*data);
2380           x0i = (*(data + 1));
2381 
2382           x0r = ixheaacd_add32_sat(x0r, x2r);
2383           x0i = ixheaacd_add32_sat(x0i, x2i);
2384           x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
2385           x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
2386           x1r = ixheaacd_add32_sat(x1r, x3r);
2387           x1i = ixheaacd_sub32_sat(x1i, x3i);
2388           x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2389           x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2390 
2391           x0r = ixheaacd_add32_sat(x0r, x1r);
2392           x0i = ixheaacd_add32_sat(x0i, x1i);
2393           x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2394           x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2395           x2r = ixheaacd_sub32_sat(x2r, x3i);
2396           x2i = ixheaacd_add32_sat(x2i, x3r);
2397           x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2398           x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2399 
2400           *data = x0r;
2401           *(data + 1) = x0i;
2402           data += (del << 1);
2403 
2404           *data = x2r;
2405           *(data + 1) = x2i;
2406           data += (del << 1);
2407 
2408           *data = x1r;
2409           *(data + 1) = x1i;
2410           data += (del << 1);
2411 
2412           *data = x3i;
2413           *(data + 1) = x3r;
2414           data += (del << 1);
2415         }
2416         data -= 2 * npoints;
2417         data += 2;
2418       }
2419       nodespacing >>= 2;
2420       del <<= 2;
2421       in_loop_cnt >>= 2;
2422     }
2423     if (not_power_4) {
2424       const WORD32 *twiddles = ptr_w;
2425       nodespacing <<= 1;
2426       shift += 1;
2427       for (j = del / 2; j != 0; j--) {
2428         WORD32 w1h = *twiddles;
2429         WORD32 w1l = *(twiddles + 1);
2430 
2431         WORD32 tmp;
2432         twiddles += nodespacing * 2;
2433 
2434         x0r = *ptr_y;
2435         x0i = *(ptr_y + 1);
2436         ptr_y += (del << 1);
2437 
2438         x1r = *ptr_y;
2439         x1i = *(ptr_y + 1);
2440 
2441         tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2442                                  ixheaacd_mult32_sat(x1i, w1h));
2443         x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2444         x1r = tmp;
2445 
2446         *ptr_y = (x0r) / 2 - (x1r) / 2;
2447         *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
2448         ptr_y -= (del << 1);
2449 
2450         *ptr_y = (x0r) / 2 + (x1r) / 2;
2451         *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
2452         ptr_y += 2;
2453       }
2454       twiddles = ptr_w;
2455       for (j = del / 2; j != 0; j--) {
2456         WORD32 w1h = *twiddles;
2457         WORD32 w1l = *(twiddles + 1);
2458         WORD32 tmp;
2459         twiddles += nodespacing * 2;
2460 
2461         x0r = *ptr_y;
2462         x0i = *(ptr_y + 1);
2463         ptr_y += (del << 1);
2464 
2465         x1r = *ptr_y;
2466         x1i = *(ptr_y + 1);
2467 
2468         tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1h),
2469                                  ixheaacd_mult32_sat(x1i, w1l));
2470         x1i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2471                                  ixheaacd_mult32_sat(x1i, w1h));
2472         x1r = tmp;
2473 
2474         *ptr_y = (x0r) / 2 - (x1r) / 2;
2475         *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
2476         ptr_y -= (del << 1);
2477 
2478         *ptr_y = (x0r) / 2 + (x1r) / 2;
2479         *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
2480         ptr_y += 2;
2481       }
2482     }
2483   }
2484 
2485   for (i = 0; i < nlength; i++) {
2486     xr[i] = y[2 * i];
2487     xi[i] = y[2 * i + 1];
2488   }
2489 
2490   *preshift = shift - *preshift;
2491   return;
2492 }
2493 
ixheaacd_complex_3point_fft(WORD32 * inp,WORD32 * op,WORD32 sign_dir)2494 static PLATFORM_INLINE void ixheaacd_complex_3point_fft(WORD32 *inp, WORD32 *op,
2495                                                         WORD32 sign_dir) {
2496   WORD32 add_r, sub_r;
2497   WORD32 add_i, sub_i;
2498   WORD32 temp_real, temp_imag, temp;
2499 
2500   WORD32 p1, p2, p3, p4;
2501 
2502   WORD32 sinmu;
2503   sinmu = -1859775393 * sign_dir;
2504 
2505   temp_real = ixheaacd_add32_sat(inp[0], inp[2]);
2506   temp_imag = ixheaacd_add32_sat(inp[1], inp[3]);
2507 
2508   add_r = ixheaacd_add32_sat(inp[2], inp[4]);
2509   add_i = ixheaacd_add32_sat(inp[3], inp[5]);
2510 
2511   sub_r = ixheaacd_sub32_sat(inp[2], inp[4]);
2512   sub_i = ixheaacd_sub32_sat(inp[3], inp[5]);
2513 
2514   p1 = add_r >> 1;
2515   p4 = add_i >> 1;
2516   p2 = ixheaacd_mult32_shl(sub_i, sinmu);
2517   p3 = ixheaacd_mult32_shl(sub_r, sinmu);
2518 
2519   temp = ixheaacd_sub32(inp[0], p1);
2520 
2521   op[0] = ixheaacd_add32_sat(temp_real, inp[4]);
2522   op[1] = ixheaacd_add32_sat(temp_imag, inp[5]);
2523   op[2] = ixheaacd_add32_sat(temp, p2);
2524   op[3] = ixheaacd_sub32_sat(ixheaacd_sub32_sat(inp[1], p3), p4);
2525   op[4] = ixheaacd_sub32_sat(temp, p2);
2526   op[5] = ixheaacd_sub32_sat(ixheaacd_add32_sat(inp[1], p3), p4);
2527 
2528   return;
2529 }
2530 
ixheaacd_complex_fft_p3(WORD32 * xr,WORD32 * xi,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)2531 VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
2532                              WORD32 fft_mode, WORD32 *preshift) {
2533   WORD32 i, j;
2534   WORD32 shift = 0;
2535   WORD32 xr_3[384];
2536   WORD32 xi_3[384];
2537   WORD32 x[1024];
2538   WORD32 y[1024];
2539   WORD32 cnfac, npts;
2540   WORD32 mpass = nlength;
2541   WORD32 n = 0;
2542   WORD32 *ptr_x = x;
2543   WORD32 *ptr_y = y;
2544 
2545   cnfac = 0;
2546   while (mpass % 3 == 0) {
2547     mpass /= 3;
2548     cnfac++;
2549   }
2550   npts = mpass;
2551 
2552   for (i = 0; i < 3 * cnfac; i++) {
2553     for (j = 0; j < mpass; j++) {
2554       xr_3[j] = xr[3 * j + i];
2555       xi_3[j] = xi[3 * j + i];
2556     }
2557 
2558     (*ixheaacd_complex_fft_p2)(xr_3, xi_3, mpass, fft_mode, &shift);
2559 
2560     for (j = 0; j < mpass; j++) {
2561       xr[3 * j + i] = xr_3[j];
2562       xi[3 * j + i] = xi_3[j];
2563     }
2564   }
2565 
2566   while (npts >> 1) {
2567     n++;
2568     npts = npts >> 1;
2569   }
2570 
2571   if (n % 2 == 0)
2572     shift = ((n + 4)) / 2;
2573   else
2574     shift = ((n + 5) / 2);
2575 
2576   *preshift = shift - *preshift + 1;
2577 
2578   for (i = 0; i < nlength; i++) {
2579     ptr_x[2 * i] = (xr[i] >> 1);
2580     ptr_x[2 * i + 1] = (xi[i] >> 1);
2581   }
2582 
2583   {
2584     const WORD32 *w1r, *w1i;
2585     WORD32 tmp;
2586     w1r = ixheaacd_twiddle_table_3pr;
2587     w1i = ixheaacd_twiddle_table_3pi;
2588 
2589     if (fft_mode < 0) {
2590       for (i = 0; i < nlength; i += 3) {
2591         w1r++;
2592         w1i++;
2593 
2594         tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
2595                                  ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
2596         ptr_x[2 * i + 3] =
2597             ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)),
2598                                ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)));
2599         ptr_x[2 * i + 2] = tmp;
2600 
2601         w1r++;
2602         w1i++;
2603 
2604         tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
2605                                  ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
2606         ptr_x[2 * i + 5] =
2607             ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)),
2608                                ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)));
2609         ptr_x[2 * i + 4] = tmp;
2610 
2611         w1r += 3 * (128 / mpass - 1) + 1;
2612         w1i += 3 * (128 / mpass - 1) + 1;
2613       }
2614     }
2615 
2616     else {
2617       for (i = 0; i < nlength; i += 3) {
2618         w1r++;
2619         w1i++;
2620 
2621         tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
2622                                  ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
2623         ptr_x[2 * i + 3] =
2624             ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)),
2625                                ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)));
2626         ptr_x[2 * i + 2] = tmp;
2627 
2628         w1r++;
2629         w1i++;
2630 
2631         tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
2632                                  ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
2633         ptr_x[2 * i + 5] =
2634             ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)),
2635                                ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)));
2636         ptr_x[2 * i + 4] = tmp;
2637 
2638         w1r += 3 * (128 / mpass - 1) + 1;
2639         w1i += 3 * (128 / mpass - 1) + 1;
2640       }
2641     }
2642   }
2643 
2644   for (i = 0; i < mpass; i++) {
2645     ixheaacd_complex_3point_fft(ptr_x, ptr_y, fft_mode);
2646 
2647     ptr_x = ptr_x + 6;
2648     ptr_y = ptr_y + 6;
2649   }
2650 
2651   ptr_y = y;
2652   for (i = 0; i < mpass; i++) {
2653     xr[i] = *ptr_y++;
2654     xi[i] = *ptr_y++;
2655     xr[mpass + i] = *ptr_y++;
2656     xi[mpass + i] = *ptr_y++;
2657     xr[2 * mpass + i] = *ptr_y++;
2658     xi[2 * mpass + i] = *ptr_y++;
2659   }
2660 
2661   return;
2662 }
2663 
ixheaacd_complex_fft(WORD32 * data_r,WORD32 * data_i,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)2664 VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, WORD32 fft_mode,
2665                           WORD32 *preshift) {
2666   if (nlength & (nlength - 1)) {
2667     ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift);
2668   } else
2669     (*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift);
2670 
2671   return;
2672 }
2673