1 /******************************************************************************
2 * *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 #include <stdlib.h>
21 #include <stdio.h>
22
23 #include "ixheaacd_type_def.h"
24 #include "ixheaacd_interface.h"
25 #include "ixheaacd_constants.h"
26 #include "ixheaacd_basic_ops32.h"
27 #include "ixheaacd_basic_ops40.h"
28 #include "ixheaacd_function_selector.h"
29
30 extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
31 extern const FLOAT32 ixheaacd_twiddle_table_fft[514];
32 extern const FLOAT32 ixheaacd_twiddle_table_fft_flt[16];
33 extern const WORD32 ixheaacd_twiddle_table_3pr[1155];
34 extern const WORD32 ixheaacd_twiddle_table_3pi[1155];
35 extern const WORD8 ixheaacd_mps_dig_rev[8];
36
37 #define PLATFORM_INLINE __inline
38
39 #define DIG_REV(i, m, j) \
40 do { \
41 unsigned _ = (i); \
42 _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
43 _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
44 _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
45 (j) = _ >> (m); \
46 } while (0)
47
ixheaacd_mult32_sat(WORD32 a,WORD32 b)48 static PLATFORM_INLINE WORD32 ixheaacd_mult32_sat(WORD32 a, WORD32 b) {
49 WORD32 result;
50 WORD64 temp_result;
51
52 temp_result = (WORD64)a * (WORD64)b;
53 result = ixheaacd_sat64_32(temp_result >> 31);
54
55 return (result);
56 }
57
ixheaacd_mac32_sat(WORD32 a,WORD32 b,WORD32 c)58 static PLATFORM_INLINE WORD32 ixheaacd_mac32_sat(WORD32 a, WORD32 b, WORD32 c) {
59 WORD32 result;
60
61 result = ixheaacd_add32_sat(a, ixheaacd_mult32_sat(b, c));
62
63 return (result);
64 }
65
ixheaacd_mult32X32float(FLOAT32 a,FLOAT32 b)66 static PLATFORM_INLINE FLOAT32 ixheaacd_mult32X32float(FLOAT32 a, FLOAT32 b) {
67 FLOAT32 result;
68
69 result = a * b;
70
71 return result;
72 }
73
ixheaacd_mac32X32float(FLOAT32 a,FLOAT32 b,FLOAT32 c)74 static PLATFORM_INLINE FLOAT32 ixheaacd_mac32X32float(FLOAT32 a, FLOAT32 b, FLOAT32 c) {
75 FLOAT32 result;
76
77 result = a + b * c;
78
79 return result;
80 }
81
ixheaacd_mps_synth_calc_fft(FLOAT32 * ptr_xr,FLOAT32 * ptr_xi,WORD32 npoints)82 VOID ixheaacd_mps_synth_calc_fft(FLOAT32 *ptr_xr, FLOAT32 *ptr_xi,
83 WORD32 npoints) {
84 WORD32 i, j, k;
85 FLOAT32 y[64], z[64];
86 FLOAT32 *ptr_y = y, *ptr_z = z;
87 const FLOAT32 *ptr_w = ixheaacd_twiddle_table_fft_flt;
88
89 for (i = 0; i < npoints; i += 4) {
90 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
91 FLOAT32 *inp = ptr_xr;
92 FLOAT32 tmk;
93
94 WORD32 h2 = ixheaacd_mps_dig_rev[i >> 2];
95
96 inp += (h2);
97
98 x0r = *inp;
99 x0i = *(inp + 1);
100 inp += 16;
101
102 x1r = *inp;
103 x1i = *(inp + 1);
104 inp += 16;
105
106 x2r = *inp;
107 x2i = *(inp + 1);
108 inp += 16;
109
110 x3r = *inp;
111 x3i = *(inp + 1);
112
113 x0r = x0r + x2r;
114 x0i = x0i + x2i;
115
116 tmk = x0r - x2r;
117 x2r = tmk - x2r;
118 tmk = x0i - x2i;
119 x2i = tmk - x2i;
120
121 x1r = x1r + x3r;
122 x1i = x1i + x3i;
123
124 tmk = x1r - x3r;
125 x3r = tmk - x3r;
126 tmk = x1i - x3i;
127 x3i = tmk - x3i;
128
129 x0r = x0r + x1r;
130 x0i = x0i + x1i;
131
132 tmk = x0r - x1r;
133 x1r = tmk - x1r;
134 tmk = x0i - x1i;
135 x1i = tmk - x1i;
136
137 x2r = x2r + x3i;
138 x2i = x2i - x3r;
139
140 tmk = x2r - x3i;
141 x3i = tmk - x3i;
142 tmk = x2i + x3r;
143 x3r = tmk + x3r;
144
145 *ptr_y++ = x0r;
146 *ptr_y++ = x0i;
147 *ptr_y++ = x2r;
148 *ptr_y++ = x2i;
149 *ptr_y++ = x1r;
150 *ptr_y++ = x1i;
151 *ptr_y++ = x3i;
152 *ptr_y++ = x3r;
153
154 inp = ptr_xi;
155
156 inp += (h2);
157
158 x0r = *inp;
159 x0i = *(inp + 1);
160 inp += 16;
161
162 x1r = *inp;
163 x1i = *(inp + 1);
164 inp += 16;
165
166 x2r = *inp;
167 x2i = *(inp + 1);
168 inp += 16;
169
170 x3r = *inp;
171 x3i = *(inp + 1);
172
173 x0r = x0r + x2r;
174 x0i = x0i + x2i;
175
176 tmk = x0r - x2r;
177 x2r = tmk - x2r;
178 tmk = x0i - x2i;
179 x2i = tmk - x2i;
180
181 x1r = x1r + x3r;
182 x1i = x1i + x3i;
183
184 tmk = x1r - x3r;
185 x3r = tmk - x3r;
186 tmk = x1i - x3i;
187 x3i = tmk - x3i;
188
189 x0r = x0r + x1r;
190 x0i = x0i + x1i;
191
192 tmk = x0r - x1r;
193 x1r = tmk - x1r;
194 tmk = x0i - x1i;
195 x1i = tmk - x1i;
196
197 x2r = x2r + x3i;
198 x2i = x2i - x3r;
199
200 tmk = x2r - x3i;
201 x3i = tmk - x3i;
202 tmk = x2i + x3r;
203 x3r = tmk + x3r;
204
205 *ptr_z++ = x0r;
206 *ptr_z++ = x0i;
207 *ptr_z++ = x2r;
208 *ptr_z++ = x2i;
209 *ptr_z++ = x1r;
210 *ptr_z++ = x1i;
211 *ptr_z++ = x3i;
212 *ptr_z++ = x3r;
213 }
214 ptr_y -= 64;
215 ptr_z -= 64;
216 {
217 FLOAT32 *data_r = ptr_y;
218 FLOAT32 *data_i = ptr_z;
219 for (k = 2; k != 0; k--) {
220 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
221
222 x0r = (*data_r);
223 x0i = (*(data_r + 1));
224 data_r += 8;
225
226 x1r = (*data_r);
227 x1i = (*(data_r + 1));
228 data_r += 8;
229
230 x2r = (*data_r);
231 x2i = (*(data_r + 1));
232 data_r += 8;
233
234 x3r = (*data_r);
235 x3i = (*(data_r + 1));
236 data_r -= 24;
237
238 x0r = x0r + x2r;
239 x0i = x0i + x2i;
240 x2r = x0r - (x2r * 2);
241 x2i = x0i - (x2i * 2);
242 x1r = x1r + x3r;
243 x1i = x1i + x3i;
244 x3r = x1r - (x3r * 2);
245 x3i = x1i - (x3i * 2);
246
247 x0r = x0r + x1r;
248 x0i = x0i + x1i;
249 x1r = x0r - (x1r * 2);
250 x1i = x0i - (x1i * 2);
251 x2r = x2r + x3i;
252 x2i = x2i - x3r;
253 x3i = x2r - (x3i * 2);
254 x3r = x2i + (x3r * 2);
255
256 *data_r = x0r;
257 *(data_r + 1) = x0i;
258 data_r += 8;
259
260 *data_r = x2r;
261 *(data_r + 1) = x2i;
262 data_r += 8;
263
264 *data_r = x1r;
265 *(data_r + 1) = x1i;
266 data_r += 8;
267
268 *data_r = x3i;
269 *(data_r + 1) = x3r;
270 data_r += 8;
271
272 x0r = (*data_i);
273 x0i = (*(data_i + 1));
274 data_i += 8;
275
276 x1r = (*data_i);
277 x1i = (*(data_i + 1));
278 data_i += 8;
279
280 x2r = (*data_i);
281 x2i = (*(data_i + 1));
282 data_i += 8;
283
284 x3r = (*data_i);
285 x3i = (*(data_i + 1));
286 data_i -= 24;
287
288 x0r = x0r + x2r;
289 x0i = x0i + x2i;
290 x2r = x0r - (x2r * 2);
291 x2i = x0i - (x2i * 2);
292 x1r = x1r + x3r;
293 x1i = x1i + x3i;
294 x3r = x1r - (x3r * 2);
295 x3i = x1i - (x3i * 2);
296
297 x0r = x0r + x1r;
298 x0i = x0i + x1i;
299 x1r = x0r - (x1r * 2);
300 x1i = x0i - (x1i * 2);
301 x2r = x2r + x3i;
302 x2i = x2i - x3r;
303 x3i = x2r - (x3i * 2);
304 x3r = x2i + (x3r * 2);
305
306 *data_i = x0r;
307 *(data_i + 1) = x0i;
308 data_i += 8;
309
310 *data_i = x2r;
311 *(data_i + 1) = x2i;
312 data_i += 8;
313
314 *data_i = x1r;
315 *(data_i + 1) = x1i;
316 data_i += 8;
317
318 *data_i = x3i;
319 *(data_i + 1) = x3r;
320 data_i += 8;
321 }
322 data_r = ptr_y + 2;
323 data_i = ptr_z + 2;
324
325 for (k = 2; k != 0; k--) {
326 FLOAT32 tmp;
327 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
328
329 data_r += 8;
330
331 x1r = *data_r;
332 x1i = *(data_r + 1);
333 data_r += 8;
334
335 x2r = *data_r;
336 x2i = *(data_r + 1);
337 data_r += 8;
338
339 x3r = *data_r;
340 x3i = *(data_r + 1);
341 data_r -= 24;
342
343 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
344 ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
345 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
346 (FLOAT32)x1i, 0.923880f);
347 x1r = tmp;
348
349 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
350 ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
351 x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
352 (FLOAT32)x2i, 0.707107f);
353 x2r = tmp;
354
355 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
356 ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
357 x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
358 (FLOAT32)x3i, 0.382683f);
359 x3r = tmp;
360
361 x0r = (*data_r);
362 x0i = (*(data_r + 1));
363
364 x0r = x0r + (x2r);
365 x0i = x0i + (x2i);
366 x2r = x0r - (x2r * 2);
367 x2i = x0i - (x2i * 2);
368 x1r = x1r + x3r;
369 x1i = x1i + x3i;
370 x3r = x1r - (x3r * 2);
371 x3i = x1i - (x3i * 2);
372
373 x0r = x0r + (x1r);
374 x0i = x0i + (x1i);
375 x1r = x0r - (x1r * 2);
376 x1i = x0i - (x1i * 2);
377 x2r = x2r + (x3i);
378 x2i = x2i - (x3r);
379 x3i = x2r - (x3i * 2);
380 x3r = x2i + (x3r * 2);
381
382 *data_r = x0r;
383 *(data_r + 1) = x0i;
384 data_r += 8;
385
386 *data_r = x2r;
387 *(data_r + 1) = x2i;
388 data_r += 8;
389
390 *data_r = x1r;
391 *(data_r + 1) = x1i;
392 data_r += 8;
393
394 *data_r = x3i;
395 *(data_r + 1) = x3r;
396 data_r += 8;
397 data_i += 8;
398
399 x1r = *data_i;
400 x1i = *(data_i + 1);
401 data_i += 8;
402
403 x2r = *data_i;
404 x2i = *(data_i + 1);
405 data_i += 8;
406
407 x3r = *data_i;
408 x3i = *(data_i + 1);
409 data_i -= 24;
410
411 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
412 ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
413 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
414 (FLOAT32)x1i, 0.923880f);
415 x1r = tmp;
416
417 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
418 ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
419 x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
420 (FLOAT32)x2i, 0.707107f);
421 x2r = tmp;
422
423 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
424 ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
425 x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
426 (FLOAT32)x3i, 0.382683f);
427 x3r = tmp;
428
429 x0r = (*data_i);
430 x0i = (*(data_i + 1));
431
432 x0r = x0r + (x2r);
433 x0i = x0i + (x2i);
434 x2r = x0r - (x2r * 2);
435 x2i = x0i - (x2i * 2);
436 x1r = x1r + x3r;
437 x1i = x1i + x3i;
438 x3r = x1r - (x3r * 2);
439 x3i = x1i - (x3i * 2);
440
441 x0r = x0r + (x1r);
442 x0i = x0i + (x1i);
443 x1r = x0r - (x1r * 2);
444 x1i = x0i - (x1i * 2);
445 x2r = x2r + (x3i);
446 x2i = x2i - (x3r);
447 x3i = x2r - (x3i * 2);
448 x3r = x2i + (x3r * 2);
449
450 *data_i = x0r;
451 *(data_i + 1) = x0i;
452 data_i += 8;
453
454 *data_i = x2r;
455 *(data_i + 1) = x2i;
456 data_i += 8;
457
458 *data_i = x1r;
459 *(data_i + 1) = x1i;
460 data_i += 8;
461
462 *data_i = x3i;
463 *(data_i + 1) = x3r;
464 data_i += 8;
465 }
466 data_r -= 62;
467 data_i -= 62;
468 for (k = 2; k != 0; k--) {
469 FLOAT32 tmp;
470 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
471
472 data_r += 8;
473
474 x1r = *data_r;
475 x1i = *(data_r + 1);
476 data_r += 8;
477
478 x2r = *data_r;
479 x2i = *(data_r + 1);
480 data_r += 8;
481
482 x3r = *data_r;
483 x3i = *(data_r + 1);
484 data_r -= 24;
485
486 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
487 ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
488 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
489 (FLOAT32)x1i, 0.707107f);
490 x1r = tmp;
491
492 tmp = x2i;
493 x2i = -x2r;
494 x2r = tmp;
495
496 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
497 ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
498 x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
499 ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
500 x3r = tmp;
501
502 x0r = (*data_r);
503 x0i = (*(data_r + 1));
504
505 x0r = x0r + (x2r);
506 x0i = x0i + (x2i);
507 x2r = x0r - (x2r * 2);
508 x2i = x0i - (x2i * 2);
509 x1r = x1r + x3r;
510 x1i = x1i + x3i;
511 x3r = x1r - (x3r * 2);
512 x3i = x1i - (x3i * 2);
513
514 x0r = x0r + (x1r);
515 x0i = x0i + (x1i);
516 x1r = x0r - (x1r * 2);
517 x1i = x0i - (x1i * 2);
518 x2r = x2r + (x3i);
519 x2i = x2i - (x3r);
520 x3i = x2r - (x3i * 2);
521 x3r = x2i + (x3r * 2);
522
523 *data_r = x0r;
524 *(data_r + 1) = x0i;
525 data_r += 8;
526
527 *data_r = x2r;
528 *(data_r + 1) = x2i;
529 data_r += 8;
530
531 *data_r = x1r;
532 *(data_r + 1) = x1i;
533 data_r += 8;
534
535 *data_r = x3i;
536 *(data_r + 1) = x3r;
537 data_r += 8;
538 data_i += 8;
539
540 x1r = *data_i;
541 x1i = *(data_i + 1);
542 data_i += 8;
543
544 x2r = *data_i;
545 x2i = *(data_i + 1);
546 data_i += 8;
547
548 x3r = *data_i;
549 x3i = *(data_i + 1);
550 data_i -= 24;
551
552 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
553 ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
554 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
555 (FLOAT32)x1i, 0.707107f);
556 x1r = tmp;
557
558 tmp = x2i;
559 x2i = -x2r;
560 x2r = tmp;
561
562 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
563 ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
564 x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
565 ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
566 x3r = tmp;
567
568 x0r = (*data_i);
569 x0i = (*(data_i + 1));
570
571 x0r = x0r + (x2r);
572 x0i = x0i + (x2i);
573 x2r = x0r - (x2r * 2);
574 x2i = x0i - (x2i * 2);
575 x1r = x1r + x3r;
576 x1i = x1i + x3i;
577 x3r = x1r - (x3r * 2);
578 x3i = x1i - (x3i * 2);
579
580 x0r = x0r + (x1r);
581 x0i = x0i + (x1i);
582 x1r = x0r - (x1r * 2);
583 x1i = x0i - (x1i * 2);
584 x2r = x2r + (x3i);
585 x2i = x2i - (x3r);
586 x3i = x2r - (x3i * 2);
587 x3r = x2i + (x3r * 2);
588
589 *data_i = x0r;
590 *(data_i + 1) = x0i;
591 data_i += 8;
592
593 *data_i = x2r;
594 *(data_i + 1) = x2i;
595 data_i += 8;
596
597 *data_i = x1r;
598 *(data_i + 1) = x1i;
599 data_i += 8;
600
601 *data_i = x3i;
602 *(data_i + 1) = x3r;
603 data_i += 8;
604 }
605 data_r -= 62;
606 data_i -= 62;
607 for (k = 2; k != 0; k--) {
608 FLOAT32 tmp;
609 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
610
611 data_r += 8;
612
613 x1r = *data_r;
614 x1i = *(data_r + 1);
615 data_r += 8;
616
617 x2r = *data_r;
618 x2i = *(data_r + 1);
619 data_r += 8;
620
621 x3r = *data_r;
622 x3i = *(data_r + 1);
623 data_r -= 24;
624
625 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
626 ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
627 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
628 (FLOAT32)x1i, 0.382683f);
629 x1r = tmp;
630
631 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
632 ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
633 x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
634 ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
635 x2r = tmp;
636
637 tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
638 ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
639 x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
640 (FLOAT32)x3i, 0.923880f);
641 x3r = tmp;
642
643 x0r = (*data_r);
644 x0i = (*(data_r + 1));
645
646 x0r = x0r + (x2r);
647 x0i = x0i + (x2i);
648 x2r = x0r - (x2r * 2);
649 x2i = x0i - (x2i * 2);
650 x1r = x1r + x3r;
651 x1i = x1i - x3i;
652 x3r = x1r - (x3r * 2);
653 x3i = x1i + (x3i * 2);
654
655 x0r = x0r + (x1r);
656 x0i = x0i + (x1i);
657 x1r = x0r - (x1r * 2);
658 x1i = x0i - (x1i * 2);
659 x2r = x2r + (x3i);
660 x2i = x2i - (x3r);
661 x3i = x2r - (x3i * 2);
662 x3r = x2i + (x3r * 2);
663
664 *data_r = x0r;
665 *(data_r + 1) = x0i;
666 data_r += 8;
667
668 *data_r = x2r;
669 *(data_r + 1) = x2i;
670 data_r += 8;
671
672 *data_r = x1r;
673 *(data_r + 1) = x1i;
674 data_r += 8;
675
676 *data_r = x3i;
677 *(data_r + 1) = x3r;
678 data_r += 8;
679 data_i += 8;
680
681 x1r = *data_i;
682 x1i = *(data_i + 1);
683 data_i += 8;
684
685 x2r = *data_i;
686 x2i = *(data_i + 1);
687 data_i += 8;
688
689 x3r = *data_i;
690 x3i = *(data_i + 1);
691 data_i -= 24;
692
693 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
694 ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
695 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
696 (FLOAT32)x1i, 0.382683f);
697 x1r = tmp;
698
699 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
700 ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
701 x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
702 ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
703 x2r = tmp;
704
705 tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
706 ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
707 x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
708 (FLOAT32)x3i, 0.923880f);
709 x3r = tmp;
710
711 x0r = (*data_i);
712 x0i = (*(data_i + 1));
713
714 x0r = x0r + (x2r);
715 x0i = x0i + (x2i);
716 x2r = x0r - (x2r * 2);
717 x2i = x0i - (x2i * 2);
718 x1r = x1r + x3r;
719 x1i = x1i - x3i;
720 x3r = x1r - (x3r * 2);
721 x3i = x1i + (x3i * 2);
722
723 x0r = x0r + (x1r);
724 x0i = x0i + (x1i);
725 x1r = x0r - (x1r * 2);
726 x1i = x0i - (x1i * 2);
727 x2r = x2r + (x3i);
728 x2i = x2i - (x3r);
729 x3i = x2r - (x3i * 2);
730 x3r = x2i + (x3r * 2);
731
732 *data_i = x0r;
733 *(data_i + 1) = x0i;
734 data_i += 8;
735
736 *data_i = x2r;
737 *(data_i + 1) = x2i;
738 data_i += 8;
739
740 *data_i = x1r;
741 *(data_i + 1) = x1i;
742 data_i += 8;
743
744 *data_i = x3i;
745 *(data_i + 1) = x3r;
746 data_i += 8;
747 }
748 data_r -= 62;
749 data_i -= 62;
750 }
751 {
752 const FLOAT32 *twiddles = ptr_w;
753 FLOAT32 x0r, x0i, x1r, x1i;
754 for (j = 8; j != 0; j--) {
755 FLOAT32 W1 = *twiddles;
756 twiddles++;
757 FLOAT32 W4 = *twiddles;
758 twiddles++;
759 FLOAT32 tmp;
760
761 x0r = *ptr_y;
762 x0i = *(ptr_y + 1);
763 ptr_y += 32;
764 ptr_xr += 32;
765
766 x1r = *ptr_y;
767 x1i = *(ptr_y + 1);
768
769 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
770 ixheaacd_mult32X32float((FLOAT32)x1i, W4));
771 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
772 (FLOAT32)x1i, W1);
773 x1r = tmp;
774
775 *ptr_xr = (x0r) - (x1r);
776 *(ptr_xr + 1) = (x0i) - (x1i);
777 ptr_y -= 32;
778 ptr_xr -= 32;
779
780 *ptr_xr = (x0r) + (x1r);
781 *(ptr_xr + 1) = (x0i) + (x1i);
782 ptr_y += 2;
783 ptr_xr += 2;
784
785 x0r = *ptr_z;
786 x0i = *(ptr_z + 1);
787 ptr_z += 32;
788 ptr_xi += 32;
789
790 x1r = *ptr_z;
791 x1i = *(ptr_z + 1);
792
793 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
794 ixheaacd_mult32X32float((FLOAT32)x1i, W4));
795 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
796 (FLOAT32)x1i, W1);
797 x1r = tmp;
798
799 *ptr_xi = (x0r) - (x1r);
800 *(ptr_xi + 1) = (x0i) - (x1i);
801 ptr_z -= 32;
802 ptr_xi -= 32;
803
804 *ptr_xi = (x0r) + (x1r);
805 *(ptr_xi + 1) = (x0i) + (x1i);
806 ptr_z += 2;
807 ptr_xi += 2;
808 }
809 twiddles = ptr_w;
810 for (j = 8; j != 0; j--) {
811 FLOAT32 W1 = *twiddles;
812 twiddles++;
813 FLOAT32 W4 = *twiddles;
814 twiddles++;
815 FLOAT32 tmp;
816
817 x0r = *ptr_y;
818 x0i = *(ptr_y + 1);
819 ptr_y += 32;
820 ptr_xr += 32;
821
822 x1r = *ptr_y;
823 x1i = *(ptr_y + 1);
824
825 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
826 ixheaacd_mult32X32float((FLOAT32)x1i, W1));
827 x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
828 ixheaacd_mult32X32float((FLOAT32)x1i, W4));
829 x1r = tmp;
830
831 *ptr_xr = (x0r) - (x1r);
832 *(ptr_xr + 1) = (x0i) - (x1i);
833 ptr_y -= 32;
834 ptr_xr -= 32;
835
836 *ptr_xr = (x0r) + (x1r);
837 *(ptr_xr + 1) = (x0i) + (x1i);
838 ptr_y += 2;
839 ptr_xr += 2;
840
841 x0r = *ptr_z;
842 x0i = *(ptr_z + 1);
843 ptr_z += 32;
844 ptr_xi += 32;
845
846 x1r = *ptr_z;
847 x1i = *(ptr_z + 1);
848
849 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
850 ixheaacd_mult32X32float((FLOAT32)x1i, W1));
851 x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
852 ixheaacd_mult32X32float((FLOAT32)x1i, W4));
853 x1r = tmp;
854
855 *ptr_xi = (x0r) - (x1r);
856 *(ptr_xi + 1) = (x0i) - (x1i);
857 ptr_z -= 32;
858 ptr_xi -= 32;
859
860 *ptr_xi = (x0r) + (x1r);
861 *(ptr_xi + 1) = (x0i) + (x1i);
862 ptr_z += 2;
863 ptr_xi += 2;
864 }
865 }
866 }
867
ixheaacd_mps_complex_fft(FLOAT32 * xr,FLOAT32 * xi,WORD32 nlength)868 VOID ixheaacd_mps_complex_fft(FLOAT32 *xr, FLOAT32 *xi, WORD32 nlength) {
869 WORD32 i, j, k, n_stages, h2;
870 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
871 WORD32 del, nodespacing, in_loop_cnt;
872 WORD32 dig_rev_shift;
873 WORD32 not_power_4;
874 FLOAT32 ptr_x[256];
875 FLOAT32 y[256];
876 WORD32 npoints = nlength;
877 FLOAT32 *ptr_y = y;
878 const FLOAT32 *ptr_w;
879 dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16;
880 n_stages = 30 - ixheaacd_norm32(npoints);
881 not_power_4 = n_stages & 1;
882
883 n_stages = n_stages >> 1;
884
885
886 for (i = 0; i<nlength; i++)
887 {
888 ptr_x[2 * i] = xr[i];
889 ptr_x[2 * i + 1] = xi[i];
890 }
891
892 ptr_w = ixheaacd_twiddle_table_fft;
893
894 for (i = 0; i<npoints; i += 4)
895 {
896 FLOAT32 *inp = ptr_x;
897
898 DIG_REV(i, dig_rev_shift, h2);
899 if (not_power_4)
900 {
901 h2 += 1;
902 h2 &= ~1;
903 }
904 inp += (h2);
905
906 x0r = *inp;
907 x0i = *(inp + 1);
908 inp += (npoints >> 1);
909
910 x1r = *inp;
911 x1i = *(inp + 1);
912 inp += (npoints >> 1);
913
914 x2r = *inp;
915 x2i = *(inp + 1);
916 inp += (npoints >> 1);
917
918 x3r = *inp;
919 x3i = *(inp + 1);
920
921 x0r = x0r + x2r;
922 x0i = x0i + x2i;
923 x2r = x0r - (x2r * 2);
924 x2i = x0i - (x2i * 2);
925 x1r = x1r + x3r;
926 x1i = x1i + x3i;
927 x3r = x1r - (x3r * 2);
928 x3i = x1i - (x3i * 2);
929
930 x0r = x0r + x1r;
931 x0i = x0i + x1i;
932 x1r = x0r - (x1r * 2);
933 x1i = x0i - (x1i * 2);
934 x2r = x2r + x3i;
935 x2i = x2i - x3r;
936 x3i = x2r - (x3i * 2);
937 x3r = x2i + (x3r * 2);
938
939 *ptr_y++ = x0r;
940 *ptr_y++ = x0i;
941 *ptr_y++ = x2r;
942 *ptr_y++ = x2i;
943 *ptr_y++ = x1r;
944 *ptr_y++ = x1i;
945 *ptr_y++ = x3i;
946 *ptr_y++ = x3r;
947 }
948 ptr_y -= 2 * npoints;
949 del = 4;
950 nodespacing = 64;
951 in_loop_cnt = npoints >> 4;
952 for (i = n_stages - 1; i>0; i--)
953 {
954 const FLOAT32 *twiddles = ptr_w;
955 FLOAT32 *data = ptr_y;
956 FLOAT32 w1h, w2h, w3h, w1l, w2l, w3l;
957 WORD32 sec_loop_cnt;
958
959 for (k = in_loop_cnt; k != 0; k--)
960 {
961 x0r = (*data);
962 x0i = (*(data + 1));
963 data += (del << 1);
964
965 x1r = (*data);
966 x1i = (*(data + 1));
967 data += (del << 1);
968
969 x2r = (*data);
970 x2i = (*(data + 1));
971 data += (del << 1);
972
973 x3r = (*data);
974 x3i = (*(data + 1));
975 data -= 3 * (del << 1);
976
977 x0r = x0r + x2r;
978 x0i = x0i + x2i;
979 x2r = x0r - (x2r * 2);
980 x2i = x0i - (x2i * 2);
981 x1r = x1r + x3r;
982 x1i = x1i + x3i;
983 x3r = x1r - (x3r * 2);
984 x3i = x1i - (x3i * 2);
985
986 x0r = x0r + x1r;
987 x0i = x0i + x1i;
988 x1r = x0r - (x1r * 2);
989 x1i = x0i - (x1i * 2);
990 x2r = x2r + x3i;
991 x2i = x2i - x3r;
992 x3i = x2r - (x3i * 2);
993 x3r = x2i + (x3r * 2);
994
995 *data = x0r;
996 *(data + 1) = x0i;
997 data += (del << 1);
998
999 *data = x2r;
1000 *(data + 1) = x2i;
1001 data += (del << 1);
1002
1003 *data = x1r;
1004 *(data + 1) = x1i;
1005 data += (del << 1);
1006
1007 *data = x3i;
1008 *(data + 1) = x3r;
1009 data += (del << 1);
1010 }
1011 data = ptr_y + 2;
1012
1013 sec_loop_cnt = (nodespacing * del);
1014 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) \
1015 + (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) \
1016 - (sec_loop_cnt / 256);
1017 j = nodespacing;
1018
1019 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing)
1020 {
1021 w1h = *(twiddles + 2 * j);
1022 w1l = *(twiddles + 2 * j + 1);
1023 w2h = *(twiddles + 2 * (j << 1));
1024 w2l = *(twiddles + 2 * (j << 1) + 1);
1025 w3h = *(twiddles + 2 * j + 2 * (j << 1));
1026 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1027
1028 for (k = in_loop_cnt; k != 0; k--)
1029 {
1030 FLOAT32 tmp;
1031 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1032
1033 data += (del << 1);
1034
1035 x1r = *data;
1036 x1i = *(data + 1);
1037 data += (del << 1);
1038
1039 x2r = *data;
1040 x2i = *(data + 1);
1041 data += (del << 1);
1042
1043 x3r = *data;
1044 x3i = *(data + 1);
1045 data -= 3 * (del << 1);
1046
1047 tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1048 x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1049 x1r = tmp;
1050
1051 tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
1052 x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
1053 x2r = tmp;
1054
1055 tmp = (ixheaacd_mult32X32float(x3r, w3l) - ixheaacd_mult32X32float(x3i, w3h));
1056 x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
1057 x3r = tmp;
1058
1059 x0r = (*data);
1060 x0i = (*(data + 1));
1061
1062 x0r = x0r + (x2r);
1063 x0i = x0i + (x2i);
1064 x2r = x0r - (x2r * 2);
1065 x2i = x0i - (x2i * 2);
1066 x1r = x1r + x3r;
1067 x1i = x1i + x3i;
1068 x3r = x1r - (x3r * 2);
1069 x3i = x1i - (x3i * 2);
1070
1071 x0r = x0r + (x1r);
1072 x0i = x0i + (x1i);
1073 x1r = x0r - (x1r * 2);
1074 x1i = x0i - (x1i * 2);
1075 x2r = x2r + (x3i);
1076 x2i = x2i - (x3r);
1077 x3i = x2r - (x3i * 2);
1078 x3r = x2i + (x3r * 2);
1079
1080 *data = x0r;
1081 *(data + 1) = x0i;
1082 data += (del << 1);
1083
1084 *data = x2r;
1085 *(data + 1) = x2i;
1086 data += (del << 1);
1087
1088 *data = x1r;
1089 *(data + 1) = x1i;
1090 data += (del << 1);
1091
1092 *data = x3i;
1093 *(data + 1) = x3r;
1094 data += (del << 1);
1095 }
1096 data -= 2 * npoints;
1097 data += 2;
1098 }
1099 for (; j <= (nodespacing * del) >> 1; j += nodespacing)
1100 {
1101 w1h = *(twiddles + 2 * j);
1102 w2h = *(twiddles + 2 * (j << 1));
1103 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1104 w1l = *(twiddles + 2 * j + 1);
1105 w2l = *(twiddles + 2 * (j << 1) + 1);
1106 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1107
1108 for (k = in_loop_cnt; k != 0; k--)
1109 {
1110 FLOAT32 tmp;
1111 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1112
1113 data += (del << 1);
1114
1115 x1r = *data;
1116 x1i = *(data + 1);
1117 data += (del << 1);
1118
1119 x2r = *data;
1120 x2i = *(data + 1);
1121 data += (del << 1);
1122
1123 x3r = *data;
1124 x3i = *(data + 1);
1125 data -= 3 * (del << 1);
1126
1127 tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1128 x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1129 x1r = tmp;
1130
1131 tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
1132 x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
1133 x2r = tmp;
1134
1135 tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
1136 x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
1137 x3r = tmp;
1138
1139 x0r = (*data);
1140 x0i = (*(data + 1));
1141
1142 x0r = x0r + (x2r);
1143 x0i = x0i + (x2i);
1144 x2r = x0r - (x2r * 2);
1145 x2i = x0i - (x2i * 2);
1146 x1r = x1r + x3r;
1147 x1i = x1i + x3i;
1148 x3r = x1r - (x3r * 2);
1149 x3i = x1i - (x3i * 2);
1150
1151 x0r = x0r + (x1r);
1152 x0i = x0i + (x1i);
1153 x1r = x0r - (x1r * 2);
1154 x1i = x0i - (x1i * 2);
1155 x2r = x2r + (x3i);
1156 x2i = x2i - (x3r);
1157 x3i = x2r - (x3i * 2);
1158 x3r = x2i + (x3r * 2);
1159
1160 *data = x0r;
1161 *(data + 1) = x0i;
1162 data += (del << 1);
1163
1164 *data = x2r;
1165 *(data + 1) = x2i;
1166 data += (del << 1);
1167
1168 *data = x1r;
1169 *(data + 1) = x1i;
1170 data += (del << 1);
1171
1172 *data = x3i;
1173 *(data + 1) = x3r;
1174 data += (del << 1);
1175 }
1176 data -= 2 * npoints;
1177 data += 2;
1178 }
1179 for (; j <= sec_loop_cnt * 2; j += nodespacing)
1180 {
1181 w1h = *(twiddles + 2 * j);
1182 w2h = *(twiddles + 2 * (j << 1) - 512);
1183 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1184 w1l = *(twiddles + 2 * j + 1);
1185 w2l = *(twiddles + 2 * (j << 1) - 511);
1186 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1187
1188 for (k = in_loop_cnt; k != 0; k--)
1189 {
1190 FLOAT32 tmp;
1191 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1192
1193 data += (del << 1);
1194
1195 x1r = *data;
1196 x1i = *(data + 1);
1197 data += (del << 1);
1198
1199 x2r = *data;
1200 x2i = *(data + 1);
1201 data += (del << 1);
1202
1203 x3r = *data;
1204 x3i = *(data + 1);
1205 data -= 3 * (del << 1);
1206
1207 tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1208 x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1209 x1r = tmp;
1210
1211 tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
1212 x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
1213 x2r = tmp;
1214
1215 tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
1216 x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
1217 x3r = tmp;
1218
1219 x0r = (*data);
1220 x0i = (*(data + 1));
1221
1222 x0r = x0r + (x2r);
1223 x0i = x0i + (x2i);
1224 x2r = x0r - (x2r * 2);
1225 x2i = x0i - (x2i * 2);
1226 x1r = x1r + x3r;
1227 x1i = x1i + x3i;
1228 x3r = x1r - (x3r * 2);
1229 x3i = x1i - (x3i * 2);
1230
1231 x0r = x0r + (x1r);
1232 x0i = x0i + (x1i);
1233 x1r = x0r - (x1r * 2);
1234 x1i = x0i - (x1i * 2);
1235 x2r = x2r + (x3i);
1236 x2i = x2i - (x3r);
1237 x3i = x2r - (x3i * 2);
1238 x3r = x2i + (x3r * 2);
1239
1240 *data = x0r;
1241 *(data + 1) = x0i;
1242 data += (del << 1);
1243
1244 *data = x2r;
1245 *(data + 1) = x2i;
1246 data += (del << 1);
1247
1248 *data = x1r;
1249 *(data + 1) = x1i;
1250 data += (del << 1);
1251
1252 *data = x3i;
1253 *(data + 1) = x3r;
1254 data += (del << 1);
1255 }
1256 data -= 2 * npoints;
1257 data += 2;
1258 }
1259 for (; j<nodespacing * del; j += nodespacing)
1260 {
1261 w1h = *(twiddles + 2 * j);
1262 w2h = *(twiddles + 2 * (j << 1) - 512);
1263 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1264 w1l = *(twiddles + 2 * j + 1);
1265 w2l = *(twiddles + 2 * (j << 1) - 511);
1266 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1267
1268 for (k = in_loop_cnt; k != 0; k--)
1269 {
1270 FLOAT32 tmp;
1271 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1272
1273 data += (del << 1);
1274
1275 x1r = *data;
1276 x1i = *(data + 1);
1277 data += (del << 1);
1278
1279 x2r = *data;
1280 x2i = *(data + 1);
1281 data += (del << 1);
1282
1283 x3r = *data;
1284 x3i = *(data + 1);
1285 data -= 3 * (del << 1);
1286
1287 tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1288 x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1289 x1r = tmp;
1290
1291 tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
1292 x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
1293 x2r = tmp;
1294
1295 tmp = (-ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h));
1296 x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
1297 x3r = tmp;
1298
1299 x0r = (*data);
1300 x0i = (*(data + 1));
1301
1302 x0r = x0r + (x2r);
1303 x0i = x0i + (x2i);
1304 x2r = x0r - (x2r * 2);
1305 x2i = x0i - (x2i * 2);
1306 x1r = x1r + x3r;
1307 x1i = x1i - x3i;
1308 x3r = x1r - (x3r * 2);
1309 x3i = x1i + (x3i * 2);
1310
1311 x0r = x0r + (x1r);
1312 x0i = x0i + (x1i);
1313 x1r = x0r - (x1r * 2);
1314 x1i = x0i - (x1i * 2);
1315 x2r = x2r + (x3i);
1316 x2i = x2i - (x3r);
1317 x3i = x2r - (x3i * 2);
1318 x3r = x2i + (x3r * 2);
1319
1320 *data = x0r;
1321 *(data + 1) = x0i;
1322 data += (del << 1);
1323
1324 *data = x2r;
1325 *(data + 1) = x2i;
1326 data += (del << 1);
1327
1328 *data = x1r;
1329 *(data + 1) = x1i;
1330 data += (del << 1);
1331
1332 *data = x3i;
1333 *(data + 1) = x3r;
1334 data += (del << 1);
1335 }
1336 data -= 2 * npoints;
1337 data += 2;
1338 }
1339 nodespacing >>= 2;
1340 del <<= 2;
1341 in_loop_cnt >>= 2;
1342 }
1343 if (not_power_4)
1344 {
1345 const FLOAT32 *twiddles = ptr_w;
1346 nodespacing <<= 1;
1347
1348 for (j = del / 2; j != 0; j--)
1349 {
1350 FLOAT32 w1h = *twiddles;
1351 FLOAT32 w1l = *(twiddles + 1);
1352 FLOAT32 tmp;
1353 twiddles += nodespacing * 2;
1354
1355 x0r = *ptr_y;
1356 x0i = *(ptr_y + 1);
1357 ptr_y += (del << 1);
1358
1359 x1r = *ptr_y;
1360 x1i = *(ptr_y + 1);
1361
1362 tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
1363 x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
1364 x1r = tmp;
1365
1366 *ptr_y = (x0r) - (x1r);
1367 *(ptr_y + 1) = (x0i) - (x1i);
1368 ptr_y -= (del << 1);
1369
1370 *ptr_y = (x0r) + (x1r);
1371 *(ptr_y + 1) = (x0i) + (x1i);
1372 ptr_y += 2;
1373 }
1374 twiddles = ptr_w;
1375 for (j = del / 2; j != 0; j--)
1376 {
1377 FLOAT32 w1h = *twiddles;
1378 FLOAT32 w1l = *(twiddles + 1);
1379 FLOAT32 tmp;
1380 twiddles += nodespacing * 2;
1381
1382 x0r = *ptr_y;
1383 x0i = *(ptr_y + 1);
1384 ptr_y += (del << 1);
1385
1386 x1r = *ptr_y;
1387 x1i = *(ptr_y + 1);
1388
1389 tmp = (ixheaacd_mult32X32float(x1r, w1h) + ixheaacd_mult32X32float(x1i, w1l));
1390 x1i = -ixheaacd_mult32X32float(x1r, w1l) + ixheaacd_mult32X32float(x1i, w1h);
1391 x1r = tmp;
1392
1393 *ptr_y = (x0r) - (x1r);
1394 *(ptr_y + 1) = (x0i) - (x1i);
1395 ptr_y -= (del << 1);
1396
1397 *ptr_y = (x0r) + (x1r);
1398 *(ptr_y + 1) = (x0i) + (x1i);
1399 ptr_y += 2;
1400 }
1401 }
1402
1403 for (i = 0; i<nlength; i++)
1404 {
1405 xr[i] = y[2 * i];
1406 xi[i] = y[2 * i + 1];
1407 }
1408
1409 return;
1410 }
1411
ixheaacd_complex_fft_p2_dec(WORD32 * xr,WORD32 * xi,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)1412 VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength,
1413 WORD32 fft_mode, WORD32 *preshift) {
1414 WORD32 i, j, k, n_stages;
1415 WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1416 WORD32 del, nodespacing, in_loop_cnt;
1417 WORD32 not_power_4;
1418 WORD32 npts, shift;
1419 WORD32 dig_rev_shift;
1420 WORD32 ptr_x[1024];
1421 WORD32 y[1024];
1422 WORD32 npoints = nlength;
1423 WORD32 n = 0;
1424 WORD32 *ptr_y = y;
1425 const WORD32 *ptr_w;
1426 dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16;
1427 n_stages = 30 - ixheaacd_norm32(npoints);
1428 not_power_4 = n_stages & 1;
1429
1430 n_stages = n_stages >> 1;
1431
1432 npts = npoints;
1433 while (npts >> 1) {
1434 n++;
1435 npts = npts >> 1;
1436 }
1437
1438 if (n % 2 == 0)
1439 shift = ((n + 4)) / 2;
1440 else
1441 shift = ((n + 3) / 2);
1442
1443 for (i = 0; i < nlength; i++) {
1444 ptr_x[2 * i] = (xr[i] / (1 << (shift)));
1445 ptr_x[2 * i + 1] = (xi[i] / (1 << (shift)));
1446 }
1447
1448 if (fft_mode == -1) {
1449 ptr_w = ixheaacd_twiddle_table_fft_32x32;
1450
1451 for (i = 0; i < npoints; i += 4) {
1452 WORD32 *inp = ptr_x;
1453
1454 DIG_REV(i, dig_rev_shift, h2);
1455 if (not_power_4) {
1456 h2 += 1;
1457 h2 &= ~1;
1458 }
1459 inp += (h2);
1460
1461 x0r = *inp;
1462 x0i = *(inp + 1);
1463 inp += (npoints >> 1);
1464
1465 x1r = *inp;
1466 x1i = *(inp + 1);
1467 inp += (npoints >> 1);
1468
1469 x2r = *inp;
1470 x2i = *(inp + 1);
1471 inp += (npoints >> 1);
1472
1473 x3r = *inp;
1474 x3i = *(inp + 1);
1475
1476 x0r = ixheaacd_add32_sat(x0r, x2r);
1477 x0i = ixheaacd_add32_sat(x0i, x2i);
1478 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1479 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1480 x1r = ixheaacd_add32_sat(x1r, x3r);
1481 x1i = ixheaacd_add32_sat(x1i, x3i);
1482 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1483 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1484
1485 x0r = ixheaacd_add32_sat(x0r, x1r);
1486 x0i = ixheaacd_add32_sat(x0i, x1i);
1487 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1488 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1489 x2r = ixheaacd_add32_sat(x2r, x3i);
1490 x2i = ixheaacd_sub32_sat(x2i, x3r);
1491 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1492 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1493
1494 *ptr_y++ = x0r;
1495 *ptr_y++ = x0i;
1496 *ptr_y++ = x2r;
1497 *ptr_y++ = x2i;
1498 *ptr_y++ = x1r;
1499 *ptr_y++ = x1i;
1500 *ptr_y++ = x3i;
1501 *ptr_y++ = x3r;
1502 }
1503 ptr_y -= 2 * npoints;
1504 del = 4;
1505 nodespacing = 64;
1506 in_loop_cnt = npoints >> 4;
1507 for (i = n_stages - 1; i > 0; i--) {
1508 const WORD32 *twiddles = ptr_w;
1509 WORD32 *data = ptr_y;
1510 WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
1511 WORD32 sec_loop_cnt;
1512
1513 for (k = in_loop_cnt; k != 0; k--) {
1514 x0r = (*data);
1515 x0i = (*(data + 1));
1516 data += (del << 1);
1517
1518 x1r = (*data);
1519 x1i = (*(data + 1));
1520 data += (del << 1);
1521
1522 x2r = (*data);
1523 x2i = (*(data + 1));
1524 data += (del << 1);
1525
1526 x3r = (*data);
1527 x3i = (*(data + 1));
1528 data -= 3 * (del << 1);
1529
1530 x0r = ixheaacd_add32_sat(x0r, x2r);
1531 x0i = ixheaacd_add32_sat(x0i, x2i);
1532 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1533 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1534 x1r = ixheaacd_add32_sat(x1r, x3r);
1535 x1i = ixheaacd_add32_sat(x1i, x3i);
1536 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1537 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1538
1539 x0r = ixheaacd_add32_sat(x0r, x1r);
1540 x0i = ixheaacd_add32_sat(x0i, x1i);
1541 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1542 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1543 x2r = ixheaacd_add32_sat(x2r, x3i);
1544 x2i = ixheaacd_sub32_sat(x2i, x3r);
1545 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1546 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1547
1548 *data = x0r;
1549 *(data + 1) = x0i;
1550 data += (del << 1);
1551
1552 *data = x2r;
1553 *(data + 1) = x2i;
1554 data += (del << 1);
1555
1556 *data = x1r;
1557 *(data + 1) = x1i;
1558 data += (del << 1);
1559
1560 *data = x3i;
1561 *(data + 1) = x3r;
1562 data += (del << 1);
1563 }
1564 data = ptr_y + 2;
1565
1566 sec_loop_cnt = (nodespacing * del);
1567 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
1568 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
1569 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
1570 (sec_loop_cnt / 256);
1571 j = nodespacing;
1572
1573 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
1574 w1h = *(twiddles + 2 * j);
1575 w1l = *(twiddles + 2 * j + 1);
1576 w2h = *(twiddles + 2 * (j << 1));
1577 w2l = *(twiddles + 2 * (j << 1) + 1);
1578 w3h = *(twiddles + 2 * j + 2 * (j << 1));
1579 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1580
1581 for (k = in_loop_cnt; k != 0; k--) {
1582 WORD32 tmp;
1583 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1584
1585 data += (del << 1);
1586
1587 x1r = *data;
1588 x1i = *(data + 1);
1589 data += (del << 1);
1590
1591 x2r = *data;
1592 x2i = *(data + 1);
1593 data += (del << 1);
1594
1595 x3r = *data;
1596 x3i = *(data + 1);
1597 data -= 3 * (del << 1);
1598
1599 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1600 ixheaacd_mult32_sat(x1i, w1h));
1601 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1602 x1r = tmp;
1603
1604 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
1605 ixheaacd_mult32_sat(x2i, w2h));
1606 x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1607 x2r = tmp;
1608
1609 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3l),
1610 ixheaacd_mult32_sat(x3i, w3h));
1611 x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1612 x3r = tmp;
1613
1614 x0r = (*data);
1615 x0i = (*(data + 1));
1616
1617 x0r = ixheaacd_add32_sat(x0r, x2r);
1618 x0i = ixheaacd_add32_sat(x0i, x2i);
1619 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1620 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1621 x1r = ixheaacd_add32_sat(x1r, x3r);
1622 x1i = ixheaacd_add32_sat(x1i, x3i);
1623 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1624 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1625
1626 x0r = ixheaacd_add32_sat(x0r, x1r);
1627 x0i = ixheaacd_add32_sat(x0i, x1i);
1628 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1629 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1630 x2r = ixheaacd_add32_sat(x2r, x3i);
1631 x2i = ixheaacd_sub32_sat(x2i, x3r);
1632 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1633 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1634
1635 *data = x0r;
1636 *(data + 1) = x0i;
1637 data += (del << 1);
1638
1639 *data = x2r;
1640 *(data + 1) = x2i;
1641 data += (del << 1);
1642
1643 *data = x1r;
1644 *(data + 1) = x1i;
1645 data += (del << 1);
1646
1647 *data = x3i;
1648 *(data + 1) = x3r;
1649 data += (del << 1);
1650 }
1651 data -= 2 * npoints;
1652 data += 2;
1653 }
1654 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1655 w1h = *(twiddles + 2 * j);
1656 w2h = *(twiddles + 2 * (j << 1));
1657 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1658 w1l = *(twiddles + 2 * j + 1);
1659 w2l = *(twiddles + 2 * (j << 1) + 1);
1660 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1661
1662 for (k = in_loop_cnt; k != 0; k--) {
1663 WORD32 tmp;
1664 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1665 data += (del << 1);
1666
1667 x1r = *data;
1668 x1i = *(data + 1);
1669 data += (del << 1);
1670
1671 x2r = *data;
1672 x2i = *(data + 1);
1673 data += (del << 1);
1674
1675 x3r = *data;
1676 x3i = *(data + 1);
1677 data -= 3 * (del << 1);
1678
1679 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1680 ixheaacd_mult32_sat(x1i, w1h));
1681 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1682 x1r = tmp;
1683
1684 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
1685 ixheaacd_mult32_sat(x2i, w2h));
1686 x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1687 x2r = tmp;
1688
1689 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
1690 ixheaacd_mult32_sat(x3i, w3l));
1691 x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1692 ixheaacd_mult32_sat(x3r, w3l));
1693 x3r = tmp;
1694
1695 x0r = (*data);
1696 x0i = (*(data + 1));
1697
1698 x0r = ixheaacd_add32_sat(x0r, x2r);
1699 x0i = ixheaacd_add32_sat(x0i, x2i);
1700 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1701 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1702 x1r = ixheaacd_add32_sat(x1r, x3r);
1703 x1i = ixheaacd_add32_sat(x1i, x3i);
1704 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1705 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1706
1707 x0r = ixheaacd_add32_sat(x0r, x1r);
1708 x0i = ixheaacd_add32_sat(x0i, x1i);
1709 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1710 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1711 x2r = ixheaacd_add32_sat(x2r, x3i);
1712 x2i = ixheaacd_sub32_sat(x2i, x3r);
1713 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1714 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1715
1716 *data = x0r;
1717 *(data + 1) = x0i;
1718 data += (del << 1);
1719
1720 *data = x2r;
1721 *(data + 1) = x2i;
1722 data += (del << 1);
1723
1724 *data = x1r;
1725 *(data + 1) = x1i;
1726 data += (del << 1);
1727
1728 *data = x3i;
1729 *(data + 1) = x3r;
1730 data += (del << 1);
1731 }
1732 data -= 2 * npoints;
1733 data += 2;
1734 }
1735 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1736 w1h = *(twiddles + 2 * j);
1737 w2h = *(twiddles + 2 * (j << 1) - 512);
1738 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1739 w1l = *(twiddles + 2 * j + 1);
1740 w2l = *(twiddles + 2 * (j << 1) - 511);
1741 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1742
1743 for (k = in_loop_cnt; k != 0; k--) {
1744 WORD32 tmp;
1745 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1746
1747 data += (del << 1);
1748
1749 x1r = *data;
1750 x1i = *(data + 1);
1751 data += (del << 1);
1752
1753 x2r = *data;
1754 x2i = *(data + 1);
1755 data += (del << 1);
1756
1757 x3r = *data;
1758 x3i = *(data + 1);
1759 data -= 3 * (del << 1);
1760
1761 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1762 ixheaacd_mult32_sat(x1i, w1h));
1763 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1764 x1r = tmp;
1765
1766 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
1767 ixheaacd_mult32_sat(x2i, w2l));
1768 x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
1769 ixheaacd_mult32_sat(x2r, w2l));
1770 x2r = tmp;
1771
1772 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
1773 ixheaacd_mult32_sat(x3i, w3l));
1774 x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1775 ixheaacd_mult32_sat(x3r, w3l));
1776 x3r = tmp;
1777
1778 x0r = (*data);
1779 x0i = (*(data + 1));
1780
1781 x0r = ixheaacd_add32_sat(x0r, x2r);
1782 x0i = ixheaacd_add32_sat(x0i, x2i);
1783 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1784 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1785 x1r = ixheaacd_add32_sat(x1r, x3r);
1786 x1i = ixheaacd_add32_sat(x1i, x3i);
1787 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1788 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1789
1790 x0r = ixheaacd_add32_sat(x0r, x1r);
1791 x0i = ixheaacd_add32_sat(x0i, x1i);
1792 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1793 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1794 x2r = ixheaacd_add32_sat(x2r, x3i);
1795 x2i = ixheaacd_sub32_sat(x2i, x3r);
1796 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1797 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1798
1799 *data = x0r;
1800 *(data + 1) = x0i;
1801 data += (del << 1);
1802
1803 *data = x2r;
1804 *(data + 1) = x2i;
1805 data += (del << 1);
1806
1807 *data = x1r;
1808 *(data + 1) = x1i;
1809 data += (del << 1);
1810
1811 *data = x3i;
1812 *(data + 1) = x3r;
1813 data += (del << 1);
1814 }
1815 data -= 2 * npoints;
1816 data += 2;
1817 }
1818 for (; j < nodespacing * del; j += nodespacing) {
1819 w1h = *(twiddles + 2 * j);
1820 w2h = *(twiddles + 2 * (j << 1) - 512);
1821 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1822 w1l = *(twiddles + 2 * j + 1);
1823 w2l = *(twiddles + 2 * (j << 1) - 511);
1824 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1825
1826 for (k = in_loop_cnt; k != 0; k--) {
1827 WORD32 tmp;
1828 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1829
1830 data += (del << 1);
1831
1832 x1r = *data;
1833 x1i = *(data + 1);
1834 data += (del << 1);
1835
1836 x2r = *data;
1837 x2i = *(data + 1);
1838 data += (del << 1);
1839
1840 x3r = *data;
1841 x3i = *(data + 1);
1842 data -= 3 * (del << 1);
1843
1844 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1845 ixheaacd_mult32_sat(x1i, w1h));
1846 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1847 x1r = tmp;
1848
1849 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
1850 ixheaacd_mult32_sat(x2i, w2l));
1851 x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
1852 ixheaacd_mult32_sat(x2r, w2l));
1853 x2r = tmp;
1854
1855 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
1856 ixheaacd_mult32_sat(x3r, w3l));
1857 x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1858 x3r = tmp;
1859
1860 x0r = (*data);
1861 x0i = (*(data + 1));
1862
1863 x0r = ixheaacd_add32_sat(x0r, x2r);
1864 x0i = ixheaacd_add32_sat(x0i, x2i);
1865 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1866 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1867 x1r = ixheaacd_add32_sat(x1r, x3r);
1868 x1i = ixheaacd_sub32_sat(x1i, x3i);
1869 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1870 x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1871
1872 x0r = ixheaacd_add32_sat(x0r, x1r);
1873 x0i = ixheaacd_add32_sat(x0i, x1i);
1874 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1875 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1876 x2r = ixheaacd_add32_sat(x2r, x3i);
1877 x2i = ixheaacd_sub32_sat(x2i, x3r);
1878 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1879 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1880
1881 *data = x0r;
1882 *(data + 1) = x0i;
1883 data += (del << 1);
1884
1885 *data = x2r;
1886 *(data + 1) = x2i;
1887 data += (del << 1);
1888
1889 *data = x1r;
1890 *(data + 1) = x1i;
1891 data += (del << 1);
1892
1893 *data = x3i;
1894 *(data + 1) = x3r;
1895 data += (del << 1);
1896 }
1897 data -= 2 * npoints;
1898 data += 2;
1899 }
1900 nodespacing >>= 2;
1901 del <<= 2;
1902 in_loop_cnt >>= 2;
1903 }
1904 if (not_power_4) {
1905 const WORD32 *twiddles = ptr_w;
1906 nodespacing <<= 1;
1907 shift += 1;
1908
1909 for (j = del / 2; j != 0; j--) {
1910 WORD32 w1h = *twiddles;
1911 WORD32 w1l = *(twiddles + 1);
1912 WORD32 tmp;
1913 twiddles += nodespacing * 2;
1914
1915 x0r = *ptr_y;
1916 x0i = *(ptr_y + 1);
1917 ptr_y += (del << 1);
1918
1919 x1r = *ptr_y;
1920 x1i = *(ptr_y + 1);
1921
1922 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1923 ixheaacd_mult32_sat(x1i, w1h));
1924 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1925 x1r = tmp;
1926
1927 *ptr_y = (x0r) / 2 - (x1r) / 2;
1928 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1929 ptr_y -= (del << 1);
1930
1931 *ptr_y = (x0r) / 2 + (x1r) / 2;
1932 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1933 ptr_y += 2;
1934 }
1935 twiddles = ptr_w;
1936 for (j = del / 2; j != 0; j--) {
1937 WORD32 w1h = *twiddles;
1938 WORD32 w1l = *(twiddles + 1);
1939 WORD32 tmp;
1940 twiddles += nodespacing * 2;
1941
1942 x0r = *ptr_y;
1943 x0i = *(ptr_y + 1);
1944 ptr_y += (del << 1);
1945
1946 x1r = *ptr_y;
1947 x1i = *(ptr_y + 1);
1948
1949 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1h),
1950 ixheaacd_mult32_sat(x1i, w1l));
1951 x1i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1i, w1h),
1952 ixheaacd_mult32_sat(x1r, w1l));
1953 x1r = tmp;
1954
1955 *ptr_y = (x0r) / 2 - (x1r) / 2;
1956 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1957 ptr_y -= (del << 1);
1958
1959 *ptr_y = (x0r) / 2 + (x1r) / 2;
1960 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1961 ptr_y += 2;
1962 }
1963 }
1964 }
1965
1966 else {
1967 ptr_w = ixheaacd_twiddle_table_fft_32x32;
1968
1969 for (i = 0; i < npoints; i += 4) {
1970 WORD32 *inp = ptr_x;
1971
1972 DIG_REV(i, dig_rev_shift, h2);
1973 if (not_power_4) {
1974 h2 += 1;
1975 h2 &= ~1;
1976 }
1977 inp += (h2);
1978
1979 x0r = *inp;
1980 x0i = *(inp + 1);
1981 inp += (npoints >> 1);
1982
1983 x1r = *inp;
1984 x1i = *(inp + 1);
1985 inp += (npoints >> 1);
1986
1987 x2r = *inp;
1988 x2i = *(inp + 1);
1989 inp += (npoints >> 1);
1990
1991 x3r = *inp;
1992 x3i = *(inp + 1);
1993
1994 x0r = ixheaacd_add32_sat(x0r, x2r);
1995 x0i = ixheaacd_add32_sat(x0i, x2i);
1996 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1997 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1998 x1r = ixheaacd_add32_sat(x1r, x3r);
1999 x1i = ixheaacd_add32_sat(x1i, x3i);
2000 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2001 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2002
2003 x0r = ixheaacd_add32_sat(x0r, x1r);
2004 x0i = ixheaacd_add32_sat(x0i, x1i);
2005 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2006 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2007 x2r = ixheaacd_sub32_sat(x2r, x3i);
2008 x2i = ixheaacd_add32_sat(x2i, x3r);
2009 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2010 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2011
2012 *ptr_y++ = x0r;
2013 *ptr_y++ = x0i;
2014 *ptr_y++ = x2r;
2015 *ptr_y++ = x2i;
2016 *ptr_y++ = x1r;
2017 *ptr_y++ = x1i;
2018 *ptr_y++ = x3i;
2019 *ptr_y++ = x3r;
2020 }
2021 ptr_y -= 2 * npoints;
2022 del = 4;
2023 nodespacing = 64;
2024 in_loop_cnt = npoints >> 4;
2025 for (i = n_stages - 1; i > 0; i--) {
2026 const WORD32 *twiddles = ptr_w;
2027 WORD32 *data = ptr_y;
2028 WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
2029 WORD32 sec_loop_cnt;
2030
2031 for (k = in_loop_cnt; k != 0; k--) {
2032 x0r = (*data);
2033 x0i = (*(data + 1));
2034 data += (del << 1);
2035
2036 x1r = (*data);
2037 x1i = (*(data + 1));
2038 data += (del << 1);
2039
2040 x2r = (*data);
2041 x2i = (*(data + 1));
2042 data += (del << 1);
2043
2044 x3r = (*data);
2045 x3i = (*(data + 1));
2046 data -= 3 * (del << 1);
2047
2048 x0r = ixheaacd_add32_sat(x0r, x2r);
2049 x0i = ixheaacd_add32_sat(x0i, x2i);
2050 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
2051 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
2052 x1r = ixheaacd_add32_sat(x1r, x3r);
2053 x1i = ixheaacd_add32_sat(x1i, x3i);
2054 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2055 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2056
2057 x0r = ixheaacd_add32_sat(x0r, x1r);
2058 x0i = ixheaacd_add32_sat(x0i, x1i);
2059 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2060 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2061 x2r = ixheaacd_sub32_sat(x2r, x3i);
2062 x2i = ixheaacd_add32_sat(x2i, x3r);
2063 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2064 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2065
2066 *data = x0r;
2067 *(data + 1) = x0i;
2068 data += (del << 1);
2069
2070 *data = x2r;
2071 *(data + 1) = x2i;
2072 data += (del << 1);
2073
2074 *data = x1r;
2075 *(data + 1) = x1i;
2076 data += (del << 1);
2077
2078 *data = x3i;
2079 *(data + 1) = x3r;
2080 data += (del << 1);
2081 }
2082 data = ptr_y + 2;
2083
2084 sec_loop_cnt = (nodespacing * del);
2085 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
2086 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
2087 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
2088 (sec_loop_cnt / 256);
2089 j = nodespacing;
2090
2091 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
2092 w1h = *(twiddles + 2 * j);
2093 w2h = *(twiddles + 2 * (j << 1));
2094 w3h = *(twiddles + 2 * j + 2 * (j << 1));
2095 w1l = *(twiddles + 2 * j + 1);
2096 w2l = *(twiddles + 2 * (j << 1) + 1);
2097 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
2098
2099 for (k = in_loop_cnt; k != 0; k--) {
2100 WORD32 tmp;
2101 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2102
2103 data += (del << 1);
2104
2105 x1r = *data;
2106 x1i = *(data + 1);
2107 data += (del << 1);
2108
2109 x2r = *data;
2110 x2i = *(data + 1);
2111 data += (del << 1);
2112
2113 x3r = *data;
2114 x3i = *(data + 1);
2115 data -= 3 * (del << 1);
2116
2117 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2118 ixheaacd_mult32_sat(x1i, w1h));
2119 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2120 x1r = tmp;
2121
2122 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2123 ixheaacd_mult32_sat(x2i, w2h));
2124 x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
2125 x2r = tmp;
2126
2127 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2128 ixheaacd_mult32_sat(x3i, w3h));
2129 x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
2130 x3r = tmp;
2131
2132 x0r = (*data);
2133 x0i = (*(data + 1));
2134
2135 x0r = ixheaacd_add32_sat(x0r, x2r);
2136 x0i = ixheaacd_add32_sat(x0i, x2i);
2137 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
2138 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
2139 x1r = ixheaacd_add32_sat(x1r, x3r);
2140 x1i = ixheaacd_add32_sat(x1i, x3i);
2141 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2142 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2143
2144 x0r = ixheaacd_add32_sat(x0r, x1r);
2145 x0i = ixheaacd_add32_sat(x0i, x1i);
2146 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2147 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2148 x2r = ixheaacd_sub32_sat(x2r, x3i);
2149 x2i = ixheaacd_add32_sat(x2i, x3r);
2150 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2151 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2152
2153 *data = x0r;
2154 *(data + 1) = x0i;
2155 data += (del << 1);
2156
2157 *data = x2r;
2158 *(data + 1) = x2i;
2159 data += (del << 1);
2160
2161 *data = x1r;
2162 *(data + 1) = x1i;
2163 data += (del << 1);
2164
2165 *data = x3i;
2166 *(data + 1) = x3r;
2167 data += (del << 1);
2168 }
2169 data -= 2 * npoints;
2170 data += 2;
2171 }
2172 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
2173 w1h = *(twiddles + 2 * j);
2174 w2h = *(twiddles + 2 * (j << 1));
2175 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
2176 w1l = *(twiddles + 2 * j + 1);
2177 w2l = *(twiddles + 2 * (j << 1) + 1);
2178 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
2179
2180 for (k = in_loop_cnt; k != 0; k--) {
2181 WORD32 tmp;
2182 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2183
2184 data += (del << 1);
2185
2186 x1r = *data;
2187 x1i = *(data + 1);
2188 data += (del << 1);
2189
2190 x2r = *data;
2191 x2i = *(data + 1);
2192 data += (del << 1);
2193
2194 x3r = *data;
2195 x3i = *(data + 1);
2196 data -= 3 * (del << 1);
2197
2198 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2199 ixheaacd_mult32_sat(x1i, w1h));
2200 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2201 x1r = tmp;
2202
2203 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2204 ixheaacd_mult32_sat(x2i, w2h));
2205 x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
2206 x2r = tmp;
2207
2208 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
2209 ixheaacd_mult32_sat(x3i, w3l));
2210 x3i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2211 ixheaacd_mult32_sat(x3i, w3h));
2212 x3r = tmp;
2213
2214 x0r = (*data);
2215 x0i = (*(data + 1));
2216
2217 x0r = ixheaacd_add32_sat(x0r, x2r);
2218 x0i = ixheaacd_add32_sat(x0i, x2i);
2219 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
2220 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
2221 x1r = ixheaacd_add32_sat(x1r, x3r);
2222 x1i = ixheaacd_add32_sat(x1i, x3i);
2223 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2224 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2225
2226 x0r = ixheaacd_add32_sat(x0r, x1r);
2227 x0i = ixheaacd_add32_sat(x0i, x1i);
2228 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2229 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2230 x2r = ixheaacd_sub32_sat(x2r, x3i);
2231 x2i = ixheaacd_add32_sat(x2i, x3r);
2232 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2233 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2234
2235 *data = x0r;
2236 *(data + 1) = x0i;
2237 data += (del << 1);
2238
2239 *data = x2r;
2240 *(data + 1) = x2i;
2241 data += (del << 1);
2242
2243 *data = x1r;
2244 *(data + 1) = x1i;
2245 data += (del << 1);
2246
2247 *data = x3i;
2248 *(data + 1) = x3r;
2249 data += (del << 1);
2250 }
2251 data -= 2 * npoints;
2252 data += 2;
2253 }
2254 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
2255 w1h = *(twiddles + 2 * j);
2256 w2h = *(twiddles + 2 * (j << 1) - 512);
2257 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
2258 w1l = *(twiddles + 2 * j + 1);
2259 w2l = *(twiddles + 2 * (j << 1) - 511);
2260 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
2261
2262 for (k = in_loop_cnt; k != 0; k--) {
2263 WORD32 tmp;
2264 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2265
2266 data += (del << 1);
2267
2268 x1r = *data;
2269 x1i = *(data + 1);
2270 data += (del << 1);
2271
2272 x2r = *data;
2273 x2i = *(data + 1);
2274 data += (del << 1);
2275
2276 x3r = *data;
2277 x3i = *(data + 1);
2278 data -= 3 * (del << 1);
2279
2280 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2281 ixheaacd_mult32_sat(x1i, w1h));
2282 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2283 x1r = tmp;
2284
2285 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
2286 ixheaacd_mult32_sat(x2i, w2l));
2287 x2i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2288 ixheaacd_mult32_sat(x2i, w2h));
2289 x2r = tmp;
2290
2291 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
2292 ixheaacd_mult32_sat(x3i, w3l));
2293 x3i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2294 ixheaacd_mult32_sat(x3i, w3h));
2295 x3r = tmp;
2296
2297 x0r = (*data);
2298 x0i = (*(data + 1));
2299
2300 x0r = ixheaacd_add32_sat(x0r, x2r);
2301 x0i = ixheaacd_add32_sat(x0i, x2i);
2302 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
2303 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
2304 x1r = ixheaacd_add32_sat(x1r, x3r);
2305 x1i = ixheaacd_add32_sat(x1i, x3i);
2306 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2307 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2308
2309 x0r = ixheaacd_add32_sat(x0r, x1r);
2310 x0i = ixheaacd_add32_sat(x0i, x1i);
2311 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2312 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2313 x2r = ixheaacd_sub32_sat(x2r, x3i);
2314 x2i = ixheaacd_add32_sat(x2i, x3r);
2315 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2316 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2317
2318 *data = x0r;
2319 *(data + 1) = x0i;
2320 data += (del << 1);
2321
2322 *data = x2r;
2323 *(data + 1) = x2i;
2324 data += (del << 1);
2325
2326 *data = x1r;
2327 *(data + 1) = x1i;
2328 data += (del << 1);
2329
2330 *data = x3i;
2331 *(data + 1) = x3r;
2332 data += (del << 1);
2333 }
2334 data -= 2 * npoints;
2335 data += 2;
2336 }
2337 for (; j < nodespacing * del; j += nodespacing) {
2338 w1h = *(twiddles + 2 * j);
2339 w2h = *(twiddles + 2 * (j << 1) - 512);
2340 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
2341 w1l = *(twiddles + 2 * j + 1);
2342 w2l = *(twiddles + 2 * (j << 1) - 511);
2343 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
2344
2345 for (k = in_loop_cnt; k != 0; k--) {
2346 WORD32 tmp;
2347 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2348
2349 data += (del << 1);
2350
2351 x1r = *data;
2352 x1i = *(data + 1);
2353 data += (del << 1);
2354
2355 x2r = *data;
2356 x2i = *(data + 1);
2357 data += (del << 1);
2358
2359 x3r = *data;
2360 x3i = *(data + 1);
2361 data -= 3 * (del << 1);
2362
2363 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2364 ixheaacd_mult32_sat(x1i, w1h));
2365 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2366 x1r = tmp;
2367
2368 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
2369 ixheaacd_mult32_sat(x2i, w2l));
2370 x2i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
2371 ixheaacd_mult32_sat(x2i, w2h));
2372 x2r = tmp;
2373
2374 tmp = -ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
2375 ixheaacd_mult32_sat(x3i, w3h));
2376 x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
2377 x3r = tmp;
2378
2379 x0r = (*data);
2380 x0i = (*(data + 1));
2381
2382 x0r = ixheaacd_add32_sat(x0r, x2r);
2383 x0i = ixheaacd_add32_sat(x0i, x2i);
2384 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
2385 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
2386 x1r = ixheaacd_add32_sat(x1r, x3r);
2387 x1i = ixheaacd_sub32_sat(x1i, x3i);
2388 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
2389 x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
2390
2391 x0r = ixheaacd_add32_sat(x0r, x1r);
2392 x0i = ixheaacd_add32_sat(x0i, x1i);
2393 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
2394 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
2395 x2r = ixheaacd_sub32_sat(x2r, x3i);
2396 x2i = ixheaacd_add32_sat(x2i, x3r);
2397 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
2398 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
2399
2400 *data = x0r;
2401 *(data + 1) = x0i;
2402 data += (del << 1);
2403
2404 *data = x2r;
2405 *(data + 1) = x2i;
2406 data += (del << 1);
2407
2408 *data = x1r;
2409 *(data + 1) = x1i;
2410 data += (del << 1);
2411
2412 *data = x3i;
2413 *(data + 1) = x3r;
2414 data += (del << 1);
2415 }
2416 data -= 2 * npoints;
2417 data += 2;
2418 }
2419 nodespacing >>= 2;
2420 del <<= 2;
2421 in_loop_cnt >>= 2;
2422 }
2423 if (not_power_4) {
2424 const WORD32 *twiddles = ptr_w;
2425 nodespacing <<= 1;
2426 shift += 1;
2427 for (j = del / 2; j != 0; j--) {
2428 WORD32 w1h = *twiddles;
2429 WORD32 w1l = *(twiddles + 1);
2430
2431 WORD32 tmp;
2432 twiddles += nodespacing * 2;
2433
2434 x0r = *ptr_y;
2435 x0i = *(ptr_y + 1);
2436 ptr_y += (del << 1);
2437
2438 x1r = *ptr_y;
2439 x1i = *(ptr_y + 1);
2440
2441 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2442 ixheaacd_mult32_sat(x1i, w1h));
2443 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
2444 x1r = tmp;
2445
2446 *ptr_y = (x0r) / 2 - (x1r) / 2;
2447 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
2448 ptr_y -= (del << 1);
2449
2450 *ptr_y = (x0r) / 2 + (x1r) / 2;
2451 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
2452 ptr_y += 2;
2453 }
2454 twiddles = ptr_w;
2455 for (j = del / 2; j != 0; j--) {
2456 WORD32 w1h = *twiddles;
2457 WORD32 w1l = *(twiddles + 1);
2458 WORD32 tmp;
2459 twiddles += nodespacing * 2;
2460
2461 x0r = *ptr_y;
2462 x0i = *(ptr_y + 1);
2463 ptr_y += (del << 1);
2464
2465 x1r = *ptr_y;
2466 x1i = *(ptr_y + 1);
2467
2468 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1h),
2469 ixheaacd_mult32_sat(x1i, w1l));
2470 x1i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
2471 ixheaacd_mult32_sat(x1i, w1h));
2472 x1r = tmp;
2473
2474 *ptr_y = (x0r) / 2 - (x1r) / 2;
2475 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
2476 ptr_y -= (del << 1);
2477
2478 *ptr_y = (x0r) / 2 + (x1r) / 2;
2479 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
2480 ptr_y += 2;
2481 }
2482 }
2483 }
2484
2485 for (i = 0; i < nlength; i++) {
2486 xr[i] = y[2 * i];
2487 xi[i] = y[2 * i + 1];
2488 }
2489
2490 *preshift = shift - *preshift;
2491 return;
2492 }
2493
ixheaacd_complex_3point_fft(WORD32 * inp,WORD32 * op,WORD32 sign_dir)2494 static PLATFORM_INLINE void ixheaacd_complex_3point_fft(WORD32 *inp, WORD32 *op,
2495 WORD32 sign_dir) {
2496 WORD32 add_r, sub_r;
2497 WORD32 add_i, sub_i;
2498 WORD32 temp_real, temp_imag, temp;
2499
2500 WORD32 p1, p2, p3, p4;
2501
2502 WORD32 sinmu;
2503 sinmu = -1859775393 * sign_dir;
2504
2505 temp_real = ixheaacd_add32_sat(inp[0], inp[2]);
2506 temp_imag = ixheaacd_add32_sat(inp[1], inp[3]);
2507
2508 add_r = ixheaacd_add32_sat(inp[2], inp[4]);
2509 add_i = ixheaacd_add32_sat(inp[3], inp[5]);
2510
2511 sub_r = ixheaacd_sub32_sat(inp[2], inp[4]);
2512 sub_i = ixheaacd_sub32_sat(inp[3], inp[5]);
2513
2514 p1 = add_r >> 1;
2515 p4 = add_i >> 1;
2516 p2 = ixheaacd_mult32_shl(sub_i, sinmu);
2517 p3 = ixheaacd_mult32_shl(sub_r, sinmu);
2518
2519 temp = ixheaacd_sub32(inp[0], p1);
2520
2521 op[0] = ixheaacd_add32_sat(temp_real, inp[4]);
2522 op[1] = ixheaacd_add32_sat(temp_imag, inp[5]);
2523 op[2] = ixheaacd_add32_sat(temp, p2);
2524 op[3] = ixheaacd_sub32_sat(ixheaacd_sub32_sat(inp[1], p3), p4);
2525 op[4] = ixheaacd_sub32_sat(temp, p2);
2526 op[5] = ixheaacd_sub32_sat(ixheaacd_add32_sat(inp[1], p3), p4);
2527
2528 return;
2529 }
2530
ixheaacd_complex_fft_p3(WORD32 * xr,WORD32 * xi,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)2531 VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
2532 WORD32 fft_mode, WORD32 *preshift) {
2533 WORD32 i, j;
2534 WORD32 shift = 0;
2535 WORD32 xr_3[384];
2536 WORD32 xi_3[384];
2537 WORD32 x[1024];
2538 WORD32 y[1024];
2539 WORD32 cnfac, npts;
2540 WORD32 mpass = nlength;
2541 WORD32 n = 0;
2542 WORD32 *ptr_x = x;
2543 WORD32 *ptr_y = y;
2544
2545 cnfac = 0;
2546 while (mpass % 3 == 0) {
2547 mpass /= 3;
2548 cnfac++;
2549 }
2550 npts = mpass;
2551
2552 for (i = 0; i < 3 * cnfac; i++) {
2553 for (j = 0; j < mpass; j++) {
2554 xr_3[j] = xr[3 * j + i];
2555 xi_3[j] = xi[3 * j + i];
2556 }
2557
2558 (*ixheaacd_complex_fft_p2)(xr_3, xi_3, mpass, fft_mode, &shift);
2559
2560 for (j = 0; j < mpass; j++) {
2561 xr[3 * j + i] = xr_3[j];
2562 xi[3 * j + i] = xi_3[j];
2563 }
2564 }
2565
2566 while (npts >> 1) {
2567 n++;
2568 npts = npts >> 1;
2569 }
2570
2571 if (n % 2 == 0)
2572 shift = ((n + 4)) / 2;
2573 else
2574 shift = ((n + 5) / 2);
2575
2576 *preshift = shift - *preshift + 1;
2577
2578 for (i = 0; i < nlength; i++) {
2579 ptr_x[2 * i] = (xr[i] >> 1);
2580 ptr_x[2 * i + 1] = (xi[i] >> 1);
2581 }
2582
2583 {
2584 const WORD32 *w1r, *w1i;
2585 WORD32 tmp;
2586 w1r = ixheaacd_twiddle_table_3pr;
2587 w1i = ixheaacd_twiddle_table_3pi;
2588
2589 if (fft_mode < 0) {
2590 for (i = 0; i < nlength; i += 3) {
2591 w1r++;
2592 w1i++;
2593
2594 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
2595 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
2596 ptr_x[2 * i + 3] =
2597 ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)),
2598 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)));
2599 ptr_x[2 * i + 2] = tmp;
2600
2601 w1r++;
2602 w1i++;
2603
2604 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
2605 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
2606 ptr_x[2 * i + 5] =
2607 ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)),
2608 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)));
2609 ptr_x[2 * i + 4] = tmp;
2610
2611 w1r += 3 * (128 / mpass - 1) + 1;
2612 w1i += 3 * (128 / mpass - 1) + 1;
2613 }
2614 }
2615
2616 else {
2617 for (i = 0; i < nlength; i += 3) {
2618 w1r++;
2619 w1i++;
2620
2621 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
2622 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
2623 ptr_x[2 * i + 3] =
2624 ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)),
2625 ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)));
2626 ptr_x[2 * i + 2] = tmp;
2627
2628 w1r++;
2629 w1i++;
2630
2631 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
2632 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
2633 ptr_x[2 * i + 5] =
2634 ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)),
2635 ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)));
2636 ptr_x[2 * i + 4] = tmp;
2637
2638 w1r += 3 * (128 / mpass - 1) + 1;
2639 w1i += 3 * (128 / mpass - 1) + 1;
2640 }
2641 }
2642 }
2643
2644 for (i = 0; i < mpass; i++) {
2645 ixheaacd_complex_3point_fft(ptr_x, ptr_y, fft_mode);
2646
2647 ptr_x = ptr_x + 6;
2648 ptr_y = ptr_y + 6;
2649 }
2650
2651 ptr_y = y;
2652 for (i = 0; i < mpass; i++) {
2653 xr[i] = *ptr_y++;
2654 xi[i] = *ptr_y++;
2655 xr[mpass + i] = *ptr_y++;
2656 xi[mpass + i] = *ptr_y++;
2657 xr[2 * mpass + i] = *ptr_y++;
2658 xi[2 * mpass + i] = *ptr_y++;
2659 }
2660
2661 return;
2662 }
2663
ixheaacd_complex_fft(WORD32 * data_r,WORD32 * data_i,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)2664 VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, WORD32 fft_mode,
2665 WORD32 *preshift) {
2666 if (nlength & (nlength - 1)) {
2667 ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift);
2668 } else
2669 (*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift);
2670
2671 return;
2672 }
2673