1 /******************************************************************************
2 * *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 #include <stdlib.h>
21 #include <stdio.h>
22
23 #include <ixheaacd_type_def.h>
24 #include "ixheaacd_interface.h"
25 #include "ixheaacd_constants.h"
26 #include <ixheaacd_basic_ops32.h>
27 #include "ixheaacd_function_selector.h"
28
29 extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
30 extern const WORD32 ixheaacd_twiddle_table_3pr[1155];
31 extern const WORD32 ixheaacd_twiddle_table_3pi[1155];
32 extern const WORD8 ixheaacd_mps_dig_rev[16];
33
34 #define PLATFORM_INLINE __inline
35
36 #define DIG_REV(i, m, j) \
37 do { \
38 unsigned _ = (i); \
39 _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
40 _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
41 _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
42 (j) = _ >> (m); \
43 } while (0)
44
ixheaacd_mult32(WORD32 a,WORD32 b)45 static PLATFORM_INLINE WORD32 ixheaacd_mult32(WORD32 a, WORD32 b) {
46 WORD32 result;
47 WORD64 temp_result;
48
49 temp_result = (WORD64)a * (WORD64)b;
50 result = (WORD32)(temp_result >> 31);
51
52 return (result);
53 }
54
ixheaacd_mac32(WORD32 a,WORD32 b,WORD32 c)55 static PLATFORM_INLINE WORD32 ixheaacd_mac32(WORD32 a, WORD32 b, WORD32 c) {
56 WORD32 result;
57
58 result = a + ixheaacd_mult32(b, c);
59
60 return (result);
61 }
62
ixheaacd_mult32_shl(WORD32 a,WORD32 b)63 static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl(WORD32 a, WORD32 b) {
64 WORD32 result;
65 WORD64 temp_result;
66
67 temp_result = (WORD64)a * (WORD64)b;
68 result = (WORD32)(temp_result >> 32);
69
70 return (result << 1);
71 }
72
ixheaacd_mps_complex_fft_64_dec(WORD32 * ptr_x,WORD32 * fin_re,WORD32 * fin_im,WORD32 nlength)73 VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
74 WORD32 *fin_im, WORD32 nlength) {
75 WORD32 i, j, k, n_stages;
76 WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
77 WORD32 del, nodespacing, in_loop_cnt;
78 WORD32 y[128];
79 WORD32 npoints = nlength;
80 WORD32 *ptr_y = y;
81 const WORD32 *ptr_w;
82 n_stages = 30 - ixheaacd_norm32(npoints);
83
84 n_stages = n_stages >> 1;
85
86 ptr_w = ixheaacd_twiddle_table_fft_32x32;
87
88 for (i = 0; i < npoints; i += 4) {
89 WORD32 *inp = ptr_x;
90 h2 = ixheaacd_mps_dig_rev[i >> 2];
91 inp += (h2);
92
93 x0r = *inp;
94 x0i = *(inp + 1);
95 inp += (npoints >> 1);
96
97 x1r = *inp;
98 x1i = *(inp + 1);
99 inp += (npoints >> 1);
100
101 x2r = *inp;
102 x2i = *(inp + 1);
103 inp += (npoints >> 1);
104
105 x3r = *inp;
106 x3i = *(inp + 1);
107
108 x0r = x0r + x2r;
109 x0i = x0i + x2i;
110 x2r = x0r - (x2r << 1);
111 x2i = x0i - (x2i << 1);
112 x1r = x1r + x3r;
113 x1i = x1i + x3i;
114 x3r = x1r - (x3r << 1);
115 x3i = x1i - (x3i << 1);
116
117 x0r = x0r + x1r;
118 x0i = x0i + x1i;
119 x1r = x0r - (x1r << 1);
120 x1i = x0i - (x1i << 1);
121 x2r = x2r + x3i;
122 x2i = x2i - x3r;
123 x3i = x2r - (x3i << 1);
124 x3r = x2i + (x3r << 1);
125
126 *ptr_y++ = x0r;
127 *ptr_y++ = x0i;
128 *ptr_y++ = x2r;
129 *ptr_y++ = x2i;
130 *ptr_y++ = x1r;
131 *ptr_y++ = x1i;
132 *ptr_y++ = x3i;
133 *ptr_y++ = x3r;
134 }
135 ptr_y -= 2 * npoints;
136 del = 4;
137 nodespacing = 64;
138 in_loop_cnt = npoints >> 4;
139 for (i = n_stages - 1; i > 0; i--) {
140 const WORD32 *twiddles = ptr_w;
141 WORD32 *data = ptr_y;
142 WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
143 WORD32 sec_loop_cnt;
144
145 for (k = in_loop_cnt; k != 0; k--) {
146 x0r = (*data);
147 x0i = (*(data + 1));
148 data += (del << 1);
149
150 x1r = (*data);
151 x1i = (*(data + 1));
152 data += (del << 1);
153
154 x2r = (*data);
155 x2i = (*(data + 1));
156 data += (del << 1);
157
158 x3r = (*data);
159 x3i = (*(data + 1));
160 data -= 3 * (del << 1);
161
162 x0r = x0r + x2r;
163 x0i = x0i + x2i;
164 x2r = x0r - (x2r << 1);
165 x2i = x0i - (x2i << 1);
166 x1r = x1r + x3r;
167 x1i = x1i + x3i;
168 x3r = x1r - (x3r << 1);
169 x3i = x1i - (x3i << 1);
170
171 x0r = x0r + x1r;
172 x0i = x0i + x1i;
173 x1r = x0r - (x1r << 1);
174 x1i = x0i - (x1i << 1);
175 x2r = x2r + x3i;
176 x2i = x2i - x3r;
177 x3i = x2r - (x3i << 1);
178 x3r = x2i + (x3r << 1);
179
180 *data = x0r;
181 *(data + 1) = x0i;
182 data += (del << 1);
183
184 *data = x2r;
185 *(data + 1) = x2i;
186 data += (del << 1);
187
188 *data = x1r;
189 *(data + 1) = x1i;
190 data += (del << 1);
191
192 *data = x3i;
193 *(data + 1) = x3r;
194 data += (del << 1);
195 }
196 data = ptr_y + 2;
197
198 sec_loop_cnt = (nodespacing * del);
199 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
200 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
201 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
202 (sec_loop_cnt / 256);
203 j = nodespacing;
204
205 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
206 w1h = *(twiddles + 2 * j);
207 w1l = *(twiddles + 2 * j + 1);
208 w2h = *(twiddles + 2 * (j << 1));
209 w2l = *(twiddles + 2 * (j << 1) + 1);
210 w3h = *(twiddles + 2 * j + 2 * (j << 1));
211 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
212
213 for (k = in_loop_cnt; k != 0; k--) {
214 WORD32 tmp;
215 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
216
217 data += (del << 1);
218
219 x1r = *data;
220 x1i = *(data + 1);
221 data += (del << 1);
222
223 x2r = *data;
224 x2i = *(data + 1);
225 data += (del << 1);
226
227 x3r = *data;
228 x3i = *(data + 1);
229 data -= 3 * (del << 1);
230
231 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h));
232 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l);
233 x1r = tmp;
234
235 tmp = (ixheaacd_mult32(x2r, w2l) - ixheaacd_mult32(x2i, w2h));
236 x2i = ixheaacd_mac32(ixheaacd_mult32(x2r, w2h), x2i, w2l);
237 x2r = tmp;
238
239 tmp = (ixheaacd_mult32(x3r, w3l) - ixheaacd_mult32(x3i, w3h));
240 x3i = ixheaacd_mac32(ixheaacd_mult32(x3r, w3h), x3i, w3l);
241 x3r = tmp;
242
243 x0r = (*data);
244 x0i = (*(data + 1));
245
246 x0r = x0r + (x2r);
247 x0i = x0i + (x2i);
248 x2r = x0r - (x2r << 1);
249 x2i = x0i - (x2i << 1);
250 x1r = x1r + x3r;
251 x1i = x1i + x3i;
252 x3r = x1r - (x3r << 1);
253 x3i = x1i - (x3i << 1);
254
255 x0r = x0r + (x1r);
256 x0i = x0i + (x1i);
257 x1r = x0r - (x1r << 1);
258 x1i = x0i - (x1i << 1);
259 x2r = x2r + (x3i);
260 x2i = x2i - (x3r);
261 x3i = x2r - (x3i << 1);
262 x3r = x2i + (x3r << 1);
263
264 *data = x0r;
265 *(data + 1) = x0i;
266 data += (del << 1);
267
268 *data = x2r;
269 *(data + 1) = x2i;
270 data += (del << 1);
271
272 *data = x1r;
273 *(data + 1) = x1i;
274 data += (del << 1);
275
276 *data = x3i;
277 *(data + 1) = x3r;
278 data += (del << 1);
279 }
280 data -= 2 * npoints;
281 data += 2;
282 }
283 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
284 w1h = *(twiddles + 2 * j);
285 w2h = *(twiddles + 2 * (j << 1));
286 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
287 w1l = *(twiddles + 2 * j + 1);
288 w2l = *(twiddles + 2 * (j << 1) + 1);
289 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
290
291 for (k = in_loop_cnt; k != 0; k--) {
292 WORD32 tmp;
293 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
294
295 data += (del << 1);
296
297 x1r = *data;
298 x1i = *(data + 1);
299 data += (del << 1);
300
301 x2r = *data;
302 x2i = *(data + 1);
303 data += (del << 1);
304
305 x3r = *data;
306 x3i = *(data + 1);
307 data -= 3 * (del << 1);
308
309 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h));
310 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l);
311 x1r = tmp;
312
313 tmp = (ixheaacd_mult32(x2r, w2l) - ixheaacd_mult32(x2i, w2h));
314 x2i = ixheaacd_mac32(ixheaacd_mult32(x2r, w2h), x2i, w2l);
315 x2r = tmp;
316
317 tmp = (ixheaacd_mult32(x3r, w3h) + ixheaacd_mult32(x3i, w3l));
318 x3i = -ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h);
319 x3r = tmp;
320
321 x0r = (*data);
322 x0i = (*(data + 1));
323
324 x0r = x0r + (x2r);
325 x0i = x0i + (x2i);
326 x2r = x0r - (x2r << 1);
327 x2i = x0i - (x2i << 1);
328 x1r = x1r + x3r;
329 x1i = x1i + x3i;
330 x3r = x1r - (x3r << 1);
331 x3i = x1i - (x3i << 1);
332
333 x0r = x0r + (x1r);
334 x0i = x0i + (x1i);
335 x1r = x0r - (x1r << 1);
336 x1i = x0i - (x1i << 1);
337 x2r = x2r + (x3i);
338 x2i = x2i - (x3r);
339 x3i = x2r - (x3i << 1);
340 x3r = x2i + (x3r << 1);
341
342 *data = x0r;
343 *(data + 1) = x0i;
344 data += (del << 1);
345
346 *data = x2r;
347 *(data + 1) = x2i;
348 data += (del << 1);
349
350 *data = x1r;
351 *(data + 1) = x1i;
352 data += (del << 1);
353
354 *data = x3i;
355 *(data + 1) = x3r;
356 data += (del << 1);
357 }
358 data -= 2 * npoints;
359 data += 2;
360 }
361 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
362 w1h = *(twiddles + 2 * j);
363 w2h = *(twiddles + 2 * (j << 1) - 512);
364 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
365 w1l = *(twiddles + 2 * j + 1);
366 w2l = *(twiddles + 2 * (j << 1) - 511);
367 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
368
369 for (k = in_loop_cnt; k != 0; k--) {
370 WORD32 tmp;
371 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
372
373 data += (del << 1);
374
375 x1r = *data;
376 x1i = *(data + 1);
377 data += (del << 1);
378
379 x2r = *data;
380 x2i = *(data + 1);
381 data += (del << 1);
382
383 x3r = *data;
384 x3i = *(data + 1);
385 data -= 3 * (del << 1);
386
387 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h));
388 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l);
389 x1r = tmp;
390
391 tmp = (ixheaacd_mult32(x2r, w2h) + ixheaacd_mult32(x2i, w2l));
392 x2i = -ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h);
393 x2r = tmp;
394
395 tmp = (ixheaacd_mult32(x3r, w3h) + ixheaacd_mult32(x3i, w3l));
396 x3i = -ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h);
397 x3r = tmp;
398
399 x0r = (*data);
400 x0i = (*(data + 1));
401
402 x0r = x0r + (x2r);
403 x0i = x0i + (x2i);
404 x2r = x0r - (x2r << 1);
405 x2i = x0i - (x2i << 1);
406 x1r = x1r + x3r;
407 x1i = x1i + x3i;
408 x3r = x1r - (x3r << 1);
409 x3i = x1i - (x3i << 1);
410
411 x0r = x0r + (x1r);
412 x0i = x0i + (x1i);
413 x1r = x0r - (x1r << 1);
414 x1i = x0i - (x1i << 1);
415 x2r = x2r + (x3i);
416 x2i = x2i - (x3r);
417 x3i = x2r - (x3i << 1);
418 x3r = x2i + (x3r << 1);
419
420 *data = x0r;
421 *(data + 1) = x0i;
422 data += (del << 1);
423
424 *data = x2r;
425 *(data + 1) = x2i;
426 data += (del << 1);
427
428 *data = x1r;
429 *(data + 1) = x1i;
430 data += (del << 1);
431
432 *data = x3i;
433 *(data + 1) = x3r;
434 data += (del << 1);
435 }
436 data -= 2 * npoints;
437 data += 2;
438 }
439 for (; j < nodespacing * del; j += nodespacing) {
440 w1h = *(twiddles + 2 * j);
441 w2h = *(twiddles + 2 * (j << 1) - 512);
442 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
443 w1l = *(twiddles + 2 * j + 1);
444 w2l = *(twiddles + 2 * (j << 1) - 511);
445 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
446
447 for (k = in_loop_cnt; k != 0; k--) {
448 WORD32 tmp;
449 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
450
451 data += (del << 1);
452
453 x1r = *data;
454 x1i = *(data + 1);
455 data += (del << 1);
456
457 x2r = *data;
458 x2i = *(data + 1);
459 data += (del << 1);
460
461 x3r = *data;
462 x3i = *(data + 1);
463 data -= 3 * (del << 1);
464
465 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h));
466 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l);
467 x1r = tmp;
468
469 tmp = (ixheaacd_mult32(x2r, w2h) + ixheaacd_mult32(x2i, w2l));
470 x2i = -ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h);
471 x2r = tmp;
472
473 tmp = (-ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h));
474 x3i = ixheaacd_mac32(ixheaacd_mult32(x3r, w3h), x3i, w3l);
475 x3r = tmp;
476
477 x0r = (*data);
478 x0i = (*(data + 1));
479
480 x0r = x0r + (x2r);
481 x0i = x0i + (x2i);
482 x2r = x0r - (x2r << 1);
483 x2i = x0i - (x2i << 1);
484 x1r = x1r + x3r;
485 x1i = x1i - x3i;
486 x3r = x1r - (x3r << 1);
487 x3i = x1i + (x3i << 1);
488
489 x0r = x0r + (x1r);
490 x0i = x0i + (x1i);
491 x1r = x0r - (x1r << 1);
492 x1i = x0i - (x1i << 1);
493 x2r = x2r + (x3i);
494 x2i = x2i - (x3r);
495 x3i = x2r - (x3i << 1);
496 x3r = x2i + (x3r << 1);
497
498 *data = x0r;
499 *(data + 1) = x0i;
500 data += (del << 1);
501
502 *data = x2r;
503 *(data + 1) = x2i;
504 data += (del << 1);
505
506 *data = x1r;
507 *(data + 1) = x1i;
508 data += (del << 1);
509
510 *data = x3i;
511 *(data + 1) = x3r;
512 data += (del << 1);
513 }
514 data -= 2 * npoints;
515 data += 2;
516 }
517 nodespacing >>= 2;
518 del <<= 2;
519 in_loop_cnt >>= 2;
520 }
521
522 for (i = 0; i < 2 * nlength; i += 2) {
523 fin_re[i] = y[i];
524 fin_im[i] = y[i + 1];
525 }
526
527 return;
528 }
529
ixheaacd_complex_fft_p2_dec(WORD32 * xr,WORD32 * xi,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)530 VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength,
531 WORD32 fft_mode, WORD32 *preshift) {
532 WORD32 i, j, k, n_stages;
533 WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
534 WORD32 del, nodespacing, in_loop_cnt;
535 WORD32 not_power_4;
536 WORD32 npts, shift;
537 WORD32 dig_rev_shift;
538 WORD32 ptr_x[1024];
539 WORD32 y[1024];
540 WORD32 npoints = nlength;
541 WORD32 n = 0;
542 WORD32 *ptr_y = y;
543 const WORD32 *ptr_w;
544 dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16;
545 n_stages = 30 - ixheaacd_norm32(npoints);
546 not_power_4 = n_stages & 1;
547
548 n_stages = n_stages >> 1;
549
550 npts = npoints;
551 while (npts >> 1) {
552 n++;
553 npts = npts >> 1;
554 }
555
556 if (n % 2 == 0)
557 shift = ((n + 4)) / 2;
558 else
559 shift = ((n + 3) / 2);
560
561 for (i = 0; i < nlength; i++) {
562 ptr_x[2 * i] = (xr[i] / (1 << (shift)));
563 ptr_x[2 * i + 1] = (xi[i] / (1 << (shift)));
564 }
565
566 if (fft_mode == -1) {
567 ptr_w = ixheaacd_twiddle_table_fft_32x32;
568
569 for (i = 0; i < npoints; i += 4) {
570 WORD32 *inp = ptr_x;
571
572 DIG_REV(i, dig_rev_shift, h2);
573 if (not_power_4) {
574 h2 += 1;
575 h2 &= ~1;
576 }
577 inp += (h2);
578
579 x0r = *inp;
580 x0i = *(inp + 1);
581 inp += (npoints >> 1);
582
583 x1r = *inp;
584 x1i = *(inp + 1);
585 inp += (npoints >> 1);
586
587 x2r = *inp;
588 x2i = *(inp + 1);
589 inp += (npoints >> 1);
590
591 x3r = *inp;
592 x3i = *(inp + 1);
593
594 x0r = x0r + x2r;
595 x0i = x0i + x2i;
596 x2r = x0r - (x2r << 1);
597 x2i = x0i - (x2i << 1);
598 x1r = x1r + x3r;
599 x1i = x1i + x3i;
600 x3r = x1r - (x3r << 1);
601 x3i = x1i - (x3i << 1);
602
603 x0r = x0r + x1r;
604 x0i = x0i + x1i;
605 x1r = x0r - (x1r << 1);
606 x1i = x0i - (x1i << 1);
607 x2r = x2r + x3i;
608 x2i = x2i - x3r;
609 x3i = x2r - (x3i << 1);
610 x3r = x2i + (x3r << 1);
611
612 *ptr_y++ = x0r;
613 *ptr_y++ = x0i;
614 *ptr_y++ = x2r;
615 *ptr_y++ = x2i;
616 *ptr_y++ = x1r;
617 *ptr_y++ = x1i;
618 *ptr_y++ = x3i;
619 *ptr_y++ = x3r;
620 }
621 ptr_y -= 2 * npoints;
622 del = 4;
623 nodespacing = 64;
624 in_loop_cnt = npoints >> 4;
625 for (i = n_stages - 1; i > 0; i--) {
626 const WORD32 *twiddles = ptr_w;
627 WORD32 *data = ptr_y;
628 WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
629 WORD32 sec_loop_cnt;
630
631 for (k = in_loop_cnt; k != 0; k--) {
632 x0r = (*data);
633 x0i = (*(data + 1));
634 data += (del << 1);
635
636 x1r = (*data);
637 x1i = (*(data + 1));
638 data += (del << 1);
639
640 x2r = (*data);
641 x2i = (*(data + 1));
642 data += (del << 1);
643
644 x3r = (*data);
645 x3i = (*(data + 1));
646 data -= 3 * (del << 1);
647
648 x0r = x0r + x2r;
649 x0i = x0i + x2i;
650 x2r = x0r - (x2r << 1);
651 x2i = x0i - (x2i << 1);
652 x1r = x1r + x3r;
653 x1i = x1i + x3i;
654 x3r = x1r - (x3r << 1);
655 x3i = x1i - (x3i << 1);
656
657 x0r = x0r + x1r;
658 x0i = x0i + x1i;
659 x1r = x0r - (x1r << 1);
660 x1i = x0i - (x1i << 1);
661 x2r = x2r + x3i;
662 x2i = x2i - x3r;
663 x3i = x2r - (x3i << 1);
664 x3r = x2i + (x3r << 1);
665
666 *data = x0r;
667 *(data + 1) = x0i;
668 data += (del << 1);
669
670 *data = x2r;
671 *(data + 1) = x2i;
672 data += (del << 1);
673
674 *data = x1r;
675 *(data + 1) = x1i;
676 data += (del << 1);
677
678 *data = x3i;
679 *(data + 1) = x3r;
680 data += (del << 1);
681 }
682 data = ptr_y + 2;
683
684 sec_loop_cnt = (nodespacing * del);
685 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
686 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
687 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
688 (sec_loop_cnt / 256);
689 j = nodespacing;
690
691 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
692 w1h = *(twiddles + 2 * j);
693 w1l = *(twiddles + 2 * j + 1);
694 w2h = *(twiddles + 2 * (j << 1));
695 w2l = *(twiddles + 2 * (j << 1) + 1);
696 w3h = *(twiddles + 2 * j + 2 * (j << 1));
697 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
698
699 for (k = in_loop_cnt; k != 0; k--) {
700 WORD32 tmp;
701 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
702
703 data += (del << 1);
704
705 x1r = *data;
706 x1i = *(data + 1);
707 data += (del << 1);
708
709 x2r = *data;
710 x2i = *(data + 1);
711 data += (del << 1);
712
713 x3r = *data;
714 x3i = *(data + 1);
715 data -= 3 * (del << 1);
716
717 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h));
718 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l);
719 x1r = tmp;
720
721 tmp = (ixheaacd_mult32(x2r, w2l) - ixheaacd_mult32(x2i, w2h));
722 x2i = ixheaacd_mac32(ixheaacd_mult32(x2r, w2h), x2i, w2l);
723 x2r = tmp;
724
725 tmp = (ixheaacd_mult32(x3r, w3l) - ixheaacd_mult32(x3i, w3h));
726 x3i = ixheaacd_mac32(ixheaacd_mult32(x3r, w3h), x3i, w3l);
727 x3r = tmp;
728
729 x0r = (*data);
730 x0i = (*(data + 1));
731
732 x0r = x0r + (x2r);
733 x0i = x0i + (x2i);
734 x2r = x0r - (x2r << 1);
735 x2i = x0i - (x2i << 1);
736 x1r = x1r + x3r;
737 x1i = x1i + x3i;
738 x3r = x1r - (x3r << 1);
739 x3i = x1i - (x3i << 1);
740
741 x0r = x0r + (x1r);
742 x0i = x0i + (x1i);
743 x1r = x0r - (x1r << 1);
744 x1i = x0i - (x1i << 1);
745 x2r = x2r + (x3i);
746 x2i = x2i - (x3r);
747 x3i = x2r - (x3i << 1);
748 x3r = x2i + (x3r << 1);
749
750 *data = x0r;
751 *(data + 1) = x0i;
752 data += (del << 1);
753
754 *data = x2r;
755 *(data + 1) = x2i;
756 data += (del << 1);
757
758 *data = x1r;
759 *(data + 1) = x1i;
760 data += (del << 1);
761
762 *data = x3i;
763 *(data + 1) = x3r;
764 data += (del << 1);
765 }
766 data -= 2 * npoints;
767 data += 2;
768 }
769 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
770 w1h = *(twiddles + 2 * j);
771 w2h = *(twiddles + 2 * (j << 1));
772 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
773 w1l = *(twiddles + 2 * j + 1);
774 w2l = *(twiddles + 2 * (j << 1) + 1);
775 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
776
777 for (k = in_loop_cnt; k != 0; k--) {
778 WORD32 tmp;
779 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
780 data += (del << 1);
781
782 x1r = *data;
783 x1i = *(data + 1);
784 data += (del << 1);
785
786 x2r = *data;
787 x2i = *(data + 1);
788 data += (del << 1);
789
790 x3r = *data;
791 x3i = *(data + 1);
792 data -= 3 * (del << 1);
793
794 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h));
795 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l);
796 x1r = tmp;
797
798 tmp = (ixheaacd_mult32(x2r, w2l) - ixheaacd_mult32(x2i, w2h));
799 x2i = ixheaacd_mac32(ixheaacd_mult32(x2r, w2h), x2i, w2l);
800 x2r = tmp;
801
802 tmp = (ixheaacd_mult32(x3r, w3h) + ixheaacd_mult32(x3i, w3l));
803 x3i = -ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h);
804 x3r = tmp;
805
806 x0r = (*data);
807 x0i = (*(data + 1));
808
809 x0r = x0r + (x2r);
810 x0i = x0i + (x2i);
811 x2r = x0r - (x2r << 1);
812 x2i = x0i - (x2i << 1);
813 x1r = x1r + x3r;
814 x1i = x1i + x3i;
815 x3r = x1r - (x3r << 1);
816 x3i = x1i - (x3i << 1);
817
818 x0r = x0r + (x1r);
819 x0i = x0i + (x1i);
820 x1r = x0r - (x1r << 1);
821 x1i = x0i - (x1i << 1);
822 x2r = x2r + (x3i);
823 x2i = x2i - (x3r);
824 x3i = x2r - (x3i << 1);
825 x3r = x2i + (x3r << 1);
826
827 *data = x0r;
828 *(data + 1) = x0i;
829 data += (del << 1);
830
831 *data = x2r;
832 *(data + 1) = x2i;
833 data += (del << 1);
834
835 *data = x1r;
836 *(data + 1) = x1i;
837 data += (del << 1);
838
839 *data = x3i;
840 *(data + 1) = x3r;
841 data += (del << 1);
842 }
843 data -= 2 * npoints;
844 data += 2;
845 }
846 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
847 w1h = *(twiddles + 2 * j);
848 w2h = *(twiddles + 2 * (j << 1) - 512);
849 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
850 w1l = *(twiddles + 2 * j + 1);
851 w2l = *(twiddles + 2 * (j << 1) - 511);
852 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
853
854 for (k = in_loop_cnt; k != 0; k--) {
855 WORD32 tmp;
856 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
857
858 data += (del << 1);
859
860 x1r = *data;
861 x1i = *(data + 1);
862 data += (del << 1);
863
864 x2r = *data;
865 x2i = *(data + 1);
866 data += (del << 1);
867
868 x3r = *data;
869 x3i = *(data + 1);
870 data -= 3 * (del << 1);
871
872 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h));
873 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l);
874 x1r = tmp;
875
876 tmp = (ixheaacd_mult32(x2r, w2h) + ixheaacd_mult32(x2i, w2l));
877 x2i = -ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h);
878 x2r = tmp;
879
880 tmp = (ixheaacd_mult32(x3r, w3h) + ixheaacd_mult32(x3i, w3l));
881 x3i = -ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h);
882 x3r = tmp;
883
884 x0r = (*data);
885 x0i = (*(data + 1));
886
887 x0r = x0r + (x2r);
888 x0i = x0i + (x2i);
889 x2r = x0r - (x2r << 1);
890 x2i = x0i - (x2i << 1);
891 x1r = x1r + x3r;
892 x1i = x1i + x3i;
893 x3r = x1r - (x3r << 1);
894 x3i = x1i - (x3i << 1);
895
896 x0r = x0r + (x1r);
897 x0i = x0i + (x1i);
898 x1r = x0r - (x1r << 1);
899 x1i = x0i - (x1i << 1);
900 x2r = x2r + (x3i);
901 x2i = x2i - (x3r);
902 x3i = x2r - (x3i << 1);
903 x3r = x2i + (x3r << 1);
904
905 *data = x0r;
906 *(data + 1) = x0i;
907 data += (del << 1);
908
909 *data = x2r;
910 *(data + 1) = x2i;
911 data += (del << 1);
912
913 *data = x1r;
914 *(data + 1) = x1i;
915 data += (del << 1);
916
917 *data = x3i;
918 *(data + 1) = x3r;
919 data += (del << 1);
920 }
921 data -= 2 * npoints;
922 data += 2;
923 }
924 for (; j < nodespacing * del; j += nodespacing) {
925 w1h = *(twiddles + 2 * j);
926 w2h = *(twiddles + 2 * (j << 1) - 512);
927 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
928 w1l = *(twiddles + 2 * j + 1);
929 w2l = *(twiddles + 2 * (j << 1) - 511);
930 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
931
932 for (k = in_loop_cnt; k != 0; k--) {
933 WORD32 tmp;
934 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
935
936 data += (del << 1);
937
938 x1r = *data;
939 x1i = *(data + 1);
940 data += (del << 1);
941
942 x2r = *data;
943 x2i = *(data + 1);
944 data += (del << 1);
945
946 x3r = *data;
947 x3i = *(data + 1);
948 data -= 3 * (del << 1);
949
950 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h));
951 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l);
952 x1r = tmp;
953
954 tmp = (ixheaacd_mult32(x2r, w2h) + ixheaacd_mult32(x2i, w2l));
955 x2i = -ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h);
956 x2r = tmp;
957
958 tmp = (-ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h));
959 x3i = ixheaacd_mac32(ixheaacd_mult32(x3r, w3h), x3i, w3l);
960 x3r = tmp;
961
962 x0r = (*data);
963 x0i = (*(data + 1));
964
965 x0r = x0r + (x2r);
966 x0i = x0i + (x2i);
967 x2r = x0r - (x2r << 1);
968 x2i = x0i - (x2i << 1);
969 x1r = x1r + x3r;
970 x1i = x1i - x3i;
971 x3r = x1r - (x3r << 1);
972 x3i = x1i + (x3i << 1);
973
974 x0r = x0r + (x1r);
975 x0i = x0i + (x1i);
976 x1r = x0r - (x1r << 1);
977 x1i = x0i - (x1i << 1);
978 x2r = x2r + (x3i);
979 x2i = x2i - (x3r);
980 x3i = x2r - (x3i << 1);
981 x3r = x2i + (x3r << 1);
982
983 *data = x0r;
984 *(data + 1) = x0i;
985 data += (del << 1);
986
987 *data = x2r;
988 *(data + 1) = x2i;
989 data += (del << 1);
990
991 *data = x1r;
992 *(data + 1) = x1i;
993 data += (del << 1);
994
995 *data = x3i;
996 *(data + 1) = x3r;
997 data += (del << 1);
998 }
999 data -= 2 * npoints;
1000 data += 2;
1001 }
1002 nodespacing >>= 2;
1003 del <<= 2;
1004 in_loop_cnt >>= 2;
1005 }
1006 if (not_power_4) {
1007 const WORD32 *twiddles = ptr_w;
1008 nodespacing <<= 1;
1009 shift += 1;
1010
1011 for (j = del / 2; j != 0; j--) {
1012 WORD32 w1h = *twiddles;
1013 WORD32 w1l = *(twiddles + 1);
1014 WORD32 tmp;
1015 twiddles += nodespacing * 2;
1016
1017 x0r = *ptr_y;
1018 x0i = *(ptr_y + 1);
1019 ptr_y += (del << 1);
1020
1021 x1r = *ptr_y;
1022 x1i = *(ptr_y + 1);
1023
1024 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h));
1025 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l);
1026 x1r = tmp;
1027
1028 *ptr_y = (x0r) / 2 - (x1r) / 2;
1029 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1030 ptr_y -= (del << 1);
1031
1032 *ptr_y = (x0r) / 2 + (x1r) / 2;
1033 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1034 ptr_y += 2;
1035 }
1036 twiddles = ptr_w;
1037 for (j = del / 2; j != 0; j--) {
1038 WORD32 w1h = *twiddles;
1039 WORD32 w1l = *(twiddles + 1);
1040 WORD32 tmp;
1041 twiddles += nodespacing * 2;
1042
1043 x0r = *ptr_y;
1044 x0i = *(ptr_y + 1);
1045 ptr_y += (del << 1);
1046
1047 x1r = *ptr_y;
1048 x1i = *(ptr_y + 1);
1049
1050 tmp = (ixheaacd_mult32(x1r, w1h) + ixheaacd_mult32(x1i, w1l));
1051 x1i = -ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h);
1052 x1r = tmp;
1053
1054 *ptr_y = (x0r) / 2 - (x1r) / 2;
1055 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1056 ptr_y -= (del << 1);
1057
1058 *ptr_y = (x0r) / 2 + (x1r) / 2;
1059 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1060 ptr_y += 2;
1061 }
1062 }
1063
1064 }
1065
1066 else {
1067 ptr_w = ixheaacd_twiddle_table_fft_32x32;
1068
1069 for (i = 0; i < npoints; i += 4) {
1070 WORD32 *inp = ptr_x;
1071
1072 DIG_REV(i, dig_rev_shift, h2);
1073 if (not_power_4) {
1074 h2 += 1;
1075 h2 &= ~1;
1076 }
1077 inp += (h2);
1078
1079 x0r = *inp;
1080 x0i = *(inp + 1);
1081 inp += (npoints >> 1);
1082
1083 x1r = *inp;
1084 x1i = *(inp + 1);
1085 inp += (npoints >> 1);
1086
1087 x2r = *inp;
1088 x2i = *(inp + 1);
1089 inp += (npoints >> 1);
1090
1091 x3r = *inp;
1092 x3i = *(inp + 1);
1093
1094 x0r = x0r + x2r;
1095 x0i = x0i + x2i;
1096 x2r = x0r - (x2r << 1);
1097 x2i = x0i - (x2i << 1);
1098 x1r = x1r + x3r;
1099 x1i = x1i + x3i;
1100 x3r = x1r - (x3r << 1);
1101 x3i = x1i - (x3i << 1);
1102
1103 x0r = x0r + x1r;
1104 x0i = x0i + x1i;
1105 x1r = x0r - (x1r << 1);
1106 x1i = x0i - (x1i << 1);
1107 x2r = x2r - x3i;
1108 x2i = x2i + x3r;
1109 x3i = x2r + (x3i << 1);
1110 x3r = x2i - (x3r << 1);
1111
1112 *ptr_y++ = x0r;
1113 *ptr_y++ = x0i;
1114 *ptr_y++ = x2r;
1115 *ptr_y++ = x2i;
1116 *ptr_y++ = x1r;
1117 *ptr_y++ = x1i;
1118 *ptr_y++ = x3i;
1119 *ptr_y++ = x3r;
1120 }
1121 ptr_y -= 2 * npoints;
1122 del = 4;
1123 nodespacing = 64;
1124 in_loop_cnt = npoints >> 4;
1125 for (i = n_stages - 1; i > 0; i--) {
1126 const WORD32 *twiddles = ptr_w;
1127 WORD32 *data = ptr_y;
1128 WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
1129 WORD32 sec_loop_cnt;
1130
1131 for (k = in_loop_cnt; k != 0; k--) {
1132 x0r = (*data);
1133 x0i = (*(data + 1));
1134 data += (del << 1);
1135
1136 x1r = (*data);
1137 x1i = (*(data + 1));
1138 data += (del << 1);
1139
1140 x2r = (*data);
1141 x2i = (*(data + 1));
1142 data += (del << 1);
1143
1144 x3r = (*data);
1145 x3i = (*(data + 1));
1146 data -= 3 * (del << 1);
1147
1148 x0r = x0r + x2r;
1149 x0i = x0i + x2i;
1150 x2r = x0r - (x2r << 1);
1151 x2i = x0i - (x2i << 1);
1152 x1r = x1r + x3r;
1153 x1i = x1i + x3i;
1154 x3r = x1r - (x3r << 1);
1155 x3i = x1i - (x3i << 1);
1156
1157 x0r = ixheaacd_add32_sat(x0r, x1r);
1158 x0i = ixheaacd_add32_sat(x0i, x1i);
1159 x1r = ixheaacd_sub32_sat(x0r, (x1r << 1));
1160 x1i = ixheaacd_sub32_sat(x0i, (x1i << 1));
1161 x2r = ixheaacd_sub32_sat(x2r, x3i);
1162 x2i = ixheaacd_add32_sat(x2i, x3r);
1163 x3i = ixheaacd_add32_sat(x2r, (x3i << 1));
1164 x3r = ixheaacd_sub32_sat(x2i, (x3r << 1));
1165
1166 *data = x0r;
1167 *(data + 1) = x0i;
1168 data += (del << 1);
1169
1170 *data = x2r;
1171 *(data + 1) = x2i;
1172 data += (del << 1);
1173
1174 *data = x1r;
1175 *(data + 1) = x1i;
1176 data += (del << 1);
1177
1178 *data = x3i;
1179 *(data + 1) = x3r;
1180 data += (del << 1);
1181 }
1182 data = ptr_y + 2;
1183
1184 sec_loop_cnt = (nodespacing * del);
1185 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
1186 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
1187 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
1188 (sec_loop_cnt / 256);
1189 j = nodespacing;
1190
1191 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
1192 w1h = *(twiddles + 2 * j);
1193 w2h = *(twiddles + 2 * (j << 1));
1194 w3h = *(twiddles + 2 * j + 2 * (j << 1));
1195 w1l = *(twiddles + 2 * j + 1);
1196 w2l = *(twiddles + 2 * (j << 1) + 1);
1197 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1198
1199 for (k = in_loop_cnt; k != 0; k--) {
1200 WORD32 tmp;
1201 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1202
1203 data += (del << 1);
1204
1205 x1r = *data;
1206 x1i = *(data + 1);
1207 data += (del << 1);
1208
1209 x2r = *data;
1210 x2i = *(data + 1);
1211 data += (del << 1);
1212
1213 x3r = *data;
1214 x3i = *(data + 1);
1215 data -= 3 * (del << 1);
1216
1217 tmp = (ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h));
1218 x1i = ixheaacd_mac32(-ixheaacd_mult32(x1r, w1h), x1i, w1l);
1219 x1r = tmp;
1220
1221 tmp = (ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h));
1222 x2i = ixheaacd_mac32(-ixheaacd_mult32(x2r, w2h), x2i, w2l);
1223 x2r = tmp;
1224
1225 tmp = (ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h));
1226 x3i = ixheaacd_mac32(-ixheaacd_mult32(x3r, w3h), x3i, w3l);
1227 x3r = tmp;
1228
1229 x0r = (*data);
1230 x0i = (*(data + 1));
1231
1232 x0r = x0r + (x2r);
1233 x0i = x0i + (x2i);
1234 x2r = x0r - (x2r << 1);
1235 x2i = x0i - (x2i << 1);
1236 x1r = x1r + x3r;
1237 x1i = x1i + x3i;
1238 x3r = x1r - (x3r << 1);
1239 x3i = x1i - (x3i << 1);
1240
1241 x0r = x0r + (x1r);
1242 x0i = x0i + (x1i);
1243 x1r = x0r - (x1r << 1);
1244 x1i = x0i - (x1i << 1);
1245 x2r = x2r - (x3i);
1246 x2i = x2i + (x3r);
1247 x3i = x2r + (x3i << 1);
1248 x3r = x2i - (x3r << 1);
1249
1250 *data = x0r;
1251 *(data + 1) = x0i;
1252 data += (del << 1);
1253
1254 *data = x2r;
1255 *(data + 1) = x2i;
1256 data += (del << 1);
1257
1258 *data = x1r;
1259 *(data + 1) = x1i;
1260 data += (del << 1);
1261
1262 *data = x3i;
1263 *(data + 1) = x3r;
1264 data += (del << 1);
1265 }
1266 data -= 2 * npoints;
1267 data += 2;
1268 }
1269 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1270 w1h = *(twiddles + 2 * j);
1271 w2h = *(twiddles + 2 * (j << 1));
1272 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1273 w1l = *(twiddles + 2 * j + 1);
1274 w2l = *(twiddles + 2 * (j << 1) + 1);
1275 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1276
1277 for (k = in_loop_cnt; k != 0; k--) {
1278 WORD32 tmp;
1279 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1280
1281 data += (del << 1);
1282
1283 x1r = *data;
1284 x1i = *(data + 1);
1285 data += (del << 1);
1286
1287 x2r = *data;
1288 x2i = *(data + 1);
1289 data += (del << 1);
1290
1291 x3r = *data;
1292 x3i = *(data + 1);
1293 data -= 3 * (del << 1);
1294
1295 tmp = (ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h));
1296 x1i = ixheaacd_mac32(-ixheaacd_mult32(x1r, w1h), x1i, w1l);
1297 x1r = tmp;
1298
1299 tmp = (ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h));
1300 x2i = ixheaacd_mac32(-ixheaacd_mult32(x2r, w2h), x2i, w2l);
1301 x2r = tmp;
1302
1303 tmp = (ixheaacd_mult32(x3r, w3h) - ixheaacd_mult32(x3i, w3l));
1304 x3i = ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h);
1305 x3r = tmp;
1306
1307 x0r = (*data);
1308 x0i = (*(data + 1));
1309
1310 x0r = x0r + (x2r);
1311 x0i = x0i + (x2i);
1312 x2r = x0r - (x2r << 1);
1313 x2i = x0i - (x2i << 1);
1314 x1r = x1r + x3r;
1315 x1i = x1i + x3i;
1316 x3r = x1r - (x3r << 1);
1317 x3i = x1i - (x3i << 1);
1318
1319 x0r = x0r + (x1r);
1320 x0i = x0i + (x1i);
1321 x1r = x0r - (x1r << 1);
1322 x1i = x0i - (x1i << 1);
1323 x2r = x2r - (x3i);
1324 x2i = x2i + (x3r);
1325 x3i = x2r + (x3i << 1);
1326 x3r = x2i - (x3r << 1);
1327
1328 *data = x0r;
1329 *(data + 1) = x0i;
1330 data += (del << 1);
1331
1332 *data = x2r;
1333 *(data + 1) = x2i;
1334 data += (del << 1);
1335
1336 *data = x1r;
1337 *(data + 1) = x1i;
1338 data += (del << 1);
1339
1340 *data = x3i;
1341 *(data + 1) = x3r;
1342 data += (del << 1);
1343 }
1344 data -= 2 * npoints;
1345 data += 2;
1346 }
1347 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1348 w1h = *(twiddles + 2 * j);
1349 w2h = *(twiddles + 2 * (j << 1) - 512);
1350 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1351 w1l = *(twiddles + 2 * j + 1);
1352 w2l = *(twiddles + 2 * (j << 1) - 511);
1353 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1354
1355 for (k = in_loop_cnt; k != 0; k--) {
1356 WORD32 tmp;
1357 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1358
1359 data += (del << 1);
1360
1361 x1r = *data;
1362 x1i = *(data + 1);
1363 data += (del << 1);
1364
1365 x2r = *data;
1366 x2i = *(data + 1);
1367 data += (del << 1);
1368
1369 x3r = *data;
1370 x3i = *(data + 1);
1371 data -= 3 * (del << 1);
1372
1373 tmp = (ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h));
1374 x1i = ixheaacd_mac32(-ixheaacd_mult32(x1r, w1h), x1i, w1l);
1375 x1r = tmp;
1376
1377 tmp = (ixheaacd_mult32(x2r, w2h) - ixheaacd_mult32(x2i, w2l));
1378 x2i = ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h);
1379 x2r = tmp;
1380
1381 tmp = (ixheaacd_mult32(x3r, w3h) - ixheaacd_mult32(x3i, w3l));
1382 x3i = ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h);
1383 x3r = tmp;
1384
1385 x0r = (*data);
1386 x0i = (*(data + 1));
1387
1388 x0r = x0r + (x2r);
1389 x0i = x0i + (x2i);
1390 x2r = x0r - (x2r << 1);
1391 x2i = x0i - (x2i << 1);
1392 x1r = x1r + x3r;
1393 x1i = x1i + x3i;
1394 x3r = x1r - (x3r << 1);
1395 x3i = x1i - (x3i << 1);
1396
1397 x0r = x0r + (x1r);
1398 x0i = x0i + (x1i);
1399 x1r = x0r - (x1r << 1);
1400 x1i = x0i - (x1i << 1);
1401 x2r = x2r - (x3i);
1402 x2i = x2i + (x3r);
1403 x3i = x2r + (x3i << 1);
1404 x3r = x2i - (x3r << 1);
1405
1406 *data = x0r;
1407 *(data + 1) = x0i;
1408 data += (del << 1);
1409
1410 *data = x2r;
1411 *(data + 1) = x2i;
1412 data += (del << 1);
1413
1414 *data = x1r;
1415 *(data + 1) = x1i;
1416 data += (del << 1);
1417
1418 *data = x3i;
1419 *(data + 1) = x3r;
1420 data += (del << 1);
1421 }
1422 data -= 2 * npoints;
1423 data += 2;
1424 }
1425 for (; j < nodespacing * del; j += nodespacing) {
1426 w1h = *(twiddles + 2 * j);
1427 w2h = *(twiddles + 2 * (j << 1) - 512);
1428 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1429 w1l = *(twiddles + 2 * j + 1);
1430 w2l = *(twiddles + 2 * (j << 1) - 511);
1431 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1432
1433 for (k = in_loop_cnt; k != 0; k--) {
1434 WORD32 tmp;
1435 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1436
1437 data += (del << 1);
1438
1439 x1r = *data;
1440 x1i = *(data + 1);
1441 data += (del << 1);
1442
1443 x2r = *data;
1444 x2i = *(data + 1);
1445 data += (del << 1);
1446
1447 x3r = *data;
1448 x3i = *(data + 1);
1449 data -= 3 * (del << 1);
1450
1451 tmp = (ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h));
1452 x1i = ixheaacd_mac32(-ixheaacd_mult32(x1r, w1h), x1i, w1l);
1453 x1r = tmp;
1454
1455 tmp = (ixheaacd_mult32(x2r, w2h) - ixheaacd_mult32(x2i, w2l));
1456 x2i = ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h);
1457 x2r = tmp;
1458
1459 tmp = (-ixheaacd_mult32(x3r, w3l) - ixheaacd_mult32(x3i, w3h));
1460 x3i = ixheaacd_mac32(-ixheaacd_mult32(x3r, w3h), x3i, w3l);
1461 x3r = tmp;
1462
1463 x0r = (*data);
1464 x0i = (*(data + 1));
1465
1466 x0r = x0r + (x2r);
1467 x0i = x0i + (x2i);
1468 x2r = x0r - (x2r << 1);
1469 x2i = x0i - (x2i << 1);
1470 x1r = x1r + x3r;
1471 x1i = x1i - x3i;
1472 x3r = x1r - (x3r << 1);
1473 x3i = x1i + (x3i << 1);
1474
1475 x0r = x0r + (x1r);
1476 x0i = x0i + (x1i);
1477 x1r = x0r - (x1r << 1);
1478 x1i = x0i - (x1i << 1);
1479 x2r = x2r - (x3i);
1480 x2i = x2i + (x3r);
1481 x3i = x2r + (x3i << 1);
1482 x3r = x2i - (x3r << 1);
1483
1484 *data = x0r;
1485 *(data + 1) = x0i;
1486 data += (del << 1);
1487
1488 *data = x2r;
1489 *(data + 1) = x2i;
1490 data += (del << 1);
1491
1492 *data = x1r;
1493 *(data + 1) = x1i;
1494 data += (del << 1);
1495
1496 *data = x3i;
1497 *(data + 1) = x3r;
1498 data += (del << 1);
1499 }
1500 data -= 2 * npoints;
1501 data += 2;
1502 }
1503 nodespacing >>= 2;
1504 del <<= 2;
1505 in_loop_cnt >>= 2;
1506 }
1507 if (not_power_4) {
1508 const WORD32 *twiddles = ptr_w;
1509 nodespacing <<= 1;
1510 shift += 1;
1511 for (j = del / 2; j != 0; j--) {
1512 WORD32 w1h = *twiddles;
1513 WORD32 w1l = *(twiddles + 1);
1514
1515 WORD32 tmp;
1516 twiddles += nodespacing * 2;
1517
1518 x0r = *ptr_y;
1519 x0i = *(ptr_y + 1);
1520 ptr_y += (del << 1);
1521
1522 x1r = *ptr_y;
1523 x1i = *(ptr_y + 1);
1524
1525 tmp = (ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h));
1526 x1i = ixheaacd_mac32(-ixheaacd_mult32(x1r, w1h), x1i, w1l);
1527 x1r = tmp;
1528
1529 *ptr_y = (x0r) / 2 - (x1r) / 2;
1530 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1531 ptr_y -= (del << 1);
1532
1533 *ptr_y = (x0r) / 2 + (x1r) / 2;
1534 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1535 ptr_y += 2;
1536 }
1537 twiddles = ptr_w;
1538 for (j = del / 2; j != 0; j--) {
1539 WORD32 w1h = *twiddles;
1540 WORD32 w1l = *(twiddles + 1);
1541 WORD32 tmp;
1542 twiddles += nodespacing * 2;
1543
1544 x0r = *ptr_y;
1545 x0i = *(ptr_y + 1);
1546 ptr_y += (del << 1);
1547
1548 x1r = *ptr_y;
1549 x1i = *(ptr_y + 1);
1550
1551 tmp = (ixheaacd_mult32(x1r, w1h) - ixheaacd_mult32(x1i, w1l));
1552 x1i = ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h);
1553 x1r = tmp;
1554
1555 *ptr_y = (x0r) / 2 - (x1r) / 2;
1556 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1557 ptr_y -= (del << 1);
1558
1559 *ptr_y = (x0r) / 2 + (x1r) / 2;
1560 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1561 ptr_y += 2;
1562 }
1563 }
1564 }
1565
1566 for (i = 0; i < nlength; i++) {
1567 xr[i] = y[2 * i];
1568 xi[i] = y[2 * i + 1];
1569 }
1570
1571 *preshift = shift - *preshift;
1572 return;
1573 }
1574
ixheaacd_complex_3point_fft(WORD32 * inp,WORD32 * op,WORD32 sign_dir)1575 static PLATFORM_INLINE void ixheaacd_complex_3point_fft(WORD32 *inp, WORD32 *op,
1576 WORD32 sign_dir) {
1577 WORD32 add_r, sub_r;
1578 WORD32 add_i, sub_i;
1579 WORD32 temp_real, temp_imag, temp;
1580
1581 WORD32 p1, p2, p3, p4;
1582
1583 WORD32 sinmu;
1584 sinmu = -1859775393 * sign_dir;
1585
1586 temp_real = ixheaacd_add32_sat(inp[0], inp[2]);
1587 temp_imag = ixheaacd_add32_sat(inp[1], inp[3]);
1588
1589 add_r = ixheaacd_add32_sat(inp[2], inp[4]);
1590 add_i = ixheaacd_add32_sat(inp[3], inp[5]);
1591
1592 sub_r = ixheaacd_sub32_sat(inp[2], inp[4]);
1593 sub_i = ixheaacd_sub32_sat(inp[3], inp[5]);
1594
1595 p1 = add_r >> 1;
1596 p4 = add_i >> 1;
1597 p2 = ixheaacd_mult32_shl(sub_i, sinmu);
1598 p3 = ixheaacd_mult32_shl(sub_r, sinmu);
1599
1600 temp = ixheaacd_sub32(inp[0], p1);
1601
1602 op[0] = ixheaacd_add32_sat(temp_real, inp[4]);
1603 op[1] = ixheaacd_add32_sat(temp_imag, inp[5]);
1604 op[2] = ixheaacd_add32_sat(temp, p2);
1605 op[3] = ixheaacd_sub32_sat(ixheaacd_sub32_sat(inp[1], p3), p4);
1606 op[4] = ixheaacd_sub32_sat(temp, p2);
1607 op[5] = ixheaacd_sub32_sat(ixheaacd_add32_sat(inp[1], p3), p4);
1608
1609 return;
1610 }
1611
ixheaacd_complex_fft_p3(WORD32 * xr,WORD32 * xi,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)1612 VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
1613 WORD32 fft_mode, WORD32 *preshift) {
1614 WORD32 i, j;
1615 WORD32 shift = 0;
1616 WORD32 xr_3[384];
1617 WORD32 xi_3[384];
1618 WORD32 x[1024];
1619 WORD32 y[1024];
1620 WORD32 cnfac, npts;
1621 WORD32 mpass = nlength;
1622 WORD32 n = 0;
1623 WORD32 *ptr_x = x;
1624 WORD32 *ptr_y = y;
1625
1626 cnfac = 0;
1627 while (mpass % 3 == 0) {
1628 mpass /= 3;
1629 cnfac++;
1630 }
1631 npts = mpass;
1632
1633 for (i = 0; i < 3 * cnfac; i++) {
1634 for (j = 0; j < mpass; j++) {
1635 xr_3[j] = xr[3 * j + i];
1636 xi_3[j] = xi[3 * j + i];
1637 }
1638
1639 (*ixheaacd_complex_fft_p2)(xr_3, xi_3, mpass, fft_mode, &shift);
1640
1641 for (j = 0; j < mpass; j++) {
1642 xr[3 * j + i] = xr_3[j];
1643 xi[3 * j + i] = xi_3[j];
1644 }
1645 }
1646
1647 while (npts >> 1) {
1648 n++;
1649 npts = npts >> 1;
1650 }
1651
1652 if (n % 2 == 0)
1653 shift = ((n + 4)) / 2;
1654 else
1655 shift = ((n + 5) / 2);
1656
1657 *preshift = shift - *preshift + 1;
1658
1659 for (i = 0; i < nlength; i++) {
1660 ptr_x[2 * i] = (xr[i] >> 1);
1661 ptr_x[2 * i + 1] = (xi[i] >> 1);
1662 }
1663
1664 {
1665 const WORD32 *w1r, *w1i;
1666 WORD32 tmp;
1667 w1r = ixheaacd_twiddle_table_3pr;
1668 w1i = ixheaacd_twiddle_table_3pi;
1669
1670 if (fft_mode < 0) {
1671 for (i = 0; i < nlength; i += 3) {
1672 tmp = ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i], (*w1r)),
1673 ixheaacd_mult32(ptr_x[2 * i + 1], (*w1i)));
1674 ptr_x[2 * i + 1] =
1675 ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i], (*w1i)),
1676 ixheaacd_mult32(ptr_x[2 * i + 1], (*w1r)));
1677 ptr_x[2 * i] = tmp;
1678
1679 w1r++;
1680 w1i++;
1681
1682 tmp = ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i + 2], (*w1r)),
1683 ixheaacd_mult32(ptr_x[2 * i + 3], (*w1i)));
1684 ptr_x[2 * i + 3] =
1685 ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i + 2], (*w1i)),
1686 ixheaacd_mult32(ptr_x[2 * i + 3], (*w1r)));
1687 ptr_x[2 * i + 2] = tmp;
1688
1689 w1r++;
1690 w1i++;
1691
1692 tmp = ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i + 4], (*w1r)),
1693 ixheaacd_mult32(ptr_x[2 * i + 5], (*w1i)));
1694 ptr_x[2 * i + 5] =
1695 ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i + 4], (*w1i)),
1696 ixheaacd_mult32(ptr_x[2 * i + 5], (*w1r)));
1697 ptr_x[2 * i + 4] = tmp;
1698
1699 w1r += 3 * (128 / mpass - 1) + 1;
1700 w1i += 3 * (128 / mpass - 1) + 1;
1701 }
1702 }
1703
1704 else {
1705 for (i = 0; i < nlength; i += 3) {
1706 tmp = ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i], (*w1r)),
1707 ixheaacd_mult32(ptr_x[2 * i + 1], (*w1i)));
1708 ptr_x[2 * i + 1] =
1709 ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i + 1], (*w1r)),
1710 ixheaacd_mult32(ptr_x[2 * i], (*w1i)));
1711 ptr_x[2 * i] = tmp;
1712
1713 w1r++;
1714 w1i++;
1715
1716 tmp = ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i + 2], (*w1r)),
1717 ixheaacd_mult32(ptr_x[2 * i + 3], (*w1i)));
1718 ptr_x[2 * i + 3] =
1719 ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i + 3], (*w1r)),
1720 ixheaacd_mult32(ptr_x[2 * i + 2], (*w1i)));
1721 ptr_x[2 * i + 2] = tmp;
1722
1723 w1r++;
1724 w1i++;
1725
1726 tmp = ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i + 4], (*w1r)),
1727 ixheaacd_mult32(ptr_x[2 * i + 5], (*w1i)));
1728 ptr_x[2 * i + 5] =
1729 ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i + 5], (*w1r)),
1730 ixheaacd_mult32(ptr_x[2 * i + 4], (*w1i)));
1731 ptr_x[2 * i + 4] = tmp;
1732
1733 w1r += 3 * (128 / mpass - 1) + 1;
1734 w1i += 3 * (128 / mpass - 1) + 1;
1735 }
1736 }
1737 }
1738
1739 for (i = 0; i < mpass; i++) {
1740 ixheaacd_complex_3point_fft(ptr_x, ptr_y, fft_mode);
1741
1742 ptr_x = ptr_x + 6;
1743 ptr_y = ptr_y + 6;
1744 }
1745
1746 for (i = 0; i < mpass; i++) {
1747 xr[i] = y[6 * i];
1748 xi[i] = y[6 * i + 1];
1749 }
1750
1751 for (i = 0; i < mpass; i++) {
1752 xr[mpass + i] = y[6 * i + 2];
1753 xi[mpass + i] = y[6 * i + 3];
1754 }
1755
1756 for (i = 0; i < mpass; i++) {
1757 xr[2 * mpass + i] = y[6 * i + 4];
1758 xi[2 * mpass + i] = y[6 * i + 5];
1759 }
1760 return;
1761 }
1762
ixheaacd_complex_fft(WORD32 * data_r,WORD32 * data_i,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)1763 VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength,
1764 WORD32 fft_mode, WORD32 *preshift) {
1765 if (nlength & (nlength - 1)) {
1766 if ((nlength != 24) && (nlength != 48) && (nlength != 96) &&
1767 (nlength != 192) && (nlength != 384)) {
1768 printf("%d point FFT not supported", nlength);
1769 exit(0);
1770 }
1771 ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift);
1772 } else
1773 (*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift);
1774
1775 return;
1776 }
1777