1 /******************************************************************************
2 * *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 #include <stdlib.h>
21 #include <math.h>
22 #include "ixheaacd_type_def.h"
23 #include "ixheaacd_constants.h"
24 #include "ixheaacd_basic_ops32.h"
25 #include "ixheaacd_fft_ifft_rom.h"
26 #include "ixheaacd_dsp_fft32x32s.h"
27
28 #define DIG_REV(i, m, j) \
29 do { \
30 unsigned _ = (i); \
31 _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
32 _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
33 _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
34 (j) = _ >> (m); \
35 } while (0)
36
ixheaacd_mult32X32float(FLOAT64 a,FLOAT64 b)37 FLOAT64 ixheaacd_mult32X32float(FLOAT64 a, FLOAT64 b) {
38 FLOAT64 result;
39
40 result = a * b;
41
42 return result;
43 }
44
ixheaacd_mac32X32float(FLOAT64 a,FLOAT64 b,FLOAT64 c)45 FLOAT64 ixheaacd_mac32X32float(FLOAT64 a, FLOAT64 b, FLOAT64 c) {
46 FLOAT64 result;
47
48 result = a + b * c;
49
50 return result;
51 }
52
ixheaacd_hbe_apply_ifft_7(FLOAT32 * inp,FLOAT32 * op)53 VOID ixheaacd_hbe_apply_ifft_7(FLOAT32 *inp, FLOAT32 *op) {
54 FLOAT32 x0r, x1r, x2r, x3r, x4r, x5r, x6r, x7r, x8r;
55 FLOAT32 x0i, x1i, x2i, x3i, x4i, x5i, x6i, x7i, x8i;
56 FLOAT32 y0r, y1r, y2r, y3r, y4r, y5r, y6r, y7r, y8r;
57 FLOAT32 y0i, y1i, y2i, y3i, y4i, y5i, y6i, y7i, y8i;
58
59 x0r = inp[0];
60 x0i = inp[1];
61 x1r = inp[2] + inp[12];
62 x1i = inp[3] + inp[13];
63 x2r = inp[2] - inp[12];
64 x2i = inp[3] - inp[13];
65 x3r = inp[4] + inp[10];
66 x3i = inp[5] + inp[11];
67 x4r = inp[4] - inp[10];
68 x4i = inp[5] - inp[11];
69 x5r = inp[8] + inp[6];
70 x5i = inp[9] + inp[7];
71 x6r = inp[8] - inp[6];
72 x6i = inp[9] - inp[7];
73
74 y0r = x0r;
75 y0i = x0i;
76 y1r = x1r + x3r + x5r;
77 y1i = x1i + x3i + x5i;
78 y2r = x1r - x3r;
79 y2i = x1i - x3i;
80 y3r = x5r - x1r;
81 y3i = x5i - x1i;
82 y4r = x3r - x5r;
83 y4i = x3i - x5i;
84 y5r = x2r + x4r + x6r;
85 y5i = x2i + x4i + x6i;
86 y6r = x2r - x4r;
87 y6i = x2i - x4i;
88 y7r = x6r - x2r;
89 y7i = x6i - x2i;
90 y8r = x4r - x6r;
91 y8i = x4i - x6i;
92
93 x0r = y0r + y1r;
94 x0i = y0i + y1i;
95 x1r = y0r + C70 * y1r;
96 x1i = y0i + C70 * y1i;
97 x2r = C71 * y2r;
98 x2i = C71 * y2i;
99 x3r = C72 * y3r;
100 x3i = C72 * y3i;
101 x4r = C73 * y4r;
102 x4i = C73 * y4i;
103 x5r = C74 * y5i;
104 x5i = -C74 * y5r;
105 x6r = C75 * y6i;
106 x6i = -C75 * y6r;
107 x7r = C76 * y7i;
108 x7i = -C76 * y7r;
109 x8r = C77 * y8i;
110 x8i = -C77 * y8r;
111
112 y0r = x0r;
113 y0i = x0i;
114 y1r = x1r + x2r + x4r;
115 y1i = x1i + x2i + x4i;
116 y2r = x1r - x2r - x3r;
117 y2i = x1i - x2i - x3i;
118 y3r = x1r + x3r - x4r;
119 y3i = x1i + x3i - x4i;
120 y4r = x5r + x6r + x8r;
121 y4i = x5i + x6i + x8i;
122 y5r = x5r - x6r - x7r;
123 y5i = x5i - x6i - x7i;
124 y6r = x5r + x7r - x8r;
125 y6i = x5i + x7i - x8i;
126
127 x0r = y0r;
128 x0i = y0i;
129 x1r = y1r + y4r;
130 x1i = y1i + y4i;
131 x2r = y3r + y6r;
132 x2i = y3i + y6i;
133 x3r = y2r - y5r;
134 x3i = y2i - y5i;
135 x4r = y2r + y5r;
136 x4i = y2i + y5i;
137 x5r = y3r - y6r;
138 x5i = y3i - y6i;
139 x6r = y1r - y4r;
140 x6i = y1i - y4i;
141
142 op[0] = x0r;
143 op[1] = x0i;
144 op[2] = x1r;
145 op[3] = x1i;
146 op[4] = x2r;
147 op[5] = x2i;
148 op[6] = x3r;
149 op[7] = x3i;
150 op[8] = x4r;
151 op[9] = x4i;
152 op[10] = x5r;
153 op[11] = x5i;
154 op[12] = x6r;
155 op[13] = x6i;
156
157 return;
158 }
159
ixheaacd_hbe_apply_fft_3(FLOAT32 * inp,FLOAT32 * op,WORD32 i_sign)160 VOID ixheaacd_hbe_apply_fft_3(FLOAT32 *inp, FLOAT32 *op, WORD32 i_sign) {
161 FLOAT32 add_r, sub_r;
162 FLOAT32 add_i, sub_i;
163 FLOAT32 X01r, X01i, temp;
164
165 FLOAT32 p1, p2, p3, p4;
166
167 FLOAT64 sinmu;
168 sinmu = -0.866025403784439 * (FLOAT64)i_sign;
169
170 X01r = inp[0] + inp[2];
171 X01i = inp[1] + inp[3];
172
173 add_r = inp[2] + inp[4];
174 add_i = inp[3] + inp[5];
175
176 sub_r = inp[2] - inp[4];
177 sub_i = inp[3] - inp[5];
178
179 p1 = add_r / (FLOAT32)2.0;
180 p4 = add_i / (FLOAT32)2.0;
181 p2 = (FLOAT32)((FLOAT64)sub_i * sinmu);
182 p3 = (FLOAT32)((FLOAT64)sub_r * sinmu);
183
184 temp = inp[0] - p1;
185
186 op[0] = X01r + inp[4];
187 op[1] = X01i + inp[5];
188 op[2] = temp + p2;
189 op[3] = (inp[1] - p3) - p4;
190 op[4] = temp - p2;
191 op[5] = (inp[1] + p3) - p4;
192
193 return;
194 }
195
ixheaacd_hbe_apply_tw_mult_ifft(FLOAT32 * inp,FLOAT32 * op,WORD32 dim1,WORD32 dim2,const FLOAT32 * tw)196 VOID ixheaacd_hbe_apply_tw_mult_ifft(FLOAT32 *inp, FLOAT32 *op, WORD32 dim1, WORD32 dim2,
197 const FLOAT32 *tw) {
198 FLOAT32 accu1, accu2;
199 WORD32 i, j;
200 WORD32 step_val = (dim2 - 1) << 1;
201 for (i = 0; i < (dim2); i++) {
202 op[0] = inp[0];
203 op[1] = inp[1];
204 op += 2;
205 inp += 2;
206 }
207
208 for (j = 0; j < (dim1 - 1); j++) {
209 op[0] = inp[0];
210 op[1] = inp[1];
211 inp += 2;
212 op += 2;
213 for (i = 0; i < (dim2 - 1); i++) {
214 CPLX_MPY_IFFT(accu1, accu2, inp[2 * i + 0], inp[2 * i + 1], tw[2 * i + 1], tw[2 * i]);
215 op[2 * i + 0] = accu1;
216 op[2 * i + 1] = accu2;
217 }
218 inp += step_val;
219 op += step_val;
220 tw += (dim2 - 1) * 2;
221 }
222 }
223
ixheaacd_hbe_apply_tw_mult_fft(FLOAT32 * inp,FLOAT32 * op,WORD32 dim1,WORD32 dim2,const FLOAT32 * tw)224 VOID ixheaacd_hbe_apply_tw_mult_fft(FLOAT32 *inp, FLOAT32 *op, WORD32 dim1, WORD32 dim2,
225 const FLOAT32 *tw) {
226 FLOAT32 accu1, accu2;
227 WORD32 i, j;
228 WORD32 step_val = (dim2 - 1) << 1;
229 for (i = 0; i < (dim2); i++) {
230 op[0] = inp[0];
231 op[1] = inp[1];
232 op += 2;
233 inp += 2;
234 }
235
236 for (j = 0; j < (dim1 - 1); j++) {
237 op[0] = inp[0];
238 op[1] = inp[1];
239 inp += 2;
240 op += 2;
241 for (i = 0; i < (dim2 - 1); i++) {
242 CPLX_MPY_FFT(accu1, accu2, inp[2 * i + 0], inp[2 * i + 1], tw[2 * i + 1], tw[2 * i]);
243 op[2 * i + 0] = accu1;
244 op[2 * i + 1] = accu2;
245 }
246 inp += step_val;
247 op += step_val;
248 tw += (dim2 - 1) * 2;
249 }
250 }
251
ixheaacd_hbe_apply_cfftn(FLOAT32 re[],FLOAT32 * scratch,WORD32 n_pass,WORD32 i_sign)252 VOID ixheaacd_hbe_apply_cfftn(FLOAT32 re[], FLOAT32 *scratch, WORD32 n_pass, WORD32 i_sign) {
253 WORD32 i, j, k, n_stages, h2;
254 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
255 WORD32 del, nodespacing, in_loop_cnt;
256 WORD32 not_power_4;
257 WORD32 dig_rev_shift;
258 WORD32 mpass = n_pass;
259 WORD32 npoints = n_pass;
260 const FLOAT64 *ptr_w;
261 FLOAT32 *ptr_x = scratch;
262 FLOAT32 *y = scratch + (2 * n_pass);
263 FLOAT32 *ptr_y = y;
264
265 dig_rev_shift = ixheaacd_norm32(mpass) + 1 - 16;
266 n_stages = 30 - ixheaacd_norm32(mpass);
267 not_power_4 = n_stages & 1;
268
269 n_stages = n_stages >> 1;
270
271 ptr_w = ixheaacd_twid_tbl_fft_double;
272 ptr_x = re;
273
274 if (i_sign == -1) {
275 for (i = 0; i < npoints; i += 4) {
276 FLOAT32 *inp = ptr_x;
277 FLOAT32 tmk;
278
279 DIG_REV(i, dig_rev_shift, h2);
280 if (not_power_4) {
281 h2 += 1;
282 h2 &= ~1;
283 }
284 inp += (h2);
285
286 x0r = *inp;
287 x0i = *(inp + 1);
288 inp += (npoints >> 1);
289
290 x1r = *inp;
291 x1i = *(inp + 1);
292 inp += (npoints >> 1);
293
294 x2r = *inp;
295 x2i = *(inp + 1);
296 inp += (npoints >> 1);
297
298 x3r = *inp;
299 x3i = *(inp + 1);
300
301 x0r = x0r + x2r;
302 x0i = x0i + x2i;
303
304 tmk = x0r - x2r;
305 x2r = tmk - x2r;
306 tmk = x0i - x2i;
307 x2i = tmk - x2i;
308
309 x1r = x1r + x3r;
310 x1i = x1i + x3i;
311
312 tmk = x1r - x3r;
313 x3r = tmk - x3r;
314 tmk = x1i - x3i;
315 x3i = tmk - x3i;
316
317 x0r = x0r + x1r;
318 x0i = x0i + x1i;
319
320 tmk = x0r - x1r;
321 x1r = tmk - x1r;
322 tmk = x0i - x1i;
323 x1i = tmk - x1i;
324
325 x2r = x2r + x3i;
326 x2i = x2i - x3r;
327
328 tmk = x2r - x3i;
329 x3i = tmk - x3i;
330 tmk = x2i + x3r;
331 x3r = tmk + x3r;
332
333 *ptr_y++ = x0r;
334 *ptr_y++ = x0i;
335 *ptr_y++ = x2r;
336 *ptr_y++ = x2i;
337 *ptr_y++ = x1r;
338 *ptr_y++ = x1i;
339 *ptr_y++ = x3i;
340 *ptr_y++ = x3r;
341 }
342 ptr_y -= 2 * npoints;
343 del = 4;
344 nodespacing = 64;
345 in_loop_cnt = npoints >> 4;
346 for (i = n_stages - 1; i > 0; i--) {
347 const FLOAT64 *twiddles = ptr_w;
348 FLOAT32 *data = ptr_y;
349 FLOAT64 W1, W2, W3, W4, W5, W6;
350 WORD32 sec_loop_cnt;
351
352 for (k = in_loop_cnt; k != 0; k--) {
353 x0r = (*data);
354 x0i = (*(data + 1));
355 data += (del << 1);
356
357 x1r = (*data);
358 x1i = (*(data + 1));
359 data += (del << 1);
360
361 x2r = (*data);
362 x2i = (*(data + 1));
363 data += (del << 1);
364
365 x3r = (*data);
366 x3i = (*(data + 1));
367 data -= 3 * (del << 1);
368
369 x0r = x0r + x2r;
370 x0i = x0i + x2i;
371 x2r = x0r - (x2r * 2);
372 x2i = x0i - (x2i * 2);
373 x1r = x1r + x3r;
374 x1i = x1i + x3i;
375 x3r = x1r - (x3r * 2);
376 x3i = x1i - (x3i * 2);
377
378 x0r = x0r + x1r;
379 x0i = x0i + x1i;
380 x1r = x0r - (x1r * 2);
381 x1i = x0i - (x1i * 2);
382 x2r = x2r + x3i;
383 x2i = x2i - x3r;
384 x3i = x2r - (x3i * 2);
385 x3r = x2i + (x3r * 2);
386
387 *data = x0r;
388 *(data + 1) = x0i;
389 data += (del << 1);
390
391 *data = x2r;
392 *(data + 1) = x2i;
393 data += (del << 1);
394
395 *data = x1r;
396 *(data + 1) = x1i;
397 data += (del << 1);
398
399 *data = x3i;
400 *(data + 1) = x3r;
401 data += (del << 1);
402 }
403 data = ptr_y + 2;
404
405 sec_loop_cnt = (nodespacing * del);
406 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
407 (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
408 (sec_loop_cnt / 256);
409 j = nodespacing;
410
411 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
412 W1 = *(twiddles + j);
413 W4 = *(twiddles + j + 257);
414 W2 = *(twiddles + (j << 1));
415 W5 = *(twiddles + (j << 1) + 257);
416 W3 = *(twiddles + j + (j << 1));
417 W6 = *(twiddles + j + (j << 1) + 257);
418
419 for (k = in_loop_cnt; k != 0; k--) {
420 FLOAT32 tmp;
421 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
422
423 data += (del << 1);
424
425 x1r = *data;
426 x1i = *(data + 1);
427 data += (del << 1);
428
429 x2r = *data;
430 x2i = *(data + 1);
431 data += (del << 1);
432
433 x3r = *data;
434 x3i = *(data + 1);
435 data -= 3 * (del << 1);
436
437 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) -
438 ixheaacd_mult32X32float((FLOAT64)x1i, W4));
439 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4),
440 (FLOAT64)x1i, W1);
441 x1r = tmp;
442
443 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W2) -
444 ixheaacd_mult32X32float((FLOAT64)x2i, W5));
445 x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x2r, W5),
446 (FLOAT64)x2i, W2);
447 x2r = tmp;
448
449 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x3r, W3) -
450 ixheaacd_mult32X32float((FLOAT64)x3i, W6));
451 x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x3r, W6),
452 (FLOAT64)x3i, W3);
453 x3r = tmp;
454
455 x0r = (*data);
456 x0i = (*(data + 1));
457
458 x0r = x0r + (x2r);
459 x0i = x0i + (x2i);
460 x2r = x0r - (x2r * 2);
461 x2i = x0i - (x2i * 2);
462 x1r = x1r + x3r;
463 x1i = x1i + x3i;
464 x3r = x1r - (x3r * 2);
465 x3i = x1i - (x3i * 2);
466
467 x0r = x0r + (x1r);
468 x0i = x0i + (x1i);
469 x1r = x0r - (x1r * 2);
470 x1i = x0i - (x1i * 2);
471 x2r = x2r + (x3i);
472 x2i = x2i - (x3r);
473 x3i = x2r - (x3i * 2);
474 x3r = x2i + (x3r * 2);
475
476 *data = x0r;
477 *(data + 1) = x0i;
478 data += (del << 1);
479
480 *data = x2r;
481 *(data + 1) = x2i;
482 data += (del << 1);
483
484 *data = x1r;
485 *(data + 1) = x1i;
486 data += (del << 1);
487
488 *data = x3i;
489 *(data + 1) = x3r;
490 data += (del << 1);
491 }
492 data -= 2 * npoints;
493 data += 2;
494 }
495 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
496 W1 = *(twiddles + j);
497 W4 = *(twiddles + j + 257);
498 W2 = *(twiddles + (j << 1));
499 W5 = *(twiddles + (j << 1) + 257);
500 W3 = *(twiddles + j + (j << 1) - 256);
501 W6 = *(twiddles + j + (j << 1) + 1);
502
503 for (k = in_loop_cnt; k != 0; k--) {
504 FLOAT32 tmp;
505 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
506
507 data += (del << 1);
508
509 x1r = *data;
510 x1i = *(data + 1);
511 data += (del << 1);
512
513 x2r = *data;
514 x2i = *(data + 1);
515 data += (del << 1);
516
517 x3r = *data;
518 x3i = *(data + 1);
519 data -= 3 * (del << 1);
520
521 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) -
522 ixheaacd_mult32X32float((FLOAT64)x1i, W4));
523 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4),
524 (FLOAT64)x1i, W1);
525 x1r = tmp;
526
527 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W2) -
528 ixheaacd_mult32X32float((FLOAT64)x2i, W5));
529 x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x2r, W5),
530 (FLOAT64)x2i, W2);
531 x2r = tmp;
532
533 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x3r, W6) +
534 ixheaacd_mult32X32float((FLOAT64)x3i, W3));
535 x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x3r, W3) +
536 ixheaacd_mult32X32float((FLOAT64)x3i, W6));
537 x3r = tmp;
538
539 x0r = (*data);
540 x0i = (*(data + 1));
541
542 x0r = x0r + (x2r);
543 x0i = x0i + (x2i);
544 x2r = x0r - (x2r * 2);
545 x2i = x0i - (x2i * 2);
546 x1r = x1r + x3r;
547 x1i = x1i + x3i;
548 x3r = x1r - (x3r * 2);
549 x3i = x1i - (x3i * 2);
550
551 x0r = x0r + (x1r);
552 x0i = x0i + (x1i);
553 x1r = x0r - (x1r * 2);
554 x1i = x0i - (x1i * 2);
555 x2r = x2r + (x3i);
556 x2i = x2i - (x3r);
557 x3i = x2r - (x3i * 2);
558 x3r = x2i + (x3r * 2);
559
560 *data = x0r;
561 *(data + 1) = x0i;
562 data += (del << 1);
563
564 *data = x2r;
565 *(data + 1) = x2i;
566 data += (del << 1);
567
568 *data = x1r;
569 *(data + 1) = x1i;
570 data += (del << 1);
571
572 *data = x3i;
573 *(data + 1) = x3r;
574 data += (del << 1);
575 }
576 data -= 2 * npoints;
577 data += 2;
578 }
579 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
580 W1 = *(twiddles + j);
581 W4 = *(twiddles + j + 257);
582 W2 = *(twiddles + (j << 1) - 256);
583 W5 = *(twiddles + (j << 1) + 1);
584 W3 = *(twiddles + j + (j << 1) - 256);
585 W6 = *(twiddles + j + (j << 1) + 1);
586
587 for (k = in_loop_cnt; k != 0; k--) {
588 FLOAT32 tmp;
589 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
590
591 data += (del << 1);
592
593 x1r = *data;
594 x1i = *(data + 1);
595 data += (del << 1);
596
597 x2r = *data;
598 x2i = *(data + 1);
599 data += (del << 1);
600
601 x3r = *data;
602 x3i = *(data + 1);
603 data -= 3 * (del << 1);
604
605 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) -
606 ixheaacd_mult32X32float((FLOAT64)x1i, W4));
607 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, W4), x1i, W1);
608 x1r = tmp;
609
610 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W5) +
611 ixheaacd_mult32X32float((FLOAT64)x2i, W2));
612 x2i = (FLOAT32)(-ixheaacd_mult32X32float(x2r, W2) + ixheaacd_mult32X32float(x2i, W5));
613 x2r = tmp;
614
615 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x3r, W6) +
616 ixheaacd_mult32X32float((FLOAT64)x3i, W3));
617 x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x3r, W3) +
618 ixheaacd_mult32X32float((FLOAT64)x3i, W6));
619 x3r = tmp;
620
621 x0r = (*data);
622 x0i = (*(data + 1));
623
624 x0r = x0r + (x2r);
625 x0i = x0i + (x2i);
626 x2r = x0r - (x2r * 2);
627 x2i = x0i - (x2i * 2);
628 x1r = x1r + x3r;
629 x1i = x1i + x3i;
630 x3r = x1r - (x3r * 2);
631 x3i = x1i - (x3i * 2);
632
633 x0r = x0r + (x1r);
634 x0i = x0i + (x1i);
635 x1r = x0r - (x1r * 2);
636 x1i = x0i - (x1i * 2);
637 x2r = x2r + (x3i);
638 x2i = x2i - (x3r);
639 x3i = x2r - (x3i * 2);
640 x3r = x2i + (x3r * 2);
641
642 *data = x0r;
643 *(data + 1) = x0i;
644 data += (del << 1);
645
646 *data = x2r;
647 *(data + 1) = x2i;
648 data += (del << 1);
649
650 *data = x1r;
651 *(data + 1) = x1i;
652 data += (del << 1);
653
654 *data = x3i;
655 *(data + 1) = x3r;
656 data += (del << 1);
657 }
658 data -= 2 * npoints;
659 data += 2;
660 }
661 for (; j < nodespacing * del; j += nodespacing) {
662 W1 = *(twiddles + j);
663 W4 = *(twiddles + j + 257);
664 W2 = *(twiddles + (j << 1) - 256);
665 W5 = *(twiddles + (j << 1) + 1);
666 W3 = *(twiddles + j + (j << 1) - 512);
667 W6 = *(twiddles + j + (j << 1) - 512 + 257);
668
669 for (k = in_loop_cnt; k != 0; k--) {
670 FLOAT32 tmp;
671 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
672
673 data += (del << 1);
674
675 x1r = *data;
676 x1i = *(data + 1);
677 data += (del << 1);
678
679 x2r = *data;
680 x2i = *(data + 1);
681 data += (del << 1);
682
683 x3r = *data;
684 x3i = *(data + 1);
685 data -= 3 * (del << 1);
686
687 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) -
688 ixheaacd_mult32X32float((FLOAT64)x1i, W4));
689 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4),
690 (FLOAT64)x1i, W1);
691 x1r = tmp;
692
693 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x2r, W5) +
694 ixheaacd_mult32X32float((FLOAT64)x2i, W2));
695 x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x2r, W2) +
696 ixheaacd_mult32X32float((FLOAT64)x2i, W5));
697 x2r = tmp;
698
699 tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x3r, W3) +
700 ixheaacd_mult32X32float((FLOAT64)x3i, W6));
701 x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x3r, W6),
702 (FLOAT64)x3i, W3);
703 x3r = tmp;
704
705 x0r = (*data);
706 x0i = (*(data + 1));
707
708 x0r = x0r + (x2r);
709 x0i = x0i + (x2i);
710 x2r = x0r - (x2r * 2);
711 x2i = x0i - (x2i * 2);
712 x1r = x1r + x3r;
713 x1i = x1i - x3i;
714 x3r = x1r - (x3r * 2);
715 x3i = x1i + (x3i * 2);
716
717 x0r = x0r + (x1r);
718 x0i = x0i + (x1i);
719 x1r = x0r - (x1r * 2);
720 x1i = x0i - (x1i * 2);
721 x2r = x2r + (x3i);
722 x2i = x2i - (x3r);
723 x3i = x2r - (x3i * 2);
724 x3r = x2i + (x3r * 2);
725
726 *data = x0r;
727 *(data + 1) = x0i;
728 data += (del << 1);
729
730 *data = x2r;
731 *(data + 1) = x2i;
732 data += (del << 1);
733
734 *data = x1r;
735 *(data + 1) = x1i;
736 data += (del << 1);
737
738 *data = x3i;
739 *(data + 1) = x3r;
740 data += (del << 1);
741 }
742 data -= 2 * npoints;
743 data += 2;
744 }
745 nodespacing >>= 2;
746 del <<= 2;
747 in_loop_cnt >>= 2;
748 }
749 if (not_power_4) {
750 const FLOAT64 *twiddles = ptr_w;
751 nodespacing <<= 1;
752
753 for (j = del / 2; j != 0; j--) {
754 FLOAT64 W1 = *twiddles;
755 FLOAT64 W4 = *(twiddles + 257);
756 FLOAT32 tmp;
757 twiddles += nodespacing;
758
759 x0r = *ptr_y;
760 x0i = *(ptr_y + 1);
761 ptr_y += (del << 1);
762
763 x1r = *ptr_y;
764 x1i = *(ptr_y + 1);
765
766 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W1) -
767 ixheaacd_mult32X32float((FLOAT64)x1i, W4));
768 x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT64)x1r, W4),
769 (FLOAT64)x1i, W1);
770 x1r = tmp;
771
772 *ptr_y = (x0r) - (x1r);
773 *(ptr_y + 1) = (x0i) - (x1i);
774 ptr_y -= (del << 1);
775
776 *ptr_y = (x0r) + (x1r);
777 *(ptr_y + 1) = (x0i) + (x1i);
778 ptr_y += 2;
779 }
780 twiddles = ptr_w;
781 for (j = del / 2; j != 0; j--) {
782 FLOAT64 W1 = *twiddles;
783 FLOAT64 W4 = *(twiddles + 257);
784 FLOAT32 tmp;
785 twiddles += nodespacing;
786
787 x0r = *ptr_y;
788 x0i = *(ptr_y + 1);
789 ptr_y += (del << 1);
790
791 x1r = *ptr_y;
792 x1i = *(ptr_y + 1);
793
794 tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT64)x1r, W4) +
795 ixheaacd_mult32X32float((FLOAT64)x1i, W1));
796 x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT64)x1r, W1) +
797 ixheaacd_mult32X32float((FLOAT64)x1i, W4));
798 x1r = tmp;
799
800 *ptr_y = (x0r) - (x1r);
801 *(ptr_y + 1) = (x0i) - (x1i);
802 ptr_y -= (del << 1);
803
804 *ptr_y = (x0r) + (x1r);
805 *(ptr_y + 1) = (x0i) + (x1i);
806 ptr_y += 2;
807 }
808 }
809 } else {
810 for (i = 0; i < npoints; i += 4) {
811 FLOAT32 *inp = ptr_x;
812
813 DIG_REV(i, dig_rev_shift, h2);
814 if (not_power_4) {
815 h2 += 1;
816 h2 &= ~1;
817 }
818 inp += (h2);
819
820 x0r = *inp;
821 x0i = *(inp + 1);
822 inp += (npoints >> 1);
823
824 x1r = *inp;
825 x1i = *(inp + 1);
826 inp += (npoints >> 1);
827
828 x2r = *inp;
829 x2i = *(inp + 1);
830 inp += (npoints >> 1);
831
832 x3r = *inp;
833 x3i = *(inp + 1);
834
835 x0r = x0r + x2r;
836 x0i = x0i + x2i;
837 x2r = x0r - (x2r * 2);
838 x2i = x0i - (x2i * 2);
839 x1r = x1r + x3r;
840 x1i = x1i + x3i;
841 x3r = x1r - (x3r * 2);
842 x3i = x1i - (x3i * 2);
843
844 x0r = x0r + x1r;
845 x0i = x0i + x1i;
846 x1r = x0r - (x1r * 2);
847 x1i = x0i - (x1i * 2);
848 x2r = x2r - x3i;
849 x2i = x2i + x3r;
850 x3i = x2r + (x3i * 2);
851 x3r = x2i - (x3r * 2);
852
853 *ptr_y++ = x0r;
854 *ptr_y++ = x0i;
855 *ptr_y++ = x2r;
856 *ptr_y++ = x2i;
857 *ptr_y++ = x1r;
858 *ptr_y++ = x1i;
859 *ptr_y++ = x3i;
860 *ptr_y++ = x3r;
861 }
862 ptr_y -= 2 * npoints;
863 del = 4;
864 nodespacing = 64;
865 in_loop_cnt = npoints >> 4;
866 for (i = n_stages - 1; i > 0; i--) {
867 const FLOAT64 *twiddles = ptr_w;
868 FLOAT32 *data = ptr_y;
869 FLOAT64 W1, W2, W3, W4, W5, W6;
870 WORD32 sec_loop_cnt;
871
872 for (k = in_loop_cnt; k != 0; k--) {
873 x0r = (*data);
874 x0i = (*(data + 1));
875 data += (del << 1);
876
877 x1r = (*data);
878 x1i = (*(data + 1));
879 data += (del << 1);
880
881 x2r = (*data);
882 x2i = (*(data + 1));
883 data += (del << 1);
884
885 x3r = (*data);
886 x3i = (*(data + 1));
887 data -= 3 * (del << 1);
888
889 x0r = x0r + x2r;
890 x0i = x0i + x2i;
891 x2r = x0r - (x2r * 2);
892 x2i = x0i - (x2i * 2);
893 x1r = x1r + x3r;
894 x1i = x1i + x3i;
895 x3r = x1r - (x3r * 2);
896 x3i = x1i - (x3i * 2);
897
898 x0r = x0r + x1r;
899 x0i = x0i + x1i;
900 x1r = x0r - (x1r * 2);
901 x1i = x0i - (x1i * 2);
902 x2r = x2r - x3i;
903 x2i = x2i + x3r;
904 x3i = x2r + (x3i * 2);
905 x3r = x2i - (x3r * 2);
906
907 *data = x0r;
908 *(data + 1) = x0i;
909 data += (del << 1);
910
911 *data = x2r;
912 *(data + 1) = x2i;
913 data += (del << 1);
914
915 *data = x1r;
916 *(data + 1) = x1i;
917 data += (del << 1);
918
919 *data = x3i;
920 *(data + 1) = x3r;
921 data += (del << 1);
922 }
923 data = ptr_y + 2;
924
925 sec_loop_cnt = (nodespacing * del);
926 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
927 (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
928 (sec_loop_cnt / 256);
929 j = nodespacing;
930
931 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
932 W1 = *(twiddles + j);
933 W4 = *(twiddles + j + 257);
934 W2 = *(twiddles + (j << 1));
935 W5 = *(twiddles + (j << 1) + 257);
936 W3 = *(twiddles + j + (j << 1));
937 W6 = *(twiddles + j + (j << 1) + 257);
938
939 for (k = in_loop_cnt; k != 0; k--) {
940 FLOAT32 tmp;
941 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
942
943 data += (del << 1);
944
945 x1r = *data;
946 x1i = *(data + 1);
947 data += (del << 1);
948
949 x2r = *data;
950 x2i = *(data + 1);
951 data += (del << 1);
952
953 x3r = *data;
954 x3i = *(data + 1);
955 data -= 3 * (del << 1);
956
957 tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
958 x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1);
959 x1r = tmp;
960
961 tmp = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5));
962 x2i = (FLOAT32)(-((FLOAT64)x2r * W5) + (FLOAT64)x2i * W2);
963 x2r = tmp;
964
965 tmp = (FLOAT32)(((FLOAT64)x3r * W3) + ((FLOAT64)x3i * W6));
966 x3i = (FLOAT32)(-((FLOAT64)x3r * W6) + (FLOAT64)x3i * W3);
967 x3r = tmp;
968
969 x0r = (*data);
970 x0i = (*(data + 1));
971
972 x0r = x0r + (x2r);
973 x0i = x0i + (x2i);
974 x2r = x0r - (x2r * 2);
975 x2i = x0i - (x2i * 2);
976 x1r = x1r + x3r;
977 x1i = x1i + x3i;
978 x3r = x1r - (x3r * 2);
979 x3i = x1i - (x3i * 2);
980
981 x0r = x0r + (x1r);
982 x0i = x0i + (x1i);
983 x1r = x0r - (x1r * 2);
984 x1i = x0i - (x1i * 2);
985 x2r = x2r - (x3i);
986 x2i = x2i + (x3r);
987 x3i = x2r + (x3i * 2);
988 x3r = x2i - (x3r * 2);
989
990 *data = x0r;
991 *(data + 1) = x0i;
992 data += (del << 1);
993
994 *data = x2r;
995 *(data + 1) = x2i;
996 data += (del << 1);
997
998 *data = x1r;
999 *(data + 1) = x1i;
1000 data += (del << 1);
1001
1002 *data = x3i;
1003 *(data + 1) = x3r;
1004 data += (del << 1);
1005 }
1006 data -= 2 * npoints;
1007 data += 2;
1008 }
1009 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1010 W1 = *(twiddles + j);
1011 W4 = *(twiddles + j + 257);
1012 W2 = *(twiddles + (j << 1));
1013 W5 = *(twiddles + (j << 1) + 257);
1014 W3 = *(twiddles + j + (j << 1) - 256);
1015 W6 = *(twiddles + j + (j << 1) + 1);
1016
1017 for (k = in_loop_cnt; k != 0; k--) {
1018 FLOAT32 tmp;
1019 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1020
1021 data += (del << 1);
1022
1023 x1r = *data;
1024 x1i = *(data + 1);
1025 data += (del << 1);
1026
1027 x2r = *data;
1028 x2i = *(data + 1);
1029 data += (del << 1);
1030
1031 x3r = *data;
1032 x3i = *(data + 1);
1033 data -= 3 * (del << 1);
1034
1035 tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
1036 x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1);
1037 x1r = tmp;
1038
1039 tmp = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5));
1040 x2i = (FLOAT32)(-((FLOAT64)x2r * W5) + (FLOAT64)x2i * W2);
1041 x2r = tmp;
1042
1043 tmp = (FLOAT32)(((FLOAT64)x3r * W6) - ((FLOAT64)x3i * W3));
1044 x3i = (FLOAT32)(((FLOAT64)x3r * W3) + ((FLOAT64)x3i * W6));
1045 x3r = tmp;
1046
1047 x0r = (*data);
1048 x0i = (*(data + 1));
1049
1050 x0r = x0r + (x2r);
1051 x0i = x0i + (x2i);
1052 x2r = x0r - (x2r * 2);
1053 x2i = x0i - (x2i * 2);
1054 x1r = x1r + x3r;
1055 x1i = x1i + x3i;
1056 x3r = x1r - (x3r * 2);
1057 x3i = x1i - (x3i * 2);
1058
1059 x0r = x0r + (x1r);
1060 x0i = x0i + (x1i);
1061 x1r = x0r - (x1r * 2);
1062 x1i = x0i - (x1i * 2);
1063 x2r = x2r - (x3i);
1064 x2i = x2i + (x3r);
1065 x3i = x2r + (x3i * 2);
1066 x3r = x2i - (x3r * 2);
1067
1068 *data = x0r;
1069 *(data + 1) = x0i;
1070 data += (del << 1);
1071
1072 *data = x2r;
1073 *(data + 1) = x2i;
1074 data += (del << 1);
1075
1076 *data = x1r;
1077 *(data + 1) = x1i;
1078 data += (del << 1);
1079
1080 *data = x3i;
1081 *(data + 1) = x3r;
1082 data += (del << 1);
1083 }
1084 data -= 2 * npoints;
1085 data += 2;
1086 }
1087 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1088 W1 = *(twiddles + j);
1089 W4 = *(twiddles + j + 257);
1090 W2 = *(twiddles + (j << 1) - 256);
1091 W5 = *(twiddles + (j << 1) + 1);
1092 W3 = *(twiddles + j + (j << 1) - 256);
1093 W6 = *(twiddles + j + (j << 1) + 1);
1094
1095 for (k = in_loop_cnt; k != 0; k--) {
1096 FLOAT32 tmp;
1097 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1098
1099 data += (del << 1);
1100
1101 x1r = *data;
1102 x1i = *(data + 1);
1103 data += (del << 1);
1104
1105 x2r = *data;
1106 x2i = *(data + 1);
1107 data += (del << 1);
1108
1109 x3r = *data;
1110 x3i = *(data + 1);
1111 data -= 3 * (del << 1);
1112
1113 tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
1114 x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1);
1115 x1r = tmp;
1116
1117 tmp = (FLOAT32)(((FLOAT64)x2r * W5) - ((FLOAT64)x2i * W2));
1118 x2i = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5));
1119 x2r = tmp;
1120
1121 tmp = (FLOAT32)(((FLOAT64)x3r * W6) - ((FLOAT64)x3i * W3));
1122 x3i = (FLOAT32)(((FLOAT64)x3r * W3) + ((FLOAT64)x3i * W6));
1123 x3r = tmp;
1124
1125 x0r = (*data);
1126 x0i = (*(data + 1));
1127
1128 x0r = x0r + (x2r);
1129 x0i = x0i + (x2i);
1130 x2r = x0r - (x2r * 2);
1131 x2i = x0i - (x2i * 2);
1132 x1r = x1r + x3r;
1133 x1i = x1i + x3i;
1134 x3r = x1r - (x3r * 2);
1135 x3i = x1i - (x3i * 2);
1136
1137 x0r = x0r + (x1r);
1138 x0i = x0i + (x1i);
1139 x1r = x0r - (x1r * 2);
1140 x1i = x0i - (x1i * 2);
1141 x2r = x2r - (x3i);
1142 x2i = x2i + (x3r);
1143 x3i = x2r + (x3i * 2);
1144 x3r = x2i - (x3r * 2);
1145
1146 *data = x0r;
1147 *(data + 1) = x0i;
1148 data += (del << 1);
1149
1150 *data = x2r;
1151 *(data + 1) = x2i;
1152 data += (del << 1);
1153
1154 *data = x1r;
1155 *(data + 1) = x1i;
1156 data += (del << 1);
1157
1158 *data = x3i;
1159 *(data + 1) = x3r;
1160 data += (del << 1);
1161 }
1162 data -= 2 * npoints;
1163 data += 2;
1164 }
1165 for (; j < nodespacing * del; j += nodespacing) {
1166 W1 = *(twiddles + j);
1167 W4 = *(twiddles + j + 257);
1168 W2 = *(twiddles + (j << 1) - 256);
1169 W5 = *(twiddles + (j << 1) + 1);
1170 W3 = *(twiddles + j + (j << 1) - 512);
1171 W6 = *(twiddles + j + (j << 1) - 512 + 257);
1172
1173 for (k = in_loop_cnt; k != 0; k--) {
1174 FLOAT32 tmp;
1175 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1176
1177 data += (del << 1);
1178
1179 x1r = *data;
1180 x1i = *(data + 1);
1181 data += (del << 1);
1182
1183 x2r = *data;
1184 x2i = *(data + 1);
1185 data += (del << 1);
1186
1187 x3r = *data;
1188 x3i = *(data + 1);
1189 data -= 3 * (del << 1);
1190
1191 tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
1192 x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1);
1193 x1r = tmp;
1194
1195 tmp = (FLOAT32)(((FLOAT64)x2r * W5) - ((FLOAT64)x2i * W2));
1196 x2i = (FLOAT32)(((FLOAT64)x2r * W2) + ((FLOAT64)x2i * W5));
1197 x2r = tmp;
1198
1199 tmp = (FLOAT32)(-((FLOAT64)x3r * W3) - ((FLOAT64)x3i * W6));
1200 x3i = (FLOAT32)(-((FLOAT64)x3r * W6) + (FLOAT64)x3i * W3);
1201 x3r = tmp;
1202
1203 x0r = (*data);
1204 x0i = (*(data + 1));
1205
1206 x0r = x0r + (x2r);
1207 x0i = x0i + (x2i);
1208 x2r = x0r - (x2r * 2);
1209 x2i = x0i - (x2i * 2);
1210 x1r = x1r + x3r;
1211 x1i = x1i - x3i;
1212 x3r = x1r - (x3r * 2);
1213 x3i = x1i + (x3i * 2);
1214
1215 x0r = x0r + (x1r);
1216 x0i = x0i + (x1i);
1217 x1r = x0r - (x1r * 2);
1218 x1i = x0i - (x1i * 2);
1219 x2r = x2r - (x3i);
1220 x2i = x2i + (x3r);
1221 x3i = x2r + (x3i * 2);
1222 x3r = x2i - (x3r * 2);
1223
1224 *data = x0r;
1225 *(data + 1) = x0i;
1226 data += (del << 1);
1227
1228 *data = x2r;
1229 *(data + 1) = x2i;
1230 data += (del << 1);
1231
1232 *data = x1r;
1233 *(data + 1) = x1i;
1234 data += (del << 1);
1235
1236 *data = x3i;
1237 *(data + 1) = x3r;
1238 data += (del << 1);
1239 }
1240 data -= 2 * npoints;
1241 data += 2;
1242 }
1243 nodespacing >>= 2;
1244 del <<= 2;
1245 in_loop_cnt >>= 2;
1246 }
1247
1248 if (not_power_4) {
1249 const FLOAT64 *twiddles = ptr_w;
1250 nodespacing <<= 1;
1251
1252 for (j = del / 2; j != 0; j--) {
1253 FLOAT64 W1 = *twiddles;
1254 FLOAT64 W4 = *(twiddles + 257);
1255 FLOAT32 tmp;
1256 twiddles += nodespacing;
1257
1258 x0r = *ptr_y;
1259 x0i = *(ptr_y + 1);
1260 ptr_y += (del << 1);
1261
1262 x1r = *ptr_y;
1263 x1i = *(ptr_y + 1);
1264
1265 tmp = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
1266 x1i = (FLOAT32)(-((FLOAT64)x1r * W4) + (FLOAT64)x1i * W1);
1267 x1r = tmp;
1268
1269 *ptr_y = (x0r) - (x1r);
1270 *(ptr_y + 1) = (x0i) - (x1i);
1271 ptr_y -= (del << 1);
1272
1273 *ptr_y = (x0r) + (x1r);
1274 *(ptr_y + 1) = (x0i) + (x1i);
1275 ptr_y += 2;
1276 }
1277 twiddles = ptr_w;
1278 for (j = del / 2; j != 0; j--) {
1279 FLOAT64 W1 = *twiddles;
1280 FLOAT64 W4 = *(twiddles + 257);
1281 FLOAT32 tmp;
1282 twiddles += nodespacing;
1283
1284 x0r = *ptr_y;
1285 x0i = *(ptr_y + 1);
1286 ptr_y += (del << 1);
1287
1288 x1r = *ptr_y;
1289 x1i = *(ptr_y + 1);
1290
1291 tmp = (FLOAT32)(((FLOAT64)x1r * W4) - ((FLOAT64)x1i * W1));
1292 x1i = (FLOAT32)(((FLOAT64)x1r * W1) + ((FLOAT64)x1i * W4));
1293 x1r = tmp;
1294
1295 *ptr_y = (x0r) - (x1r);
1296 *(ptr_y + 1) = (x0i) - (x1i);
1297 ptr_y -= (del << 1);
1298
1299 *ptr_y = (x0r) + (x1r);
1300 *(ptr_y + 1) = (x0i) + (x1i);
1301 ptr_y += 2;
1302 }
1303 }
1304 }
1305
1306 for (i = 0; i < n_pass; i++) {
1307 re[2 * i + 0] = y[2 * i + 0];
1308 re[2 * i + 1] = y[2 * i + 1];
1309 }
1310 }
1311
ixheaacd_hbe_apply_cfftn_gen(FLOAT32 re[],FLOAT32 * scratch,WORD32 n_pass,WORD32 i_sign)1312 VOID ixheaacd_hbe_apply_cfftn_gen(FLOAT32 re[], FLOAT32 *scratch, WORD32 n_pass,
1313 WORD32 i_sign) {
1314 WORD32 i, j;
1315 WORD32 m_points = n_pass;
1316 FLOAT32 *x, *y, *re3;
1317 FLOAT32 *ptr_x, *ptr_y;
1318 ptr_x = x = scratch;
1319 scratch += 2 * m_points;
1320 ptr_y = y = scratch;
1321 scratch += 4 * m_points;
1322 re3 = scratch;
1323 scratch += 2 * m_points;
1324 WORD32 cnfac;
1325 WORD32 mpass = n_pass;
1326
1327 cnfac = 0;
1328 while (mpass % 3 == 0) {
1329 mpass /= 3;
1330 cnfac++;
1331 }
1332
1333 for (i = 0; i < 3 * cnfac; i++) {
1334 for (j = 0; j < mpass; j++) {
1335 re3[2 * j + 0] = re[6 * j + 2 * i + 0];
1336 re3[2 * j + 1] = re[6 * j + 2 * i + 1];
1337 }
1338
1339 ixheaacd_hbe_apply_cfftn(re3, scratch, mpass, i_sign);
1340
1341 for (j = 0; j < mpass; j++) {
1342 re[6 * j + 2 * i + 0] = re3[2 * j + 0];
1343 re[6 * j + 2 * i + 1] = re3[2 * j + 1];
1344 }
1345 }
1346
1347 {
1348 FLOAT64 *w1r, *w1i;
1349 FLOAT32 tmp;
1350 w1r = (FLOAT64 *)ixheaacd_twid_tbl_fft_ntwt3r;
1351 w1i = (FLOAT64 *)ixheaacd_twid_tbl_fft_ntwt3i;
1352
1353 if (i_sign < 0) {
1354
1355 for (i = 0; i < n_pass; i += 3) {
1356 tmp = (FLOAT32)((FLOAT64)re[2 * i + 0] * (*w1r) - (FLOAT64)re[2 * i + 1] * (*w1i));
1357 re[2 * i + 1] =
1358 (FLOAT32)((FLOAT64)re[2 * i + 0] * (*w1i) + (FLOAT64)re[2 * i + 1] * (*w1r));
1359 re[2 * i + 0] = tmp;
1360
1361 w1r++;
1362 w1i++;
1363
1364 tmp = (FLOAT32)((FLOAT64)re[2 * i + 2] * (*w1r) - (FLOAT64)re[2 * i + 3] * (*w1i));
1365 re[2 * i + 3] =
1366 (FLOAT32)((FLOAT64)re[2 * i + 2] * (*w1i) + (FLOAT64)re[2 * i + 3] * (*w1r));
1367 re[2 * i + 2] = tmp;
1368
1369 w1r++;
1370 w1i++;
1371
1372 tmp = (FLOAT32)((FLOAT64)re[2 * i + 4] * (*w1r) - (FLOAT64)re[2 * i + 5] * (*w1i));
1373 re[2 * i + 5] =
1374 (FLOAT32)((FLOAT64)re[2 * i + 4] * (*w1i) + (FLOAT64)re[2 * i + 5] * (*w1r));
1375 re[2 * i + 4] = tmp;
1376
1377 w1r += 3 * (128 / mpass - 1) + 1;
1378 w1i += 3 * (128 / mpass - 1) + 1;
1379 }
1380 } else {
1381 for (i = 0; i < n_pass; i += 3) {
1382 tmp = (FLOAT32)((FLOAT64)re[2 * i + 0] * (*w1r) + (FLOAT64)re[2 * i + 1] * (*w1i));
1383 re[2 * i + 1] =
1384 (FLOAT32)(-(FLOAT64)re[2 * i + 0] * (*w1i) + (FLOAT64)re[2 * i + 1] * (*w1r));
1385 re[2 * i + 0] = tmp;
1386
1387 w1r++;
1388 w1i++;
1389
1390 tmp = (FLOAT32)((FLOAT64)re[2 * i + 2] * (*w1r) + (FLOAT64)re[2 * i + 3] * (*w1i));
1391 re[2 * i + 3] =
1392 (FLOAT32)(-(FLOAT64)re[2 * i + 2] * (*w1i) + (FLOAT64)re[2 * i + 3] * (*w1r));
1393 re[2 * i + 2] = tmp;
1394
1395 w1r++;
1396 w1i++;
1397
1398 tmp = (FLOAT32)((FLOAT64)re[2 * i + 4] * (*w1r) + (FLOAT64)re[2 * i + 5] * (*w1i));
1399 re[2 * i + 5] =
1400 (FLOAT32)(-(FLOAT64)re[2 * i + 4] * (*w1i) + (FLOAT64)re[2 * i + 5] * (*w1r));
1401 re[2 * i + 4] = tmp;
1402
1403 w1r += 3 * (128 / mpass - 1) + 1;
1404 w1i += 3 * (128 / mpass - 1) + 1;
1405 }
1406 }
1407 }
1408
1409 for (i = 0; i < n_pass; i++) {
1410 ptr_x[2 * i + 0] = re[2 * i + 0];
1411 ptr_x[2 * i + 1] = re[2 * i + 1];
1412 }
1413 for (i = 0; i < mpass; i++) {
1414 ixheaacd_hbe_apply_fft_3(ptr_x, ptr_y, i_sign);
1415
1416 ptr_x = ptr_x + 6;
1417 ptr_y = ptr_y + 6;
1418 }
1419
1420 for (i = 0; i < mpass; i++) {
1421 re[2 * i + 0] = y[6 * i + 0];
1422 re[2 * i + 1] = y[6 * i + 1];
1423 }
1424
1425 for (i = 0; i < mpass; i++) {
1426 re[2 * mpass + 2 * i + 0] = y[6 * i + 2];
1427 re[2 * mpass + 2 * i + 1] = y[6 * i + 3];
1428 }
1429
1430 for (i = 0; i < mpass; i++) {
1431 re[4 * mpass + 2 * i + 0] = y[6 * i + 4];
1432 re[4 * mpass + 2 * i + 1] = y[6 * i + 5];
1433 }
1434 }
1435
ixheaacd_hbe_apply_fft_288(FLOAT32 * inp,FLOAT32 * scratch,WORD32 len,WORD32 i_sign)1436 VOID ixheaacd_hbe_apply_fft_288(FLOAT32 *inp, FLOAT32 *scratch, WORD32 len, WORD32 i_sign) {
1437 FLOAT32 *op = scratch;
1438 WORD32 mpoints = len / 96;
1439 WORD32 fpoints = len / 3;
1440 WORD32 ii, jj;
1441 scratch += 2 * len;
1442
1443 for (ii = 0; ii < mpoints; ii++) {
1444 for (jj = 0; jj < fpoints; jj++) {
1445 op[2 * jj + 0] = inp[2 * mpoints * jj + 2 * ii];
1446 op[2 * jj + 1] = inp[2 * mpoints * jj + 2 * ii + 1];
1447 }
1448
1449 if (fpoints & (fpoints - 1))
1450 ixheaacd_hbe_apply_cfftn_gen(op, scratch, fpoints, i_sign);
1451 else
1452 ixheaacd_hbe_apply_cfftn(op, scratch, fpoints, i_sign);
1453
1454 for (jj = 0; jj < fpoints; jj++) {
1455 inp[mpoints * 2 * jj + 2 * ii + 0] = op[2 * jj + 0];
1456 inp[mpoints * 2 * jj + 2 * ii + 1] = op[2 * jj + 1];
1457 }
1458 }
1459
1460 ixheaacd_hbe_apply_tw_mult_fft(inp, op, fpoints, mpoints, ixheaacd_twid_tbl_fft_288);
1461
1462 for (ii = 0; ii < fpoints; ii++) {
1463 ixheaacd_hbe_apply_fft_3(op, scratch, i_sign);
1464 op = op + (mpoints * 2);
1465 scratch = scratch + (mpoints * 2);
1466 }
1467
1468 scratch -= fpoints * mpoints * 2;
1469
1470 for (jj = 0; jj < fpoints; jj++) {
1471 inp[2 * jj + 0] = scratch[6 * jj];
1472 inp[2 * jj + 1] = scratch[6 * jj + 1];
1473 }
1474 for (jj = 0; jj < fpoints; jj++) {
1475 inp[2 * fpoints + 2 * jj + 0] = scratch[6 * jj + 2];
1476 inp[2 * fpoints + 2 * jj + 1] = scratch[6 * jj + 3];
1477 }
1478 for (jj = 0; jj < fpoints; jj++) {
1479 inp[4 * fpoints + 2 * jj + 0] = scratch[6 * jj + 4];
1480 inp[4 * fpoints + 2 * jj + 1] = scratch[6 * jj + 5];
1481 }
1482 }
1483
ixheaacd_hbe_apply_ifft_224(FLOAT32 * inp,FLOAT32 * scratch,WORD32 len,WORD32 i_sign)1484 VOID ixheaacd_hbe_apply_ifft_224(FLOAT32 *inp, FLOAT32 *scratch, WORD32 len, WORD32 i_sign) {
1485 WORD32 mpoints = len / 32;
1486 WORD32 fpoints = len / 7;
1487 WORD32 ii, jj;
1488 FLOAT32 *op = scratch;
1489 scratch += 2 * len;
1490
1491 for (ii = 0; ii < mpoints; ii++) {
1492 for (jj = 0; jj < fpoints; jj++) {
1493 op[2 * jj + 0] = inp[2 * mpoints * jj + 2 * ii];
1494 op[2 * jj + 1] = inp[2 * mpoints * jj + 2 * ii + 1];
1495 }
1496
1497 if (fpoints & (fpoints - 1))
1498 ixheaacd_hbe_apply_cfftn_gen(op, scratch, fpoints, i_sign);
1499 else
1500 ixheaacd_hbe_apply_cfftn(op, scratch, fpoints, i_sign);
1501
1502 for (jj = 0; jj < fpoints; jj++) {
1503 inp[mpoints * 2 * jj + 2 * ii + 0] = op[2 * jj + 0];
1504 inp[mpoints * 2 * jj + 2 * ii + 1] = op[2 * jj + 1];
1505 }
1506 }
1507
1508 ixheaacd_hbe_apply_tw_mult_ifft(inp, op, fpoints, mpoints, ixheaacd_twid_tbl_fft_224);
1509
1510 for (ii = 0; ii < fpoints; ii++) {
1511 ixheaacd_hbe_apply_ifft_7(op, scratch);
1512 scratch += (mpoints * 2);
1513 op += (mpoints * 2);
1514 }
1515
1516 scratch -= fpoints * mpoints * 2;
1517
1518 for (jj = 0; jj < fpoints; jj++) {
1519 for (ii = 0; ii < mpoints; ii++) {
1520 inp[fpoints * ii * 2 + 2 * jj + 0] = scratch[mpoints * jj * 2 + 2 * ii + 0];
1521 inp[fpoints * ii * 2 + 2 * jj + 1] = scratch[mpoints * jj * 2 + 2 * ii + 1];
1522 }
1523 }
1524 }
1525
ixheaacd_hbe_apply_ifft_336(FLOAT32 * inp,FLOAT32 * ptr_scratch,WORD32 len,WORD32 i_sign)1526 VOID ixheaacd_hbe_apply_ifft_336(FLOAT32 *inp, FLOAT32 *ptr_scratch, WORD32 len,
1527 WORD32 i_sign) {
1528 WORD32 i, j;
1529 WORD32 m_points = len / 7;
1530 WORD32 n_points = len / 48;
1531 FLOAT32 *ptr_real, *ptr_imag, *p_real_1, *p_scratch;
1532 ptr_real = ptr_scratch;
1533 ptr_scratch += 2 * len;
1534 ptr_imag = ptr_scratch;
1535 ptr_scratch += len;
1536 p_scratch = ptr_scratch;
1537 ptr_scratch += len;
1538 p_real_1 = ptr_scratch;
1539 ptr_scratch += len;
1540
1541 for (i = 0; i < len; i++) {
1542 ptr_real[i] = inp[2 * i + 0];
1543 ptr_imag[i] = inp[2 * i + 1];
1544 }
1545
1546 for (i = 0; i < m_points; i++) {
1547 for (j = 0; j < n_points; j++) {
1548 p_real_1[2 * j + 0] = inp[m_points * 2 * j + 2 * i + 0];
1549 p_real_1[2 * j + 1] = inp[m_points * 2 * j + 2 * i + 1];
1550 }
1551
1552 ixheaacd_hbe_apply_ifft_7(p_real_1, ptr_scratch);
1553
1554 for (j = 0; j < n_points; j++) {
1555 inp[m_points * 2 * j + 2 * i + 0] = ptr_scratch[2 * j + 0];
1556 inp[m_points * 2 * j + 2 * i + 1] = ptr_scratch[2 * j + 1];
1557 }
1558 }
1559
1560 if (m_points == 48)
1561 ixheaacd_hbe_apply_tw_mult_ifft(inp, p_scratch, n_points, m_points,
1562 ixheaacd_twid_tbl_fft_336);
1563 else
1564 ixheaacd_hbe_apply_tw_mult_ifft(inp, p_scratch, n_points, m_points,
1565 ixheaacd_twid_tbl_fft_168);
1566
1567 for (i = 0; i < len; i++) {
1568 ptr_real[2 * i + 0] = p_scratch[2 * i + 0];
1569 ptr_real[2 * i + 1] = p_scratch[2 * i + 1];
1570 }
1571
1572 for (i = 0; i < n_points; i++) {
1573 ixheaacd_hbe_apply_cfftn_gen(ptr_real, ptr_scratch, m_points, i_sign);
1574 ptr_real += (2 * m_points);
1575 }
1576
1577 ptr_real -= n_points * 2 * m_points;
1578
1579 for (j = 0; j < n_points; j++) {
1580 for (i = 0; i < m_points; i++) {
1581 inp[n_points * 2 * i + 2 * j + 0] = ptr_real[2 * m_points * j + 2 * i + 0];
1582 inp[n_points * 2 * i + 2 * j + 1] = ptr_real[2 * m_points * j + 2 * i + 1];
1583 }
1584 }
1585 return;
1586 }
1587
1588