1 /******************************************************************************
2 * *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 #include <stdlib.h>
21 #include <stdio.h>
22
23 #include "ixheaacd_type_def.h"
24 #include "ixheaacd_interface.h"
25 #include "ixheaacd_constants.h"
26 #include "ixheaacd_basic_ops32.h"
27 #include "ixheaacd_basic_ops40.h"
28 #include "ixheaacd_function_selector.h"
29
30 extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
31 extern const WORD32 ixheaacd_twiddle_table_3pr[1155];
32 extern const WORD32 ixheaacd_twiddle_table_3pi[1155];
33 extern const WORD8 ixheaacd_mps_dig_rev[16];
34
35 #define PLATFORM_INLINE __inline
36
37 #define DIG_REV(i, m, j) \
38 do { \
39 unsigned _ = (i); \
40 _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
41 _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
42 _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
43 (j) = _ >> (m); \
44 } while (0)
45
ixheaacd_mult32_sat(WORD32 a,WORD32 b)46 static PLATFORM_INLINE WORD32 ixheaacd_mult32_sat(WORD32 a, WORD32 b) {
47 WORD32 result;
48 WORD64 temp_result;
49
50 temp_result = (WORD64)a * (WORD64)b;
51 result = ixheaacd_sat64_32(temp_result >> 31);
52
53 return (result);
54 }
55
ixheaacd_mac32_sat(WORD32 a,WORD32 b,WORD32 c)56 static PLATFORM_INLINE WORD32 ixheaacd_mac32_sat(WORD32 a, WORD32 b, WORD32 c) {
57 WORD32 result;
58
59 result = ixheaacd_add32_sat(a, ixheaacd_mult32_sat(b, c));
60
61 return (result);
62 }
63
64
ixheaacd_mps_complex_fft_64_dec(WORD32 * ptr_x,WORD32 * fin_re,WORD32 * fin_im,WORD32 nlength)65 VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
66 WORD32 *fin_im, WORD32 nlength) {
67 WORD32 i, j, k, n_stages;
68 WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
69 WORD32 del, nodespacing, in_loop_cnt;
70 WORD32 y[128];
71 WORD32 npoints = nlength;
72 WORD32 *ptr_y = y;
73 const WORD32 *ptr_w;
74 n_stages = 30 - ixheaacd_norm32(npoints);
75
76 n_stages = n_stages >> 1;
77
78 ptr_w = ixheaacd_twiddle_table_fft_32x32;
79
80 for (i = 0; i < npoints; i += 4) {
81 WORD32 *inp = ptr_x;
82 h2 = ixheaacd_mps_dig_rev[i >> 2];
83 inp += (h2);
84
85 x0r = *inp;
86 x0i = *(inp + 1);
87 inp += (npoints >> 1);
88
89 x1r = *inp;
90 x1i = *(inp + 1);
91 inp += (npoints >> 1);
92
93 x2r = *inp;
94 x2i = *(inp + 1);
95 inp += (npoints >> 1);
96
97 x3r = *inp;
98 x3i = *(inp + 1);
99
100 x0r = ixheaacd_add32_sat(x0r, x2r);
101 x0i = ixheaacd_add32_sat(x0i, x2i);
102 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
103 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
104 x1r = ixheaacd_add32_sat(x1r, x3r);
105 x1i = ixheaacd_add32_sat(x1i, x3i);
106 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
107 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
108
109 x0r = ixheaacd_add32_sat(x0r, x1r);
110 x0i = ixheaacd_add32_sat(x0i, x1i);
111 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
112 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
113 x2r = ixheaacd_add32_sat(x2r, x3i);
114 x2i = ixheaacd_sub32_sat(x2i, x3r);
115 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
116 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
117
118 *ptr_y++ = x0r;
119 *ptr_y++ = x0i;
120 *ptr_y++ = x2r;
121 *ptr_y++ = x2i;
122 *ptr_y++ = x1r;
123 *ptr_y++ = x1i;
124 *ptr_y++ = x3i;
125 *ptr_y++ = x3r;
126 }
127 ptr_y -= 2 * npoints;
128 del = 4;
129 nodespacing = 64;
130 in_loop_cnt = npoints >> 4;
131 for (i = n_stages - 1; i > 0; i--) {
132 const WORD32 *twiddles = ptr_w;
133 WORD32 *data = ptr_y;
134 WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
135 WORD32 sec_loop_cnt;
136
137 for (k = in_loop_cnt; k != 0; k--) {
138 x0r = (*data);
139 x0i = (*(data + 1));
140 data += (del << 1);
141
142 x1r = (*data);
143 x1i = (*(data + 1));
144 data += (del << 1);
145
146 x2r = (*data);
147 x2i = (*(data + 1));
148 data += (del << 1);
149
150 x3r = (*data);
151 x3i = (*(data + 1));
152 data -= 3 * (del << 1);
153
154 x0r = ixheaacd_add32_sat(x0r, x2r);
155 x0i = ixheaacd_add32_sat(x0i, x2i);
156 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
157 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
158 x1r = ixheaacd_add32_sat(x1r, x3r);
159 x1i = ixheaacd_add32_sat(x1i, x3i);
160 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
161 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
162
163 x0r = ixheaacd_add32_sat(x0r, x1r);
164 x0i = ixheaacd_add32_sat(x0i, x1i);
165 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
166 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
167 x2r = ixheaacd_add32_sat(x2r, x3i);
168 x2i = ixheaacd_sub32_sat(x2i, x3r);
169 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
170 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
171
172 *data = x0r;
173 *(data + 1) = x0i;
174 data += (del << 1);
175
176 *data = x2r;
177 *(data + 1) = x2i;
178 data += (del << 1);
179
180 *data = x1r;
181 *(data + 1) = x1i;
182 data += (del << 1);
183
184 *data = x3i;
185 *(data + 1) = x3r;
186 data += (del << 1);
187 }
188 data = ptr_y + 2;
189
190 sec_loop_cnt = (nodespacing * del);
191 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
192 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
193 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
194 (sec_loop_cnt / 256);
195 j = nodespacing;
196
197 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
198 w1h = *(twiddles + 2 * j);
199 w1l = *(twiddles + 2 * j + 1);
200 w2h = *(twiddles + 2 * (j << 1));
201 w2l = *(twiddles + 2 * (j << 1) + 1);
202 w3h = *(twiddles + 2 * j + 2 * (j << 1));
203 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
204
205 for (k = in_loop_cnt; k != 0; k--) {
206 WORD32 tmp;
207 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
208
209 data += (del << 1);
210
211 x1r = *data;
212 x1i = *(data + 1);
213 data += (del << 1);
214
215 x2r = *data;
216 x2i = *(data + 1);
217 data += (del << 1);
218
219 x3r = *data;
220 x3i = *(data + 1);
221 data -= 3 * (del << 1);
222
223 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
224 ixheaacd_mult32_sat(x1i, w1h));
225 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
226 x1r = tmp;
227
228 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
229 ixheaacd_mult32_sat(x2i, w2h));
230 x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
231 x2r = tmp;
232
233 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3l),
234 ixheaacd_mult32_sat(x3i, w3h));
235 x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
236 x3r = tmp;
237
238 x0r = (*data);
239 x0i = (*(data + 1));
240
241 x0r = ixheaacd_add32_sat(x0r, x2r);
242 x0i = ixheaacd_add32_sat(x0i, x2i);
243 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
244 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
245 x1r = ixheaacd_add32_sat(x1r, x3r);
246 x1i = ixheaacd_add32_sat(x1i, x3i);
247 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
248 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
249
250 x0r = ixheaacd_add32_sat(x0r, x1r);
251 x0i = ixheaacd_add32_sat(x0i, x1i);
252 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
253 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
254 x2r = ixheaacd_add32_sat(x2r, x3i);
255 x2i = ixheaacd_sub32_sat(x2i, x3r);
256 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
257 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
258
259 *data = x0r;
260 *(data + 1) = x0i;
261 data += (del << 1);
262
263 *data = x2r;
264 *(data + 1) = x2i;
265 data += (del << 1);
266
267 *data = x1r;
268 *(data + 1) = x1i;
269 data += (del << 1);
270
271 *data = x3i;
272 *(data + 1) = x3r;
273 data += (del << 1);
274 }
275 data -= 2 * npoints;
276 data += 2;
277 }
278 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
279 w1h = *(twiddles + 2 * j);
280 w2h = *(twiddles + 2 * (j << 1));
281 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
282 w1l = *(twiddles + 2 * j + 1);
283 w2l = *(twiddles + 2 * (j << 1) + 1);
284 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
285
286 for (k = in_loop_cnt; k != 0; k--) {
287 WORD32 tmp;
288 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
289
290 data += (del << 1);
291
292 x1r = *data;
293 x1i = *(data + 1);
294 data += (del << 1);
295
296 x2r = *data;
297 x2i = *(data + 1);
298 data += (del << 1);
299
300 x3r = *data;
301 x3i = *(data + 1);
302 data -= 3 * (del << 1);
303
304 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
305 ixheaacd_mult32_sat(x1i, w1h));
306 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
307 x1r = tmp;
308
309 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
310 ixheaacd_mult32_sat(x2i, w2h));
311 x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
312 x2r = tmp;
313
314 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
315 ixheaacd_mult32_sat(x3i, w3l));
316 x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
317 ixheaacd_mult32_sat(x3r, w3l));
318 x3r = tmp;
319
320 x0r = (*data);
321 x0i = (*(data + 1));
322
323 x0r = ixheaacd_add32_sat(x0r, x2r);
324 x0i = ixheaacd_add32_sat(x0i, x2i);
325 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
326 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
327 x1r = ixheaacd_add32_sat(x1r, x3r);
328 x1i = ixheaacd_add32_sat(x1i, x3i);
329 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
330 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
331
332 x0r = ixheaacd_add32_sat(x0r, x1r);
333 x0i = ixheaacd_add32_sat(x0i, x1i);
334 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
335 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
336 x2r = ixheaacd_add32_sat(x2r, x3i);
337 x2i = ixheaacd_sub32_sat(x2i, x3r);
338 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
339 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
340
341 *data = x0r;
342 *(data + 1) = x0i;
343 data += (del << 1);
344
345 *data = x2r;
346 *(data + 1) = x2i;
347 data += (del << 1);
348
349 *data = x1r;
350 *(data + 1) = x1i;
351 data += (del << 1);
352
353 *data = x3i;
354 *(data + 1) = x3r;
355 data += (del << 1);
356 }
357 data -= 2 * npoints;
358 data += 2;
359 }
360 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
361 w1h = *(twiddles + 2 * j);
362 w2h = *(twiddles + 2 * (j << 1) - 512);
363 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
364 w1l = *(twiddles + 2 * j + 1);
365 w2l = *(twiddles + 2 * (j << 1) - 511);
366 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
367
368 for (k = in_loop_cnt; k != 0; k--) {
369 WORD32 tmp;
370 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
371
372 data += (del << 1);
373
374 x1r = *data;
375 x1i = *(data + 1);
376 data += (del << 1);
377
378 x2r = *data;
379 x2i = *(data + 1);
380 data += (del << 1);
381
382 x3r = *data;
383 x3i = *(data + 1);
384 data -= 3 * (del << 1);
385
386 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
387 ixheaacd_mult32_sat(x1i, w1h));
388 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
389 x1r = tmp;
390
391 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
392 ixheaacd_mult32_sat(x2i, w2l));
393 x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
394 ixheaacd_mult32_sat(x2r, w2l));
395 x2r = tmp;
396
397 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
398 ixheaacd_mult32_sat(x3i, w3l));
399 x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
400 ixheaacd_mult32_sat(x3r, w3l));
401 x3r = tmp;
402
403 x0r = (*data);
404 x0i = (*(data + 1));
405
406 x0r = ixheaacd_add32_sat(x0r, x2r);
407 x0i = ixheaacd_add32_sat(x0i, x2i);
408 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
409 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
410 x1r = ixheaacd_add32_sat(x1r, x3r);
411 x1i = ixheaacd_add32_sat(x1i, x3i);
412 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
413 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
414
415 x0r = ixheaacd_add32_sat(x0r, x1r);
416 x0i = ixheaacd_add32_sat(x0i, x1i);
417 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
418 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
419 x2r = ixheaacd_add32_sat(x2r, x3i);
420 x2i = ixheaacd_sub32_sat(x2i, x3r);
421 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
422 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
423
424 *data = x0r;
425 *(data + 1) = x0i;
426 data += (del << 1);
427
428 *data = x2r;
429 *(data + 1) = x2i;
430 data += (del << 1);
431
432 *data = x1r;
433 *(data + 1) = x1i;
434 data += (del << 1);
435
436 *data = x3i;
437 *(data + 1) = x3r;
438 data += (del << 1);
439 }
440 data -= 2 * npoints;
441 data += 2;
442 }
443 for (; j < nodespacing * del; j += nodespacing) {
444 w1h = *(twiddles + 2 * j);
445 w2h = *(twiddles + 2 * (j << 1) - 512);
446 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
447 w1l = *(twiddles + 2 * j + 1);
448 w2l = *(twiddles + 2 * (j << 1) - 511);
449 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
450
451 for (k = in_loop_cnt; k != 0; k--) {
452 WORD32 tmp;
453 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
454
455 data += (del << 1);
456
457 x1r = *data;
458 x1i = *(data + 1);
459 data += (del << 1);
460
461 x2r = *data;
462 x2i = *(data + 1);
463 data += (del << 1);
464
465 x3r = *data;
466 x3i = *(data + 1);
467 data -= 3 * (del << 1);
468
469 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
470 ixheaacd_mult32_sat(x1i, w1h));
471 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
472 x1r = tmp;
473
474 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
475 ixheaacd_mult32_sat(x2i, w2l));
476 x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
477 ixheaacd_mult32_sat(x2r, w2l));
478 x2r = tmp;
479
480 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
481 ixheaacd_mult32_sat(x3r, w3l));
482 x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
483 x3r = tmp;
484
485 x0r = (*data);
486 x0i = (*(data + 1));
487
488 x0r = ixheaacd_add32_sat(x0r, x2r);
489 x0i = ixheaacd_add32_sat(x0i, x2i);
490 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
491 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
492 x1r = ixheaacd_add32_sat(x1r, x3r);
493 x1i = ixheaacd_sub32_sat(x1i, x3i);
494 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
495 x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
496
497 x0r = ixheaacd_add32_sat(x0r, x1r);
498 x0i = ixheaacd_add32_sat(x0i, x1i);
499 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
500 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
501 x2r = ixheaacd_add32_sat(x2r, x3i);
502 x2i = ixheaacd_sub32_sat(x2i, x3r);
503 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
504 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
505
506 *data = x0r;
507 *(data + 1) = x0i;
508 data += (del << 1);
509
510 *data = x2r;
511 *(data + 1) = x2i;
512 data += (del << 1);
513
514 *data = x1r;
515 *(data + 1) = x1i;
516 data += (del << 1);
517
518 *data = x3i;
519 *(data + 1) = x3r;
520 data += (del << 1);
521 }
522 data -= 2 * npoints;
523 data += 2;
524 }
525 nodespacing >>= 2;
526 del <<= 2;
527 in_loop_cnt >>= 2;
528 }
529
530 for (i = 0; i < 2 * nlength; i += 2) {
531 fin_re[i] = y[i];
532 fin_im[i] = y[i + 1];
533 }
534
535 return;
536 }
537
ixheaacd_complex_fft_p2_dec(WORD32 * xr,WORD32 * xi,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)538 VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength,
539 WORD32 fft_mode, WORD32 *preshift) {
540 WORD32 i, j, k, n_stages;
541 WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
542 WORD32 del, nodespacing, in_loop_cnt;
543 WORD32 not_power_4;
544 WORD32 npts, shift;
545 WORD32 dig_rev_shift;
546 WORD32 ptr_x[1024];
547 WORD32 y[1024];
548 WORD32 npoints = nlength;
549 WORD32 n = 0;
550 WORD32 *ptr_y = y;
551 const WORD32 *ptr_w;
552 dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16;
553 n_stages = 30 - ixheaacd_norm32(npoints);
554 not_power_4 = n_stages & 1;
555
556 n_stages = n_stages >> 1;
557
558 npts = npoints;
559 while (npts >> 1) {
560 n++;
561 npts = npts >> 1;
562 }
563
564 if (n % 2 == 0)
565 shift = ((n + 4)) / 2;
566 else
567 shift = ((n + 3) / 2);
568
569 for (i = 0; i < nlength; i++) {
570 ptr_x[2 * i] = (xr[i] / (1 << (shift)));
571 ptr_x[2 * i + 1] = (xi[i] / (1 << (shift)));
572 }
573
574 if (fft_mode == -1) {
575 ptr_w = ixheaacd_twiddle_table_fft_32x32;
576
577 for (i = 0; i < npoints; i += 4) {
578 WORD32 *inp = ptr_x;
579
580 DIG_REV(i, dig_rev_shift, h2);
581 if (not_power_4) {
582 h2 += 1;
583 h2 &= ~1;
584 }
585 inp += (h2);
586
587 x0r = *inp;
588 x0i = *(inp + 1);
589 inp += (npoints >> 1);
590
591 x1r = *inp;
592 x1i = *(inp + 1);
593 inp += (npoints >> 1);
594
595 x2r = *inp;
596 x2i = *(inp + 1);
597 inp += (npoints >> 1);
598
599 x3r = *inp;
600 x3i = *(inp + 1);
601
602 x0r = ixheaacd_add32_sat(x0r, x2r);
603 x0i = ixheaacd_add32_sat(x0i, x2i);
604 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
605 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
606 x1r = ixheaacd_add32_sat(x1r, x3r);
607 x1i = ixheaacd_add32_sat(x1i, x3i);
608 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
609 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
610
611 x0r = ixheaacd_add32_sat(x0r, x1r);
612 x0i = ixheaacd_add32_sat(x0i, x1i);
613 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
614 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
615 x2r = ixheaacd_add32_sat(x2r, x3i);
616 x2i = ixheaacd_sub32_sat(x2i, x3r);
617 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
618 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
619
620 *ptr_y++ = x0r;
621 *ptr_y++ = x0i;
622 *ptr_y++ = x2r;
623 *ptr_y++ = x2i;
624 *ptr_y++ = x1r;
625 *ptr_y++ = x1i;
626 *ptr_y++ = x3i;
627 *ptr_y++ = x3r;
628 }
629 ptr_y -= 2 * npoints;
630 del = 4;
631 nodespacing = 64;
632 in_loop_cnt = npoints >> 4;
633 for (i = n_stages - 1; i > 0; i--) {
634 const WORD32 *twiddles = ptr_w;
635 WORD32 *data = ptr_y;
636 WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
637 WORD32 sec_loop_cnt;
638
639 for (k = in_loop_cnt; k != 0; k--) {
640 x0r = (*data);
641 x0i = (*(data + 1));
642 data += (del << 1);
643
644 x1r = (*data);
645 x1i = (*(data + 1));
646 data += (del << 1);
647
648 x2r = (*data);
649 x2i = (*(data + 1));
650 data += (del << 1);
651
652 x3r = (*data);
653 x3i = (*(data + 1));
654 data -= 3 * (del << 1);
655
656 x0r = ixheaacd_add32_sat(x0r, x2r);
657 x0i = ixheaacd_add32_sat(x0i, x2i);
658 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
659 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
660 x1r = ixheaacd_add32_sat(x1r, x3r);
661 x1i = ixheaacd_add32_sat(x1i, x3i);
662 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
663 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
664
665 x0r = ixheaacd_add32_sat(x0r, x1r);
666 x0i = ixheaacd_add32_sat(x0i, x1i);
667 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
668 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
669 x2r = ixheaacd_add32_sat(x2r, x3i);
670 x2i = ixheaacd_sub32_sat(x2i, x3r);
671 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
672 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
673
674 *data = x0r;
675 *(data + 1) = x0i;
676 data += (del << 1);
677
678 *data = x2r;
679 *(data + 1) = x2i;
680 data += (del << 1);
681
682 *data = x1r;
683 *(data + 1) = x1i;
684 data += (del << 1);
685
686 *data = x3i;
687 *(data + 1) = x3r;
688 data += (del << 1);
689 }
690 data = ptr_y + 2;
691
692 sec_loop_cnt = (nodespacing * del);
693 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
694 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
695 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
696 (sec_loop_cnt / 256);
697 j = nodespacing;
698
699 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
700 w1h = *(twiddles + 2 * j);
701 w1l = *(twiddles + 2 * j + 1);
702 w2h = *(twiddles + 2 * (j << 1));
703 w2l = *(twiddles + 2 * (j << 1) + 1);
704 w3h = *(twiddles + 2 * j + 2 * (j << 1));
705 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
706
707 for (k = in_loop_cnt; k != 0; k--) {
708 WORD32 tmp;
709 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
710
711 data += (del << 1);
712
713 x1r = *data;
714 x1i = *(data + 1);
715 data += (del << 1);
716
717 x2r = *data;
718 x2i = *(data + 1);
719 data += (del << 1);
720
721 x3r = *data;
722 x3i = *(data + 1);
723 data -= 3 * (del << 1);
724
725 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
726 ixheaacd_mult32_sat(x1i, w1h));
727 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
728 x1r = tmp;
729
730 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
731 ixheaacd_mult32_sat(x2i, w2h));
732 x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
733 x2r = tmp;
734
735 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3l),
736 ixheaacd_mult32_sat(x3i, w3h));
737 x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
738 x3r = tmp;
739
740 x0r = (*data);
741 x0i = (*(data + 1));
742
743 x0r = ixheaacd_add32_sat(x0r, x2r);
744 x0i = ixheaacd_add32_sat(x0i, x2i);
745 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
746 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
747 x1r = ixheaacd_add32_sat(x1r, x3r);
748 x1i = ixheaacd_add32_sat(x1i, x3i);
749 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
750 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
751
752 x0r = ixheaacd_add32_sat(x0r, x1r);
753 x0i = ixheaacd_add32_sat(x0i, x1i);
754 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
755 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
756 x2r = ixheaacd_add32_sat(x2r, x3i);
757 x2i = ixheaacd_sub32_sat(x2i, x3r);
758 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
759 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
760
761 *data = x0r;
762 *(data + 1) = x0i;
763 data += (del << 1);
764
765 *data = x2r;
766 *(data + 1) = x2i;
767 data += (del << 1);
768
769 *data = x1r;
770 *(data + 1) = x1i;
771 data += (del << 1);
772
773 *data = x3i;
774 *(data + 1) = x3r;
775 data += (del << 1);
776 }
777 data -= 2 * npoints;
778 data += 2;
779 }
780 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
781 w1h = *(twiddles + 2 * j);
782 w2h = *(twiddles + 2 * (j << 1));
783 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
784 w1l = *(twiddles + 2 * j + 1);
785 w2l = *(twiddles + 2 * (j << 1) + 1);
786 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
787
788 for (k = in_loop_cnt; k != 0; k--) {
789 WORD32 tmp;
790 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
791 data += (del << 1);
792
793 x1r = *data;
794 x1i = *(data + 1);
795 data += (del << 1);
796
797 x2r = *data;
798 x2i = *(data + 1);
799 data += (del << 1);
800
801 x3r = *data;
802 x3i = *(data + 1);
803 data -= 3 * (del << 1);
804
805 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
806 ixheaacd_mult32_sat(x1i, w1h));
807 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
808 x1r = tmp;
809
810 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
811 ixheaacd_mult32_sat(x2i, w2h));
812 x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
813 x2r = tmp;
814
815 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
816 ixheaacd_mult32_sat(x3i, w3l));
817 x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
818 ixheaacd_mult32_sat(x3r, w3l));
819 x3r = tmp;
820
821 x0r = (*data);
822 x0i = (*(data + 1));
823
824 x0r = ixheaacd_add32_sat(x0r, x2r);
825 x0i = ixheaacd_add32_sat(x0i, x2i);
826 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
827 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
828 x1r = ixheaacd_add32_sat(x1r, x3r);
829 x1i = ixheaacd_add32_sat(x1i, x3i);
830 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
831 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
832
833 x0r = ixheaacd_add32_sat(x0r, x1r);
834 x0i = ixheaacd_add32_sat(x0i, x1i);
835 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
836 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
837 x2r = ixheaacd_add32_sat(x2r, x3i);
838 x2i = ixheaacd_sub32_sat(x2i, x3r);
839 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
840 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
841
842 *data = x0r;
843 *(data + 1) = x0i;
844 data += (del << 1);
845
846 *data = x2r;
847 *(data + 1) = x2i;
848 data += (del << 1);
849
850 *data = x1r;
851 *(data + 1) = x1i;
852 data += (del << 1);
853
854 *data = x3i;
855 *(data + 1) = x3r;
856 data += (del << 1);
857 }
858 data -= 2 * npoints;
859 data += 2;
860 }
861 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
862 w1h = *(twiddles + 2 * j);
863 w2h = *(twiddles + 2 * (j << 1) - 512);
864 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
865 w1l = *(twiddles + 2 * j + 1);
866 w2l = *(twiddles + 2 * (j << 1) - 511);
867 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
868
869 for (k = in_loop_cnt; k != 0; k--) {
870 WORD32 tmp;
871 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
872
873 data += (del << 1);
874
875 x1r = *data;
876 x1i = *(data + 1);
877 data += (del << 1);
878
879 x2r = *data;
880 x2i = *(data + 1);
881 data += (del << 1);
882
883 x3r = *data;
884 x3i = *(data + 1);
885 data -= 3 * (del << 1);
886
887 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
888 ixheaacd_mult32_sat(x1i, w1h));
889 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
890 x1r = tmp;
891
892 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
893 ixheaacd_mult32_sat(x2i, w2l));
894 x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
895 ixheaacd_mult32_sat(x2r, w2l));
896 x2r = tmp;
897
898 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
899 ixheaacd_mult32_sat(x3i, w3l));
900 x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
901 ixheaacd_mult32_sat(x3r, w3l));
902 x3r = tmp;
903
904 x0r = (*data);
905 x0i = (*(data + 1));
906
907 x0r = ixheaacd_add32_sat(x0r, x2r);
908 x0i = ixheaacd_add32_sat(x0i, x2i);
909 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
910 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
911 x1r = ixheaacd_add32_sat(x1r, x3r);
912 x1i = ixheaacd_add32_sat(x1i, x3i);
913 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
914 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
915
916 x0r = ixheaacd_add32_sat(x0r, x1r);
917 x0i = ixheaacd_add32_sat(x0i, x1i);
918 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
919 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
920 x2r = ixheaacd_add32_sat(x2r, x3i);
921 x2i = ixheaacd_sub32_sat(x2i, x3r);
922 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
923 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
924
925 *data = x0r;
926 *(data + 1) = x0i;
927 data += (del << 1);
928
929 *data = x2r;
930 *(data + 1) = x2i;
931 data += (del << 1);
932
933 *data = x1r;
934 *(data + 1) = x1i;
935 data += (del << 1);
936
937 *data = x3i;
938 *(data + 1) = x3r;
939 data += (del << 1);
940 }
941 data -= 2 * npoints;
942 data += 2;
943 }
944 for (; j < nodespacing * del; j += nodespacing) {
945 w1h = *(twiddles + 2 * j);
946 w2h = *(twiddles + 2 * (j << 1) - 512);
947 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
948 w1l = *(twiddles + 2 * j + 1);
949 w2l = *(twiddles + 2 * (j << 1) - 511);
950 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
951
952 for (k = in_loop_cnt; k != 0; k--) {
953 WORD32 tmp;
954 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
955
956 data += (del << 1);
957
958 x1r = *data;
959 x1i = *(data + 1);
960 data += (del << 1);
961
962 x2r = *data;
963 x2i = *(data + 1);
964 data += (del << 1);
965
966 x3r = *data;
967 x3i = *(data + 1);
968 data -= 3 * (del << 1);
969
970 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
971 ixheaacd_mult32_sat(x1i, w1h));
972 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
973 x1r = tmp;
974
975 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
976 ixheaacd_mult32_sat(x2i, w2l));
977 x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
978 ixheaacd_mult32_sat(x2r, w2l));
979 x2r = tmp;
980
981 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
982 ixheaacd_mult32_sat(x3r, w3l));
983 x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
984 x3r = tmp;
985
986 x0r = (*data);
987 x0i = (*(data + 1));
988
989 x0r = ixheaacd_add32_sat(x0r, x2r);
990 x0i = ixheaacd_add32_sat(x0i, x2i);
991 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
992 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
993 x1r = ixheaacd_add32_sat(x1r, x3r);
994 x1i = ixheaacd_sub32_sat(x1i, x3i);
995 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
996 x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
997
998 x0r = ixheaacd_add32_sat(x0r, x1r);
999 x0i = ixheaacd_add32_sat(x0i, x1i);
1000 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1001 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1002 x2r = ixheaacd_add32_sat(x2r, x3i);
1003 x2i = ixheaacd_sub32_sat(x2i, x3r);
1004 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1005 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1006
1007 *data = x0r;
1008 *(data + 1) = x0i;
1009 data += (del << 1);
1010
1011 *data = x2r;
1012 *(data + 1) = x2i;
1013 data += (del << 1);
1014
1015 *data = x1r;
1016 *(data + 1) = x1i;
1017 data += (del << 1);
1018
1019 *data = x3i;
1020 *(data + 1) = x3r;
1021 data += (del << 1);
1022 }
1023 data -= 2 * npoints;
1024 data += 2;
1025 }
1026 nodespacing >>= 2;
1027 del <<= 2;
1028 in_loop_cnt >>= 2;
1029 }
1030 if (not_power_4) {
1031 const WORD32 *twiddles = ptr_w;
1032 nodespacing <<= 1;
1033 shift += 1;
1034
1035 for (j = del / 2; j != 0; j--) {
1036 WORD32 w1h = *twiddles;
1037 WORD32 w1l = *(twiddles + 1);
1038 WORD32 tmp;
1039 twiddles += nodespacing * 2;
1040
1041 x0r = *ptr_y;
1042 x0i = *(ptr_y + 1);
1043 ptr_y += (del << 1);
1044
1045 x1r = *ptr_y;
1046 x1i = *(ptr_y + 1);
1047
1048 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
1049 ixheaacd_mult32_sat(x1i, w1h));
1050 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1051 x1r = tmp;
1052
1053 *ptr_y = (x0r) / 2 - (x1r) / 2;
1054 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1055 ptr_y -= (del << 1);
1056
1057 *ptr_y = (x0r) / 2 + (x1r) / 2;
1058 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1059 ptr_y += 2;
1060 }
1061 twiddles = ptr_w;
1062 for (j = del / 2; j != 0; j--) {
1063 WORD32 w1h = *twiddles;
1064 WORD32 w1l = *(twiddles + 1);
1065 WORD32 tmp;
1066 twiddles += nodespacing * 2;
1067
1068 x0r = *ptr_y;
1069 x0i = *(ptr_y + 1);
1070 ptr_y += (del << 1);
1071
1072 x1r = *ptr_y;
1073 x1i = *(ptr_y + 1);
1074
1075 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1h),
1076 ixheaacd_mult32_sat(x1i, w1l));
1077 x1i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1i, w1h),
1078 ixheaacd_mult32_sat(x1r, w1l));
1079 x1r = tmp;
1080
1081 *ptr_y = (x0r) / 2 - (x1r) / 2;
1082 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1083 ptr_y -= (del << 1);
1084
1085 *ptr_y = (x0r) / 2 + (x1r) / 2;
1086 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1087 ptr_y += 2;
1088 }
1089 }
1090
1091 }
1092
1093 else {
1094 ptr_w = ixheaacd_twiddle_table_fft_32x32;
1095
1096 for (i = 0; i < npoints; i += 4) {
1097 WORD32 *inp = ptr_x;
1098
1099 DIG_REV(i, dig_rev_shift, h2);
1100 if (not_power_4) {
1101 h2 += 1;
1102 h2 &= ~1;
1103 }
1104 inp += (h2);
1105
1106 x0r = *inp;
1107 x0i = *(inp + 1);
1108 inp += (npoints >> 1);
1109
1110 x1r = *inp;
1111 x1i = *(inp + 1);
1112 inp += (npoints >> 1);
1113
1114 x2r = *inp;
1115 x2i = *(inp + 1);
1116 inp += (npoints >> 1);
1117
1118 x3r = *inp;
1119 x3i = *(inp + 1);
1120
1121 x0r = ixheaacd_add32_sat(x0r, x2r);
1122 x0i = ixheaacd_add32_sat(x0i, x2i);
1123 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1124 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1125 x1r = ixheaacd_add32_sat(x1r, x3r);
1126 x1i = ixheaacd_add32_sat(x1i, x3i);
1127 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1128 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1129
1130 x0r = ixheaacd_add32_sat(x0r, x1r);
1131 x0i = ixheaacd_add32_sat(x0i, x1i);
1132 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1133 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1134 x2r = ixheaacd_sub32_sat(x2r, x3i);
1135 x2i = ixheaacd_add32_sat(x2i, x3r);
1136 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1137 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1138
1139 *ptr_y++ = x0r;
1140 *ptr_y++ = x0i;
1141 *ptr_y++ = x2r;
1142 *ptr_y++ = x2i;
1143 *ptr_y++ = x1r;
1144 *ptr_y++ = x1i;
1145 *ptr_y++ = x3i;
1146 *ptr_y++ = x3r;
1147 }
1148 ptr_y -= 2 * npoints;
1149 del = 4;
1150 nodespacing = 64;
1151 in_loop_cnt = npoints >> 4;
1152 for (i = n_stages - 1; i > 0; i--) {
1153 const WORD32 *twiddles = ptr_w;
1154 WORD32 *data = ptr_y;
1155 WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
1156 WORD32 sec_loop_cnt;
1157
1158 for (k = in_loop_cnt; k != 0; k--) {
1159 x0r = (*data);
1160 x0i = (*(data + 1));
1161 data += (del << 1);
1162
1163 x1r = (*data);
1164 x1i = (*(data + 1));
1165 data += (del << 1);
1166
1167 x2r = (*data);
1168 x2i = (*(data + 1));
1169 data += (del << 1);
1170
1171 x3r = (*data);
1172 x3i = (*(data + 1));
1173 data -= 3 * (del << 1);
1174
1175 x0r = ixheaacd_add32_sat(x0r, x2r);
1176 x0i = ixheaacd_add32_sat(x0i, x2i);
1177 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1178 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1179 x1r = ixheaacd_add32_sat(x1r, x3r);
1180 x1i = ixheaacd_add32_sat(x1i, x3i);
1181 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1182 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1183
1184 x0r = ixheaacd_add32_sat(x0r, x1r);
1185 x0i = ixheaacd_add32_sat(x0i, x1i);
1186 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1187 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1188 x2r = ixheaacd_sub32_sat(x2r, x3i);
1189 x2i = ixheaacd_add32_sat(x2i, x3r);
1190 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1191 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1192
1193 *data = x0r;
1194 *(data + 1) = x0i;
1195 data += (del << 1);
1196
1197 *data = x2r;
1198 *(data + 1) = x2i;
1199 data += (del << 1);
1200
1201 *data = x1r;
1202 *(data + 1) = x1i;
1203 data += (del << 1);
1204
1205 *data = x3i;
1206 *(data + 1) = x3r;
1207 data += (del << 1);
1208 }
1209 data = ptr_y + 2;
1210
1211 sec_loop_cnt = (nodespacing * del);
1212 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
1213 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
1214 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
1215 (sec_loop_cnt / 256);
1216 j = nodespacing;
1217
1218 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
1219 w1h = *(twiddles + 2 * j);
1220 w2h = *(twiddles + 2 * (j << 1));
1221 w3h = *(twiddles + 2 * j + 2 * (j << 1));
1222 w1l = *(twiddles + 2 * j + 1);
1223 w2l = *(twiddles + 2 * (j << 1) + 1);
1224 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
1225
1226 for (k = in_loop_cnt; k != 0; k--) {
1227 WORD32 tmp;
1228 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1229
1230 data += (del << 1);
1231
1232 x1r = *data;
1233 x1i = *(data + 1);
1234 data += (del << 1);
1235
1236 x2r = *data;
1237 x2i = *(data + 1);
1238 data += (del << 1);
1239
1240 x3r = *data;
1241 x3i = *(data + 1);
1242 data -= 3 * (del << 1);
1243
1244 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
1245 ixheaacd_mult32_sat(x1i, w1h));
1246 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1247 x1r = tmp;
1248
1249 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
1250 ixheaacd_mult32_sat(x2i, w2h));
1251 x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1252 x2r = tmp;
1253
1254 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
1255 ixheaacd_mult32_sat(x3i, w3h));
1256 x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1257 x3r = tmp;
1258
1259 x0r = (*data);
1260 x0i = (*(data + 1));
1261
1262 x0r = ixheaacd_add32_sat(x0r, x2r);
1263 x0i = ixheaacd_add32_sat(x0i, x2i);
1264 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1265 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1266 x1r = ixheaacd_add32_sat(x1r, x3r);
1267 x1i = ixheaacd_add32_sat(x1i, x3i);
1268 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1269 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1270
1271 x0r = ixheaacd_add32_sat(x0r, x1r);
1272 x0i = ixheaacd_add32_sat(x0i, x1i);
1273 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1274 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1275 x2r = ixheaacd_sub32_sat(x2r, x3i);
1276 x2i = ixheaacd_add32_sat(x2i, x3r);
1277 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1278 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1279
1280 *data = x0r;
1281 *(data + 1) = x0i;
1282 data += (del << 1);
1283
1284 *data = x2r;
1285 *(data + 1) = x2i;
1286 data += (del << 1);
1287
1288 *data = x1r;
1289 *(data + 1) = x1i;
1290 data += (del << 1);
1291
1292 *data = x3i;
1293 *(data + 1) = x3r;
1294 data += (del << 1);
1295 }
1296 data -= 2 * npoints;
1297 data += 2;
1298 }
1299 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1300 w1h = *(twiddles + 2 * j);
1301 w2h = *(twiddles + 2 * (j << 1));
1302 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1303 w1l = *(twiddles + 2 * j + 1);
1304 w2l = *(twiddles + 2 * (j << 1) + 1);
1305 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1306
1307 for (k = in_loop_cnt; k != 0; k--) {
1308 WORD32 tmp;
1309 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1310
1311 data += (del << 1);
1312
1313 x1r = *data;
1314 x1i = *(data + 1);
1315 data += (del << 1);
1316
1317 x2r = *data;
1318 x2i = *(data + 1);
1319 data += (del << 1);
1320
1321 x3r = *data;
1322 x3i = *(data + 1);
1323 data -= 3 * (del << 1);
1324
1325 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
1326 ixheaacd_mult32_sat(x1i, w1h));
1327 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1328 x1r = tmp;
1329
1330 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
1331 ixheaacd_mult32_sat(x2i, w2h));
1332 x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
1333 x2r = tmp;
1334
1335 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
1336 ixheaacd_mult32_sat(x3i, w3l));
1337 x3i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
1338 ixheaacd_mult32_sat(x3i, w3h));
1339 x3r = tmp;
1340
1341 x0r = (*data);
1342 x0i = (*(data + 1));
1343
1344 x0r = ixheaacd_add32_sat(x0r, x2r);
1345 x0i = ixheaacd_add32_sat(x0i, x2i);
1346 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1347 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1348 x1r = ixheaacd_add32_sat(x1r, x3r);
1349 x1i = ixheaacd_add32_sat(x1i, x3i);
1350 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1351 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1352
1353 x0r = ixheaacd_add32_sat(x0r, x1r);
1354 x0i = ixheaacd_add32_sat(x0i, x1i);
1355 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1356 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1357 x2r = ixheaacd_sub32_sat(x2r, x3i);
1358 x2i = ixheaacd_add32_sat(x2i, x3r);
1359 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1360 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1361
1362 *data = x0r;
1363 *(data + 1) = x0i;
1364 data += (del << 1);
1365
1366 *data = x2r;
1367 *(data + 1) = x2i;
1368 data += (del << 1);
1369
1370 *data = x1r;
1371 *(data + 1) = x1i;
1372 data += (del << 1);
1373
1374 *data = x3i;
1375 *(data + 1) = x3r;
1376 data += (del << 1);
1377 }
1378 data -= 2 * npoints;
1379 data += 2;
1380 }
1381 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1382 w1h = *(twiddles + 2 * j);
1383 w2h = *(twiddles + 2 * (j << 1) - 512);
1384 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
1385 w1l = *(twiddles + 2 * j + 1);
1386 w2l = *(twiddles + 2 * (j << 1) - 511);
1387 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
1388
1389 for (k = in_loop_cnt; k != 0; k--) {
1390 WORD32 tmp;
1391 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1392
1393 data += (del << 1);
1394
1395 x1r = *data;
1396 x1i = *(data + 1);
1397 data += (del << 1);
1398
1399 x2r = *data;
1400 x2i = *(data + 1);
1401 data += (del << 1);
1402
1403 x3r = *data;
1404 x3i = *(data + 1);
1405 data -= 3 * (del << 1);
1406
1407 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
1408 ixheaacd_mult32_sat(x1i, w1h));
1409 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1410 x1r = tmp;
1411
1412 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
1413 ixheaacd_mult32_sat(x2i, w2l));
1414 x2i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
1415 ixheaacd_mult32_sat(x2i, w2h));
1416 x2r = tmp;
1417
1418 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
1419 ixheaacd_mult32_sat(x3i, w3l));
1420 x3i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
1421 ixheaacd_mult32_sat(x3i, w3h));
1422 x3r = tmp;
1423
1424 x0r = (*data);
1425 x0i = (*(data + 1));
1426
1427 x0r = ixheaacd_add32_sat(x0r, x2r);
1428 x0i = ixheaacd_add32_sat(x0i, x2i);
1429 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1430 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1431 x1r = ixheaacd_add32_sat(x1r, x3r);
1432 x1i = ixheaacd_add32_sat(x1i, x3i);
1433 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1434 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1435
1436 x0r = ixheaacd_add32_sat(x0r, x1r);
1437 x0i = ixheaacd_add32_sat(x0i, x1i);
1438 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1439 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1440 x2r = ixheaacd_sub32_sat(x2r, x3i);
1441 x2i = ixheaacd_add32_sat(x2i, x3r);
1442 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1443 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1444
1445 *data = x0r;
1446 *(data + 1) = x0i;
1447 data += (del << 1);
1448
1449 *data = x2r;
1450 *(data + 1) = x2i;
1451 data += (del << 1);
1452
1453 *data = x1r;
1454 *(data + 1) = x1i;
1455 data += (del << 1);
1456
1457 *data = x3i;
1458 *(data + 1) = x3r;
1459 data += (del << 1);
1460 }
1461 data -= 2 * npoints;
1462 data += 2;
1463 }
1464 for (; j < nodespacing * del; j += nodespacing) {
1465 w1h = *(twiddles + 2 * j);
1466 w2h = *(twiddles + 2 * (j << 1) - 512);
1467 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
1468 w1l = *(twiddles + 2 * j + 1);
1469 w2l = *(twiddles + 2 * (j << 1) - 511);
1470 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
1471
1472 for (k = in_loop_cnt; k != 0; k--) {
1473 WORD32 tmp;
1474 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1475
1476 data += (del << 1);
1477
1478 x1r = *data;
1479 x1i = *(data + 1);
1480 data += (del << 1);
1481
1482 x2r = *data;
1483 x2i = *(data + 1);
1484 data += (del << 1);
1485
1486 x3r = *data;
1487 x3i = *(data + 1);
1488 data -= 3 * (del << 1);
1489
1490 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
1491 ixheaacd_mult32_sat(x1i, w1h));
1492 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1493 x1r = tmp;
1494
1495 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
1496 ixheaacd_mult32_sat(x2i, w2l));
1497 x2i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
1498 ixheaacd_mult32_sat(x2i, w2h));
1499 x2r = tmp;
1500
1501 tmp = -ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
1502 ixheaacd_mult32_sat(x3i, w3h));
1503 x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
1504 x3r = tmp;
1505
1506 x0r = (*data);
1507 x0i = (*(data + 1));
1508
1509 x0r = ixheaacd_add32_sat(x0r, x2r);
1510 x0i = ixheaacd_add32_sat(x0i, x2i);
1511 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
1512 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
1513 x1r = ixheaacd_add32_sat(x1r, x3r);
1514 x1i = ixheaacd_sub32_sat(x1i, x3i);
1515 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
1516 x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
1517
1518 x0r = ixheaacd_add32_sat(x0r, x1r);
1519 x0i = ixheaacd_add32_sat(x0i, x1i);
1520 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
1521 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
1522 x2r = ixheaacd_sub32_sat(x2r, x3i);
1523 x2i = ixheaacd_add32_sat(x2i, x3r);
1524 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
1525 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
1526
1527 *data = x0r;
1528 *(data + 1) = x0i;
1529 data += (del << 1);
1530
1531 *data = x2r;
1532 *(data + 1) = x2i;
1533 data += (del << 1);
1534
1535 *data = x1r;
1536 *(data + 1) = x1i;
1537 data += (del << 1);
1538
1539 *data = x3i;
1540 *(data + 1) = x3r;
1541 data += (del << 1);
1542 }
1543 data -= 2 * npoints;
1544 data += 2;
1545 }
1546 nodespacing >>= 2;
1547 del <<= 2;
1548 in_loop_cnt >>= 2;
1549 }
1550 if (not_power_4) {
1551 const WORD32 *twiddles = ptr_w;
1552 nodespacing <<= 1;
1553 shift += 1;
1554 for (j = del / 2; j != 0; j--) {
1555 WORD32 w1h = *twiddles;
1556 WORD32 w1l = *(twiddles + 1);
1557
1558 WORD32 tmp;
1559 twiddles += nodespacing * 2;
1560
1561 x0r = *ptr_y;
1562 x0i = *(ptr_y + 1);
1563 ptr_y += (del << 1);
1564
1565 x1r = *ptr_y;
1566 x1i = *(ptr_y + 1);
1567
1568 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
1569 ixheaacd_mult32_sat(x1i, w1h));
1570 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
1571 x1r = tmp;
1572
1573 *ptr_y = (x0r) / 2 - (x1r) / 2;
1574 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1575 ptr_y -= (del << 1);
1576
1577 *ptr_y = (x0r) / 2 + (x1r) / 2;
1578 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1579 ptr_y += 2;
1580 }
1581 twiddles = ptr_w;
1582 for (j = del / 2; j != 0; j--) {
1583 WORD32 w1h = *twiddles;
1584 WORD32 w1l = *(twiddles + 1);
1585 WORD32 tmp;
1586 twiddles += nodespacing * 2;
1587
1588 x0r = *ptr_y;
1589 x0i = *(ptr_y + 1);
1590 ptr_y += (del << 1);
1591
1592 x1r = *ptr_y;
1593 x1i = *(ptr_y + 1);
1594
1595 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1h),
1596 ixheaacd_mult32_sat(x1i, w1l));
1597 x1i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
1598 ixheaacd_mult32_sat(x1i, w1h));
1599 x1r = tmp;
1600
1601 *ptr_y = (x0r) / 2 - (x1r) / 2;
1602 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
1603 ptr_y -= (del << 1);
1604
1605 *ptr_y = (x0r) / 2 + (x1r) / 2;
1606 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
1607 ptr_y += 2;
1608 }
1609 }
1610 }
1611
1612 for (i = 0; i < nlength; i++) {
1613 xr[i] = y[2 * i];
1614 xi[i] = y[2 * i + 1];
1615 }
1616
1617 *preshift = shift - *preshift;
1618 return;
1619 }
1620
ixheaacd_complex_3point_fft(WORD32 * inp,WORD32 * op,WORD32 sign_dir)1621 static PLATFORM_INLINE void ixheaacd_complex_3point_fft(WORD32 *inp, WORD32 *op,
1622 WORD32 sign_dir) {
1623 WORD32 add_r, sub_r;
1624 WORD32 add_i, sub_i;
1625 WORD32 temp_real, temp_imag, temp;
1626
1627 WORD32 p1, p2, p3, p4;
1628
1629 WORD32 sinmu;
1630 sinmu = -1859775393 * sign_dir;
1631
1632 temp_real = ixheaacd_add32_sat(inp[0], inp[2]);
1633 temp_imag = ixheaacd_add32_sat(inp[1], inp[3]);
1634
1635 add_r = ixheaacd_add32_sat(inp[2], inp[4]);
1636 add_i = ixheaacd_add32_sat(inp[3], inp[5]);
1637
1638 sub_r = ixheaacd_sub32_sat(inp[2], inp[4]);
1639 sub_i = ixheaacd_sub32_sat(inp[3], inp[5]);
1640
1641 p1 = add_r >> 1;
1642 p4 = add_i >> 1;
1643 p2 = ixheaacd_mult32_shl(sub_i, sinmu);
1644 p3 = ixheaacd_mult32_shl(sub_r, sinmu);
1645
1646 temp = ixheaacd_sub32(inp[0], p1);
1647
1648 op[0] = ixheaacd_add32_sat(temp_real, inp[4]);
1649 op[1] = ixheaacd_add32_sat(temp_imag, inp[5]);
1650 op[2] = ixheaacd_add32_sat(temp, p2);
1651 op[3] = ixheaacd_sub32_sat(ixheaacd_sub32_sat(inp[1], p3), p4);
1652 op[4] = ixheaacd_sub32_sat(temp, p2);
1653 op[5] = ixheaacd_sub32_sat(ixheaacd_add32_sat(inp[1], p3), p4);
1654
1655 return;
1656 }
1657
ixheaacd_complex_fft_p3(WORD32 * xr,WORD32 * xi,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)1658 VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
1659 WORD32 fft_mode, WORD32 *preshift) {
1660 WORD32 i, j;
1661 WORD32 shift = 0;
1662 WORD32 xr_3[384];
1663 WORD32 xi_3[384];
1664 WORD32 x[1024];
1665 WORD32 y[1024];
1666 WORD32 cnfac, npts;
1667 WORD32 mpass = nlength;
1668 WORD32 n = 0;
1669 WORD32 *ptr_x = x;
1670 WORD32 *ptr_y = y;
1671
1672 cnfac = 0;
1673 while (mpass % 3 == 0) {
1674 mpass /= 3;
1675 cnfac++;
1676 }
1677 npts = mpass;
1678
1679 for (i = 0; i < 3 * cnfac; i++) {
1680 for (j = 0; j < mpass; j++) {
1681 xr_3[j] = xr[3 * j + i];
1682 xi_3[j] = xi[3 * j + i];
1683 }
1684
1685 (*ixheaacd_complex_fft_p2)(xr_3, xi_3, mpass, fft_mode, &shift);
1686
1687 for (j = 0; j < mpass; j++) {
1688 xr[3 * j + i] = xr_3[j];
1689 xi[3 * j + i] = xi_3[j];
1690 }
1691 }
1692
1693 while (npts >> 1) {
1694 n++;
1695 npts = npts >> 1;
1696 }
1697
1698 if (n % 2 == 0)
1699 shift = ((n + 4)) / 2;
1700 else
1701 shift = ((n + 5) / 2);
1702
1703 *preshift = shift - *preshift + 1;
1704
1705 for (i = 0; i < nlength; i++) {
1706 ptr_x[2 * i] = (xr[i] >> 1);
1707 ptr_x[2 * i + 1] = (xi[i] >> 1);
1708 }
1709
1710 {
1711 const WORD32 *w1r, *w1i;
1712 WORD32 tmp;
1713 w1r = ixheaacd_twiddle_table_3pr;
1714 w1i = ixheaacd_twiddle_table_3pi;
1715
1716 if (fft_mode < 0) {
1717 for (i = 0; i < nlength; i += 3) {
1718 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1r)),
1719 ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1i)));
1720 ptr_x[2 * i + 1] =
1721 ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1i)),
1722 ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1r)));
1723 ptr_x[2 * i] = tmp;
1724
1725 w1r++;
1726 w1i++;
1727
1728 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
1729 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
1730 ptr_x[2 * i + 3] =
1731 ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)),
1732 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)));
1733 ptr_x[2 * i + 2] = tmp;
1734
1735 w1r++;
1736 w1i++;
1737
1738 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
1739 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
1740 ptr_x[2 * i + 5] =
1741 ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)),
1742 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)));
1743 ptr_x[2 * i + 4] = tmp;
1744
1745 w1r += 3 * (128 / mpass - 1) + 1;
1746 w1i += 3 * (128 / mpass - 1) + 1;
1747 }
1748 }
1749
1750 else {
1751 for (i = 0; i < nlength; i += 3) {
1752 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1r)),
1753 ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1i)));
1754 ptr_x[2 * i + 1] =
1755 ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1r)),
1756 ixheaacd_mult32_sat(ptr_x[2 * i], (*w1i)));
1757 ptr_x[2 * i] = tmp;
1758
1759 w1r++;
1760 w1i++;
1761
1762 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
1763 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
1764 ptr_x[2 * i + 3] =
1765 ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)),
1766 ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)));
1767 ptr_x[2 * i + 2] = tmp;
1768
1769 w1r++;
1770 w1i++;
1771
1772 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
1773 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
1774 ptr_x[2 * i + 5] =
1775 ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)),
1776 ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)));
1777 ptr_x[2 * i + 4] = tmp;
1778
1779 w1r += 3 * (128 / mpass - 1) + 1;
1780 w1i += 3 * (128 / mpass - 1) + 1;
1781 }
1782 }
1783 }
1784
1785 for (i = 0; i < mpass; i++) {
1786 ixheaacd_complex_3point_fft(ptr_x, ptr_y, fft_mode);
1787
1788 ptr_x = ptr_x + 6;
1789 ptr_y = ptr_y + 6;
1790 }
1791
1792 for (i = 0; i < mpass; i++) {
1793 xr[i] = y[6 * i];
1794 xi[i] = y[6 * i + 1];
1795 }
1796
1797 for (i = 0; i < mpass; i++) {
1798 xr[mpass + i] = y[6 * i + 2];
1799 xi[mpass + i] = y[6 * i + 3];
1800 }
1801
1802 for (i = 0; i < mpass; i++) {
1803 xr[2 * mpass + i] = y[6 * i + 4];
1804 xi[2 * mpass + i] = y[6 * i + 5];
1805 }
1806 return;
1807 }
1808
ixheaacd_complex_fft(WORD32 * data_r,WORD32 * data_i,WORD32 nlength,WORD32 fft_mode,WORD32 * preshift)1809 WORD32 ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength,
1810 WORD32 fft_mode, WORD32 *preshift) {
1811 if (nlength & (nlength - 1)) {
1812 if ((nlength != 24) && (nlength != 48) && (nlength != 96) &&
1813 (nlength != 192) && (nlength != 384)) {
1814 printf("%d point FFT not supported", nlength);
1815 return IA_FATAL_ERROR;
1816 }
1817 ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift);
1818 } else
1819 (*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift);
1820
1821 return 0;
1822 }
1823