1 /* -----------------------------------------------------------------------------
2 Software License for The Fraunhofer FDK AAC Codec Library for Android
3
4 © Copyright 1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
5 Forschung e.V. All rights reserved.
6
7 1. INTRODUCTION
8 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
9 that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
10 scheme for digital audio. This FDK AAC Codec software is intended to be used on
11 a wide variety of Android devices.
12
13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
14 general perceptual audio codecs. AAC-ELD is considered the best-performing
15 full-bandwidth communications codec by independent studies and is widely
16 deployed. AAC has been standardized by ISO and IEC as part of the MPEG
17 specifications.
18
19 Patent licenses for necessary patent claims for the FDK AAC Codec (including
20 those of Fraunhofer) may be obtained through Via Licensing
21 (www.vialicensing.com) or through the respective patent owners individually for
22 the purpose of encoding or decoding bit streams in products that are compliant
23 with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
24 Android devices already license these patent claims through Via Licensing or
25 directly from the patent owners, and therefore FDK AAC Codec software may
26 already be covered under those patent licenses when it is used for those
27 licensed purposes only.
28
29 Commercially-licensed AAC software libraries, including floating-point versions
30 with enhanced sound quality, are also available from Fraunhofer. Users are
31 encouraged to check the Fraunhofer website for additional applications
32 information and documentation.
33
34 2. COPYRIGHT LICENSE
35
36 Redistribution and use in source and binary forms, with or without modification,
37 are permitted without payment of copyright license fees provided that you
38 satisfy the following conditions:
39
40 You must retain the complete text of this software license in redistributions of
41 the FDK AAC Codec or your modifications thereto in source code form.
42
43 You must retain the complete text of this software license in the documentation
44 and/or other materials provided with redistributions of the FDK AAC Codec or
45 your modifications thereto in binary form. You must make available free of
46 charge copies of the complete source code of the FDK AAC Codec and your
47 modifications thereto to recipients of copies in binary form.
48
49 The name of Fraunhofer may not be used to endorse or promote products derived
50 from this library without prior written permission.
51
52 You may not charge copyright license fees for anyone to use, copy or distribute
53 the FDK AAC Codec software or your modifications thereto.
54
55 Your modified versions of the FDK AAC Codec must carry prominent notices stating
56 that you changed the software and the date of any change. For modified versions
57 of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
58 must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
59 AAC Codec Library for Android."
60
61 3. NO PATENT LICENSE
62
63 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
64 limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
65 Fraunhofer provides no warranty of patent non-infringement with respect to this
66 software.
67
68 You may use this FDK AAC Codec software or modifications thereto only for
69 purposes that are authorized by appropriate patent licenses.
70
71 4. DISCLAIMER
72
73 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
74 holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
75 including but not limited to the implied warranties of merchantability and
76 fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
77 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
78 or consequential damages, including but not limited to procurement of substitute
79 goods or services; loss of use, data, or profits, or business interruption,
80 however caused and on any theory of liability, whether in contract, strict
81 liability, or tort (including negligence), arising in any way out of the use of
82 this software, even if advised of the possibility of such damage.
83
84 5. CONTACT INFORMATION
85
86 Fraunhofer Institute for Integrated Circuits IIS
87 Attention: Audio and Multimedia Departments - FDK AAC LL
88 Am Wolfsmantel 33
89 91058 Erlangen, Germany
90
91 www.iis.fraunhofer.de/amm
92 amm-info@iis.fraunhofer.de
93 ----------------------------------------------------------------------------- */
94
95 /******************* Library for basic calculation routines ********************
96
97 Author(s): Josef Hoepfl, DSP Solutions
98
99 Description: Fix point FFT
100
101 *******************************************************************************/
102
103 #include "fft_rad2.h"
104 #include "FDK_tools_rom.h"
105
106 #define W_PiFOURTH STC(0x5a82799a)
107 //#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a))
108 #ifndef SUMDIFF_PIFOURTH
109 #define SUMDIFF_PIFOURTH(diff, sum, a, b) \
110 { \
111 FIXP_DBL wa, wb; \
112 wa = fMultDiv2(a, W_PiFOURTH); \
113 wb = fMultDiv2(b, W_PiFOURTH); \
114 diff = wb - wa; \
115 sum = wb + wa; \
116 }
117 #define SUMDIFF_PIFOURTH16(diff, sum, a, b) \
118 { \
119 FIXP_SGL wa, wb; \
120 wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \
121 wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \
122 diff = wb - wa; \
123 sum = wb + wa; \
124 }
125 #endif
126
127 #define SCALEFACTOR2048 10
128 #define SCALEFACTOR1024 9
129 #define SCALEFACTOR512 8
130 #define SCALEFACTOR256 7
131 #define SCALEFACTOR128 6
132 #define SCALEFACTOR64 5
133 #define SCALEFACTOR32 4
134 #define SCALEFACTOR16 3
135 #define SCALEFACTOR8 2
136 #define SCALEFACTOR4 1
137 #define SCALEFACTOR2 1
138
139 #define SCALEFACTOR3 1
140 #define SCALEFACTOR5 1
141 #define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2)
142 #define SCALEFACTOR7 2
143 #define SCALEFACTOR9 2
144 #define SCALEFACTOR10 5
145 #define SCALEFACTOR12 3
146 #define SCALEFACTOR15 3
147 #define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2)
148 #define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2)
149 #define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2)
150 #define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2)
151 #define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2)
152 #define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2)
153 #define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2)
154 #define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2)
155 #define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2)
156 #define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2)
157 #define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2)
158 #define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2)
159 #define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2)
160 #define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2)
161 #define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2)
162 #define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2)
163 #define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2)
164 #define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2)
165 #define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2)
166
167 #include "fft.h"
168
169 #ifndef FUNCTION_fft2
170
171 /* Performs the FFT of length 2. Input vector unscaled, output vector scaled
172 * with factor 0.5 */
fft2(FIXP_DBL * RESTRICT pDat)173 static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) {
174 FIXP_DBL r1, i1;
175 FIXP_DBL r2, i2;
176
177 /* real part */
178 r1 = pDat[2];
179 r2 = pDat[0];
180
181 /* imaginary part */
182 i1 = pDat[3];
183 i2 = pDat[1];
184
185 /* real part */
186 pDat[0] = (r2 + r1) >> 1;
187 pDat[2] = (r2 - r1) >> 1;
188
189 /* imaginary part */
190 pDat[1] = (i2 + i1) >> 1;
191 pDat[3] = (i2 - i1) >> 1;
192 }
193 #endif /* FUNCTION_fft2 */
194
195 #define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2 */
196
197 #ifndef FUNCTION_fft3
198 /* Performs the FFT of length 3 according to the algorithm after winograd. */
fft3(FIXP_DBL * RESTRICT pDat)199 static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) {
200 FIXP_DBL r1, r2;
201 FIXP_DBL s1, s2;
202 FIXP_DBL pD;
203
204 /* real part */
205 r1 = pDat[2] + pDat[4];
206 r2 = fMultDiv2((pDat[2] - pDat[4]), C31);
207 pD = pDat[0] >> 1;
208 pDat[0] = pD + (r1 >> 1);
209 r1 = pD - (r1 >> 2);
210
211 /* imaginary part */
212 s1 = pDat[3] + pDat[5];
213 s2 = fMultDiv2((pDat[3] - pDat[5]), C31);
214 pD = pDat[1] >> 1;
215 pDat[1] = pD + (s1 >> 1);
216 s1 = pD - (s1 >> 2);
217
218 /* combination */
219 pDat[2] = r1 - s2;
220 pDat[4] = r1 + s2;
221 pDat[3] = s1 + r2;
222 pDat[5] = s1 - r2;
223 }
224 #endif /* #ifndef FUNCTION_fft3 */
225
226 #define F5C(x) STC(x)
227
228 #define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */
229 #define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
230 #define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */
231 #define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */
232 #define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */
233
234 /* performs the FFT of length 5 according to the algorithm after winograd */
235 /* This version works with a prescale of 2 instead of 3 */
fft5(FIXP_DBL * RESTRICT pDat)236 static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) {
237 FIXP_DBL r1, r2, r3, r4;
238 FIXP_DBL s1, s2, s3, s4;
239 FIXP_DBL t;
240
241 /* real part */
242 r1 = (pDat[2] + pDat[8]) >> 1;
243 r4 = (pDat[2] - pDat[8]) >> 1;
244 r3 = (pDat[4] + pDat[6]) >> 1;
245 r2 = (pDat[4] - pDat[6]) >> 1;
246 t = fMult((r1 - r3), C54);
247 r1 = r1 + r3;
248 pDat[0] = (pDat[0] >> 1) + r1;
249 /* Bit shift left because of the constant C55 which was scaled with the factor
250 0.5 because of the representation of the values as fracts */
251 r1 = pDat[0] + (fMultDiv2(r1, C55) << (2));
252 r3 = r1 - t;
253 r1 = r1 + t;
254 t = fMult((r4 + r2), C51);
255 /* Bit shift left because of the constant C55 which was scaled with the factor
256 0.5 because of the representation of the values as fracts */
257 r4 = t + (fMultDiv2(r4, C52) << (2));
258 r2 = t + fMult(r2, C53);
259
260 /* imaginary part */
261 s1 = (pDat[3] + pDat[9]) >> 1;
262 s4 = (pDat[3] - pDat[9]) >> 1;
263 s3 = (pDat[5] + pDat[7]) >> 1;
264 s2 = (pDat[5] - pDat[7]) >> 1;
265 t = fMult((s1 - s3), C54);
266 s1 = s1 + s3;
267 pDat[1] = (pDat[1] >> 1) + s1;
268 /* Bit shift left because of the constant C55 which was scaled with the factor
269 0.5 because of the representation of the values as fracts */
270 s1 = pDat[1] + (fMultDiv2(s1, C55) << (2));
271 s3 = s1 - t;
272 s1 = s1 + t;
273 t = fMult((s4 + s2), C51);
274 /* Bit shift left because of the constant C55 which was scaled with the factor
275 0.5 because of the representation of the values as fracts */
276 s4 = t + (fMultDiv2(s4, C52) << (2));
277 s2 = t + fMult(s2, C53);
278
279 /* combination */
280 pDat[2] = r1 + s2;
281 pDat[8] = r1 - s2;
282 pDat[4] = r3 - s4;
283 pDat[6] = r3 + s4;
284
285 pDat[3] = s1 - r2;
286 pDat[9] = s1 + r2;
287 pDat[5] = s3 + r4;
288 pDat[7] = s3 - r4;
289 }
290
291 #define F5C(x) STC(x)
292
293 #define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */
294 #define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
295 #define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */
296 #define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */
297 #define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */
298 /**
299 * \brief Function performs a complex 10-point FFT
300 * The FFT is performed inplace. The result of the FFT
301 * is scaled by SCALEFACTOR10 bits.
302 *
303 * WOPS FLC version: 1093 cycles
304 * WOPS with 32x16 bit multiplications: 196 cycles
305 *
306 * \param [i/o] re real input / output
307 * \param [i/o] im imag input / output
308 * \param [i ] s stride real and imag input / output
309 *
310 * \return void
311 */
fft10(FIXP_DBL * x)312 static void fft10(FIXP_DBL *x) // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s)
313 {
314 FIXP_DBL t;
315 FIXP_DBL x0, x1, x2, x3, x4;
316 FIXP_DBL r1, r2, r3, r4;
317 FIXP_DBL s1, s2, s3, s4;
318 FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
319 FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
320
321 const int s = 1; // stride factor
322
323 /* 2 fft5 stages */
324
325 /* real part */
326 x0 = (x[s * 0] >> SCALEFACTOR10);
327 x1 = (x[s * 4] >> SCALEFACTOR10);
328 x2 = (x[s * 8] >> SCALEFACTOR10);
329 x3 = (x[s * 12] >> SCALEFACTOR10);
330 x4 = (x[s * 16] >> SCALEFACTOR10);
331
332 r1 = (x3 + x2);
333 r4 = (x3 - x2);
334 r3 = (x1 + x4);
335 r2 = (x1 - x4);
336 t = fMult((r1 - r3), C54);
337 r1 = (r1 + r3);
338 y00 = (x0 + r1);
339 r1 = (y00 + ((fMult(r1, C55) << 1)));
340 r3 = (r1 - t);
341 r1 = (r1 + t);
342 t = fMult((r4 + r2), C51);
343 r4 = (t + (fMult(r4, C52) << 1));
344 r2 = (t + fMult(r2, C53));
345
346 /* imaginary part */
347 x0 = (x[s * 0 + 1] >> SCALEFACTOR10);
348 x1 = (x[s * 4 + 1] >> SCALEFACTOR10);
349 x2 = (x[s * 8 + 1] >> SCALEFACTOR10);
350 x3 = (x[s * 12 + 1] >> SCALEFACTOR10);
351 x4 = (x[s * 16 + 1] >> SCALEFACTOR10);
352
353 s1 = (x3 + x2);
354 s4 = (x3 - x2);
355 s3 = (x1 + x4);
356 s2 = (x1 - x4);
357 t = fMult((s1 - s3), C54);
358 s1 = (s1 + s3);
359 y01 = (x0 + s1);
360 s1 = (y01 + (fMult(s1, C55) << 1));
361 s3 = (s1 - t);
362 s1 = (s1 + t);
363 t = fMult((s4 + s2), C51);
364 s4 = (t + (fMult(s4, C52) << 1));
365 s2 = (t + fMult(s2, C53));
366
367 /* combination */
368 y04 = (r1 + s2);
369 y16 = (r1 - s2);
370 y08 = (r3 - s4);
371 y12 = (r3 + s4);
372
373 y05 = (s1 - r2);
374 y17 = (s1 + r2);
375 y09 = (s3 + r4);
376 y13 = (s3 - r4);
377
378 /* real part */
379 x0 = (x[s * 10] >> SCALEFACTOR10);
380 x1 = (x[s * 2] >> SCALEFACTOR10);
381 x2 = (x[s * 6] >> SCALEFACTOR10);
382 x3 = (x[s * 14] >> SCALEFACTOR10);
383 x4 = (x[s * 18] >> SCALEFACTOR10);
384
385 r1 = (x1 + x4);
386 r4 = (x1 - x4);
387 r3 = (x3 + x2);
388 r2 = (x3 - x2);
389 t = fMult((r1 - r3), C54);
390 r1 = (r1 + r3);
391 y02 = (x0 + r1);
392 r1 = (y02 + ((fMult(r1, C55) << 1)));
393 r3 = (r1 - t);
394 r1 = (r1 + t);
395 t = fMult(((r4 + r2)), C51);
396 r4 = (t + (fMult(r4, C52) << 1));
397 r2 = (t + fMult(r2, C53));
398
399 /* imaginary part */
400 x0 = (x[s * 10 + 1] >> SCALEFACTOR10);
401 x1 = (x[s * 2 + 1] >> SCALEFACTOR10);
402 x2 = (x[s * 6 + 1] >> SCALEFACTOR10);
403 x3 = (x[s * 14 + 1] >> SCALEFACTOR10);
404 x4 = (x[s * 18 + 1] >> SCALEFACTOR10);
405
406 s1 = (x1 + x4);
407 s4 = (x1 - x4);
408 s3 = (x3 + x2);
409 s2 = (x3 - x2);
410 t = fMult((s1 - s3), C54);
411 s1 = (s1 + s3);
412 y03 = (x0 + s1);
413 s1 = (y03 + (fMult(s1, C55) << 1));
414 s3 = (s1 - t);
415 s1 = (s1 + t);
416 t = fMult((s4 + s2), C51);
417 s4 = (t + (fMult(s4, C52) << 1));
418 s2 = (t + fMult(s2, C53));
419
420 /* combination */
421 y06 = (r1 + s2);
422 y18 = (r1 - s2);
423 y10 = (r3 - s4);
424 y14 = (r3 + s4);
425
426 y07 = (s1 - r2);
427 y19 = (s1 + r2);
428 y11 = (s3 + r4);
429 y15 = (s3 - r4);
430
431 /* 5 fft2 stages */
432 x[s * 0] = (y00 + y02);
433 x[s * 0 + 1] = (y01 + y03);
434 x[s * 10] = (y00 - y02);
435 x[s * 10 + 1] = (y01 - y03);
436
437 x[s * 4] = (y04 + y06);
438 x[s * 4 + 1] = (y05 + y07);
439 x[s * 14] = (y04 - y06);
440 x[s * 14 + 1] = (y05 - y07);
441
442 x[s * 8] = (y08 + y10);
443 x[s * 8 + 1] = (y09 + y11);
444 x[s * 18] = (y08 - y10);
445 x[s * 18 + 1] = (y09 - y11);
446
447 x[s * 12] = (y12 + y14);
448 x[s * 12 + 1] = (y13 + y15);
449 x[s * 2] = (y12 - y14);
450 x[s * 2 + 1] = (y13 - y15);
451
452 x[s * 16] = (y16 + y18);
453 x[s * 16 + 1] = (y17 + y19);
454 x[s * 6] = (y16 - y18);
455 x[s * 6 + 1] = (y17 - y19);
456 }
457
458 #ifndef FUNCTION_fft12
459 #define FUNCTION_fft12
460
461 #undef C31
462 #define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2 */
463
fft12(FIXP_DBL * pInput)464 static inline void fft12(FIXP_DBL *pInput) {
465 FIXP_DBL aDst[24];
466 FIXP_DBL *pSrc, *pDst;
467 int i;
468
469 pSrc = pInput;
470 pDst = aDst;
471 FIXP_DBL r1, r2, s1, s2, pD;
472
473 /* First 3*2 samples are shifted right by 2 before output */
474 r1 = pSrc[8] + pSrc[16];
475 r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
476 pD = pSrc[0] >> 1;
477 pDst[0] = (pD + (r1 >> 1)) >> 1;
478 r1 = pD - (r1 >> 2);
479
480 /* imaginary part */
481 s1 = pSrc[9] + pSrc[17];
482 s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
483 pD = pSrc[1] >> 1;
484 pDst[1] = (pD + (s1 >> 1)) >> 1;
485 s1 = pD - (s1 >> 2);
486
487 /* combination */
488 pDst[2] = (r1 - s2) >> 1;
489 pDst[3] = (s1 + r2) >> 1;
490 pDst[4] = (r1 + s2) >> 1;
491 pDst[5] = (s1 - r2) >> 1;
492 pSrc += 2;
493 pDst += 6;
494
495 const FIXP_STB *pVecRe = RotVectorReal12;
496 const FIXP_STB *pVecIm = RotVectorImag12;
497 FIXP_DBL re, im;
498 FIXP_STB vre, vim;
499 for (i = 0; i < 2; i++) {
500 /* sample 0,1 are shifted right by 2 before output */
501 /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before
502 * output */
503
504 r1 = pSrc[8] + pSrc[16];
505 r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
506 pD = pSrc[0] >> 1;
507 pDst[0] = (pD + (r1 >> 1)) >> 1;
508 r1 = pD - (r1 >> 2);
509
510 /* imaginary part */
511 s1 = pSrc[9] + pSrc[17];
512 s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
513 pD = pSrc[1] >> 1;
514 pDst[1] = (pD + (s1 >> 1)) >> 1;
515 s1 = pD - (s1 >> 2);
516
517 /* combination */
518 re = (r1 - s2) >> 0;
519 im = (s1 + r2) >> 0;
520 vre = *pVecRe++;
521 vim = *pVecIm++;
522 cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim);
523
524 re = (r1 + s2) >> 0;
525 im = (s1 - r2) >> 0;
526 vre = *pVecRe++;
527 vim = *pVecIm++;
528 cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim);
529
530 pDst += 6;
531 pSrc += 2;
532 }
533 /* sample 0,1 are shifted right by 2 before output */
534 /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
535 /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
536 r1 = pSrc[8] + pSrc[16];
537 r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
538 pD = pSrc[0] >> 1;
539 pDst[0] = (pD + (r1 >> 1)) >> 1;
540 r1 = pD - (r1 >> 2);
541
542 /* imaginary part */
543 s1 = pSrc[9] + pSrc[17];
544 s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
545 pD = pSrc[1] >> 1;
546 pDst[1] = (pD + (s1 >> 1)) >> 1;
547 s1 = pD - (s1 >> 2);
548
549 /* combination */
550 pDst[2] = (s1 + r2) >> 1;
551 pDst[3] = (s2 - r1) >> 1;
552 pDst[4] = -((r1 + s2) >> 1);
553 pDst[5] = (r2 - s1) >> 1;
554
555 /* Perform 3 times the fft of length 4. The input samples are at the address
556 of aDst and the output samples are at the address of pInput. The input vector
557 for the fft of length 4 is built of the interleaved samples in aDst, the
558 output samples are stored consecutively at the address of pInput.
559 */
560 pSrc = aDst;
561 pDst = pInput;
562 for (i = 0; i < 3; i++) {
563 /* inline FFT4 merged with incoming resorting loop */
564 FIXP_DBL a00, a10, a20, a30, tmp0, tmp1;
565
566 a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */
567 a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */
568 a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */
569 a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */
570
571 pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
572 pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
573
574 tmp0 = a00 - pSrc[12]; /* Re A - Re B */
575 tmp1 = a20 - pSrc[13]; /* Im A - Im B */
576
577 pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
578 pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
579
580 a10 = a10 - pSrc[18]; /* Re C - Re D */
581 a30 = a30 - pSrc[19]; /* Im C - Im D */
582
583 pDst[6] = tmp0 + a30; /* Re B' = Re A - Re B + Im C - Im D */
584 pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */
585 pDst[7] = tmp1 - a10; /* Im B' = Im A - Im B - Re C + Re D */
586 pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */
587
588 pSrc += 2;
589 pDst += 2;
590 }
591 }
592 #endif /* FUNCTION_fft12 */
593
594 #ifndef FUNCTION_fft15
595
596 #define N3 3
597 #define N5 5
598 #define N6 6
599 #define N15 15
600
601 /* Performs the FFT of length 15. It is split into FFTs of length 3 and
602 * length 5. */
fft15(FIXP_DBL * pInput)603 static inline void fft15(FIXP_DBL *pInput) {
604 FIXP_DBL aDst[2 * N15];
605 FIXP_DBL aDst1[2 * N15];
606 int i, k, l;
607
608 /* Sort input vector for fft's of length 3
609 input3(0:2) = [input(0) input(5) input(10)];
610 input3(3:5) = [input(3) input(8) input(13)];
611 input3(6:8) = [input(6) input(11) input(1)];
612 input3(9:11) = [input(9) input(14) input(4)];
613 input3(12:14) = [input(12) input(2) input(7)]; */
614 {
615 const FIXP_DBL *pSrc = pInput;
616 FIXP_DBL *RESTRICT pDst = aDst;
617 /* Merge 3 loops into one, skip call of fft3 */
618 for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) {
619 pDst[k + 0] = pSrc[l];
620 pDst[k + 1] = pSrc[l + 1];
621 l += 2 * N5;
622 if (l >= (2 * N15)) l -= (2 * N15);
623
624 pDst[k + 2] = pSrc[l];
625 pDst[k + 3] = pSrc[l + 1];
626 l += 2 * N5;
627 if (l >= (2 * N15)) l -= (2 * N15);
628 pDst[k + 4] = pSrc[l];
629 pDst[k + 5] = pSrc[l + 1];
630 l += (2 * N5) + (2 * N3);
631 if (l >= (2 * N15)) l -= (2 * N15);
632
633 /* fft3 merged with shift right by 2 loop */
634 FIXP_DBL r1, r2, r3;
635 FIXP_DBL s1, s2;
636 /* real part */
637 r1 = pDst[k + 2] + pDst[k + 4];
638 r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31);
639 s1 = pDst[k + 0];
640 pDst[k + 0] = (s1 + r1) >> 2;
641 r1 = s1 - (r1 >> 1);
642
643 /* imaginary part */
644 s1 = pDst[k + 3] + pDst[k + 5];
645 s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31);
646 r3 = pDst[k + 1];
647 pDst[k + 1] = (r3 + s1) >> 2;
648 s1 = r3 - (s1 >> 1);
649
650 /* combination */
651 pDst[k + 2] = (r1 - s2) >> 2;
652 pDst[k + 4] = (r1 + s2) >> 2;
653 pDst[k + 3] = (s1 + r2) >> 2;
654 pDst[k + 5] = (s1 - r2) >> 2;
655 }
656 }
657 /* Sort input vector for fft's of length 5
658 input5(0:4) = [output3(0) output3(3) output3(6) output3(9) output3(12)];
659 input5(5:9) = [output3(1) output3(4) output3(7) output3(10) output3(13)];
660 input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
661 /* Merge 2 loops into one, brings about 10% */
662 {
663 const FIXP_DBL *pSrc = aDst;
664 FIXP_DBL *RESTRICT pDst = aDst1;
665 for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
666 l = 2 * i;
667 pDst[k + 0] = pSrc[l + 0];
668 pDst[k + 1] = pSrc[l + 1];
669 pDst[k + 2] = pSrc[l + 0 + (2 * N3)];
670 pDst[k + 3] = pSrc[l + 1 + (2 * N3)];
671 pDst[k + 4] = pSrc[l + 0 + (4 * N3)];
672 pDst[k + 5] = pSrc[l + 1 + (4 * N3)];
673 pDst[k + 6] = pSrc[l + 0 + (6 * N3)];
674 pDst[k + 7] = pSrc[l + 1 + (6 * N3)];
675 pDst[k + 8] = pSrc[l + 0 + (8 * N3)];
676 pDst[k + 9] = pSrc[l + 1 + (8 * N3)];
677 fft5(&pDst[k]);
678 }
679 }
680 /* Sort output vector of length 15
681 output = [out5(0) out5(6) out5(12) out5(3) out5(9)
682 out5(10) out5(1) out5(7) out5(13) out5(4)
683 out5(5) out5(11) out5(2) out5(8) out5(14)]; */
684 /* optimize clumsy loop, brings about 5% */
685 {
686 const FIXP_DBL *pSrc = aDst1;
687 FIXP_DBL *RESTRICT pDst = pInput;
688 for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
689 pDst[k + 0] = pSrc[l];
690 pDst[k + 1] = pSrc[l + 1];
691 l += (2 * N6);
692 if (l >= (2 * N15)) l -= (2 * N15);
693 pDst[k + 2] = pSrc[l];
694 pDst[k + 3] = pSrc[l + 1];
695 l += (2 * N6);
696 if (l >= (2 * N15)) l -= (2 * N15);
697 pDst[k + 4] = pSrc[l];
698 pDst[k + 5] = pSrc[l + 1];
699 l += (2 * N6);
700 if (l >= (2 * N15)) l -= (2 * N15);
701 pDst[k + 6] = pSrc[l];
702 pDst[k + 7] = pSrc[l + 1];
703 l += (2 * N6);
704 if (l >= (2 * N15)) l -= (2 * N15);
705 pDst[k + 8] = pSrc[l];
706 pDst[k + 9] = pSrc[l + 1];
707 l += 2; /* no modulo check needed, it cannot occur */
708 }
709 }
710 }
711 #endif /* FUNCTION_fft15 */
712
713 /*
714 Select shift placement.
715 Some processors like ARM may shift "for free" in combination with an addition
716 or substraction, but others don't so either combining shift with +/- or reduce
717 the total amount or shift operations is optimal
718 */
719 #if !defined(__arm__)
720 #define SHIFT_A >> 1
721 #define SHIFT_B
722 #else
723 #define SHIFT_A
724 #define SHIFT_B >> 1
725 #endif
726
727 #ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \
728 */
729
730 /* This defines prevents this array to be declared twice, if 16-bit fft is
731 * enabled too */
732 #define FUNCTION_DATA_fft_16_w16
733 static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d),
734 STCP(0x30fbc54d, 0x7641af3d)};
735
736 LNK_SECTION_CODE_L1
fft_16(FIXP_DBL * RESTRICT x)737 inline void fft_16(FIXP_DBL *RESTRICT x) {
738 FIXP_DBL vr, ur;
739 FIXP_DBL vr2, ur2;
740 FIXP_DBL vr3, ur3;
741 FIXP_DBL vr4, ur4;
742 FIXP_DBL vi, ui;
743 FIXP_DBL vi2, ui2;
744 FIXP_DBL vi3, ui3;
745
746 vr = (x[0] >> 1) + (x[16] >> 1); /* Re A + Re B */
747 ur = (x[1] >> 1) + (x[17] >> 1); /* Im A + Im B */
748 vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */
749 ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */
750 x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
751 x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
752
753 vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */
754 ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */
755
756 x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
757 x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
758 vr -= x[16]; /* Re A - Re B */
759 vi = (vi SHIFT_B)-x[24]; /* Re C - Re D */
760 ur -= x[17]; /* Im A - Im B */
761 ui = (ui SHIFT_B)-x[25]; /* Im C - Im D */
762
763 vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */
764 ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */
765
766 x[2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
767 x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
768
769 vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */
770 ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */
771
772 x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
773 x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
774
775 vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */
776 ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */
777 x[8] = vr2 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
778 x[9] = ur2 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
779 x[12] = vr2 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
780 x[13] = ur2 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
781 vr2 -= x[20]; /* Re A - Re B */
782 ur2 -= x[21]; /* Im A - Im B */
783 vi2 = (vi2 SHIFT_B)-x[28]; /* Re C - Re D */
784 ui2 = (ui2 SHIFT_B)-x[29]; /* Im C - Im D */
785
786 vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */
787 ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */
788
789 x[10] = ui2 + vr2; /* Re B' = Im C - Im D + Re A - Re B */
790 x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
791
792 vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */
793 ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */
794
795 x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
796 x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */
797
798 x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
799 x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */
800 x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
801 x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
802 vr3 -= x[18]; /* Re A - Re B */
803 ur3 -= x[19]; /* Im A - Im B */
804 vi = (vi SHIFT_B)-x[26]; /* Re C - Re D */
805 ui = (ui SHIFT_B)-x[27]; /* Im C - Im D */
806 x[18] = ui + vr3; /* Re B' = Im C - Im D + Re A - Re B */
807 x[19] = ur3 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
808
809 x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
810 x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
811 x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
812 x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
813 vr4 -= x[22]; /* Re A - Re B */
814 ur4 -= x[23]; /* Im A - Im B */
815
816 x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
817 x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */
818
819 vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */
820 ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */
821 x[26] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */
822 x[30] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
823 x[27] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
824 x[31] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
825
826 // xt1 = 0
827 // xt2 = 8
828 vr = x[8];
829 vi = x[9];
830 ur = x[0] >> 1;
831 ui = x[1] >> 1;
832 x[0] = ur + (vr >> 1);
833 x[1] = ui + (vi >> 1);
834 x[8] = ur - (vr >> 1);
835 x[9] = ui - (vi >> 1);
836
837 // xt1 = 4
838 // xt2 = 12
839 vr = x[13];
840 vi = x[12];
841 ur = x[4] >> 1;
842 ui = x[5] >> 1;
843 x[4] = ur + (vr >> 1);
844 x[5] = ui - (vi >> 1);
845 x[12] = ur - (vr >> 1);
846 x[13] = ui + (vi >> 1);
847
848 // xt1 = 16
849 // xt2 = 24
850 vr = x[24];
851 vi = x[25];
852 ur = x[16] >> 1;
853 ui = x[17] >> 1;
854 x[16] = ur + (vr >> 1);
855 x[17] = ui + (vi >> 1);
856 x[24] = ur - (vr >> 1);
857 x[25] = ui - (vi >> 1);
858
859 // xt1 = 20
860 // xt2 = 28
861 vr = x[29];
862 vi = x[28];
863 ur = x[20] >> 1;
864 ui = x[21] >> 1;
865 x[20] = ur + (vr >> 1);
866 x[21] = ui - (vi >> 1);
867 x[28] = ur - (vr >> 1);
868 x[29] = ui + (vi >> 1);
869
870 // xt1 = 2
871 // xt2 = 10
872 SUMDIFF_PIFOURTH(vi, vr, x[10], x[11])
873 // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH);
874 // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH);
875 ur = x[2];
876 ui = x[3];
877 x[2] = (ur >> 1) + vr;
878 x[3] = (ui >> 1) + vi;
879 x[10] = (ur >> 1) - vr;
880 x[11] = (ui >> 1) - vi;
881
882 // xt1 = 6
883 // xt2 = 14
884 SUMDIFF_PIFOURTH(vr, vi, x[14], x[15])
885 ur = x[6];
886 ui = x[7];
887 x[6] = (ur >> 1) + vr;
888 x[7] = (ui >> 1) - vi;
889 x[14] = (ur >> 1) - vr;
890 x[15] = (ui >> 1) + vi;
891
892 // xt1 = 18
893 // xt2 = 26
894 SUMDIFF_PIFOURTH(vi, vr, x[26], x[27])
895 ur = x[18];
896 ui = x[19];
897 x[18] = (ur >> 1) + vr;
898 x[19] = (ui >> 1) + vi;
899 x[26] = (ur >> 1) - vr;
900 x[27] = (ui >> 1) - vi;
901
902 // xt1 = 22
903 // xt2 = 30
904 SUMDIFF_PIFOURTH(vr, vi, x[30], x[31])
905 ur = x[22];
906 ui = x[23];
907 x[22] = (ur >> 1) + vr;
908 x[23] = (ui >> 1) - vi;
909 x[30] = (ur >> 1) - vr;
910 x[31] = (ui >> 1) + vi;
911
912 // xt1 = 0
913 // xt2 = 16
914 vr = x[16];
915 vi = x[17];
916 ur = x[0] >> 1;
917 ui = x[1] >> 1;
918 x[0] = ur + (vr >> 1);
919 x[1] = ui + (vi >> 1);
920 x[16] = ur - (vr >> 1);
921 x[17] = ui - (vi >> 1);
922
923 // xt1 = 8
924 // xt2 = 24
925 vi = x[24];
926 vr = x[25];
927 ur = x[8] >> 1;
928 ui = x[9] >> 1;
929 x[8] = ur + (vr >> 1);
930 x[9] = ui - (vi >> 1);
931 x[24] = ur - (vr >> 1);
932 x[25] = ui + (vi >> 1);
933
934 // xt1 = 2
935 // xt2 = 18
936 cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
937 ur = x[2];
938 ui = x[3];
939 x[2] = (ur >> 1) + vr;
940 x[3] = (ui >> 1) + vi;
941 x[18] = (ur >> 1) - vr;
942 x[19] = (ui >> 1) - vi;
943
944 // xt1 = 10
945 // xt2 = 26
946 cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
947 ur = x[10];
948 ui = x[11];
949 x[10] = (ur >> 1) + vr;
950 x[11] = (ui >> 1) - vi;
951 x[26] = (ur >> 1) - vr;
952 x[27] = (ui >> 1) + vi;
953
954 // xt1 = 4
955 // xt2 = 20
956 SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
957 ur = x[4];
958 ui = x[5];
959 x[4] = (ur >> 1) + vr;
960 x[5] = (ui >> 1) + vi;
961 x[20] = (ur >> 1) - vr;
962 x[21] = (ui >> 1) - vi;
963
964 // xt1 = 12
965 // xt2 = 28
966 SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
967 ur = x[12];
968 ui = x[13];
969 x[12] = (ur >> 1) + vr;
970 x[13] = (ui >> 1) - vi;
971 x[28] = (ur >> 1) - vr;
972 x[29] = (ui >> 1) + vi;
973
974 // xt1 = 6
975 // xt2 = 22
976 cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
977 ur = x[6];
978 ui = x[7];
979 x[6] = (ur >> 1) + vr;
980 x[7] = (ui >> 1) + vi;
981 x[22] = (ur >> 1) - vr;
982 x[23] = (ui >> 1) - vi;
983
984 // xt1 = 14
985 // xt2 = 30
986 cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
987 ur = x[14];
988 ui = x[15];
989 x[14] = (ur >> 1) + vr;
990 x[15] = (ui >> 1) - vi;
991 x[30] = (ur >> 1) - vr;
992 x[31] = (ui >> 1) + vi;
993 }
994 #endif /* FUNCTION_fft_16 */
995
996 #ifndef FUNCTION_fft_32
997 static const FIXP_STP fft32_w32[6] = {
998 STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d),
999 STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7),
1000 STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)};
1001 #define W_PiFOURTH STC(0x5a82799a)
1002
1003 LNK_SECTION_CODE_L1
fft_32(FIXP_DBL * const _x)1004 inline void fft_32(FIXP_DBL *const _x) {
1005 /*
1006 * 1+2 stage radix 4
1007 */
1008
1009 /////////////////////////////////////////////////////////////////////////////////////////
1010 {
1011 FIXP_DBL *const x = _x;
1012 FIXP_DBL vi, ui;
1013 FIXP_DBL vi2, ui2;
1014 FIXP_DBL vi3, ui3;
1015 FIXP_DBL vr, ur;
1016 FIXP_DBL vr2, ur2;
1017 FIXP_DBL vr3, ur3;
1018 FIXP_DBL vr4, ur4;
1019
1020 // i = 0
1021 vr = (x[0] + x[32]) >> 1; /* Re A + Re B */
1022 ur = (x[1] + x[33]) >> 1; /* Im A + Im B */
1023 vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */
1024 ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */
1025
1026 x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1027 x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1028
1029 vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */
1030 ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */
1031
1032 x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1033 x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1034
1035 vr -= x[32]; /* Re A - Re B */
1036 ur -= x[33]; /* Im A - Im B */
1037 vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */
1038 ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */
1039
1040 vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */
1041 ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */
1042
1043 x[2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
1044 x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1045
1046 vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */
1047 ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */
1048
1049 x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1050 x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1051
1052 // i=16
1053 vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */
1054 ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */
1055
1056 x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1057 x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1058 x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1059 x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1060
1061 vr2 -= x[36]; /* Re A - Re B */
1062 ur2 -= x[37]; /* Im A - Im B */
1063 vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */
1064 ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */
1065
1066 vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */
1067 ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */
1068
1069 x[18] = ui + vr2; /* Re B' = Im C - Im D + Re A - Re B */
1070 x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1071
1072 vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */
1073 ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */
1074
1075 x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1076 x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1077
1078 // i = 32
1079
1080 x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1081 x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1082 x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1083 x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1084
1085 vr3 -= x[34]; /* Re A - Re B */
1086 ur3 -= x[35]; /* Im A - Im B */
1087 vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */
1088 ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */
1089
1090 x[34] = ui2 + vr3; /* Re B' = Im C - Im D + Re A - Re B */
1091 x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1092
1093 // i=48
1094
1095 x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1096 x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1097 x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1098 x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1099
1100 vr4 -= x[38]; /* Re A - Re B */
1101 ur4 -= x[39]; /* Im A - Im B */
1102
1103 x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1104 x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1105
1106 vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */
1107 ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */
1108
1109 x[50] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */
1110 x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1111 x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1112 x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1113
1114 // i=8
1115 vr = (x[8] + x[40]) >> 1; /* Re A + Re B */
1116 ur = (x[9] + x[41]) >> 1; /* Im A + Im B */
1117 vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */
1118 ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */
1119
1120 x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1121 x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
1122
1123 vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */
1124 ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */
1125
1126 x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1127 x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1128
1129 vr -= x[40]; /* Re A - Re B */
1130 ur -= x[41]; /* Im A - Im B */
1131 vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */
1132 ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */
1133
1134 vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */
1135 ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */
1136
1137 x[10] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */
1138 x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1139
1140 vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */
1141 ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */
1142
1143 x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1144 x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
1145
1146 // i=24
1147 vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */
1148 ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */
1149
1150 x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1151 x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1152 x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
1153 x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1154
1155 vr2 -= x[44]; /* Re A - Re B */
1156 ur2 -= x[45]; /* Im A - Im B */
1157 vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */
1158 ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */
1159
1160 vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */
1161 ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */
1162
1163 x[26] = ui + vr2; /* Re B' = Im C - Im D + Re A - Re B */
1164 x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
1165
1166 vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */
1167 ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */
1168
1169 x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
1170 x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
1171
1172 // i=40
1173
1174 x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1175 x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1176 x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
1177 x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1178
1179 vr3 -= x[42]; /* Re A - Re B */
1180 ur3 -= x[43]; /* Im A - Im B */
1181 vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */
1182 ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */
1183
1184 x[42] = ui2 + vr3; /* Re B' = Im C - Im D + Re A - Re B */
1185 x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
1186
1187 // i=56
1188
1189 x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
1190 x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
1191 x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
1192 x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
1193
1194 vr4 -= x[46]; /* Re A - Re B */
1195 ur4 -= x[47]; /* Im A - Im B */
1196
1197 x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
1198 x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
1199
1200 vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */
1201 ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */
1202
1203 x[58] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */
1204 x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
1205 x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
1206 x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
1207 }
1208
1209 {
1210 FIXP_DBL *xt = _x;
1211
1212 int j = 4;
1213 do {
1214 FIXP_DBL vi, ui, vr, ur;
1215
1216 vr = xt[8];
1217 vi = xt[9];
1218 ur = xt[0] >> 1;
1219 ui = xt[1] >> 1;
1220 xt[0] = ur + (vr >> 1);
1221 xt[1] = ui + (vi >> 1);
1222 xt[8] = ur - (vr >> 1);
1223 xt[9] = ui - (vi >> 1);
1224
1225 vr = xt[13];
1226 vi = xt[12];
1227 ur = xt[4] >> 1;
1228 ui = xt[5] >> 1;
1229 xt[4] = ur + (vr >> 1);
1230 xt[5] = ui - (vi >> 1);
1231 xt[12] = ur - (vr >> 1);
1232 xt[13] = ui + (vi >> 1);
1233
1234 SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11])
1235 ur = xt[2];
1236 ui = xt[3];
1237 xt[2] = (ur >> 1) + vr;
1238 xt[3] = (ui >> 1) + vi;
1239 xt[10] = (ur >> 1) - vr;
1240 xt[11] = (ui >> 1) - vi;
1241
1242 SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15])
1243 ur = xt[6];
1244 ui = xt[7];
1245
1246 xt[6] = (ur >> 1) + vr;
1247 xt[7] = (ui >> 1) - vi;
1248 xt[14] = (ur >> 1) - vr;
1249 xt[15] = (ui >> 1) + vi;
1250 xt += 16;
1251 } while (--j != 0);
1252 }
1253
1254 {
1255 FIXP_DBL *const x = _x;
1256 FIXP_DBL vi, ui, vr, ur;
1257
1258 vr = x[16];
1259 vi = x[17];
1260 ur = x[0] >> 1;
1261 ui = x[1] >> 1;
1262 x[0] = ur + (vr >> 1);
1263 x[1] = ui + (vi >> 1);
1264 x[16] = ur - (vr >> 1);
1265 x[17] = ui - (vi >> 1);
1266
1267 vi = x[24];
1268 vr = x[25];
1269 ur = x[8] >> 1;
1270 ui = x[9] >> 1;
1271 x[8] = ur + (vr >> 1);
1272 x[9] = ui - (vi >> 1);
1273 x[24] = ur - (vr >> 1);
1274 x[25] = ui + (vi >> 1);
1275
1276 vr = x[48];
1277 vi = x[49];
1278 ur = x[32] >> 1;
1279 ui = x[33] >> 1;
1280 x[32] = ur + (vr >> 1);
1281 x[33] = ui + (vi >> 1);
1282 x[48] = ur - (vr >> 1);
1283 x[49] = ui - (vi >> 1);
1284
1285 vi = x[56];
1286 vr = x[57];
1287 ur = x[40] >> 1;
1288 ui = x[41] >> 1;
1289 x[40] = ur + (vr >> 1);
1290 x[41] = ui - (vi >> 1);
1291 x[56] = ur - (vr >> 1);
1292 x[57] = ui + (vi >> 1);
1293
1294 cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
1295 ur = x[2];
1296 ui = x[3];
1297 x[2] = (ur >> 1) + vr;
1298 x[3] = (ui >> 1) + vi;
1299 x[18] = (ur >> 1) - vr;
1300 x[19] = (ui >> 1) - vi;
1301
1302 cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
1303 ur = x[10];
1304 ui = x[11];
1305 x[10] = (ur >> 1) + vr;
1306 x[11] = (ui >> 1) - vi;
1307 x[26] = (ur >> 1) - vr;
1308 x[27] = (ui >> 1) + vi;
1309
1310 cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
1311 ur = x[34];
1312 ui = x[35];
1313 x[34] = (ur >> 1) + vr;
1314 x[35] = (ui >> 1) + vi;
1315 x[50] = (ur >> 1) - vr;
1316 x[51] = (ui >> 1) - vi;
1317
1318 cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
1319 ur = x[42];
1320 ui = x[43];
1321 x[42] = (ur >> 1) + vr;
1322 x[43] = (ui >> 1) - vi;
1323 x[58] = (ur >> 1) - vr;
1324 x[59] = (ui >> 1) + vi;
1325
1326 SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
1327 ur = x[4];
1328 ui = x[5];
1329 x[4] = (ur >> 1) + vr;
1330 x[5] = (ui >> 1) + vi;
1331 x[20] = (ur >> 1) - vr;
1332 x[21] = (ui >> 1) - vi;
1333
1334 SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
1335 ur = x[12];
1336 ui = x[13];
1337 x[12] = (ur >> 1) + vr;
1338 x[13] = (ui >> 1) - vi;
1339 x[28] = (ur >> 1) - vr;
1340 x[29] = (ui >> 1) + vi;
1341
1342 SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
1343 ur = x[36];
1344 ui = x[37];
1345 x[36] = (ur >> 1) + vr;
1346 x[37] = (ui >> 1) + vi;
1347 x[52] = (ur >> 1) - vr;
1348 x[53] = (ui >> 1) - vi;
1349
1350 SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
1351 ur = x[44];
1352 ui = x[45];
1353 x[44] = (ur >> 1) + vr;
1354 x[45] = (ui >> 1) - vi;
1355 x[60] = (ur >> 1) - vr;
1356 x[61] = (ui >> 1) + vi;
1357
1358 cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
1359 ur = x[6];
1360 ui = x[7];
1361 x[6] = (ur >> 1) + vr;
1362 x[7] = (ui >> 1) + vi;
1363 x[22] = (ur >> 1) - vr;
1364 x[23] = (ui >> 1) - vi;
1365
1366 cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
1367 ur = x[14];
1368 ui = x[15];
1369 x[14] = (ur >> 1) + vr;
1370 x[15] = (ui >> 1) - vi;
1371 x[30] = (ur >> 1) - vr;
1372 x[31] = (ui >> 1) + vi;
1373
1374 cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
1375 ur = x[38];
1376 ui = x[39];
1377 x[38] = (ur >> 1) + vr;
1378 x[39] = (ui >> 1) + vi;
1379 x[54] = (ur >> 1) - vr;
1380 x[55] = (ui >> 1) - vi;
1381
1382 cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
1383 ur = x[46];
1384 ui = x[47];
1385
1386 x[46] = (ur >> 1) + vr;
1387 x[47] = (ui >> 1) - vi;
1388 x[62] = (ur >> 1) - vr;
1389 x[63] = (ui >> 1) + vi;
1390
1391 vr = x[32];
1392 vi = x[33];
1393 ur = x[0] >> 1;
1394 ui = x[1] >> 1;
1395 x[0] = ur + (vr >> 1);
1396 x[1] = ui + (vi >> 1);
1397 x[32] = ur - (vr >> 1);
1398 x[33] = ui - (vi >> 1);
1399
1400 vi = x[48];
1401 vr = x[49];
1402 ur = x[16] >> 1;
1403 ui = x[17] >> 1;
1404 x[16] = ur + (vr >> 1);
1405 x[17] = ui - (vi >> 1);
1406 x[48] = ur - (vr >> 1);
1407 x[49] = ui + (vi >> 1);
1408
1409 cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
1410 ur = x[2];
1411 ui = x[3];
1412 x[2] = (ur >> 1) + vr;
1413 x[3] = (ui >> 1) + vi;
1414 x[34] = (ur >> 1) - vr;
1415 x[35] = (ui >> 1) - vi;
1416
1417 cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
1418 ur = x[18];
1419 ui = x[19];
1420 x[18] = (ur >> 1) + vr;
1421 x[19] = (ui >> 1) - vi;
1422 x[50] = (ur >> 1) - vr;
1423 x[51] = (ui >> 1) + vi;
1424
1425 cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
1426 ur = x[4];
1427 ui = x[5];
1428 x[4] = (ur >> 1) + vr;
1429 x[5] = (ui >> 1) + vi;
1430 x[36] = (ur >> 1) - vr;
1431 x[37] = (ui >> 1) - vi;
1432
1433 cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
1434 ur = x[20];
1435 ui = x[21];
1436 x[20] = (ur >> 1) + vr;
1437 x[21] = (ui >> 1) - vi;
1438 x[52] = (ur >> 1) - vr;
1439 x[53] = (ui >> 1) + vi;
1440
1441 cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
1442 ur = x[6];
1443 ui = x[7];
1444 x[6] = (ur >> 1) + vr;
1445 x[7] = (ui >> 1) + vi;
1446 x[38] = (ur >> 1) - vr;
1447 x[39] = (ui >> 1) - vi;
1448
1449 cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
1450 ur = x[22];
1451 ui = x[23];
1452 x[22] = (ur >> 1) + vr;
1453 x[23] = (ui >> 1) - vi;
1454 x[54] = (ur >> 1) - vr;
1455 x[55] = (ui >> 1) + vi;
1456
1457 SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
1458 ur = x[8];
1459 ui = x[9];
1460 x[8] = (ur >> 1) + vr;
1461 x[9] = (ui >> 1) + vi;
1462 x[40] = (ur >> 1) - vr;
1463 x[41] = (ui >> 1) - vi;
1464
1465 SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
1466 ur = x[24];
1467 ui = x[25];
1468 x[24] = (ur >> 1) + vr;
1469 x[25] = (ui >> 1) - vi;
1470 x[56] = (ur >> 1) - vr;
1471 x[57] = (ui >> 1) + vi;
1472
1473 cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
1474 ur = x[10];
1475 ui = x[11];
1476
1477 x[10] = (ur >> 1) + vr;
1478 x[11] = (ui >> 1) + vi;
1479 x[42] = (ur >> 1) - vr;
1480 x[43] = (ui >> 1) - vi;
1481
1482 cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
1483 ur = x[26];
1484 ui = x[27];
1485 x[26] = (ur >> 1) + vr;
1486 x[27] = (ui >> 1) - vi;
1487 x[58] = (ur >> 1) - vr;
1488 x[59] = (ui >> 1) + vi;
1489
1490 cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
1491 ur = x[12];
1492 ui = x[13];
1493 x[12] = (ur >> 1) + vr;
1494 x[13] = (ui >> 1) + vi;
1495 x[44] = (ur >> 1) - vr;
1496 x[45] = (ui >> 1) - vi;
1497
1498 cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
1499 ur = x[28];
1500 ui = x[29];
1501 x[28] = (ur >> 1) + vr;
1502 x[29] = (ui >> 1) - vi;
1503 x[60] = (ur >> 1) - vr;
1504 x[61] = (ui >> 1) + vi;
1505
1506 cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
1507 ur = x[14];
1508 ui = x[15];
1509 x[14] = (ur >> 1) + vr;
1510 x[15] = (ui >> 1) + vi;
1511 x[46] = (ur >> 1) - vr;
1512 x[47] = (ui >> 1) - vi;
1513
1514 cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
1515 ur = x[30];
1516 ui = x[31];
1517 x[30] = (ur >> 1) + vr;
1518 x[31] = (ui >> 1) - vi;
1519 x[62] = (ur >> 1) - vr;
1520 x[63] = (ui >> 1) + vi;
1521 }
1522 }
1523 #endif /* #ifndef FUNCTION_fft_32 */
1524
1525 /**
1526 * \brief Apply rotation vectors to a data buffer.
1527 * \param cl length of each row of input data.
1528 * \param l total length of input data.
1529 * \param pVecRe real part of rotation coefficient vector.
1530 * \param pVecIm imaginary part of rotation coefficient vector.
1531 */
1532
1533 /*
1534 This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000
1535 (-1.0) instead. At the end, the sign of the result is inverted
1536 */
1537 #define noFFT_APPLY_ROT_VECTOR_HQ
1538
1539 #ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL
fft_apply_rot_vector(FIXP_DBL * RESTRICT pData,const int cl,const int l,const FIXP_STB * pVecRe,const FIXP_STB * pVecIm)1540 static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl,
1541 const int l, const FIXP_STB *pVecRe,
1542 const FIXP_STB *pVecIm) {
1543 FIXP_DBL re, im;
1544 FIXP_STB vre, vim;
1545
1546 int i, c;
1547
1548 for (i = 0; i < cl; i++) {
1549 re = pData[2 * i];
1550 im = pData[2 * i + 1];
1551
1552 pData[2 * i] = re >> 2; /* * 0.25 */
1553 pData[2 * i + 1] = im >> 2; /* * 0.25 */
1554 }
1555 for (; i < l; i += cl) {
1556 re = pData[2 * i];
1557 im = pData[2 * i + 1];
1558
1559 pData[2 * i] = re >> 2; /* * 0.25 */
1560 pData[2 * i + 1] = im >> 2; /* * 0.25 */
1561
1562 for (c = i + 1; c < i + cl; c++) {
1563 re = pData[2 * c] >> 1;
1564 im = pData[2 * c + 1] >> 1;
1565 vre = *pVecRe++;
1566 vim = *pVecIm++;
1567
1568 cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim);
1569 }
1570 }
1571 }
1572 #endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */
1573
1574 /* select either switch case of function pointer. */
1575 //#define FFT_TWO_STAGE_SWITCH_CASE
1576 #ifndef FUNCTION_fftN2_func
fftN2_func(FIXP_DBL * pInput,const int length,const int dim1,const int dim2,void (* const fft1)(FIXP_DBL *),void (* const fft2)(FIXP_DBL *),const FIXP_STB * RotVectorReal,const FIXP_STB * RotVectorImag,FIXP_DBL * aDst,FIXP_DBL * aDst2)1577 static inline void fftN2_func(FIXP_DBL *pInput, const int length,
1578 const int dim1, const int dim2,
1579 void (*const fft1)(FIXP_DBL *),
1580 void (*const fft2)(FIXP_DBL *),
1581 const FIXP_STB *RotVectorReal,
1582 const FIXP_STB *RotVectorImag, FIXP_DBL *aDst,
1583 FIXP_DBL *aDst2) {
1584 /* The real part of the input samples are at the addresses with even indices
1585 and the imaginary part of the input samples are at the addresses with odd
1586 indices. The output samples are stored at the address of pInput
1587 */
1588 FIXP_DBL *pSrc, *pDst, *pDstOut;
1589 int i;
1590
1591 FDK_ASSERT(length == dim1 * dim2);
1592
1593 /* Perform dim2 times the fft of length dim1. The input samples are at the
1594 address of pSrc and the output samples are at the address of pDst. The input
1595 vector for the fft of length dim1 is built of the interleaved samples in pSrc,
1596 the output samples are stored consecutively.
1597 */
1598 pSrc = pInput;
1599 pDst = aDst;
1600 for (i = 0; i < dim2; i++) {
1601 for (int j = 0; j < dim1; j++) {
1602 pDst[2 * j] = pSrc[2 * j * dim2];
1603 pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1];
1604 }
1605
1606 /* fft of size dim1 */
1607 #ifndef FFT_TWO_STAGE_SWITCH_CASE
1608 fft1(pDst);
1609 #else
1610 switch (dim1) {
1611 case 2:
1612 fft2(pDst);
1613 break;
1614 case 3:
1615 fft3(pDst);
1616 break;
1617 case 4:
1618 fft_4(pDst);
1619 break;
1620 /* case 5: fft5(pDst); break; */
1621 /* case 8: fft_8(pDst); break; */
1622 case 12:
1623 fft12(pDst);
1624 break;
1625 /* case 15: fft15(pDst); break; */
1626 case 16:
1627 fft_16(pDst);
1628 break;
1629 case 32:
1630 fft_32(pDst);
1631 break;
1632 /*case 64: fft_64(pDst); break;*/
1633 /* case 128: fft_128(pDst); break; */
1634 }
1635 #endif
1636 pSrc += 2;
1637 pDst = pDst + 2 * dim1;
1638 }
1639
1640 /* Perform the modulation of the output of the fft of length dim1 */
1641 pSrc = aDst;
1642 fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag);
1643
1644 /* Perform dim1 times the fft of length dim2. The input samples are at the
1645 address of aDst and the output samples are at the address of pInput. The input
1646 vector for the fft of length dim2 is built of the interleaved samples in aDst,
1647 the output samples are stored consecutively at the address of pInput.
1648 */
1649 pSrc = aDst;
1650 pDst = aDst2;
1651 pDstOut = pInput;
1652 for (i = 0; i < dim1; i++) {
1653 for (int j = 0; j < dim2; j++) {
1654 pDst[2 * j] = pSrc[2 * j * dim1];
1655 pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1];
1656 }
1657
1658 #ifndef FFT_TWO_STAGE_SWITCH_CASE
1659 fft2(pDst);
1660 #else
1661 switch (dim2) {
1662 case 4:
1663 fft_4(pDst);
1664 break;
1665 case 9:
1666 fft9(pDst);
1667 break;
1668 case 12:
1669 fft12(pDst);
1670 break;
1671 case 15:
1672 fft15(pDst);
1673 break;
1674 case 16:
1675 fft_16(pDst);
1676 break;
1677 case 32:
1678 fft_32(pDst);
1679 break;
1680 }
1681 #endif
1682
1683 for (int j = 0; j < dim2; j++) {
1684 pDstOut[2 * j * dim1] = pDst[2 * j];
1685 pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1];
1686 }
1687 pSrc += 2;
1688 pDstOut += 2;
1689 }
1690 }
1691 #endif /* FUNCTION_fftN2_function */
1692
1693 #define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \
1694 RotVectorReal, RotVectorImag) \
1695 { \
1696 C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length) \
1697 C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2) \
1698 fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2, \
1699 RotVectorReal, RotVectorImag, aDst, aDst2); \
1700 C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2) \
1701 C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length) \
1702 }
1703
1704 /*!
1705 *
1706 * \brief complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480
1707 * \param pInput contains the input signal prescaled right by 2
1708 * pInput contains the output signal scaled by SCALEFACTOR<#length>
1709 * The output signal does not have any fixed headroom
1710 * \return void
1711 *
1712 */
1713
1714 #ifndef FUNCTION_fft6
fft6(FIXP_DBL * pInput)1715 static inline void fft6(FIXP_DBL *pInput) {
1716 fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6);
1717 }
1718 #endif /* #ifndef FUNCTION_fft6 */
1719
1720 #ifndef FUNCTION_fft12
fft12(FIXP_DBL * pInput)1721 static inline void fft12(FIXP_DBL *pInput) {
1722 fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12,
1723 RotVectorImag12); /* 16,58 */
1724 }
1725 #endif /* #ifndef FUNCTION_fft12 */
1726
1727 #ifndef FUNCTION_fft20
fft20(FIXP_DBL * pInput)1728 static inline void fft20(FIXP_DBL *pInput) {
1729 fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20,
1730 RotVectorImag20);
1731 }
1732 #endif /* FUNCTION_fft20 */
1733
fft24(FIXP_DBL * pInput)1734 static inline void fft24(FIXP_DBL *pInput) {
1735 fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24,
1736 RotVectorImag24); /* 16,73 */
1737 }
1738
fft48(FIXP_DBL * pInput)1739 static inline void fft48(FIXP_DBL *pInput) {
1740 fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48,
1741 RotVectorImag48); /* 16,32 */
1742 }
1743
1744 #ifndef FUNCTION_fft60
fft60(FIXP_DBL * pInput)1745 static inline void fft60(FIXP_DBL *pInput) {
1746 fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60,
1747 RotVectorImag60); /* 15,51 */
1748 }
1749 #endif /* FUNCTION_fft60 */
1750
1751 #ifndef FUNCTION_fft80
fft80(FIXP_DBL * pInput)1752 static inline void fft80(FIXP_DBL *pInput) {
1753 fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80,
1754 RotVectorImag80); /* */
1755 }
1756 #endif
1757
1758 #ifndef FUNCTION_fft96
fft96(FIXP_DBL * pInput)1759 static inline void fft96(FIXP_DBL *pInput) {
1760 fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96,
1761 RotVectorImag96); /* 15,47 */
1762 }
1763 #endif /* FUNCTION_fft96*/
1764
1765 #ifndef FUNCTION_fft120
fft120(FIXP_DBL * pInput)1766 static inline void fft120(FIXP_DBL *pInput) {
1767 fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120,
1768 RotVectorImag120);
1769 }
1770 #endif /* FUNCTION_fft120 */
1771
1772 #ifndef FUNCTION_fft192
fft192(FIXP_DBL * pInput)1773 static inline void fft192(FIXP_DBL *pInput) {
1774 fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192,
1775 RotVectorImag192); /* 15,50 */
1776 }
1777 #endif
1778
1779 #ifndef FUNCTION_fft240
fft240(FIXP_DBL * pInput)1780 static inline void fft240(FIXP_DBL *pInput) {
1781 fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240,
1782 RotVectorImag240); /* 15.44 */
1783 }
1784 #endif
1785
1786 #ifndef FUNCTION_fft384
fft384(FIXP_DBL * pInput)1787 static inline void fft384(FIXP_DBL *pInput) {
1788 fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384,
1789 RotVectorImag384); /* 16.02 */
1790 }
1791 #endif /* FUNCTION_fft384 */
1792
1793 #ifndef FUNCTION_fft480
fft480(FIXP_DBL * pInput)1794 static inline void fft480(FIXP_DBL *pInput) {
1795 fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480,
1796 RotVectorImag480); /* 15.84 */
1797 }
1798 #endif /* FUNCTION_fft480 */
1799
fft(int length,FIXP_DBL * pInput,INT * pScalefactor)1800 void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) {
1801 /* Ensure, that the io-ptr is always (at least 8-byte) aligned */
1802 C_ALLOC_ALIGNED_CHECK(pInput);
1803
1804 if (length == 32) {
1805 fft_32(pInput);
1806 *pScalefactor += SCALEFACTOR32;
1807 } else {
1808 switch (length) {
1809 case 16:
1810 fft_16(pInput);
1811 *pScalefactor += SCALEFACTOR16;
1812 break;
1813 case 8:
1814 fft_8(pInput);
1815 *pScalefactor += SCALEFACTOR8;
1816 break;
1817 case 2:
1818 fft2(pInput);
1819 *pScalefactor += SCALEFACTOR2;
1820 break;
1821 case 3:
1822 fft3(pInput);
1823 *pScalefactor += SCALEFACTOR3;
1824 break;
1825 case 4:
1826 fft_4(pInput);
1827 *pScalefactor += SCALEFACTOR4;
1828 break;
1829 case 5:
1830 fft5(pInput);
1831 *pScalefactor += SCALEFACTOR5;
1832 break;
1833 case 6:
1834 fft6(pInput);
1835 *pScalefactor += SCALEFACTOR6;
1836 break;
1837 case 10:
1838 fft10(pInput);
1839 *pScalefactor += SCALEFACTOR10;
1840 break;
1841 case 12:
1842 fft12(pInput);
1843 *pScalefactor += SCALEFACTOR12;
1844 break;
1845 case 15:
1846 fft15(pInput);
1847 *pScalefactor += SCALEFACTOR15;
1848 break;
1849 case 20:
1850 fft20(pInput);
1851 *pScalefactor += SCALEFACTOR20;
1852 break;
1853 case 24:
1854 fft24(pInput);
1855 *pScalefactor += SCALEFACTOR24;
1856 break;
1857 case 48:
1858 fft48(pInput);
1859 *pScalefactor += SCALEFACTOR48;
1860 break;
1861 case 60:
1862 fft60(pInput);
1863 *pScalefactor += SCALEFACTOR60;
1864 break;
1865 case 64:
1866 dit_fft(pInput, 6, SineTable512, 512);
1867 *pScalefactor += SCALEFACTOR64;
1868 break;
1869 case 80:
1870 fft80(pInput);
1871 *pScalefactor += SCALEFACTOR80;
1872 break;
1873 case 96:
1874 fft96(pInput);
1875 *pScalefactor += SCALEFACTOR96;
1876 break;
1877 case 120:
1878 fft120(pInput);
1879 *pScalefactor += SCALEFACTOR120;
1880 break;
1881 case 128:
1882 dit_fft(pInput, 7, SineTable512, 512);
1883 *pScalefactor += SCALEFACTOR128;
1884 break;
1885 case 192:
1886 fft192(pInput);
1887 *pScalefactor += SCALEFACTOR192;
1888 break;
1889 case 240:
1890 fft240(pInput);
1891 *pScalefactor += SCALEFACTOR240;
1892 break;
1893 case 256:
1894 dit_fft(pInput, 8, SineTable512, 512);
1895 *pScalefactor += SCALEFACTOR256;
1896 break;
1897 case 384:
1898 fft384(pInput);
1899 *pScalefactor += SCALEFACTOR384;
1900 break;
1901 case 480:
1902 fft480(pInput);
1903 *pScalefactor += SCALEFACTOR480;
1904 break;
1905 case 512:
1906 dit_fft(pInput, 9, SineTable512, 512);
1907 *pScalefactor += SCALEFACTOR512;
1908 break;
1909 default:
1910 FDK_ASSERT(0); /* FFT length not supported! */
1911 break;
1912 }
1913 }
1914 }
1915
ifft(int length,FIXP_DBL * pInput,INT * scalefactor)1916 void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) {
1917 switch (length) {
1918 default:
1919 FDK_ASSERT(0); /* IFFT length not supported! */
1920 break;
1921 }
1922 }
1923