1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_q15.c
4 * Description: This file has function definition of Radix-4 FFT & IFFT function and
5 * In-place bit reversal using bit reversal table
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30 #include "dsp/transform_functions.h"
31
32
33 void arm_radix4_butterfly_q15(
34 q15_t * pSrc16,
35 uint32_t fftLen,
36 const q15_t * pCoef16,
37 uint32_t twidCoefModifier);
38
39 void arm_radix4_butterfly_inverse_q15(
40 q15_t * pSrc16,
41 uint32_t fftLen,
42 const q15_t * pCoef16,
43 uint32_t twidCoefModifier);
44
45 void arm_bitreversal_q15(
46 q15_t * pSrc,
47 uint32_t fftLen,
48 uint16_t bitRevFactor,
49 const uint16_t * pBitRevTab);
50
51 /**
52 @addtogroup ComplexFFTDeprecated
53 @{
54 */
55
56
57 /**
58 @brief Processing function for the Q15 CFFT/CIFFT.
59 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
60 @param[in] S points to an instance of the Q15 CFFT/CIFFT structure.
61 @param[in,out] pSrc points to the complex data buffer. Processing occurs in-place.
62 @return none
63
64 @par Input and output formats:
65 Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
66 Hence the output format is different for different FFT sizes.
67 The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
68 @par
69
70 | CFFT Size | Input format | Output format | Number of bits to upscale |
71 | --------: | ------------: | ------------: | ------------------------: |
72 | 16 | 1.15 | 5.11 | 4 |
73 | 64 | 1.15 | 7.9 | 6 |
74 | 256 | 1.15 | 9.7 | 8 |
75 | 1024 | 1.15 | 11.5 | 10 |
76
77 | CIFFT Size | Input format | Output format | Number of bits to upscale |
78 | ---------: | ------------: | ------------: | ------------------------: |
79 | 16 | 1.15 | 5.11 | 0 |
80 | 64 | 1.15 | 7.9 | 0 |
81 | 256 | 1.15 | 9.7 | 0 |
82 | 1024 | 1.15 | 11.5 | 0 |
83
84 */
85
arm_cfft_radix4_q15(const arm_cfft_radix4_instance_q15 * S,q15_t * pSrc)86 void arm_cfft_radix4_q15(
87 const arm_cfft_radix4_instance_q15 * S,
88 q15_t * pSrc)
89 {
90 if (S->ifftFlag == 1U)
91 {
92 /* Complex IFFT radix-4 */
93 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
94 }
95 else
96 {
97 /* Complex FFT radix-4 */
98 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
99 }
100
101 if (S->bitReverseFlag == 1U)
102 {
103 /* Bit Reversal */
104 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
105 }
106
107 }
108
109 /**
110 @} end of ComplexFFTDeprecated group
111 */
112
113 /*
114 * Radix-4 FFT algorithm used is :
115 *
116 * Input real and imaginary data:
117 * x(n) = xa + j * ya
118 * x(n+N/4 ) = xb + j * yb
119 * x(n+N/2 ) = xc + j * yc
120 * x(n+3N 4) = xd + j * yd
121 *
122 *
123 * Output real and imaginary data:
124 * x(4r) = xa'+ j * ya'
125 * x(4r+1) = xb'+ j * yb'
126 * x(4r+2) = xc'+ j * yc'
127 * x(4r+3) = xd'+ j * yd'
128 *
129 *
130 * Twiddle factors for radix-4 FFT:
131 * Wn = co1 + j * (- si1)
132 * W2n = co2 + j * (- si2)
133 * W3n = co3 + j * (- si3)
134
135 * The real and imaginary output values for the radix-4 butterfly are
136 * xa' = xa + xb + xc + xd
137 * ya' = ya + yb + yc + yd
138 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
139 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
140 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
141 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
142 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
143 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
144 *
145 */
146
147 /**
148 @brief Core function for the Q15 CFFT butterfly process.
149 @param[in,out] pSrc16 points to the in-place buffer of Q15 data type
150 @param[in] fftLen length of the FFT
151 @param[in] pCoef16 points to twiddle coefficient buffer
152 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
153 @return none
154 */
155
arm_radix4_butterfly_q15(q15_t * pSrc16,uint32_t fftLen,const q15_t * pCoef16,uint32_t twidCoefModifier)156 void arm_radix4_butterfly_q15(
157 q15_t * pSrc16,
158 uint32_t fftLen,
159 const q15_t * pCoef16,
160 uint32_t twidCoefModifier)
161 {
162
163 #if defined (ARM_MATH_DSP)
164
165 q31_t R, S, T, U;
166 q31_t C1, C2, C3, out1, out2;
167 uint32_t n1, n2, ic, i0, j, k;
168
169 q15_t *ptr1;
170 q15_t *pSi0;
171 q15_t *pSi1;
172 q15_t *pSi2;
173 q15_t *pSi3;
174
175 q31_t xaya, xbyb, xcyc, xdyd;
176
177 /* Total process is divided into three stages */
178
179 /* process first stage, middle stages, & last stage */
180
181 /* Initializations for the first stage */
182 n2 = fftLen;
183 n1 = n2;
184
185 /* n2 = fftLen/4 */
186 n2 >>= 2U;
187
188 /* Index for twiddle coefficient */
189 ic = 0U;
190
191 /* Index for input read and output write */
192 j = n2;
193
194 pSi0 = pSrc16;
195 pSi1 = pSi0 + 2 * n2;
196 pSi2 = pSi1 + 2 * n2;
197 pSi3 = pSi2 + 2 * n2;
198
199 /* Input is in 1.15(q15) format */
200
201 /* start of first stage process */
202 do
203 {
204 /* Butterfly implementation */
205
206 /* Reading i0, i0+fftLen/2 inputs */
207 /* Read ya (real), xa(imag) input */
208 T = read_q15x2 (pSi0);
209 T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
210 T = __SHADD16(T, 0); /* it turns out doing this twice is 2 cycles, the alternative takes 3 cycles */
211 /*
212 in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles
213 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
214 */
215
216 /* Read yc (real), xc(imag) input */
217 S = read_q15x2 (pSi2);
218 S = __SHADD16(S, 0);
219 S = __SHADD16(S, 0);
220
221 /* R = packed((ya + yc), (xa + xc) ) */
222 R = __QADD16(T, S);
223
224 /* S = packed((ya - yc), (xa - xc) ) */
225 S = __QSUB16(T, S);
226
227 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
228 /* Read yb (real), xb(imag) input */
229 T = read_q15x2 (pSi1);
230 T = __SHADD16(T, 0);
231 T = __SHADD16(T, 0);
232
233 /* Read yd (real), xd(imag) input */
234 U = read_q15x2 (pSi3);
235 U = __SHADD16(U, 0);
236 U = __SHADD16(U, 0);
237
238 /* T = packed((yb + yd), (xb + xd) ) */
239 T = __QADD16(T, U);
240
241 /* writing the butterfly processed i0 sample */
242 /* xa' = xa + xb + xc + xd */
243 /* ya' = ya + yb + yc + yd */
244 write_q15x2_ia (&pSi0, __SHADD16(R, T));
245
246 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
247 R = __QSUB16(R, T);
248
249 /* co2 & si2 are read from SIMD Coefficient pointer */
250 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
251
252 #ifndef ARM_MATH_BIG_ENDIAN
253 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
254 out1 = __SMUAD(C2, R) >> 16U;
255 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
256 out2 = __SMUSDX(C2, R);
257 #else
258 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
259 out1 = __SMUSDX(R, C2) >> 16U;
260 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
261 out2 = __SMUAD(C2, R);
262 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
263
264 /* Reading i0+fftLen/4 */
265 /* T = packed(yb, xb) */
266 T = read_q15x2 (pSi1);
267 T = __SHADD16(T, 0);
268 T = __SHADD16(T, 0);
269
270 /* writing the butterfly processed i0 + fftLen/4 sample */
271 /* writing output(xc', yc') in little endian format */
272 write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 ));
273
274 /* Butterfly calculations */
275 /* U = packed(yd, xd) */
276 U = read_q15x2 (pSi3);
277 U = __SHADD16(U, 0);
278 U = __SHADD16(U, 0);
279
280 /* T = packed(yb-yd, xb-xd) */
281 T = __QSUB16(T, U);
282
283 #ifndef ARM_MATH_BIG_ENDIAN
284 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
285 R = __QASX(S, T);
286 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
287 S = __QSAX(S, T);
288 #else
289 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
290 R = __QSAX(S, T);
291 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
292 S = __QASX(S, T);
293 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
294
295 /* co1 & si1 are read from SIMD Coefficient pointer */
296 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
297 /* Butterfly process for the i0+fftLen/2 sample */
298
299 #ifndef ARM_MATH_BIG_ENDIAN
300 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
301 out1 = __SMUAD(C1, S) >> 16U;
302 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
303 out2 = __SMUSDX(C1, S);
304 #else
305 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
306 out1 = __SMUSDX(S, C1) >> 16U;
307 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
308 out2 = __SMUAD(C1, S);
309 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
310
311 /* writing output(xb', yb') in little endian format */
312 write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 ));
313
314 /* co3 & si3 are read from SIMD Coefficient pointer */
315 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
316 /* Butterfly process for the i0+3fftLen/4 sample */
317
318 #ifndef ARM_MATH_BIG_ENDIAN
319 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
320 out1 = __SMUAD(C3, R) >> 16U;
321 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
322 out2 = __SMUSDX(C3, R);
323 #else
324 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
325 out1 = __SMUSDX(R, C3) >> 16U;
326 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
327 out2 = __SMUAD(C3, R);
328 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
329
330 /* writing output(xd', yd') in little endian format */
331 write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 ));
332
333 /* Twiddle coefficients index modifier */
334 ic = ic + twidCoefModifier;
335
336 } while (--j);
337 /* data is in 4.11(q11) format */
338
339 /* end of first stage process */
340
341
342 /* start of middle stage process */
343
344 /* Twiddle coefficients index modifier */
345 twidCoefModifier <<= 2U;
346
347 /* Calculation of Middle stage */
348 for (k = fftLen / 4U; k > 4U; k >>= 2U)
349 {
350 /* Initializations for the middle stage */
351 n1 = n2;
352 n2 >>= 2U;
353 ic = 0U;
354
355 for (j = 0U; j <= (n2 - 1U); j++)
356 {
357 /* index calculation for the coefficients */
358 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
359 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
360 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
361
362 /* Twiddle coefficients index modifier */
363 ic = ic + twidCoefModifier;
364
365 pSi0 = pSrc16 + 2 * j;
366 pSi1 = pSi0 + 2 * n2;
367 pSi2 = pSi1 + 2 * n2;
368 pSi3 = pSi2 + 2 * n2;
369
370 /* Butterfly implementation */
371 for (i0 = j; i0 < fftLen; i0 += n1)
372 {
373 /* Reading i0, i0+fftLen/2 inputs */
374 /* Read ya (real), xa(imag) input */
375 T = read_q15x2 (pSi0);
376
377 /* Read yc (real), xc(imag) input */
378 S = read_q15x2 (pSi2);
379
380 /* R = packed( (ya + yc), (xa + xc)) */
381 R = __QADD16(T, S);
382
383 /* S = packed((ya - yc), (xa - xc)) */
384 S = __QSUB16(T, S);
385
386 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
387 /* Read yb (real), xb(imag) input */
388 T = read_q15x2 (pSi1);
389
390 /* Read yd (real), xd(imag) input */
391 U = read_q15x2 (pSi3);
392
393 /* T = packed( (yb + yd), (xb + xd)) */
394 T = __QADD16(T, U);
395
396 /* writing the butterfly processed i0 sample */
397
398 /* xa' = xa + xb + xc + xd */
399 /* ya' = ya + yb + yc + yd */
400 out1 = __SHADD16(R, T);
401 out1 = __SHADD16(out1, 0);
402 write_q15x2 (pSi0, out1);
403 pSi0 += 2 * n1;
404
405 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
406 R = __SHSUB16(R, T);
407
408 #ifndef ARM_MATH_BIG_ENDIAN
409 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
410 out1 = __SMUAD(C2, R) >> 16U;
411
412 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
413 out2 = __SMUSDX(C2, R);
414 #else
415 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
416 out1 = __SMUSDX(R, C2) >> 16U;
417
418 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
419 out2 = __SMUAD(C2, R);
420 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
421
422 /* Reading i0+3fftLen/4 */
423 /* Read yb (real), xb(imag) input */
424 T = read_q15x2 (pSi1);
425
426 /* writing the butterfly processed i0 + fftLen/4 sample */
427 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
428 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
429 write_q15x2 (pSi1, __PKHBT( out1, out2, 0 ));
430 pSi1 += 2 * n1;
431
432 /* Butterfly calculations */
433
434 /* Read yd (real), xd(imag) input */
435 U = read_q15x2 (pSi3);
436
437 /* T = packed(yb-yd, xb-xd) */
438 T = __QSUB16(T, U);
439
440 #ifndef ARM_MATH_BIG_ENDIAN
441 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
442 R = __SHASX(S, T);
443
444 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
445 S = __SHSAX(S, T);
446
447
448 /* Butterfly process for the i0+fftLen/2 sample */
449 out1 = __SMUAD(C1, S) >> 16U;
450 out2 = __SMUSDX(C1, S);
451 #else
452 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
453 R = __SHSAX(S, T);
454
455 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
456 S = __SHASX(S, T);
457
458
459 /* Butterfly process for the i0+fftLen/2 sample */
460 out1 = __SMUSDX(S, C1) >> 16U;
461 out2 = __SMUAD(C1, S);
462 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
463
464 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
465 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
466 write_q15x2 (pSi2, __PKHBT( out1, out2, 0 ));
467 pSi2 += 2 * n1;
468
469 /* Butterfly process for the i0+3fftLen/4 sample */
470
471 #ifndef ARM_MATH_BIG_ENDIAN
472 out1 = __SMUAD(C3, R) >> 16U;
473 out2 = __SMUSDX(C3, R);
474 #else
475 out1 = __SMUSDX(R, C3) >> 16U;
476 out2 = __SMUAD(C3, R);
477 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
478
479 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
480 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
481 write_q15x2 (pSi3, __PKHBT( out1, out2, 0 ));
482 pSi3 += 2 * n1;
483 }
484 }
485 /* Twiddle coefficients index modifier */
486 twidCoefModifier <<= 2U;
487 }
488 /* end of middle stage process */
489
490
491 /* data is in 10.6(q6) format for the 1024 point */
492 /* data is in 8.8(q8) format for the 256 point */
493 /* data is in 6.10(q10) format for the 64 point */
494 /* data is in 4.12(q12) format for the 16 point */
495
496 /* Initializations for the last stage */
497 j = fftLen >> 2;
498
499 ptr1 = &pSrc16[0];
500
501 /* start of last stage process */
502
503 /* Butterfly implementation */
504 do
505 {
506 /* Read xa (real), ya(imag) input */
507 xaya = read_q15x2_ia (&ptr1);
508
509 /* Read xb (real), yb(imag) input */
510 xbyb = read_q15x2_ia (&ptr1);
511
512 /* Read xc (real), yc(imag) input */
513 xcyc = read_q15x2_ia (&ptr1);
514
515 /* Read xd (real), yd(imag) input */
516 xdyd = read_q15x2_ia (&ptr1);
517
518 /* R = packed((ya + yc), (xa + xc)) */
519 R = __QADD16(xaya, xcyc);
520
521 /* T = packed((yb + yd), (xb + xd)) */
522 T = __QADD16(xbyb, xdyd);
523
524 /* pointer updation for writing */
525 ptr1 = ptr1 - 8U;
526
527
528 /* xa' = xa + xb + xc + xd */
529 /* ya' = ya + yb + yc + yd */
530 write_q15x2_ia (&ptr1, __SHADD16(R, T));
531
532 /* T = packed((yb + yd), (xb + xd)) */
533 T = __QADD16(xbyb, xdyd);
534
535 /* xc' = (xa-xb+xc-xd) */
536 /* yc' = (ya-yb+yc-yd) */
537 write_q15x2_ia (&ptr1, __SHSUB16(R, T));
538
539 /* S = packed((ya - yc), (xa - xc)) */
540 S = __QSUB16(xaya, xcyc);
541
542 /* Read yd (real), xd(imag) input */
543 /* T = packed( (yb - yd), (xb - xd)) */
544 U = __QSUB16(xbyb, xdyd);
545
546 #ifndef ARM_MATH_BIG_ENDIAN
547 /* xb' = (xa+yb-xc-yd) */
548 /* yb' = (ya-xb-yc+xd) */
549 write_q15x2_ia (&ptr1, __SHSAX(S, U));
550
551 /* xd' = (xa-yb-xc+yd) */
552 /* yd' = (ya+xb-yc-xd) */
553 write_q15x2_ia (&ptr1, __SHASX(S, U));
554 #else
555 /* xb' = (xa+yb-xc-yd) */
556 /* yb' = (ya-xb-yc+xd) */
557 write_q15x2_ia (&ptr1, __SHASX(S, U));
558
559 /* xd' = (xa-yb-xc+yd) */
560 /* yd' = (ya+xb-yc-xd) */
561 write_q15x2_ia (&ptr1, __SHSAX(S, U));
562 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
563
564 } while (--j);
565
566 /* end of last stage process */
567
568 /* output is in 11.5(q5) format for the 1024 point */
569 /* output is in 9.7(q7) format for the 256 point */
570 /* output is in 7.9(q9) format for the 64 point */
571 /* output is in 5.11(q11) format for the 16 point */
572
573
574 #else /* #if defined (ARM_MATH_DSP) */
575
576 q15_t R0, R1, S0, S1, T0, T1, U0, U1;
577 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
578 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
579
580 /* Total process is divided into three stages */
581
582 /* process first stage, middle stages, & last stage */
583
584 /* Initializations for the first stage */
585 n2 = fftLen;
586 n1 = n2;
587
588 /* n2 = fftLen/4 */
589 n2 >>= 2U;
590
591 /* Index for twiddle coefficient */
592 ic = 0U;
593
594 /* Index for input read and output write */
595 i0 = 0U;
596 j = n2;
597
598 /* Input is in 1.15(q15) format */
599
600 /* start of first stage process */
601 do
602 {
603 /* Butterfly implementation */
604
605 /* index calculation for the input as, */
606 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
607 i1 = i0 + n2;
608 i2 = i1 + n2;
609 i3 = i2 + n2;
610
611 /* Reading i0, i0+fftLen/2 inputs */
612
613 /* input is down scale by 4 to avoid overflow */
614 /* Read ya (real), xa(imag) input */
615 T0 = pSrc16[i0 * 2U] >> 2U;
616 T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
617
618 /* input is down scale by 4 to avoid overflow */
619 /* Read yc (real), xc(imag) input */
620 S0 = pSrc16[i2 * 2U] >> 2U;
621 S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
622
623 /* R0 = (ya + yc) */
624 R0 = __SSAT(T0 + S0, 16U);
625 /* R1 = (xa + xc) */
626 R1 = __SSAT(T1 + S1, 16U);
627
628 /* S0 = (ya - yc) */
629 S0 = __SSAT(T0 - S0, 16);
630 /* S1 = (xa - xc) */
631 S1 = __SSAT(T1 - S1, 16);
632
633 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
634 /* input is down scale by 4 to avoid overflow */
635 /* Read yb (real), xb(imag) input */
636 T0 = pSrc16[i1 * 2U] >> 2U;
637 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
638
639 /* input is down scale by 4 to avoid overflow */
640 /* Read yd (real), xd(imag) input */
641 U0 = pSrc16[i3 * 2U] >> 2U;
642 U1 = pSrc16[(i3 * 2U) + 1] >> 2U;
643
644 /* T0 = (yb + yd) */
645 T0 = __SSAT(T0 + U0, 16U);
646 /* T1 = (xb + xd) */
647 T1 = __SSAT(T1 + U1, 16U);
648
649 /* writing the butterfly processed i0 sample */
650 /* ya' = ya + yb + yc + yd */
651 /* xa' = xa + xb + xc + xd */
652 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
653 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
654
655 /* R0 = (ya + yc) - (yb + yd) */
656 /* R1 = (xa + xc) - (xb + xd) */
657 R0 = __SSAT(R0 - T0, 16U);
658 R1 = __SSAT(R1 - T1, 16U);
659
660 /* co2 & si2 are read from Coefficient pointer */
661 Co2 = pCoef16[2U * ic * 2U];
662 Si2 = pCoef16[(2U * ic * 2U) + 1];
663
664 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
665 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
666 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
667 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
668
669 /* Reading i0+fftLen/4 */
670 /* input is down scale by 4 to avoid overflow */
671 /* T0 = yb, T1 = xb */
672 T0 = pSrc16[i1 * 2U] >> 2;
673 T1 = pSrc16[(i1 * 2U) + 1] >> 2;
674
675 /* writing the butterfly processed i0 + fftLen/4 sample */
676 /* writing output(xc', yc') in little endian format */
677 pSrc16[i1 * 2U] = out1;
678 pSrc16[(i1 * 2U) + 1] = out2;
679
680 /* Butterfly calculations */
681 /* input is down scale by 4 to avoid overflow */
682 /* U0 = yd, U1 = xd */
683 U0 = pSrc16[i3 * 2U] >> 2;
684 U1 = pSrc16[(i3 * 2U) + 1] >> 2;
685 /* T0 = yb-yd */
686 T0 = __SSAT(T0 - U0, 16);
687 /* T1 = xb-xd */
688 T1 = __SSAT(T1 - U1, 16);
689
690 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */
691 R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
692 R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
693
694 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
695 S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16U);
696 S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16U);
697
698 /* co1 & si1 are read from Coefficient pointer */
699 Co1 = pCoef16[ic * 2U];
700 Si1 = pCoef16[(ic * 2U) + 1];
701 /* Butterfly process for the i0+fftLen/2 sample */
702 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
703 out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16);
704 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
705 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16);
706
707 /* writing output(xb', yb') in little endian format */
708 pSrc16[i2 * 2U] = out1;
709 pSrc16[(i2 * 2U) + 1] = out2;
710
711 /* Co3 & si3 are read from Coefficient pointer */
712 Co3 = pCoef16[3U * (ic * 2U)];
713 Si3 = pCoef16[(3U * (ic * 2U)) + 1];
714 /* Butterfly process for the i0+3fftLen/4 sample */
715 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
716 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
717 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
718 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
719 /* writing output(xd', yd') in little endian format */
720 pSrc16[i3 * 2U] = out1;
721 pSrc16[(i3 * 2U) + 1] = out2;
722
723 /* Twiddle coefficients index modifier */
724 ic = ic + twidCoefModifier;
725
726 /* Updating input index */
727 i0 = i0 + 1U;
728
729 } while (--j);
730 /* data is in 4.11(q11) format */
731
732 /* end of first stage process */
733
734
735 /* start of middle stage process */
736
737 /* Twiddle coefficients index modifier */
738 twidCoefModifier <<= 2U;
739
740 /* Calculation of Middle stage */
741 for (k = fftLen / 4U; k > 4U; k >>= 2U)
742 {
743 /* Initializations for the middle stage */
744 n1 = n2;
745 n2 >>= 2U;
746 ic = 0U;
747
748 for (j = 0U; j <= (n2 - 1U); j++)
749 {
750 /* index calculation for the coefficients */
751 Co1 = pCoef16[ic * 2U];
752 Si1 = pCoef16[(ic * 2U) + 1U];
753 Co2 = pCoef16[2U * (ic * 2U)];
754 Si2 = pCoef16[(2U * (ic * 2U)) + 1U];
755 Co3 = pCoef16[3U * (ic * 2U)];
756 Si3 = pCoef16[(3U * (ic * 2U)) + 1U];
757
758 /* Twiddle coefficients index modifier */
759 ic = ic + twidCoefModifier;
760
761 /* Butterfly implementation */
762 for (i0 = j; i0 < fftLen; i0 += n1)
763 {
764 /* index calculation for the input as, */
765 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
766 i1 = i0 + n2;
767 i2 = i1 + n2;
768 i3 = i2 + n2;
769
770 /* Reading i0, i0+fftLen/2 inputs */
771 /* Read ya (real), xa(imag) input */
772 T0 = pSrc16[i0 * 2U];
773 T1 = pSrc16[(i0 * 2U) + 1U];
774
775 /* Read yc (real), xc(imag) input */
776 S0 = pSrc16[i2 * 2U];
777 S1 = pSrc16[(i2 * 2U) + 1U];
778
779 /* R0 = (ya + yc), R1 = (xa + xc) */
780 R0 = __SSAT(T0 + S0, 16);
781 R1 = __SSAT(T1 + S1, 16);
782
783 /* S0 = (ya - yc), S1 =(xa - xc) */
784 S0 = __SSAT(T0 - S0, 16);
785 S1 = __SSAT(T1 - S1, 16);
786
787 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
788 /* Read yb (real), xb(imag) input */
789 T0 = pSrc16[i1 * 2U];
790 T1 = pSrc16[(i1 * 2U) + 1U];
791
792 /* Read yd (real), xd(imag) input */
793 U0 = pSrc16[i3 * 2U];
794 U1 = pSrc16[(i3 * 2U) + 1U];
795
796
797 /* T0 = (yb + yd), T1 = (xb + xd) */
798 T0 = __SSAT(T0 + U0, 16);
799 T1 = __SSAT(T1 + U1, 16);
800
801 /* writing the butterfly processed i0 sample */
802
803 /* xa' = xa + xb + xc + xd */
804 /* ya' = ya + yb + yc + yd */
805 out1 = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
806 out2 = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
807
808 pSrc16[i0 * 2U] = out1;
809 pSrc16[(2U * i0) + 1U] = out2;
810
811 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
812 R0 = (R0 >> 1U) - (T0 >> 1U);
813 R1 = (R1 >> 1U) - (T1 >> 1U);
814
815 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
816 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
817
818 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
819 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
820
821 /* Reading i0+3fftLen/4 */
822 /* Read yb (real), xb(imag) input */
823 T0 = pSrc16[i1 * 2U];
824 T1 = pSrc16[(i1 * 2U) + 1U];
825
826 /* writing the butterfly processed i0 + fftLen/4 sample */
827 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
828 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
829 pSrc16[i1 * 2U] = out1;
830 pSrc16[(i1 * 2U) + 1U] = out2;
831
832 /* Butterfly calculations */
833
834 /* Read yd (real), xd(imag) input */
835 U0 = pSrc16[i3 * 2U];
836 U1 = pSrc16[(i3 * 2U) + 1U];
837
838 /* T0 = yb-yd, T1 = xb-xd */
839 T0 = __SSAT(T0 - U0, 16);
840 T1 = __SSAT(T1 - U1, 16);
841
842 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
843 R0 = (S0 >> 1U) - (T1 >> 1U);
844 R1 = (S1 >> 1U) + (T0 >> 1U);
845
846 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
847 S0 = (S0 >> 1U) + (T1 >> 1U);
848 S1 = (S1 >> 1U) - (T0 >> 1U);
849
850 /* Butterfly process for the i0+fftLen/2 sample */
851 out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16U);
852
853 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16U);
854
855 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
856 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
857 pSrc16[i2 * 2U] = out1;
858 pSrc16[(i2 * 2U) + 1U] = out2;
859
860 /* Butterfly process for the i0+3fftLen/4 sample */
861 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
862
863 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
864 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
865 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
866 pSrc16[i3 * 2U] = out1;
867 pSrc16[(i3 * 2U) + 1U] = out2;
868 }
869 }
870 /* Twiddle coefficients index modifier */
871 twidCoefModifier <<= 2U;
872 }
873 /* end of middle stage process */
874
875
876 /* data is in 10.6(q6) format for the 1024 point */
877 /* data is in 8.8(q8) format for the 256 point */
878 /* data is in 6.10(q10) format for the 64 point */
879 /* data is in 4.12(q12) format for the 16 point */
880
881 /* Initializations for the last stage */
882 n1 = n2;
883 n2 >>= 2U;
884
885 /* start of last stage process */
886
887 /* Butterfly implementation */
888 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
889 {
890 /* index calculation for the input as, */
891 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
892 i1 = i0 + n2;
893 i2 = i1 + n2;
894 i3 = i2 + n2;
895
896 /* Reading i0, i0+fftLen/2 inputs */
897 /* Read ya (real), xa(imag) input */
898 T0 = pSrc16[i0 * 2U];
899 T1 = pSrc16[(i0 * 2U) + 1U];
900
901 /* Read yc (real), xc(imag) input */
902 S0 = pSrc16[i2 * 2U];
903 S1 = pSrc16[(i2 * 2U) + 1U];
904
905 /* R0 = (ya + yc), R1 = (xa + xc) */
906 R0 = __SSAT(T0 + S0, 16U);
907 R1 = __SSAT(T1 + S1, 16U);
908
909 /* S0 = (ya - yc), S1 = (xa - xc) */
910 S0 = __SSAT(T0 - S0, 16U);
911 S1 = __SSAT(T1 - S1, 16U);
912
913 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
914 /* Read yb (real), xb(imag) input */
915 T0 = pSrc16[i1 * 2U];
916 T1 = pSrc16[(i1 * 2U) + 1U];
917 /* Read yd (real), xd(imag) input */
918 U0 = pSrc16[i3 * 2U];
919 U1 = pSrc16[(i3 * 2U) + 1U];
920
921 /* T0 = (yb + yd), T1 = (xb + xd)) */
922 T0 = __SSAT(T0 + U0, 16U);
923 T1 = __SSAT(T1 + U1, 16U);
924
925 /* writing the butterfly processed i0 sample */
926 /* xa' = xa + xb + xc + xd */
927 /* ya' = ya + yb + yc + yd */
928 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
929 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
930
931 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
932 R0 = (R0 >> 1U) - (T0 >> 1U);
933 R1 = (R1 >> 1U) - (T1 >> 1U);
934 /* Read yb (real), xb(imag) input */
935 T0 = pSrc16[i1 * 2U];
936 T1 = pSrc16[(i1 * 2U) + 1U];
937
938 /* writing the butterfly processed i0 + fftLen/4 sample */
939 /* xc' = (xa-xb+xc-xd) */
940 /* yc' = (ya-yb+yc-yd) */
941 pSrc16[i1 * 2U] = R0;
942 pSrc16[(i1 * 2U) + 1U] = R1;
943
944 /* Read yd (real), xd(imag) input */
945 U0 = pSrc16[i3 * 2U];
946 U1 = pSrc16[(i3 * 2U) + 1U];
947 /* T0 = (yb - yd), T1 = (xb - xd) */
948 T0 = __SSAT(T0 - U0, 16U);
949 T1 = __SSAT(T1 - U1, 16U);
950
951 /* writing the butterfly processed i0 + fftLen/2 sample */
952 /* xb' = (xa+yb-xc-yd) */
953 /* yb' = (ya-xb-yc+xd) */
954 pSrc16[i2 * 2U] = (S0 >> 1U) + (T1 >> 1U);
955 pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
956
957 /* writing the butterfly processed i0 + 3fftLen/4 sample */
958 /* xd' = (xa-yb-xc+yd) */
959 /* yd' = (ya+xb-yc-xd) */
960 pSrc16[i3 * 2U] = (S0 >> 1U) - (T1 >> 1U);
961 pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
962
963 }
964
965 /* end of last stage process */
966
967 /* output is in 11.5(q5) format for the 1024 point */
968 /* output is in 9.7(q7) format for the 256 point */
969 /* output is in 7.9(q9) format for the 64 point */
970 /* output is in 5.11(q11) format for the 16 point */
971
972 #endif /* #if defined (ARM_MATH_DSP) */
973
974 }
975
976
977 /**
978 @brief Core function for the Q15 CIFFT butterfly process.
979 @param[in,out] pSrc16 points to the in-place buffer of Q15 data type
980 @param[in] fftLen length of the FFT
981 @param[in] pCoef16 points to twiddle coefficient buffer
982 @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
983 @return none
984 */
985
986 /*
987 * Radix-4 IFFT algorithm used is :
988 *
989 * CIFFT uses same twiddle coefficients as CFFT function
990 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
991 *
992 *
993 * IFFT is implemented with following changes in equations from FFT
994 *
995 * Input real and imaginary data:
996 * x(n) = xa + j * ya
997 * x(n+N/4 ) = xb + j * yb
998 * x(n+N/2 ) = xc + j * yc
999 * x(n+3N 4) = xd + j * yd
1000 *
1001 *
1002 * Output real and imaginary data:
1003 * x(4r) = xa'+ j * ya'
1004 * x(4r+1) = xb'+ j * yb'
1005 * x(4r+2) = xc'+ j * yc'
1006 * x(4r+3) = xd'+ j * yd'
1007 *
1008 *
1009 * Twiddle factors for radix-4 IFFT:
1010 * Wn = co1 + j * (si1)
1011 * W2n = co2 + j * (si2)
1012 * W3n = co3 + j * (si3)
1013
1014 * The real and imaginary output values for the radix-4 butterfly are
1015 * xa' = xa + xb + xc + xd
1016 * ya' = ya + yb + yc + yd
1017 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
1018 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
1019 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
1020 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
1021 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
1022 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
1023 *
1024 */
1025
arm_radix4_butterfly_inverse_q15(q15_t * pSrc16,uint32_t fftLen,const q15_t * pCoef16,uint32_t twidCoefModifier)1026 void arm_radix4_butterfly_inverse_q15(
1027 q15_t * pSrc16,
1028 uint32_t fftLen,
1029 const q15_t * pCoef16,
1030 uint32_t twidCoefModifier)
1031 {
1032
1033 #if defined (ARM_MATH_DSP)
1034
1035 q31_t R, S, T, U;
1036 q31_t C1, C2, C3, out1, out2;
1037 uint32_t n1, n2, ic, i0, j, k;
1038
1039 q15_t *ptr1;
1040 q15_t *pSi0;
1041 q15_t *pSi1;
1042 q15_t *pSi2;
1043 q15_t *pSi3;
1044
1045 q31_t xaya, xbyb, xcyc, xdyd;
1046
1047 /* Total process is divided into three stages */
1048
1049 /* process first stage, middle stages, & last stage */
1050
1051 /* Initializations for the first stage */
1052 n2 = fftLen;
1053 n1 = n2;
1054
1055 /* n2 = fftLen/4 */
1056 n2 >>= 2U;
1057
1058 /* Index for twiddle coefficient */
1059 ic = 0U;
1060
1061 /* Index for input read and output write */
1062 j = n2;
1063
1064 pSi0 = pSrc16;
1065 pSi1 = pSi0 + 2 * n2;
1066 pSi2 = pSi1 + 2 * n2;
1067 pSi3 = pSi2 + 2 * n2;
1068
1069 /* Input is in 1.15(q15) format */
1070
1071 /* start of first stage process */
1072 do
1073 {
1074 /* Butterfly implementation */
1075
1076 /* Reading i0, i0+fftLen/2 inputs */
1077 /* Read ya (real), xa(imag) input */
1078 T = read_q15x2 (pSi0);
1079 T = __SHADD16(T, 0);
1080 T = __SHADD16(T, 0);
1081
1082 /* Read yc (real), xc(imag) input */
1083 S = read_q15x2 (pSi2);
1084 S = __SHADD16(S, 0);
1085 S = __SHADD16(S, 0);
1086
1087 /* R = packed((ya + yc), (xa + xc) ) */
1088 R = __QADD16(T, S);
1089
1090 /* S = packed((ya - yc), (xa - xc) ) */
1091 S = __QSUB16(T, S);
1092
1093 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1094 /* Read yb (real), xb(imag) input */
1095 T = read_q15x2 (pSi1);
1096 T = __SHADD16(T, 0);
1097 T = __SHADD16(T, 0);
1098
1099 /* Read yd (real), xd(imag) input */
1100 U = read_q15x2 (pSi3);
1101 U = __SHADD16(U, 0);
1102 U = __SHADD16(U, 0);
1103
1104 /* T = packed((yb + yd), (xb + xd) ) */
1105 T = __QADD16(T, U);
1106
1107 /* writing the butterfly processed i0 sample */
1108 /* xa' = xa + xb + xc + xd */
1109 /* ya' = ya + yb + yc + yd */
1110 write_q15x2_ia (&pSi0, __SHADD16(R, T));
1111
1112 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
1113 R = __QSUB16(R, T);
1114
1115 /* co2 & si2 are read from SIMD Coefficient pointer */
1116 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
1117
1118 #ifndef ARM_MATH_BIG_ENDIAN
1119 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1120 out1 = __SMUSD(C2, R) >> 16U;
1121 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1122 out2 = __SMUADX(C2, R);
1123 #else
1124 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1125 out1 = __SMUADX(C2, R) >> 16U;
1126 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1127 out2 = __SMUSD(__QSUB16(0, C2), R);
1128 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1129
1130 /* Reading i0+fftLen/4 */
1131 /* T = packed(yb, xb) */
1132 T = read_q15x2 (pSi1);
1133 T = __SHADD16(T, 0);
1134 T = __SHADD16(T, 0);
1135
1136 /* writing the butterfly processed i0 + fftLen/4 sample */
1137 /* writing output(xc', yc') in little endian format */
1138 write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 ));
1139
1140 /* Butterfly calculations */
1141 /* U = packed(yd, xd) */
1142 U = read_q15x2 (pSi3);
1143 U = __SHADD16(U, 0);
1144 U = __SHADD16(U, 0);
1145
1146 /* T = packed(yb-yd, xb-xd) */
1147 T = __QSUB16(T, U);
1148
1149 #ifndef ARM_MATH_BIG_ENDIAN
1150 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1151 R = __QSAX(S, T);
1152 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
1153 S = __QASX(S, T);
1154 #else
1155 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1156 R = __QASX(S, T);
1157 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1158 S = __QSAX(S, T);
1159 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1160
1161 /* co1 & si1 are read from SIMD Coefficient pointer */
1162 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
1163 /* Butterfly process for the i0+fftLen/2 sample */
1164
1165 #ifndef ARM_MATH_BIG_ENDIAN
1166 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1167 out1 = __SMUSD(C1, S) >> 16U;
1168 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1169 out2 = __SMUADX(C1, S);
1170 #else
1171 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1172 out1 = __SMUADX(C1, S) >> 16U;
1173 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1174 out2 = __SMUSD(__QSUB16(0, C1), S);
1175 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1176
1177 /* writing output(xb', yb') in little endian format */
1178 write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 ));
1179
1180 /* co3 & si3 are read from SIMD Coefficient pointer */
1181 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
1182 /* Butterfly process for the i0+3fftLen/4 sample */
1183
1184 #ifndef ARM_MATH_BIG_ENDIAN
1185 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1186 out1 = __SMUSD(C3, R) >> 16U;
1187 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1188 out2 = __SMUADX(C3, R);
1189 #else
1190 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1191 out1 = __SMUADX(C3, R) >> 16U;
1192 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1193 out2 = __SMUSD(__QSUB16(0, C3), R);
1194 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1195
1196 /* writing output(xd', yd') in little endian format */
1197 write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 ));
1198
1199 /* Twiddle coefficients index modifier */
1200 ic = ic + twidCoefModifier;
1201
1202 } while (--j);
1203 /* data is in 4.11(q11) format */
1204
1205 /* end of first stage process */
1206
1207
1208 /* start of middle stage process */
1209
1210 /* Twiddle coefficients index modifier */
1211 twidCoefModifier <<= 2U;
1212
1213 /* Calculation of Middle stage */
1214 for (k = fftLen / 4U; k > 4U; k >>= 2U)
1215 {
1216 /* Initializations for the middle stage */
1217 n1 = n2;
1218 n2 >>= 2U;
1219 ic = 0U;
1220
1221 for (j = 0U; j <= (n2 - 1U); j++)
1222 {
1223 /* index calculation for the coefficients */
1224 C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
1225 C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
1226 C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
1227
1228 /* Twiddle coefficients index modifier */
1229 ic = ic + twidCoefModifier;
1230
1231 pSi0 = pSrc16 + 2 * j;
1232 pSi1 = pSi0 + 2 * n2;
1233 pSi2 = pSi1 + 2 * n2;
1234 pSi3 = pSi2 + 2 * n2;
1235
1236 /* Butterfly implementation */
1237 for (i0 = j; i0 < fftLen; i0 += n1)
1238 {
1239 /* Reading i0, i0+fftLen/2 inputs */
1240 /* Read ya (real), xa(imag) input */
1241 T = read_q15x2 (pSi0);
1242
1243 /* Read yc (real), xc(imag) input */
1244 S = read_q15x2 (pSi2);
1245
1246 /* R = packed( (ya + yc), (xa + xc)) */
1247 R = __QADD16(T, S);
1248
1249 /* S = packed((ya - yc), (xa - xc)) */
1250 S = __QSUB16(T, S);
1251
1252 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1253 /* Read yb (real), xb(imag) input */
1254 T = read_q15x2 (pSi1);
1255
1256 /* Read yd (real), xd(imag) input */
1257 U = read_q15x2 (pSi3);
1258
1259 /* T = packed( (yb + yd), (xb + xd)) */
1260 T = __QADD16(T, U);
1261
1262 /* writing the butterfly processed i0 sample */
1263
1264 /* xa' = xa + xb + xc + xd */
1265 /* ya' = ya + yb + yc + yd */
1266 out1 = __SHADD16(R, T);
1267 out1 = __SHADD16(out1, 0);
1268 write_q15x2 (pSi0, out1);
1269 pSi0 += 2 * n1;
1270
1271 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
1272 R = __SHSUB16(R, T);
1273
1274 #ifndef ARM_MATH_BIG_ENDIAN
1275 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
1276 out1 = __SMUSD(C2, R) >> 16U;
1277
1278 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1279 out2 = __SMUADX(C2, R);
1280 #else
1281 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1282 out1 = __SMUADX(R, C2) >> 16U;
1283
1284 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
1285 out2 = __SMUSD(__QSUB16(0, C2), R);
1286 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1287
1288 /* Reading i0+3fftLen/4 */
1289 /* Read yb (real), xb(imag) input */
1290 T = read_q15x2 (pSi1);
1291
1292 /* writing the butterfly processed i0 + fftLen/4 sample */
1293 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
1294 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
1295 write_q15x2 (pSi1, __PKHBT( out1, out2, 0 ));
1296 pSi1 += 2 * n1;
1297
1298 /* Butterfly calculations */
1299
1300 /* Read yd (real), xd(imag) input */
1301 U = read_q15x2 (pSi3);
1302
1303 /* T = packed(yb-yd, xb-xd) */
1304 T = __QSUB16(T, U);
1305
1306 #ifndef ARM_MATH_BIG_ENDIAN
1307 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1308 R = __SHSAX(S, T);
1309
1310 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1311 S = __SHASX(S, T);
1312
1313 /* Butterfly process for the i0+fftLen/2 sample */
1314 out1 = __SMUSD(C1, S) >> 16U;
1315 out2 = __SMUADX(C1, S);
1316 #else
1317 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
1318 R = __SHASX(S, T);
1319
1320 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
1321 S = __SHSAX(S, T);
1322
1323 /* Butterfly process for the i0+fftLen/2 sample */
1324 out1 = __SMUADX(S, C1) >> 16U;
1325 out2 = __SMUSD(__QSUB16(0, C1), S);
1326 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1327
1328 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
1329 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
1330 write_q15x2 (pSi2, __PKHBT( out1, out2, 0 ));
1331 pSi2 += 2 * n1;
1332
1333 /* Butterfly process for the i0+3fftLen/4 sample */
1334
1335 #ifndef ARM_MATH_BIG_ENDIAN
1336 out1 = __SMUSD(C3, R) >> 16U;
1337 out2 = __SMUADX(C3, R);
1338 #else
1339 out1 = __SMUADX(C3, R) >> 16U;
1340 out2 = __SMUSD(__QSUB16(0, C3), R);
1341 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1342
1343 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
1344 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
1345 write_q15x2 (pSi3, __PKHBT( out1, out2, 0 ));
1346 pSi3 += 2 * n1;
1347 }
1348 }
1349 /* Twiddle coefficients index modifier */
1350 twidCoefModifier <<= 2U;
1351 }
1352 /* end of middle stage process */
1353
1354 /* data is in 10.6(q6) format for the 1024 point */
1355 /* data is in 8.8(q8) format for the 256 point */
1356 /* data is in 6.10(q10) format for the 64 point */
1357 /* data is in 4.12(q12) format for the 16 point */
1358
1359 /* Initializations for the last stage */
1360 j = fftLen >> 2;
1361
1362 ptr1 = &pSrc16[0];
1363
1364 /* start of last stage process */
1365
1366 /* Butterfly implementation */
1367 do
1368 {
1369 /* Read xa (real), ya(imag) input */
1370 xaya = read_q15x2_ia (&ptr1);
1371
1372 /* Read xb (real), yb(imag) input */
1373 xbyb = read_q15x2_ia (&ptr1);
1374
1375 /* Read xc (real), yc(imag) input */
1376 xcyc = read_q15x2_ia (&ptr1);
1377
1378 /* Read xd (real), yd(imag) input */
1379 xdyd = read_q15x2_ia (&ptr1);
1380
1381 /* R = packed((ya + yc), (xa + xc)) */
1382 R = __QADD16(xaya, xcyc);
1383
1384 /* T = packed((yb + yd), (xb + xd)) */
1385 T = __QADD16(xbyb, xdyd);
1386
1387 /* pointer updation for writing */
1388 ptr1 = ptr1 - 8U;
1389
1390
1391 /* xa' = xa + xb + xc + xd */
1392 /* ya' = ya + yb + yc + yd */
1393 write_q15x2_ia (&ptr1, __SHADD16(R, T));
1394
1395 /* T = packed((yb + yd), (xb + xd)) */
1396 T = __QADD16(xbyb, xdyd);
1397
1398 /* xc' = (xa-xb+xc-xd) */
1399 /* yc' = (ya-yb+yc-yd) */
1400 write_q15x2_ia (&ptr1, __SHSUB16(R, T));
1401
1402 /* S = packed((ya - yc), (xa - xc)) */
1403 S = __QSUB16(xaya, xcyc);
1404
1405 /* Read yd (real), xd(imag) input */
1406 /* T = packed( (yb - yd), (xb - xd)) */
1407 U = __QSUB16(xbyb, xdyd);
1408
1409 #ifndef ARM_MATH_BIG_ENDIAN
1410 /* xb' = (xa+yb-xc-yd) */
1411 /* yb' = (ya-xb-yc+xd) */
1412 write_q15x2_ia (&ptr1, __SHASX(S, U));
1413
1414 /* xd' = (xa-yb-xc+yd) */
1415 /* yd' = (ya+xb-yc-xd) */
1416 write_q15x2_ia (&ptr1, __SHSAX(S, U));
1417 #else
1418 /* xb' = (xa+yb-xc-yd) */
1419 /* yb' = (ya-xb-yc+xd) */
1420 write_q15x2_ia (&ptr1, __SHSAX(S, U));
1421
1422 /* xd' = (xa-yb-xc+yd) */
1423 /* yd' = (ya+xb-yc-xd) */
1424 write_q15x2_ia (&ptr1, __SHASX(S, U));
1425 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1426
1427 } while (--j);
1428
1429 /* end of last stage process */
1430
1431 /* output is in 11.5(q5) format for the 1024 point */
1432 /* output is in 9.7(q7) format for the 256 point */
1433 /* output is in 7.9(q9) format for the 64 point */
1434 /* output is in 5.11(q11) format for the 16 point */
1435
1436
1437 #else /* arm_radix4_butterfly_inverse_q15 */
1438
1439 q15_t R0, R1, S0, S1, T0, T1, U0, U1;
1440 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
1441 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
1442
1443 /* Total process is divided into three stages */
1444
1445 /* process first stage, middle stages, & last stage */
1446
1447 /* Initializations for the first stage */
1448 n2 = fftLen;
1449 n1 = n2;
1450
1451 /* n2 = fftLen/4 */
1452 n2 >>= 2U;
1453
1454 /* Index for twiddle coefficient */
1455 ic = 0U;
1456
1457 /* Index for input read and output write */
1458 i0 = 0U;
1459
1460 j = n2;
1461
1462 /* Input is in 1.15(q15) format */
1463
1464 /* Start of first stage process */
1465 do
1466 {
1467 /* Butterfly implementation */
1468
1469 /* index calculation for the input as, */
1470 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1471 i1 = i0 + n2;
1472 i2 = i1 + n2;
1473 i3 = i2 + n2;
1474
1475 /* Reading i0, i0+fftLen/2 inputs */
1476 /* input is down scale by 4 to avoid overflow */
1477 /* Read ya (real), xa(imag) input */
1478 T0 = pSrc16[i0 * 2U] >> 2U;
1479 T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
1480 /* input is down scale by 4 to avoid overflow */
1481 /* Read yc (real), xc(imag) input */
1482 S0 = pSrc16[i2 * 2U] >> 2U;
1483 S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
1484
1485 /* R0 = (ya + yc), R1 = (xa + xc) */
1486 R0 = __SSAT(T0 + S0, 16U);
1487 R1 = __SSAT(T1 + S1, 16U);
1488 /* S0 = (ya - yc), S1 = (xa - xc) */
1489 S0 = __SSAT(T0 - S0, 16U);
1490 S1 = __SSAT(T1 - S1, 16U);
1491
1492 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1493 /* input is down scale by 4 to avoid overflow */
1494 /* Read yb (real), xb(imag) input */
1495 T0 = pSrc16[i1 * 2U] >> 2U;
1496 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
1497 /* Read yd (real), xd(imag) input */
1498 /* input is down scale by 4 to avoid overflow */
1499 U0 = pSrc16[i3 * 2U] >> 2U;
1500 U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
1501
1502 /* T0 = (yb + yd), T1 = (xb + xd) */
1503 T0 = __SSAT(T0 + U0, 16U);
1504 T1 = __SSAT(T1 + U1, 16U);
1505
1506 /* writing the butterfly processed i0 sample */
1507 /* xa' = xa + xb + xc + xd */
1508 /* ya' = ya + yb + yc + yd */
1509 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
1510 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
1511
1512 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
1513 R0 = __SSAT(R0 - T0, 16U);
1514 R1 = __SSAT(R1 - T1, 16U);
1515 /* co2 & si2 are read from Coefficient pointer */
1516 Co2 = pCoef16[2U * ic * 2U];
1517 Si2 = pCoef16[(2U * ic * 2U) + 1U];
1518 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1519 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16U);
1520 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1521 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16U);
1522
1523 /* Reading i0+fftLen/4 */
1524 /* input is down scale by 4 to avoid overflow */
1525 /* T0 = yb, T1 = xb */
1526 T0 = pSrc16[i1 * 2U] >> 2U;
1527 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
1528
1529 /* writing the butterfly processed i0 + fftLen/4 sample */
1530 /* writing output(xc', yc') in little endian format */
1531 pSrc16[i1 * 2U] = out1;
1532 pSrc16[(i1 * 2U) + 1U] = out2;
1533
1534 /* Butterfly calculations */
1535 /* input is down scale by 4 to avoid overflow */
1536 /* U0 = yd, U1 = xd) */
1537 U0 = pSrc16[i3 * 2U] >> 2U;
1538 U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
1539
1540 /* T0 = yb-yd, T1 = xb-xd) */
1541 T0 = __SSAT(T0 - U0, 16U);
1542 T1 = __SSAT(T1 - U1, 16U);
1543 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1544 R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16);
1545 R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16);
1546 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1547 S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
1548 S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
1549
1550 /* co1 & si1 are read from Coefficient pointer */
1551 Co1 = pCoef16[ic * 2U];
1552 Si1 = pCoef16[(ic * 2U) + 1U];
1553 /* Butterfly process for the i0+fftLen/2 sample */
1554 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1555 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
1556 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1557 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
1558 /* writing output(xb', yb') in little endian format */
1559 pSrc16[i2 * 2U] = out1;
1560 pSrc16[(i2 * 2U) + 1U] = out2;
1561
1562 /* Co3 & si3 are read from Coefficient pointer */
1563 Co3 = pCoef16[3U * ic * 2U];
1564 Si3 = pCoef16[(3U * ic * 2U) + 1U];
1565 /* Butterfly process for the i0+3fftLen/4 sample */
1566 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1567 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
1568 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1569 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
1570 /* writing output(xd', yd') in little endian format */
1571 pSrc16[i3 * 2U] = out1;
1572 pSrc16[(i3 * 2U) + 1U] = out2;
1573
1574 /* Twiddle coefficients index modifier */
1575 ic = ic + twidCoefModifier;
1576
1577 /* Updating input index */
1578 i0 = i0 + 1U;
1579
1580 } while (--j);
1581
1582 /* End of first stage process */
1583
1584 /* data is in 4.11(q11) format */
1585
1586
1587 /* Start of Middle stage process */
1588
1589 /* Twiddle coefficients index modifier */
1590 twidCoefModifier <<= 2U;
1591
1592 /* Calculation of Middle stage */
1593 for (k = fftLen / 4U; k > 4U; k >>= 2U)
1594 {
1595 /* Initializations for the middle stage */
1596 n1 = n2;
1597 n2 >>= 2U;
1598 ic = 0U;
1599
1600 for (j = 0U; j <= (n2 - 1U); j++)
1601 {
1602 /* index calculation for the coefficients */
1603 Co1 = pCoef16[ic * 2U];
1604 Si1 = pCoef16[(ic * 2U) + 1U];
1605 Co2 = pCoef16[2U * ic * 2U];
1606 Si2 = pCoef16[2U * ic * 2U + 1U];
1607 Co3 = pCoef16[3U * ic * 2U];
1608 Si3 = pCoef16[(3U * ic * 2U) + 1U];
1609
1610 /* Twiddle coefficients index modifier */
1611 ic = ic + twidCoefModifier;
1612
1613 /* Butterfly implementation */
1614 for (i0 = j; i0 < fftLen; i0 += n1)
1615 {
1616 /* index calculation for the input as, */
1617 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1618 i1 = i0 + n2;
1619 i2 = i1 + n2;
1620 i3 = i2 + n2;
1621
1622 /* Reading i0, i0+fftLen/2 inputs */
1623 /* Read ya (real), xa(imag) input */
1624 T0 = pSrc16[i0 * 2U];
1625 T1 = pSrc16[(i0 * 2U) + 1U];
1626
1627 /* Read yc (real), xc(imag) input */
1628 S0 = pSrc16[i2 * 2U];
1629 S1 = pSrc16[(i2 * 2U) + 1U];
1630
1631
1632 /* R0 = (ya + yc), R1 = (xa + xc) */
1633 R0 = __SSAT(T0 + S0, 16U);
1634 R1 = __SSAT(T1 + S1, 16U);
1635 /* S0 = (ya - yc), S1 = (xa - xc) */
1636 S0 = __SSAT(T0 - S0, 16U);
1637 S1 = __SSAT(T1 - S1, 16U);
1638
1639 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1640 /* Read yb (real), xb(imag) input */
1641 T0 = pSrc16[i1 * 2U];
1642 T1 = pSrc16[(i1 * 2U) + 1U];
1643
1644 /* Read yd (real), xd(imag) input */
1645 U0 = pSrc16[i3 * 2U];
1646 U1 = pSrc16[(i3 * 2U) + 1U];
1647
1648 /* T0 = (yb + yd), T1 = (xb + xd) */
1649 T0 = __SSAT(T0 + U0, 16U);
1650 T1 = __SSAT(T1 + U1, 16U);
1651
1652 /* writing the butterfly processed i0 sample */
1653 /* xa' = xa + xb + xc + xd */
1654 /* ya' = ya + yb + yc + yd */
1655 pSrc16[i0 * 2U] = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
1656 pSrc16[(i0 * 2U) + 1U] = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
1657
1658 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1659 R0 = (R0 >> 1U) - (T0 >> 1U);
1660 R1 = (R1 >> 1U) - (T1 >> 1U);
1661
1662 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
1663 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16);
1664 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1665 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16);
1666
1667 /* Reading i0+3fftLen/4 */
1668 /* Read yb (real), xb(imag) input */
1669 T0 = pSrc16[i1 * 2U];
1670 T1 = pSrc16[(i1 * 2U) + 1U];
1671
1672 /* writing the butterfly processed i0 + fftLen/4 sample */
1673 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1674 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1675 pSrc16[i1 * 2U] = out1;
1676 pSrc16[(i1 * 2U) + 1U] = out2;
1677
1678 /* Butterfly calculations */
1679 /* Read yd (real), xd(imag) input */
1680 U0 = pSrc16[i3 * 2U];
1681 U1 = pSrc16[(i3 * 2U) + 1U];
1682
1683 /* T0 = yb-yd, T1 = xb-xd) */
1684 T0 = __SSAT(T0 - U0, 16U);
1685 T1 = __SSAT(T1 - U1, 16U);
1686
1687 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1688 R0 = (S0 >> 1U) + (T1 >> 1U);
1689 R1 = (S1 >> 1U) - (T0 >> 1U);
1690
1691 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1692 S0 = (S0 >> 1U) - (T1 >> 1U);
1693 S1 = (S1 >> 1U) + (T0 >> 1U);
1694
1695 /* Butterfly process for the i0+fftLen/2 sample */
1696 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
1697 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
1698 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1699 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1700 pSrc16[i2 * 2U] = out1;
1701 pSrc16[(i2 * 2U) + 1U] = out2;
1702
1703 /* Butterfly process for the i0+3fftLen/4 sample */
1704 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
1705
1706 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
1707 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1708 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1709 pSrc16[i3 * 2U] = out1;
1710 pSrc16[(i3 * 2U) + 1U] = out2;
1711
1712
1713 }
1714 }
1715 /* Twiddle coefficients index modifier */
1716 twidCoefModifier <<= 2U;
1717 }
1718 /* End of Middle stages process */
1719
1720
1721 /* data is in 10.6(q6) format for the 1024 point */
1722 /* data is in 8.8(q8) format for the 256 point */
1723 /* data is in 6.10(q10) format for the 64 point */
1724 /* data is in 4.12(q12) format for the 16 point */
1725
1726 /* start of last stage process */
1727
1728
1729 /* Initializations for the last stage */
1730 n1 = n2;
1731 n2 >>= 2U;
1732
1733 /* Butterfly implementation */
1734 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1735 {
1736 /* index calculation for the input as, */
1737 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1738 i1 = i0 + n2;
1739 i2 = i1 + n2;
1740 i3 = i2 + n2;
1741
1742 /* Reading i0, i0+fftLen/2 inputs */
1743 /* Read ya (real), xa(imag) input */
1744 T0 = pSrc16[i0 * 2U];
1745 T1 = pSrc16[(i0 * 2U) + 1U];
1746 /* Read yc (real), xc(imag) input */
1747 S0 = pSrc16[i2 * 2U];
1748 S1 = pSrc16[(i2 * 2U) + 1U];
1749
1750 /* R0 = (ya + yc), R1 = (xa + xc) */
1751 R0 = __SSAT(T0 + S0, 16U);
1752 R1 = __SSAT(T1 + S1, 16U);
1753 /* S0 = (ya - yc), S1 = (xa - xc) */
1754 S0 = __SSAT(T0 - S0, 16U);
1755 S1 = __SSAT(T1 - S1, 16U);
1756
1757 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1758 /* Read yb (real), xb(imag) input */
1759 T0 = pSrc16[i1 * 2U];
1760 T1 = pSrc16[(i1 * 2U) + 1U];
1761 /* Read yd (real), xd(imag) input */
1762 U0 = pSrc16[i3 * 2U];
1763 U1 = pSrc16[(i3 * 2U) + 1U];
1764
1765 /* T0 = (yb + yd), T1 = (xb + xd) */
1766 T0 = __SSAT(T0 + U0, 16U);
1767 T1 = __SSAT(T1 + U1, 16U);
1768
1769 /* writing the butterfly processed i0 sample */
1770 /* xa' = xa + xb + xc + xd */
1771 /* ya' = ya + yb + yc + yd */
1772 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
1773 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
1774
1775 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1776 R0 = (R0 >> 1U) - (T0 >> 1U);
1777 R1 = (R1 >> 1U) - (T1 >> 1U);
1778
1779 /* Read yb (real), xb(imag) input */
1780 T0 = pSrc16[i1 * 2U];
1781 T1 = pSrc16[(i1 * 2U) + 1U];
1782
1783 /* writing the butterfly processed i0 + fftLen/4 sample */
1784 /* xc' = (xa-xb+xc-xd) */
1785 /* yc' = (ya-yb+yc-yd) */
1786 pSrc16[i1 * 2U] = R0;
1787 pSrc16[(i1 * 2U) + 1U] = R1;
1788
1789 /* Read yd (real), xd(imag) input */
1790 U0 = pSrc16[i3 * 2U];
1791 U1 = pSrc16[(i3 * 2U) + 1U];
1792 /* T0 = (yb - yd), T1 = (xb - xd) */
1793 T0 = __SSAT(T0 - U0, 16U);
1794 T1 = __SSAT(T1 - U1, 16U);
1795
1796 /* writing the butterfly processed i0 + fftLen/2 sample */
1797 /* xb' = (xa-yb-xc+yd) */
1798 /* yb' = (ya+xb-yc-xd) */
1799 pSrc16[i2 * 2U] = (S0 >> 1U) - (T1 >> 1U);
1800 pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
1801
1802
1803 /* writing the butterfly processed i0 + 3fftLen/4 sample */
1804 /* xd' = (xa+yb-xc-yd) */
1805 /* yd' = (ya-xb-yc+xd) */
1806 pSrc16[i3 * 2U] = (S0 >> 1U) + (T1 >> 1U);
1807 pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
1808 }
1809 /* end of last stage process */
1810
1811 /* output is in 11.5(q5) format for the 1024 point */
1812 /* output is in 9.7(q7) format for the 256 point */
1813 /* output is in 7.9(q9) format for the 64 point */
1814 /* output is in 5.11(q11) format for the 16 point */
1815
1816 #endif /* #if defined (ARM_MATH_DSP) */
1817
1818 }
1819