1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_f16.c
4 * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/transform_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33 extern void arm_bitreversal_f16(
34 float16_t * pSrc,
35 uint16_t fftSize,
36 uint16_t bitRevFactor,
37 const uint16_t * pBitRevTab);
38
39 void arm_radix4_butterfly_f16(
40 float16_t * pSrc,
41 uint16_t fftLen,
42 const float16_t * pCoef,
43 uint16_t twidCoefModifier);
44
45 void arm_radix4_butterfly_inverse_f16(
46 float16_t * pSrc,
47 uint16_t fftLen,
48 const float16_t * pCoef,
49 uint16_t twidCoefModifier,
50 float16_t onebyfftLen);
51
52
53 void arm_cfft_radix4by2_f16(
54 float16_t * pSrc,
55 uint32_t fftLen,
56 const float16_t * pCoef);
57
58
59 /**
60 @ingroup groupTransforms
61 */
62
63 /**
64 @addtogroup ComplexFFT
65 @{
66 */
67
68 /*
69 * @brief Core function for the floating-point CFFT butterfly process.
70 * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
71 * @param[in] fftLen length of the FFT.
72 * @param[in] *pCoef points to the twiddle coefficient buffer.
73 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
74 * @return none.
75 */
76
arm_cfft_radix4by2_f16(float16_t * pSrc,uint32_t fftLen,const float16_t * pCoef)77 void arm_cfft_radix4by2_f16(
78 float16_t * pSrc,
79 uint32_t fftLen,
80 const float16_t * pCoef)
81 {
82 uint32_t i, l;
83 uint32_t n2, ia;
84 float16_t xt, yt, cosVal, sinVal;
85 float16_t p0, p1,p2,p3,a0,a1;
86
87 n2 = fftLen >> 1;
88 ia = 0;
89 for (i = 0; i < n2; i++)
90 {
91 cosVal = pCoef[2*ia];
92 sinVal = pCoef[2*ia + 1];
93 ia++;
94
95 l = i + n2;
96
97 /* Butterfly implementation */
98 a0 = pSrc[2 * i] + pSrc[2 * l];
99 xt = pSrc[2 * i] - pSrc[2 * l];
100
101 yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
102 a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
103
104 p0 = xt * cosVal;
105 p1 = yt * sinVal;
106 p2 = yt * cosVal;
107 p3 = xt * sinVal;
108
109 pSrc[2 * i] = a0;
110 pSrc[2 * i + 1] = a1;
111
112 pSrc[2 * l] = p0 + p1;
113 pSrc[2 * l + 1] = p2 - p3;
114
115 }
116
117 // first col
118 arm_radix4_butterfly_f16( pSrc, n2, (float16_t*)pCoef, 2U);
119 // second col
120 arm_radix4_butterfly_f16( pSrc + fftLen, n2, (float16_t*)pCoef, 2U);
121
122 }
123
124
125 /**
126 @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
127 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future.
128 @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
129 @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
130 @return none
131 */
132
arm_cfft_radix4_f16(const arm_cfft_radix4_instance_f16 * S,float16_t * pSrc)133 void arm_cfft_radix4_f16(
134 const arm_cfft_radix4_instance_f16 * S,
135 float16_t * pSrc)
136 {
137 if (S->ifftFlag == 1U)
138 {
139 /* Complex IFFT radix-4 */
140 arm_radix4_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
141 }
142 else
143 {
144 /* Complex FFT radix-4 */
145 arm_radix4_butterfly_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
146 }
147
148 if (S->bitReverseFlag == 1U)
149 {
150 /* Bit Reversal */
151 arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
152 }
153
154 }
155
156 /**
157 @} end of ComplexFFT group
158 */
159
160 /* ----------------------------------------------------------------------
161 * Internal helper function used by the FFTs
162 * ---------------------------------------------------------------------- */
163
164 /*
165 * @brief Core function for the floating-point CFFT butterfly process.
166 * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
167 * @param[in] fftLen length of the FFT.
168 * @param[in] *pCoef points to the twiddle coefficient buffer.
169 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
170 * @return none.
171 */
172
arm_radix4_butterfly_f16(float16_t * pSrc,uint16_t fftLen,const float16_t * pCoef,uint16_t twidCoefModifier)173 void arm_radix4_butterfly_f16(
174 float16_t * pSrc,
175 uint16_t fftLen,
176 const float16_t * pCoef,
177 uint16_t twidCoefModifier)
178 {
179
180 float16_t co1, co2, co3, si1, si2, si3;
181 uint32_t ia1, ia2, ia3;
182 uint32_t i0, i1, i2, i3;
183 uint32_t n1, n2, j, k;
184
185 #if defined (ARM_MATH_DSP)
186
187 /* Run the below code for Cortex-M4 and Cortex-M3 */
188
189 float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
190 float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
191 Ybminusd;
192 float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
193 float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
194 float16_t *ptr1;
195 float16_t p0,p1,p2,p3,p4,p5;
196 float16_t a0,a1,a2,a3,a4,a5,a6,a7;
197
198 /* Initializations for the first stage */
199 n2 = fftLen;
200 n1 = n2;
201
202 /* n2 = fftLen/4 */
203 n2 >>= 2U;
204 i0 = 0U;
205 ia1 = 0U;
206
207 j = n2;
208
209 /* Calculation of first stage */
210 do
211 {
212 /* index calculation for the input as, */
213 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
214 i1 = i0 + n2;
215 i2 = i1 + n2;
216 i3 = i2 + n2;
217
218 xaIn = pSrc[(2U * i0)];
219 yaIn = pSrc[(2U * i0) + 1U];
220
221 xbIn = pSrc[(2U * i1)];
222 ybIn = pSrc[(2U * i1) + 1U];
223
224 xcIn = pSrc[(2U * i2)];
225 ycIn = pSrc[(2U * i2) + 1U];
226
227 xdIn = pSrc[(2U * i3)];
228 ydIn = pSrc[(2U * i3) + 1U];
229
230 /* xa + xc */
231 Xaplusc = xaIn + xcIn;
232 /* xb + xd */
233 Xbplusd = xbIn + xdIn;
234 /* ya + yc */
235 Yaplusc = yaIn + ycIn;
236 /* yb + yd */
237 Ybplusd = ybIn + ydIn;
238
239 /* index calculation for the coefficients */
240 ia2 = ia1 + ia1;
241 co2 = pCoef[ia2 * 2U];
242 si2 = pCoef[(ia2 * 2U) + 1U];
243
244 /* xa - xc */
245 Xaminusc = xaIn - xcIn;
246 /* xb - xd */
247 Xbminusd = xbIn - xdIn;
248 /* ya - yc */
249 Yaminusc = yaIn - ycIn;
250 /* yb - yd */
251 Ybminusd = ybIn - ydIn;
252
253 /* xa' = xa + xb + xc + xd */
254 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
255 /* ya' = ya + yb + yc + yd */
256 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
257
258 /* (xa - xc) + (yb - yd) */
259 Xb12C_out = (Xaminusc + Ybminusd);
260 /* (ya - yc) + (xb - xd) */
261 Yb12C_out = (Yaminusc - Xbminusd);
262 /* (xa + xc) - (xb + xd) */
263 Xc12C_out = (Xaplusc - Xbplusd);
264 /* (ya + yc) - (yb + yd) */
265 Yc12C_out = (Yaplusc - Ybplusd);
266 /* (xa - xc) - (yb - yd) */
267 Xd12C_out = (Xaminusc - Ybminusd);
268 /* (ya - yc) + (xb - xd) */
269 Yd12C_out = (Xbminusd + Yaminusc);
270
271 co1 = pCoef[ia1 * 2U];
272 si1 = pCoef[(ia1 * 2U) + 1U];
273
274 /* index calculation for the coefficients */
275 ia3 = ia2 + ia1;
276 co3 = pCoef[ia3 * 2U];
277 si3 = pCoef[(ia3 * 2U) + 1U];
278
279 Xb12_out = Xb12C_out * co1;
280 Yb12_out = Yb12C_out * co1;
281 Xc12_out = Xc12C_out * co2;
282 Yc12_out = Yc12C_out * co2;
283 Xd12_out = Xd12C_out * co3;
284 Yd12_out = Yd12C_out * co3;
285
286 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
287 //Xb12_out -= Yb12C_out * si1;
288 p0 = Yb12C_out * si1;
289 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
290 //Yb12_out += Xb12C_out * si1;
291 p1 = Xb12C_out * si1;
292 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
293 //Xc12_out -= Yc12C_out * si2;
294 p2 = Yc12C_out * si2;
295 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
296 //Yc12_out += Xc12C_out * si2;
297 p3 = Xc12C_out * si2;
298 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
299 //Xd12_out -= Yd12C_out * si3;
300 p4 = Yd12C_out * si3;
301 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
302 //Yd12_out += Xd12C_out * si3;
303 p5 = Xd12C_out * si3;
304
305 Xb12_out += p0;
306 Yb12_out -= p1;
307 Xc12_out += p2;
308 Yc12_out -= p3;
309 Xd12_out += p4;
310 Yd12_out -= p5;
311
312 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
313 pSrc[2U * i1] = Xc12_out;
314
315 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
316 pSrc[(2U * i1) + 1U] = Yc12_out;
317
318 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
319 pSrc[2U * i2] = Xb12_out;
320
321 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
322 pSrc[(2U * i2) + 1U] = Yb12_out;
323
324 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
325 pSrc[2U * i3] = Xd12_out;
326
327 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
328 pSrc[(2U * i3) + 1U] = Yd12_out;
329
330 /* Twiddle coefficients index modifier */
331 ia1 += twidCoefModifier;
332
333 /* Updating input index */
334 i0++;
335
336 }
337 while (--j);
338
339 twidCoefModifier <<= 2U;
340
341 /* Calculation of second stage to excluding last stage */
342 for (k = fftLen >> 2U; k > 4U; k >>= 2U)
343 {
344 /* Initializations for the first stage */
345 n1 = n2;
346 n2 >>= 2U;
347 ia1 = 0U;
348
349 /* Calculation of first stage */
350 j = 0;
351 do
352 {
353 /* index calculation for the coefficients */
354 ia2 = ia1 + ia1;
355 ia3 = ia2 + ia1;
356 co1 = pCoef[ia1 * 2U];
357 si1 = pCoef[(ia1 * 2U) + 1U];
358 co2 = pCoef[ia2 * 2U];
359 si2 = pCoef[(ia2 * 2U) + 1U];
360 co3 = pCoef[ia3 * 2U];
361 si3 = pCoef[(ia3 * 2U) + 1U];
362
363 /* Twiddle coefficients index modifier */
364 ia1 += twidCoefModifier;
365
366 i0 = j;
367 do
368 {
369 /* index calculation for the input as, */
370 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
371 i1 = i0 + n2;
372 i2 = i1 + n2;
373 i3 = i2 + n2;
374
375 xaIn = pSrc[(2U * i0)];
376 yaIn = pSrc[(2U * i0) + 1U];
377
378 xbIn = pSrc[(2U * i1)];
379 ybIn = pSrc[(2U * i1) + 1U];
380
381 xcIn = pSrc[(2U * i2)];
382 ycIn = pSrc[(2U * i2) + 1U];
383
384 xdIn = pSrc[(2U * i3)];
385 ydIn = pSrc[(2U * i3) + 1U];
386
387 /* xa - xc */
388 Xaminusc = xaIn - xcIn;
389 /* (xb - xd) */
390 Xbminusd = xbIn - xdIn;
391 /* ya - yc */
392 Yaminusc = yaIn - ycIn;
393 /* (yb - yd) */
394 Ybminusd = ybIn - ydIn;
395
396 /* xa + xc */
397 Xaplusc = xaIn + xcIn;
398 /* xb + xd */
399 Xbplusd = xbIn + xdIn;
400 /* ya + yc */
401 Yaplusc = yaIn + ycIn;
402 /* yb + yd */
403 Ybplusd = ybIn + ydIn;
404
405 /* (xa - xc) + (yb - yd) */
406 Xb12C_out = (Xaminusc + Ybminusd);
407 /* (ya - yc) - (xb - xd) */
408 Yb12C_out = (Yaminusc - Xbminusd);
409 /* xa + xc -(xb + xd) */
410 Xc12C_out = (Xaplusc - Xbplusd);
411 /* (ya + yc) - (yb + yd) */
412 Yc12C_out = (Yaplusc - Ybplusd);
413 /* (xa - xc) - (yb - yd) */
414 Xd12C_out = (Xaminusc - Ybminusd);
415 /* (ya - yc) + (xb - xd) */
416 Yd12C_out = (Xbminusd + Yaminusc);
417
418 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
419 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
420
421 Xb12_out = Xb12C_out * co1;
422 Yb12_out = Yb12C_out * co1;
423 Xc12_out = Xc12C_out * co2;
424 Yc12_out = Yc12C_out * co2;
425 Xd12_out = Xd12C_out * co3;
426 Yd12_out = Yd12C_out * co3;
427
428 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
429 //Xb12_out -= Yb12C_out * si1;
430 p0 = Yb12C_out * si1;
431 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
432 //Yb12_out += Xb12C_out * si1;
433 p1 = Xb12C_out * si1;
434 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
435 //Xc12_out -= Yc12C_out * si2;
436 p2 = Yc12C_out * si2;
437 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
438 //Yc12_out += Xc12C_out * si2;
439 p3 = Xc12C_out * si2;
440 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
441 //Xd12_out -= Yd12C_out * si3;
442 p4 = Yd12C_out * si3;
443 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
444 //Yd12_out += Xd12C_out * si3;
445 p5 = Xd12C_out * si3;
446
447 Xb12_out += p0;
448 Yb12_out -= p1;
449 Xc12_out += p2;
450 Yc12_out -= p3;
451 Xd12_out += p4;
452 Yd12_out -= p5;
453
454 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
455 pSrc[2U * i1] = Xc12_out;
456
457 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
458 pSrc[(2U * i1) + 1U] = Yc12_out;
459
460 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
461 pSrc[2U * i2] = Xb12_out;
462
463 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
464 pSrc[(2U * i2) + 1U] = Yb12_out;
465
466 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
467 pSrc[2U * i3] = Xd12_out;
468
469 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
470 pSrc[(2U * i3) + 1U] = Yd12_out;
471
472 i0 += n1;
473 } while (i0 < fftLen);
474 j++;
475 } while (j <= (n2 - 1U));
476 twidCoefModifier <<= 2U;
477 }
478
479 j = fftLen >> 2;
480 ptr1 = &pSrc[0];
481
482 /* Calculations of last stage */
483 do
484 {
485 xaIn = ptr1[0];
486 yaIn = ptr1[1];
487 xbIn = ptr1[2];
488 ybIn = ptr1[3];
489 xcIn = ptr1[4];
490 ycIn = ptr1[5];
491 xdIn = ptr1[6];
492 ydIn = ptr1[7];
493
494 /* xa + xc */
495 Xaplusc = xaIn + xcIn;
496
497 /* xa - xc */
498 Xaminusc = xaIn - xcIn;
499
500 /* ya + yc */
501 Yaplusc = yaIn + ycIn;
502
503 /* ya - yc */
504 Yaminusc = yaIn - ycIn;
505
506 /* xb + xd */
507 Xbplusd = xbIn + xdIn;
508
509 /* yb + yd */
510 Ybplusd = ybIn + ydIn;
511
512 /* (xb-xd) */
513 Xbminusd = xbIn - xdIn;
514
515 /* (yb-yd) */
516 Ybminusd = ybIn - ydIn;
517
518 /* xa' = xa + xb + xc + xd */
519 a0 = (Xaplusc + Xbplusd);
520 /* ya' = ya + yb + yc + yd */
521 a1 = (Yaplusc + Ybplusd);
522 /* xc' = (xa-xb+xc-xd) */
523 a2 = (Xaplusc - Xbplusd);
524 /* yc' = (ya-yb+yc-yd) */
525 a3 = (Yaplusc - Ybplusd);
526 /* xb' = (xa+yb-xc-yd) */
527 a4 = (Xaminusc + Ybminusd);
528 /* yb' = (ya-xb-yc+xd) */
529 a5 = (Yaminusc - Xbminusd);
530 /* xd' = (xa-yb-xc+yd)) */
531 a6 = (Xaminusc - Ybminusd);
532 /* yd' = (ya+xb-yc-xd) */
533 a7 = (Xbminusd + Yaminusc);
534
535 ptr1[0] = a0;
536 ptr1[1] = a1;
537 ptr1[2] = a2;
538 ptr1[3] = a3;
539 ptr1[4] = a4;
540 ptr1[5] = a5;
541 ptr1[6] = a6;
542 ptr1[7] = a7;
543
544 /* increment pointer by 8 */
545 ptr1 += 8U;
546 } while (--j);
547
548 #else
549
550 float16_t t1, t2, r1, r2, s1, s2;
551
552 /* Run the below code for Cortex-M0 */
553
554 /* Initializations for the fft calculation */
555 n2 = fftLen;
556 n1 = n2;
557 for (k = fftLen; k > 1U; k >>= 2U)
558 {
559 /* Initializations for the fft calculation */
560 n1 = n2;
561 n2 >>= 2U;
562 ia1 = 0U;
563
564 /* FFT Calculation */
565 j = 0;
566 do
567 {
568 /* index calculation for the coefficients */
569 ia2 = ia1 + ia1;
570 ia3 = ia2 + ia1;
571 co1 = pCoef[ia1 * 2U];
572 si1 = pCoef[(ia1 * 2U) + 1U];
573 co2 = pCoef[ia2 * 2U];
574 si2 = pCoef[(ia2 * 2U) + 1U];
575 co3 = pCoef[ia3 * 2U];
576 si3 = pCoef[(ia3 * 2U) + 1U];
577
578 /* Twiddle coefficients index modifier */
579 ia1 = ia1 + twidCoefModifier;
580
581 i0 = j;
582 do
583 {
584 /* index calculation for the input as, */
585 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
586 i1 = i0 + n2;
587 i2 = i1 + n2;
588 i3 = i2 + n2;
589
590 /* xa + xc */
591 r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
592
593 /* xa - xc */
594 r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
595
596 /* ya + yc */
597 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
598
599 /* ya - yc */
600 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
601
602 /* xb + xd */
603 t1 = pSrc[2U * i1] + pSrc[2U * i3];
604
605 /* xa' = xa + xb + xc + xd */
606 pSrc[2U * i0] = r1 + t1;
607
608 /* xa + xc -(xb + xd) */
609 r1 = r1 - t1;
610
611 /* yb + yd */
612 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
613
614 /* ya' = ya + yb + yc + yd */
615 pSrc[(2U * i0) + 1U] = s1 + t2;
616
617 /* (ya + yc) - (yb + yd) */
618 s1 = s1 - t2;
619
620 /* (yb - yd) */
621 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
622
623 /* (xb - xd) */
624 t2 = pSrc[2U * i1] - pSrc[2U * i3];
625
626 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
627 pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
628
629 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
630 pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
631
632 /* (xa - xc) + (yb - yd) */
633 r1 = r2 + t1;
634
635 /* (xa - xc) - (yb - yd) */
636 r2 = r2 - t1;
637
638 /* (ya - yc) - (xb - xd) */
639 s1 = s2 - t2;
640
641 /* (ya - yc) + (xb - xd) */
642 s2 = s2 + t2;
643
644 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
645 pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
646
647 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
648 pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
649
650 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
651 pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
652
653 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
654 pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
655
656 i0 += n1;
657 } while ( i0 < fftLen);
658 j++;
659 } while (j <= (n2 - 1U));
660 twidCoefModifier <<= 2U;
661 }
662
663 #endif /* #if defined (ARM_MATH_DSP) */
664
665 }
666
667 /*
668 * @brief Core function for the floating-point CIFFT butterfly process.
669 * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
670 * @param[in] fftLen length of the FFT.
671 * @param[in] *pCoef points to twiddle coefficient buffer.
672 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
673 * @param[in] onebyfftLen value of 1/fftLen.
674 * @return none.
675 */
676
arm_radix4_butterfly_inverse_f16(float16_t * pSrc,uint16_t fftLen,const float16_t * pCoef,uint16_t twidCoefModifier,float16_t onebyfftLen)677 void arm_radix4_butterfly_inverse_f16(
678 float16_t * pSrc,
679 uint16_t fftLen,
680 const float16_t * pCoef,
681 uint16_t twidCoefModifier,
682 float16_t onebyfftLen)
683 {
684 float16_t co1, co2, co3, si1, si2, si3;
685 uint32_t ia1, ia2, ia3;
686 uint32_t i0, i1, i2, i3;
687 uint32_t n1, n2, j, k;
688
689 #if defined (ARM_MATH_DSP)
690
691 float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
692 float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
693 Ybminusd;
694 float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
695 float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
696 float16_t *ptr1;
697 float16_t p0,p1,p2,p3,p4,p5,p6,p7;
698 float16_t a0,a1,a2,a3,a4,a5,a6,a7;
699
700
701 /* Initializations for the first stage */
702 n2 = fftLen;
703 n1 = n2;
704
705 /* n2 = fftLen/4 */
706 n2 >>= 2U;
707 i0 = 0U;
708 ia1 = 0U;
709
710 j = n2;
711
712 /* Calculation of first stage */
713 do
714 {
715 /* index calculation for the input as, */
716 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
717 i1 = i0 + n2;
718 i2 = i1 + n2;
719 i3 = i2 + n2;
720
721 /* Butterfly implementation */
722 xaIn = pSrc[(2U * i0)];
723 yaIn = pSrc[(2U * i0) + 1U];
724
725 xcIn = pSrc[(2U * i2)];
726 ycIn = pSrc[(2U * i2) + 1U];
727
728 xbIn = pSrc[(2U * i1)];
729 ybIn = pSrc[(2U * i1) + 1U];
730
731 xdIn = pSrc[(2U * i3)];
732 ydIn = pSrc[(2U * i3) + 1U];
733
734 /* xa + xc */
735 Xaplusc = xaIn + xcIn;
736 /* xb + xd */
737 Xbplusd = xbIn + xdIn;
738 /* ya + yc */
739 Yaplusc = yaIn + ycIn;
740 /* yb + yd */
741 Ybplusd = ybIn + ydIn;
742
743 /* index calculation for the coefficients */
744 ia2 = ia1 + ia1;
745 co2 = pCoef[ia2 * 2U];
746 si2 = pCoef[(ia2 * 2U) + 1U];
747
748 /* xa - xc */
749 Xaminusc = xaIn - xcIn;
750 /* xb - xd */
751 Xbminusd = xbIn - xdIn;
752 /* ya - yc */
753 Yaminusc = yaIn - ycIn;
754 /* yb - yd */
755 Ybminusd = ybIn - ydIn;
756
757 /* xa' = xa + xb + xc + xd */
758 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
759
760 /* ya' = ya + yb + yc + yd */
761 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
762
763 /* (xa - xc) - (yb - yd) */
764 Xb12C_out = (Xaminusc - Ybminusd);
765 /* (ya - yc) + (xb - xd) */
766 Yb12C_out = (Yaminusc + Xbminusd);
767 /* (xa + xc) - (xb + xd) */
768 Xc12C_out = (Xaplusc - Xbplusd);
769 /* (ya + yc) - (yb + yd) */
770 Yc12C_out = (Yaplusc - Ybplusd);
771 /* (xa - xc) + (yb - yd) */
772 Xd12C_out = (Xaminusc + Ybminusd);
773 /* (ya - yc) - (xb - xd) */
774 Yd12C_out = (Yaminusc - Xbminusd);
775
776 co1 = pCoef[ia1 * 2U];
777 si1 = pCoef[(ia1 * 2U) + 1U];
778
779 /* index calculation for the coefficients */
780 ia3 = ia2 + ia1;
781 co3 = pCoef[ia3 * 2U];
782 si3 = pCoef[(ia3 * 2U) + 1U];
783
784 Xb12_out = Xb12C_out * co1;
785 Yb12_out = Yb12C_out * co1;
786 Xc12_out = Xc12C_out * co2;
787 Yc12_out = Yc12C_out * co2;
788 Xd12_out = Xd12C_out * co3;
789 Yd12_out = Yd12C_out * co3;
790
791 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
792 //Xb12_out -= Yb12C_out * si1;
793 p0 = Yb12C_out * si1;
794 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
795 //Yb12_out += Xb12C_out * si1;
796 p1 = Xb12C_out * si1;
797 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
798 //Xc12_out -= Yc12C_out * si2;
799 p2 = Yc12C_out * si2;
800 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
801 //Yc12_out += Xc12C_out * si2;
802 p3 = Xc12C_out * si2;
803 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
804 //Xd12_out -= Yd12C_out * si3;
805 p4 = Yd12C_out * si3;
806 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
807 //Yd12_out += Xd12C_out * si3;
808 p5 = Xd12C_out * si3;
809
810 Xb12_out -= p0;
811 Yb12_out += p1;
812 Xc12_out -= p2;
813 Yc12_out += p3;
814 Xd12_out -= p4;
815 Yd12_out += p5;
816
817 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
818 pSrc[2U * i1] = Xc12_out;
819
820 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
821 pSrc[(2U * i1) + 1U] = Yc12_out;
822
823 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
824 pSrc[2U * i2] = Xb12_out;
825
826 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
827 pSrc[(2U * i2) + 1U] = Yb12_out;
828
829 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
830 pSrc[2U * i3] = Xd12_out;
831
832 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
833 pSrc[(2U * i3) + 1U] = Yd12_out;
834
835 /* Twiddle coefficients index modifier */
836 ia1 = ia1 + twidCoefModifier;
837
838 /* Updating input index */
839 i0 = i0 + 1U;
840
841 } while (--j);
842
843 twidCoefModifier <<= 2U;
844
845 /* Calculation of second stage to excluding last stage */
846 for (k = fftLen >> 2U; k > 4U; k >>= 2U)
847 {
848 /* Initializations for the first stage */
849 n1 = n2;
850 n2 >>= 2U;
851 ia1 = 0U;
852
853 /* Calculation of first stage */
854 j = 0;
855 do
856 {
857 /* index calculation for the coefficients */
858 ia2 = ia1 + ia1;
859 ia3 = ia2 + ia1;
860 co1 = pCoef[ia1 * 2U];
861 si1 = pCoef[(ia1 * 2U) + 1U];
862 co2 = pCoef[ia2 * 2U];
863 si2 = pCoef[(ia2 * 2U) + 1U];
864 co3 = pCoef[ia3 * 2U];
865 si3 = pCoef[(ia3 * 2U) + 1U];
866
867 /* Twiddle coefficients index modifier */
868 ia1 = ia1 + twidCoefModifier;
869
870 i0 = j;
871 do
872 {
873 /* index calculation for the input as, */
874 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
875 i1 = i0 + n2;
876 i2 = i1 + n2;
877 i3 = i2 + n2;
878
879 xaIn = pSrc[(2U * i0)];
880 yaIn = pSrc[(2U * i0) + 1U];
881
882 xbIn = pSrc[(2U * i1)];
883 ybIn = pSrc[(2U * i1) + 1U];
884
885 xcIn = pSrc[(2U * i2)];
886 ycIn = pSrc[(2U * i2) + 1U];
887
888 xdIn = pSrc[(2U * i3)];
889 ydIn = pSrc[(2U * i3) + 1U];
890
891 /* xa - xc */
892 Xaminusc = xaIn - xcIn;
893 /* (xb - xd) */
894 Xbminusd = xbIn - xdIn;
895 /* ya - yc */
896 Yaminusc = yaIn - ycIn;
897 /* (yb - yd) */
898 Ybminusd = ybIn - ydIn;
899
900 /* xa + xc */
901 Xaplusc = xaIn + xcIn;
902 /* xb + xd */
903 Xbplusd = xbIn + xdIn;
904 /* ya + yc */
905 Yaplusc = yaIn + ycIn;
906 /* yb + yd */
907 Ybplusd = ybIn + ydIn;
908
909 /* (xa - xc) - (yb - yd) */
910 Xb12C_out = (Xaminusc - Ybminusd);
911 /* (ya - yc) + (xb - xd) */
912 Yb12C_out = (Yaminusc + Xbminusd);
913 /* xa + xc -(xb + xd) */
914 Xc12C_out = (Xaplusc - Xbplusd);
915 /* (ya + yc) - (yb + yd) */
916 Yc12C_out = (Yaplusc - Ybplusd);
917 /* (xa - xc) + (yb - yd) */
918 Xd12C_out = (Xaminusc + Ybminusd);
919 /* (ya - yc) - (xb - xd) */
920 Yd12C_out = (Yaminusc - Xbminusd);
921
922 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
923 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
924
925 Xb12_out = Xb12C_out * co1;
926 Yb12_out = Yb12C_out * co1;
927 Xc12_out = Xc12C_out * co2;
928 Yc12_out = Yc12C_out * co2;
929 Xd12_out = Xd12C_out * co3;
930 Yd12_out = Yd12C_out * co3;
931
932 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
933 //Xb12_out -= Yb12C_out * si1;
934 p0 = Yb12C_out * si1;
935 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
936 //Yb12_out += Xb12C_out * si1;
937 p1 = Xb12C_out * si1;
938 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
939 //Xc12_out -= Yc12C_out * si2;
940 p2 = Yc12C_out * si2;
941 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
942 //Yc12_out += Xc12C_out * si2;
943 p3 = Xc12C_out * si2;
944 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
945 //Xd12_out -= Yd12C_out * si3;
946 p4 = Yd12C_out * si3;
947 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
948 //Yd12_out += Xd12C_out * si3;
949 p5 = Xd12C_out * si3;
950
951 Xb12_out -= p0;
952 Yb12_out += p1;
953 Xc12_out -= p2;
954 Yc12_out += p3;
955 Xd12_out -= p4;
956 Yd12_out += p5;
957
958 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
959 pSrc[2U * i1] = Xc12_out;
960
961 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
962 pSrc[(2U * i1) + 1U] = Yc12_out;
963
964 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
965 pSrc[2U * i2] = Xb12_out;
966
967 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
968 pSrc[(2U * i2) + 1U] = Yb12_out;
969
970 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
971 pSrc[2U * i3] = Xd12_out;
972
973 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
974 pSrc[(2U * i3) + 1U] = Yd12_out;
975
976 i0 += n1;
977 } while (i0 < fftLen);
978 j++;
979 } while (j <= (n2 - 1U));
980 twidCoefModifier <<= 2U;
981 }
982 /* Initializations of last stage */
983
984 j = fftLen >> 2;
985 ptr1 = &pSrc[0];
986
987 /* Calculations of last stage */
988 do
989 {
990 xaIn = ptr1[0];
991 yaIn = ptr1[1];
992 xbIn = ptr1[2];
993 ybIn = ptr1[3];
994 xcIn = ptr1[4];
995 ycIn = ptr1[5];
996 xdIn = ptr1[6];
997 ydIn = ptr1[7];
998
999 /* Butterfly implementation */
1000 /* xa + xc */
1001 Xaplusc = xaIn + xcIn;
1002
1003 /* xa - xc */
1004 Xaminusc = xaIn - xcIn;
1005
1006 /* ya + yc */
1007 Yaplusc = yaIn + ycIn;
1008
1009 /* ya - yc */
1010 Yaminusc = yaIn - ycIn;
1011
1012 /* xb + xd */
1013 Xbplusd = xbIn + xdIn;
1014
1015 /* yb + yd */
1016 Ybplusd = ybIn + ydIn;
1017
1018 /* (xb-xd) */
1019 Xbminusd = xbIn - xdIn;
1020
1021 /* (yb-yd) */
1022 Ybminusd = ybIn - ydIn;
1023
1024 /* xa' = (xa+xb+xc+xd) * onebyfftLen */
1025 a0 = (Xaplusc + Xbplusd);
1026 /* ya' = (ya+yb+yc+yd) * onebyfftLen */
1027 a1 = (Yaplusc + Ybplusd);
1028 /* xc' = (xa-xb+xc-xd) * onebyfftLen */
1029 a2 = (Xaplusc - Xbplusd);
1030 /* yc' = (ya-yb+yc-yd) * onebyfftLen */
1031 a3 = (Yaplusc - Ybplusd);
1032 /* xb' = (xa-yb-xc+yd) * onebyfftLen */
1033 a4 = (Xaminusc - Ybminusd);
1034 /* yb' = (ya+xb-yc-xd) * onebyfftLen */
1035 a5 = (Yaminusc + Xbminusd);
1036 /* xd' = (xa-yb-xc+yd) * onebyfftLen */
1037 a6 = (Xaminusc + Ybminusd);
1038 /* yd' = (ya-xb-yc+xd) * onebyfftLen */
1039 a7 = (Yaminusc - Xbminusd);
1040
1041 p0 = a0 * onebyfftLen;
1042 p1 = a1 * onebyfftLen;
1043 p2 = a2 * onebyfftLen;
1044 p3 = a3 * onebyfftLen;
1045 p4 = a4 * onebyfftLen;
1046 p5 = a5 * onebyfftLen;
1047 p6 = a6 * onebyfftLen;
1048 p7 = a7 * onebyfftLen;
1049
1050 /* xa' = (xa+xb+xc+xd) * onebyfftLen */
1051 ptr1[0] = p0;
1052 /* ya' = (ya+yb+yc+yd) * onebyfftLen */
1053 ptr1[1] = p1;
1054 /* xc' = (xa-xb+xc-xd) * onebyfftLen */
1055 ptr1[2] = p2;
1056 /* yc' = (ya-yb+yc-yd) * onebyfftLen */
1057 ptr1[3] = p3;
1058 /* xb' = (xa-yb-xc+yd) * onebyfftLen */
1059 ptr1[4] = p4;
1060 /* yb' = (ya+xb-yc-xd) * onebyfftLen */
1061 ptr1[5] = p5;
1062 /* xd' = (xa-yb-xc+yd) * onebyfftLen */
1063 ptr1[6] = p6;
1064 /* yd' = (ya-xb-yc+xd) * onebyfftLen */
1065 ptr1[7] = p7;
1066
1067 /* increment source pointer by 8 for next calculations */
1068 ptr1 = ptr1 + 8U;
1069
1070 } while (--j);
1071
1072 #else
1073
1074 float16_t t1, t2, r1, r2, s1, s2;
1075
1076 /* Run the below code for Cortex-M0 */
1077
1078 /* Initializations for the first stage */
1079 n2 = fftLen;
1080 n1 = n2;
1081
1082 /* Calculation of first stage */
1083 for (k = fftLen; k > 4U; k >>= 2U)
1084 {
1085 /* Initializations for the first stage */
1086 n1 = n2;
1087 n2 >>= 2U;
1088 ia1 = 0U;
1089
1090 /* Calculation of first stage */
1091 j = 0;
1092 do
1093 {
1094 /* index calculation for the coefficients */
1095 ia2 = ia1 + ia1;
1096 ia3 = ia2 + ia1;
1097 co1 = pCoef[ia1 * 2U];
1098 si1 = pCoef[(ia1 * 2U) + 1U];
1099 co2 = pCoef[ia2 * 2U];
1100 si2 = pCoef[(ia2 * 2U) + 1U];
1101 co3 = pCoef[ia3 * 2U];
1102 si3 = pCoef[(ia3 * 2U) + 1U];
1103
1104 /* Twiddle coefficients index modifier */
1105 ia1 = ia1 + twidCoefModifier;
1106
1107 i0 = j;
1108 do
1109 {
1110 /* index calculation for the input as, */
1111 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1112 i1 = i0 + n2;
1113 i2 = i1 + n2;
1114 i3 = i2 + n2;
1115
1116 /* xa + xc */
1117 r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
1118
1119 /* xa - xc */
1120 r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
1121
1122 /* ya + yc */
1123 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1124
1125 /* ya - yc */
1126 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1127
1128 /* xb + xd */
1129 t1 = pSrc[2U * i1] + pSrc[2U * i3];
1130
1131 /* xa' = xa + xb + xc + xd */
1132 pSrc[2U * i0] = r1 + t1;
1133
1134 /* xa + xc -(xb + xd) */
1135 r1 = r1 - t1;
1136
1137 /* yb + yd */
1138 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1139
1140 /* ya' = ya + yb + yc + yd */
1141 pSrc[(2U * i0) + 1U] = s1 + t2;
1142
1143 /* (ya + yc) - (yb + yd) */
1144 s1 = s1 - t2;
1145
1146 /* (yb - yd) */
1147 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1148
1149 /* (xb - xd) */
1150 t2 = pSrc[2U * i1] - pSrc[2U * i3];
1151
1152 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1153 pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
1154
1155 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1156 pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
1157
1158 /* (xa - xc) - (yb - yd) */
1159 r1 = r2 - t1;
1160
1161 /* (xa - xc) + (yb - yd) */
1162 r2 = r2 + t1;
1163
1164 /* (ya - yc) + (xb - xd) */
1165 s1 = s2 + t2;
1166
1167 /* (ya - yc) - (xb - xd) */
1168 s2 = s2 - t2;
1169
1170 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1171 pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
1172
1173 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1174 pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
1175
1176 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1177 pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
1178
1179 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1180 pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
1181
1182 i0 += n1;
1183 } while ( i0 < fftLen);
1184 j++;
1185 } while (j <= (n2 - 1U));
1186 twidCoefModifier <<= 2U;
1187 }
1188 /* Initializations of last stage */
1189 n1 = n2;
1190 n2 >>= 2U;
1191
1192 /* Calculations of last stage */
1193 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1194 {
1195 /* index calculation for the input as, */
1196 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1197 i1 = i0 + n2;
1198 i2 = i1 + n2;
1199 i3 = i2 + n2;
1200
1201 /* Butterfly implementation */
1202 /* xa + xc */
1203 r1 = pSrc[2U * i0] + pSrc[2U * i2];
1204
1205 /* xa - xc */
1206 r2 = pSrc[2U * i0] - pSrc[2U * i2];
1207
1208 /* ya + yc */
1209 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1210
1211 /* ya - yc */
1212 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1213
1214 /* xc + xd */
1215 t1 = pSrc[2U * i1] + pSrc[2U * i3];
1216
1217 /* xa' = xa + xb + xc + xd */
1218 pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
1219
1220 /* (xa + xb) - (xc + xd) */
1221 r1 = r1 - t1;
1222
1223 /* yb + yd */
1224 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1225
1226 /* ya' = ya + yb + yc + yd */
1227 pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
1228
1229 /* (ya + yc) - (yb + yd) */
1230 s1 = s1 - t2;
1231
1232 /* (yb-yd) */
1233 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1234
1235 /* (xb-xd) */
1236 t2 = pSrc[2U * i1] - pSrc[2U * i3];
1237
1238 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1239 pSrc[2U * i1] = r1 * onebyfftLen;
1240
1241 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1242 pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
1243
1244 /* (xa - xc) - (yb-yd) */
1245 r1 = r2 - t1;
1246
1247 /* (xa - xc) + (yb-yd) */
1248 r2 = r2 + t1;
1249
1250 /* (ya - yc) + (xb-xd) */
1251 s1 = s2 + t2;
1252
1253 /* (ya - yc) - (xb-xd) */
1254 s2 = s2 - t2;
1255
1256 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1257 pSrc[2U * i2] = r1 * onebyfftLen;
1258
1259 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1260 pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
1261
1262 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1263 pSrc[2U * i3] = r2 * onebyfftLen;
1264
1265 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1266 pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
1267 }
1268
1269 #endif /* #if defined (ARM_MATH_DSP) */
1270 }
1271
1272 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */