• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cfft_radix4_f32.c
4  * Description:  Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/transform_functions.h"
30 
31 extern void arm_bitreversal_f32(
32         float32_t * pSrc,
33         uint16_t fftSize,
34         uint16_t bitRevFactor,
35   const uint16_t * pBitRevTab);
36 
37 void arm_radix4_butterfly_f32(
38         float32_t * pSrc,
39         uint16_t fftLen,
40   const float32_t * pCoef,
41         uint16_t twidCoefModifier);
42 
43 void arm_radix4_butterfly_inverse_f32(
44         float32_t * pSrc,
45         uint16_t fftLen,
46   const float32_t * pCoef,
47         uint16_t twidCoefModifier,
48         float32_t onebyfftLen);
49 
50 
51 
52 
53 /**
54   @ingroup groupTransforms
55  */
56 
57 /**
58   @addtogroup ComplexFFT
59   @{
60  */
61 
62 
63 /**
64   @brief         Processing function for the floating-point Radix-4 CFFT/CIFFT.
65   @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
66   @param[in]     S    points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
67   @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
68   @return        none
69  */
70 
arm_cfft_radix4_f32(const arm_cfft_radix4_instance_f32 * S,float32_t * pSrc)71 void arm_cfft_radix4_f32(
72   const arm_cfft_radix4_instance_f32 * S,
73         float32_t * pSrc)
74 {
75    if (S->ifftFlag == 1U)
76    {
77       /*  Complex IFFT radix-4  */
78       arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
79    }
80    else
81    {
82       /*  Complex FFT radix-4  */
83       arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
84    }
85 
86    if (S->bitReverseFlag == 1U)
87    {
88       /*  Bit Reversal */
89       arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
90    }
91 
92 }
93 
94 /**
95   @} end of ComplexFFT group
96  */
97 
98 /* ----------------------------------------------------------------------
99  * Internal helper function used by the FFTs
100  * ---------------------------------------------------------------------- */
101 
102 /**
103   brief         Core function for the floating-point CFFT butterfly process.
104   param[in,out] pSrc             points to the in-place buffer of floating-point data type
105   param[in]     fftLen           length of the FFT
106   param[in]     pCoef            points to the twiddle coefficient buffer
107   param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
108   return        none
109  */
110 
arm_radix4_butterfly_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier)111 void arm_radix4_butterfly_f32(
112         float32_t * pSrc,
113         uint16_t fftLen,
114   const float32_t * pCoef,
115         uint16_t twidCoefModifier)
116 {
117         float32_t co1, co2, co3, si1, si2, si3;
118         uint32_t ia1, ia2, ia3;
119         uint32_t i0, i1, i2, i3;
120         uint32_t n1, n2, j, k;
121 
122 #if defined (ARM_MATH_LOOPUNROLL)
123 
124         float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
125         float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
126         Ybminusd;
127         float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
128         float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
129         float32_t *ptr1;
130         float32_t p0,p1,p2,p3,p4,p5;
131         float32_t a0,a1,a2,a3,a4,a5,a6,a7;
132 
133    /*  Initializations for the first stage */
134    n2 = fftLen;
135    n1 = n2;
136 
137    /* n2 = fftLen/4 */
138    n2 >>= 2U;
139    i0 = 0U;
140    ia1 = 0U;
141 
142    j = n2;
143 
144    /*  Calculation of first stage */
145    do
146    {
147       /*  index calculation for the input as, */
148       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
149       i1 = i0 + n2;
150       i2 = i1 + n2;
151       i3 = i2 + n2;
152 
153       xaIn = pSrc[(2U * i0)];
154       yaIn = pSrc[(2U * i0) + 1U];
155 
156       xbIn = pSrc[(2U * i1)];
157       ybIn = pSrc[(2U * i1) + 1U];
158 
159       xcIn = pSrc[(2U * i2)];
160       ycIn = pSrc[(2U * i2) + 1U];
161 
162       xdIn = pSrc[(2U * i3)];
163       ydIn = pSrc[(2U * i3) + 1U];
164 
165       /* xa + xc */
166       Xaplusc = xaIn + xcIn;
167       /* xb + xd */
168       Xbplusd = xbIn + xdIn;
169       /* ya + yc */
170       Yaplusc = yaIn + ycIn;
171       /* yb + yd */
172       Ybplusd = ybIn + ydIn;
173 
174       /*  index calculation for the coefficients */
175       ia2 = ia1 + ia1;
176       co2 = pCoef[ia2 * 2U];
177       si2 = pCoef[(ia2 * 2U) + 1U];
178 
179       /* xa - xc */
180       Xaminusc = xaIn - xcIn;
181       /* xb - xd */
182       Xbminusd = xbIn - xdIn;
183       /* ya - yc */
184       Yaminusc = yaIn - ycIn;
185       /* yb - yd */
186       Ybminusd = ybIn - ydIn;
187 
188       /* xa' = xa + xb + xc + xd */
189       pSrc[(2U * i0)] = Xaplusc + Xbplusd;
190       /* ya' = ya + yb + yc + yd */
191       pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
192 
193       /* (xa - xc) + (yb - yd) */
194       Xb12C_out = (Xaminusc + Ybminusd);
195       /* (ya - yc) + (xb - xd) */
196       Yb12C_out = (Yaminusc - Xbminusd);
197       /* (xa + xc) - (xb + xd) */
198       Xc12C_out = (Xaplusc - Xbplusd);
199       /* (ya + yc) - (yb + yd) */
200       Yc12C_out = (Yaplusc - Ybplusd);
201       /* (xa - xc) - (yb - yd) */
202       Xd12C_out = (Xaminusc - Ybminusd);
203       /* (ya - yc) + (xb - xd) */
204       Yd12C_out = (Xbminusd + Yaminusc);
205 
206       co1 = pCoef[ia1 * 2U];
207       si1 = pCoef[(ia1 * 2U) + 1U];
208 
209       /*  index calculation for the coefficients */
210       ia3 = ia2 + ia1;
211       co3 = pCoef[ia3 * 2U];
212       si3 = pCoef[(ia3 * 2U) + 1U];
213 
214       Xb12_out = Xb12C_out * co1;
215       Yb12_out = Yb12C_out * co1;
216       Xc12_out = Xc12C_out * co2;
217       Yc12_out = Yc12C_out * co2;
218       Xd12_out = Xd12C_out * co3;
219       Yd12_out = Yd12C_out * co3;
220 
221       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
222       //Xb12_out -= Yb12C_out * si1;
223       p0 = Yb12C_out * si1;
224       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
225       //Yb12_out += Xb12C_out * si1;
226       p1 = Xb12C_out * si1;
227       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
228       //Xc12_out -= Yc12C_out * si2;
229       p2 = Yc12C_out * si2;
230       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
231       //Yc12_out += Xc12C_out * si2;
232       p3 = Xc12C_out * si2;
233       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
234       //Xd12_out -= Yd12C_out * si3;
235       p4 = Yd12C_out * si3;
236       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
237       //Yd12_out += Xd12C_out * si3;
238       p5 = Xd12C_out * si3;
239 
240       Xb12_out += p0;
241       Yb12_out -= p1;
242       Xc12_out += p2;
243       Yc12_out -= p3;
244       Xd12_out += p4;
245       Yd12_out -= p5;
246 
247       /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
248       pSrc[2U * i1] = Xc12_out;
249 
250       /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
251       pSrc[(2U * i1) + 1U] = Yc12_out;
252 
253       /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
254       pSrc[2U * i2] = Xb12_out;
255 
256       /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
257       pSrc[(2U * i2) + 1U] = Yb12_out;
258 
259       /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
260       pSrc[2U * i3] = Xd12_out;
261 
262       /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
263       pSrc[(2U * i3) + 1U] = Yd12_out;
264 
265       /*  Twiddle coefficients index modifier */
266       ia1 += twidCoefModifier;
267 
268       /*  Updating input index */
269       i0++;
270 
271    }
272    while (--j);
273 
274    twidCoefModifier <<= 2U;
275 
276    /*  Calculation of second stage to excluding last stage */
277    for (k = fftLen >> 2U; k > 4U; k >>= 2U)
278    {
279       /*  Initializations for the first stage */
280       n1 = n2;
281       n2 >>= 2U;
282       ia1 = 0U;
283 
284       /*  Calculation of first stage */
285       j = 0;
286       do
287       {
288          /*  index calculation for the coefficients */
289          ia2 = ia1 + ia1;
290          ia3 = ia2 + ia1;
291          co1 = pCoef[(ia1 * 2U)];
292          si1 = pCoef[(ia1 * 2U) + 1U];
293          co2 = pCoef[(ia2 * 2U)];
294          si2 = pCoef[(ia2 * 2U) + 1U];
295          co3 = pCoef[(ia3 * 2U)];
296          si3 = pCoef[(ia3 * 2U) + 1U];
297 
298          /*  Twiddle coefficients index modifier */
299          ia1 += twidCoefModifier;
300 
301          i0 = j;
302          do
303          {
304             /*  index calculation for the input as, */
305             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
306             i1 = i0 + n2;
307             i2 = i1 + n2;
308             i3 = i2 + n2;
309 
310             xaIn = pSrc[(2U * i0)];
311             yaIn = pSrc[(2U * i0) + 1U];
312 
313             xbIn = pSrc[(2U * i1)];
314             ybIn = pSrc[(2U * i1) + 1U];
315 
316             xcIn = pSrc[(2U * i2)];
317             ycIn = pSrc[(2U * i2) + 1U];
318 
319             xdIn = pSrc[(2U * i3)];
320             ydIn = pSrc[(2U * i3) + 1U];
321 
322             /* xa - xc */
323             Xaminusc = xaIn - xcIn;
324             /* (xb - xd) */
325             Xbminusd = xbIn - xdIn;
326             /* ya - yc */
327             Yaminusc = yaIn - ycIn;
328             /* (yb - yd) */
329             Ybminusd = ybIn - ydIn;
330 
331             /* xa + xc */
332             Xaplusc = xaIn + xcIn;
333             /* xb + xd */
334             Xbplusd = xbIn + xdIn;
335             /* ya + yc */
336             Yaplusc = yaIn + ycIn;
337             /* yb + yd */
338             Ybplusd = ybIn + ydIn;
339 
340             /* (xa - xc) + (yb - yd) */
341             Xb12C_out = (Xaminusc + Ybminusd);
342             /* (ya - yc) -  (xb - xd) */
343             Yb12C_out = (Yaminusc - Xbminusd);
344             /* xa + xc -(xb + xd) */
345             Xc12C_out = (Xaplusc - Xbplusd);
346             /* (ya + yc) - (yb + yd) */
347             Yc12C_out = (Yaplusc - Ybplusd);
348             /* (xa - xc) - (yb - yd) */
349             Xd12C_out = (Xaminusc - Ybminusd);
350             /* (ya - yc) +  (xb - xd) */
351             Yd12C_out = (Xbminusd + Yaminusc);
352 
353             pSrc[(2U * i0)] = Xaplusc + Xbplusd;
354             pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
355 
356             Xb12_out = Xb12C_out * co1;
357             Yb12_out = Yb12C_out * co1;
358             Xc12_out = Xc12C_out * co2;
359             Yc12_out = Yc12C_out * co2;
360             Xd12_out = Xd12C_out * co3;
361             Yd12_out = Yd12C_out * co3;
362 
363             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
364             //Xb12_out -= Yb12C_out * si1;
365             p0 = Yb12C_out * si1;
366             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
367             //Yb12_out += Xb12C_out * si1;
368             p1 = Xb12C_out * si1;
369             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
370             //Xc12_out -= Yc12C_out * si2;
371             p2 = Yc12C_out * si2;
372             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
373             //Yc12_out += Xc12C_out * si2;
374             p3 = Xc12C_out * si2;
375             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
376             //Xd12_out -= Yd12C_out * si3;
377             p4 = Yd12C_out * si3;
378             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
379             //Yd12_out += Xd12C_out * si3;
380             p5 = Xd12C_out * si3;
381 
382             Xb12_out += p0;
383             Yb12_out -= p1;
384             Xc12_out += p2;
385             Yc12_out -= p3;
386             Xd12_out += p4;
387             Yd12_out -= p5;
388 
389             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
390             pSrc[2U * i1] = Xc12_out;
391 
392             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
393             pSrc[(2U * i1) + 1U] = Yc12_out;
394 
395             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
396             pSrc[2U * i2] = Xb12_out;
397 
398             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
399             pSrc[(2U * i2) + 1U] = Yb12_out;
400 
401             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
402             pSrc[2U * i3] = Xd12_out;
403 
404             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
405             pSrc[(2U * i3) + 1U] = Yd12_out;
406 
407             i0 += n1;
408          } while (i0 < fftLen);
409          j++;
410       } while (j <= (n2 - 1U));
411       twidCoefModifier <<= 2U;
412    }
413 
414    j = fftLen >> 2;
415    ptr1 = &pSrc[0];
416 
417    /*  Calculations of last stage */
418    do
419    {
420       xaIn = ptr1[0];
421       yaIn = ptr1[1];
422       xbIn = ptr1[2];
423       ybIn = ptr1[3];
424       xcIn = ptr1[4];
425       ycIn = ptr1[5];
426       xdIn = ptr1[6];
427       ydIn = ptr1[7];
428 
429       /* xa + xc */
430       Xaplusc = xaIn + xcIn;
431 
432       /* xa - xc */
433       Xaminusc = xaIn - xcIn;
434 
435       /* ya + yc */
436       Yaplusc = yaIn + ycIn;
437 
438       /* ya - yc */
439       Yaminusc = yaIn - ycIn;
440 
441       /* xb + xd */
442       Xbplusd = xbIn + xdIn;
443 
444       /* yb + yd */
445       Ybplusd = ybIn + ydIn;
446 
447       /* (xb-xd) */
448       Xbminusd = xbIn - xdIn;
449 
450       /* (yb-yd) */
451       Ybminusd = ybIn - ydIn;
452 
453       /* xa' = xa + xb + xc + xd */
454       a0 = (Xaplusc + Xbplusd);
455       /* ya' = ya + yb + yc + yd */
456       a1 = (Yaplusc + Ybplusd);
457       /* xc' = (xa-xb+xc-xd) */
458       a2 = (Xaplusc - Xbplusd);
459       /* yc' = (ya-yb+yc-yd) */
460       a3 = (Yaplusc - Ybplusd);
461       /* xb' = (xa+yb-xc-yd) */
462       a4 = (Xaminusc + Ybminusd);
463       /* yb' = (ya-xb-yc+xd) */
464       a5 = (Yaminusc - Xbminusd);
465       /* xd' = (xa-yb-xc+yd)) */
466       a6 = (Xaminusc - Ybminusd);
467       /* yd' = (ya+xb-yc-xd) */
468       a7 = (Xbminusd + Yaminusc);
469 
470       ptr1[0] = a0;
471       ptr1[1] = a1;
472       ptr1[2] = a2;
473       ptr1[3] = a3;
474       ptr1[4] = a4;
475       ptr1[5] = a5;
476       ptr1[6] = a6;
477       ptr1[7] = a7;
478 
479       /* increment pointer by 8 */
480       ptr1 += 8U;
481    } while (--j);
482 
483 #else
484 
485         float32_t t1, t2, r1, r2, s1, s2;
486 
487    /* Initializations for the fft calculation */
488    n2 = fftLen;
489    n1 = n2;
490    for (k = fftLen; k > 1U; k >>= 2U)
491    {
492       /*  Initializations for the fft calculation */
493       n1 = n2;
494       n2 >>= 2U;
495       ia1 = 0U;
496 
497       /*  FFT Calculation */
498       j = 0;
499       do
500       {
501          /*  index calculation for the coefficients */
502          ia2 = ia1 + ia1;
503          ia3 = ia2 + ia1;
504          co1 = pCoef[ia1 * 2U];
505          si1 = pCoef[(ia1 * 2U) + 1U];
506          co2 = pCoef[ia2 * 2U];
507          si2 = pCoef[(ia2 * 2U) + 1U];
508          co3 = pCoef[ia3 * 2U];
509          si3 = pCoef[(ia3 * 2U) + 1U];
510 
511          /*  Twiddle coefficients index modifier */
512          ia1 = ia1 + twidCoefModifier;
513 
514          i0 = j;
515          do
516          {
517             /*  index calculation for the input as, */
518             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
519             i1 = i0 + n2;
520             i2 = i1 + n2;
521             i3 = i2 + n2;
522 
523             /* xa + xc */
524             r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
525 
526             /* xa - xc */
527             r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
528 
529             /* ya + yc */
530             s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
531 
532             /* ya - yc */
533             s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
534 
535             /* xb + xd */
536             t1 = pSrc[2U * i1] + pSrc[2U * i3];
537 
538             /* xa' = xa + xb + xc + xd */
539             pSrc[2U * i0] = r1 + t1;
540 
541             /* xa + xc -(xb + xd) */
542             r1 = r1 - t1;
543 
544             /* yb + yd */
545             t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
546 
547             /* ya' = ya + yb + yc + yd */
548             pSrc[(2U * i0) + 1U] = s1 + t2;
549 
550             /* (ya + yc) - (yb + yd) */
551             s1 = s1 - t2;
552 
553             /* (yb - yd) */
554             t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
555 
556             /* (xb - xd) */
557             t2 = pSrc[2U * i1] - pSrc[2U * i3];
558 
559             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
560             pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
561 
562             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
563             pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
564 
565             /* (xa - xc) + (yb - yd) */
566             r1 = r2 + t1;
567 
568             /* (xa - xc) - (yb - yd) */
569             r2 = r2 - t1;
570 
571             /* (ya - yc) -  (xb - xd) */
572             s1 = s2 - t2;
573 
574             /* (ya - yc) +  (xb - xd) */
575             s2 = s2 + t2;
576 
577             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
578             pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
579 
580             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
581             pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
582 
583             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
584             pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
585 
586             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
587             pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
588 
589             i0 += n1;
590          } while ( i0 < fftLen);
591          j++;
592       } while (j <= (n2 - 1U));
593       twidCoefModifier <<= 2U;
594    }
595 
596 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
597 
598 }
599 
600 /**
601   brief         Core function for the floating-point CIFFT butterfly process.
602   param[in,out] pSrc             points to the in-place buffer of floating-point data type
603   param[in]     fftLen           length of the FFT
604   param[in]     pCoef            points to twiddle coefficient buffer
605   param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
606   param[in]     onebyfftLen      value of 1/fftLen
607   return        none
608  */
609 
arm_radix4_butterfly_inverse_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier,float32_t onebyfftLen)610 void arm_radix4_butterfly_inverse_f32(
611         float32_t * pSrc,
612         uint16_t fftLen,
613   const float32_t * pCoef,
614         uint16_t twidCoefModifier,
615         float32_t onebyfftLen)
616 {
617         float32_t co1, co2, co3, si1, si2, si3;
618         uint32_t ia1, ia2, ia3;
619         uint32_t i0, i1, i2, i3;
620         uint32_t n1, n2, j, k;
621 
622 #if defined (ARM_MATH_LOOPUNROLL)
623 
624         float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
625         float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
626         Ybminusd;
627         float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
628         float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
629         float32_t *ptr1;
630         float32_t p0,p1,p2,p3,p4,p5,p6,p7;
631         float32_t a0,a1,a2,a3,a4,a5,a6,a7;
632 
633 
634    /*  Initializations for the first stage */
635    n2 = fftLen;
636    n1 = n2;
637 
638    /* n2 = fftLen/4 */
639    n2 >>= 2U;
640    i0 = 0U;
641    ia1 = 0U;
642 
643    j = n2;
644 
645    /*  Calculation of first stage */
646    do
647    {
648       /*  index calculation for the input as, */
649       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
650       i1 = i0 + n2;
651       i2 = i1 + n2;
652       i3 = i2 + n2;
653 
654       /*  Butterfly implementation */
655       xaIn = pSrc[(2U * i0)];
656       yaIn = pSrc[(2U * i0) + 1U];
657 
658       xcIn = pSrc[(2U * i2)];
659       ycIn = pSrc[(2U * i2) + 1U];
660 
661       xbIn = pSrc[(2U * i1)];
662       ybIn = pSrc[(2U * i1) + 1U];
663 
664       xdIn = pSrc[(2U * i3)];
665       ydIn = pSrc[(2U * i3) + 1U];
666 
667       /* xa + xc */
668       Xaplusc = xaIn + xcIn;
669       /* xb + xd */
670       Xbplusd = xbIn + xdIn;
671       /* ya + yc */
672       Yaplusc = yaIn + ycIn;
673       /* yb + yd */
674       Ybplusd = ybIn + ydIn;
675 
676       /*  index calculation for the coefficients */
677       ia2 = ia1 + ia1;
678       co2 = pCoef[ia2 * 2U];
679       si2 = pCoef[(ia2 * 2U) + 1U];
680 
681       /* xa - xc */
682       Xaminusc = xaIn - xcIn;
683       /* xb - xd */
684       Xbminusd = xbIn - xdIn;
685       /* ya - yc */
686       Yaminusc = yaIn - ycIn;
687       /* yb - yd */
688       Ybminusd = ybIn - ydIn;
689 
690       /* xa' = xa + xb + xc + xd */
691       pSrc[(2U * i0)] = Xaplusc + Xbplusd;
692 
693       /* ya' = ya + yb + yc + yd */
694       pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
695 
696       /* (xa - xc) - (yb - yd) */
697       Xb12C_out = (Xaminusc - Ybminusd);
698       /* (ya - yc) + (xb - xd) */
699       Yb12C_out = (Yaminusc + Xbminusd);
700       /* (xa + xc) - (xb + xd) */
701       Xc12C_out = (Xaplusc - Xbplusd);
702       /* (ya + yc) - (yb + yd) */
703       Yc12C_out = (Yaplusc - Ybplusd);
704       /* (xa - xc) + (yb - yd) */
705       Xd12C_out = (Xaminusc + Ybminusd);
706       /* (ya - yc) - (xb - xd) */
707       Yd12C_out = (Yaminusc - Xbminusd);
708 
709       co1 = pCoef[ia1 * 2U];
710       si1 = pCoef[(ia1 * 2U) + 1U];
711 
712       /*  index calculation for the coefficients */
713       ia3 = ia2 + ia1;
714       co3 = pCoef[ia3 * 2U];
715       si3 = pCoef[(ia3 * 2U) + 1U];
716 
717       Xb12_out = Xb12C_out * co1;
718       Yb12_out = Yb12C_out * co1;
719       Xc12_out = Xc12C_out * co2;
720       Yc12_out = Yc12C_out * co2;
721       Xd12_out = Xd12C_out * co3;
722       Yd12_out = Yd12C_out * co3;
723 
724       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
725       //Xb12_out -= Yb12C_out * si1;
726       p0 = Yb12C_out * si1;
727       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
728       //Yb12_out += Xb12C_out * si1;
729       p1 = Xb12C_out * si1;
730       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
731       //Xc12_out -= Yc12C_out * si2;
732       p2 = Yc12C_out * si2;
733       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
734       //Yc12_out += Xc12C_out * si2;
735       p3 = Xc12C_out * si2;
736       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
737       //Xd12_out -= Yd12C_out * si3;
738       p4 = Yd12C_out * si3;
739       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
740       //Yd12_out += Xd12C_out * si3;
741       p5 = Xd12C_out * si3;
742 
743       Xb12_out -= p0;
744       Yb12_out += p1;
745       Xc12_out -= p2;
746       Yc12_out += p3;
747       Xd12_out -= p4;
748       Yd12_out += p5;
749 
750       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
751       pSrc[2U * i1] = Xc12_out;
752 
753       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
754       pSrc[(2U * i1) + 1U] = Yc12_out;
755 
756       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
757       pSrc[2U * i2] = Xb12_out;
758 
759       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
760       pSrc[(2U * i2) + 1U] = Yb12_out;
761 
762       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
763       pSrc[2U * i3] = Xd12_out;
764 
765       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
766       pSrc[(2U * i3) + 1U] = Yd12_out;
767 
768       /*  Twiddle coefficients index modifier */
769       ia1 = ia1 + twidCoefModifier;
770 
771       /*  Updating input index */
772       i0 = i0 + 1U;
773 
774    } while (--j);
775 
776    twidCoefModifier <<= 2U;
777 
778    /*  Calculation of second stage to excluding last stage */
779    for (k = fftLen >> 2U; k > 4U; k >>= 2U)
780    {
781       /*  Initializations for the first stage */
782       n1 = n2;
783       n2 >>= 2U;
784       ia1 = 0U;
785 
786       /*  Calculation of first stage */
787       j = 0;
788       do
789       {
790          /*  index calculation for the coefficients */
791          ia2 = ia1 + ia1;
792          ia3 = ia2 + ia1;
793          co1 = pCoef[ia1 * 2U];
794          si1 = pCoef[(ia1 * 2U) + 1U];
795          co2 = pCoef[ia2 * 2U];
796          si2 = pCoef[(ia2 * 2U) + 1U];
797          co3 = pCoef[ia3 * 2U];
798          si3 = pCoef[(ia3 * 2U) + 1U];
799 
800          /*  Twiddle coefficients index modifier */
801          ia1 = ia1 + twidCoefModifier;
802 
803          i0 = j;
804          do
805          {
806             /*  index calculation for the input as, */
807             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
808             i1 = i0 + n2;
809             i2 = i1 + n2;
810             i3 = i2 + n2;
811 
812             xaIn = pSrc[(2U * i0)];
813             yaIn = pSrc[(2U * i0) + 1U];
814 
815             xbIn = pSrc[(2U * i1)];
816             ybIn = pSrc[(2U * i1) + 1U];
817 
818             xcIn = pSrc[(2U * i2)];
819             ycIn = pSrc[(2U * i2) + 1U];
820 
821             xdIn = pSrc[(2U * i3)];
822             ydIn = pSrc[(2U * i3) + 1U];
823 
824             /* xa - xc */
825             Xaminusc = xaIn - xcIn;
826             /* (xb - xd) */
827             Xbminusd = xbIn - xdIn;
828             /* ya - yc */
829             Yaminusc = yaIn - ycIn;
830             /* (yb - yd) */
831             Ybminusd = ybIn - ydIn;
832 
833             /* xa + xc */
834             Xaplusc = xaIn + xcIn;
835             /* xb + xd */
836             Xbplusd = xbIn + xdIn;
837             /* ya + yc */
838             Yaplusc = yaIn + ycIn;
839             /* yb + yd */
840             Ybplusd = ybIn + ydIn;
841 
842             /* (xa - xc) - (yb - yd) */
843             Xb12C_out = (Xaminusc - Ybminusd);
844             /* (ya - yc) +  (xb - xd) */
845             Yb12C_out = (Yaminusc + Xbminusd);
846             /* xa + xc -(xb + xd) */
847             Xc12C_out = (Xaplusc - Xbplusd);
848             /* (ya + yc) - (yb + yd) */
849             Yc12C_out = (Yaplusc - Ybplusd);
850             /* (xa - xc) + (yb - yd) */
851             Xd12C_out = (Xaminusc + Ybminusd);
852             /* (ya - yc) -  (xb - xd) */
853             Yd12C_out = (Yaminusc - Xbminusd);
854 
855             pSrc[(2U * i0)] = Xaplusc + Xbplusd;
856             pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
857 
858             Xb12_out = Xb12C_out * co1;
859             Yb12_out = Yb12C_out * co1;
860             Xc12_out = Xc12C_out * co2;
861             Yc12_out = Yc12C_out * co2;
862             Xd12_out = Xd12C_out * co3;
863             Yd12_out = Yd12C_out * co3;
864 
865             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
866             //Xb12_out -= Yb12C_out * si1;
867             p0 = Yb12C_out * si1;
868             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
869             //Yb12_out += Xb12C_out * si1;
870             p1 = Xb12C_out * si1;
871             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
872             //Xc12_out -= Yc12C_out * si2;
873             p2 = Yc12C_out * si2;
874             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
875             //Yc12_out += Xc12C_out * si2;
876             p3 = Xc12C_out * si2;
877             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
878             //Xd12_out -= Yd12C_out * si3;
879             p4 = Yd12C_out * si3;
880             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
881             //Yd12_out += Xd12C_out * si3;
882             p5 = Xd12C_out * si3;
883 
884             Xb12_out -= p0;
885             Yb12_out += p1;
886             Xc12_out -= p2;
887             Yc12_out += p3;
888             Xd12_out -= p4;
889             Yd12_out += p5;
890 
891             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
892             pSrc[2U * i1] = Xc12_out;
893 
894             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
895             pSrc[(2U * i1) + 1U] = Yc12_out;
896 
897             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
898             pSrc[2U * i2] = Xb12_out;
899 
900             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
901             pSrc[(2U * i2) + 1U] = Yb12_out;
902 
903             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
904             pSrc[2U * i3] = Xd12_out;
905 
906             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
907             pSrc[(2U * i3) + 1U] = Yd12_out;
908 
909             i0 += n1;
910          } while (i0 < fftLen);
911          j++;
912       } while (j <= (n2 - 1U));
913       twidCoefModifier <<= 2U;
914    }
915    /*  Initializations of last stage */
916 
917    j = fftLen >> 2;
918    ptr1 = &pSrc[0];
919 
920    /*  Calculations of last stage */
921    do
922    {
923       xaIn = ptr1[0];
924       yaIn = ptr1[1];
925       xbIn = ptr1[2];
926       ybIn = ptr1[3];
927       xcIn = ptr1[4];
928       ycIn = ptr1[5];
929       xdIn = ptr1[6];
930       ydIn = ptr1[7];
931 
932       /*  Butterfly implementation */
933       /* xa + xc */
934       Xaplusc = xaIn + xcIn;
935 
936       /* xa - xc */
937       Xaminusc = xaIn - xcIn;
938 
939       /* ya + yc */
940       Yaplusc = yaIn + ycIn;
941 
942       /* ya - yc */
943       Yaminusc = yaIn - ycIn;
944 
945       /* xb + xd */
946       Xbplusd = xbIn + xdIn;
947 
948       /* yb + yd */
949       Ybplusd = ybIn + ydIn;
950 
951       /* (xb-xd) */
952       Xbminusd = xbIn - xdIn;
953 
954       /* (yb-yd) */
955       Ybminusd = ybIn - ydIn;
956 
957       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
958       a0 = (Xaplusc + Xbplusd);
959       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
960       a1 = (Yaplusc + Ybplusd);
961       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
962       a2 = (Xaplusc - Xbplusd);
963       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
964       a3 = (Yaplusc - Ybplusd);
965       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
966       a4 = (Xaminusc - Ybminusd);
967       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
968       a5 = (Yaminusc + Xbminusd);
969       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
970       a6 = (Xaminusc + Ybminusd);
971       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
972       a7 = (Yaminusc - Xbminusd);
973 
974       p0 = a0 * onebyfftLen;
975       p1 = a1 * onebyfftLen;
976       p2 = a2 * onebyfftLen;
977       p3 = a3 * onebyfftLen;
978       p4 = a4 * onebyfftLen;
979       p5 = a5 * onebyfftLen;
980       p6 = a6 * onebyfftLen;
981       p7 = a7 * onebyfftLen;
982 
983       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
984       ptr1[0] = p0;
985       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
986       ptr1[1] = p1;
987       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
988       ptr1[2] = p2;
989       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
990       ptr1[3] = p3;
991       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
992       ptr1[4] = p4;
993       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
994       ptr1[5] = p5;
995       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
996       ptr1[6] = p6;
997       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
998       ptr1[7] = p7;
999 
1000       /* increment source pointer by 8 for next calculations */
1001       ptr1 = ptr1 + 8U;
1002 
1003    } while (--j);
1004 
1005 #else
1006 
1007         float32_t t1, t2, r1, r2, s1, s2;
1008 
1009    /*  Initializations for the first stage */
1010    n2 = fftLen;
1011    n1 = n2;
1012 
1013    /*  Calculation of first stage */
1014    for (k = fftLen; k > 4U; k >>= 2U)
1015    {
1016       /*  Initializations for the first stage */
1017       n1 = n2;
1018       n2 >>= 2U;
1019       ia1 = 0U;
1020 
1021       /*  Calculation of first stage */
1022       j = 0;
1023       do
1024       {
1025          /*  index calculation for the coefficients */
1026          ia2 = ia1 + ia1;
1027          ia3 = ia2 + ia1;
1028          co1 = pCoef[ia1 * 2U];
1029          si1 = pCoef[(ia1 * 2U) + 1U];
1030          co2 = pCoef[ia2 * 2U];
1031          si2 = pCoef[(ia2 * 2U) + 1U];
1032          co3 = pCoef[ia3 * 2U];
1033          si3 = pCoef[(ia3 * 2U) + 1U];
1034 
1035          /*  Twiddle coefficients index modifier */
1036          ia1 = ia1 + twidCoefModifier;
1037 
1038          i0 = j;
1039          do
1040          {
1041             /*  index calculation for the input as, */
1042             /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1043             i1 = i0 + n2;
1044             i2 = i1 + n2;
1045             i3 = i2 + n2;
1046 
1047             /* xa + xc */
1048             r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
1049 
1050             /* xa - xc */
1051             r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
1052 
1053             /* ya + yc */
1054             s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1055 
1056             /* ya - yc */
1057             s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1058 
1059             /* xb + xd */
1060             t1 = pSrc[2U * i1] + pSrc[2U * i3];
1061 
1062             /* xa' = xa + xb + xc + xd */
1063             pSrc[2U * i0] = r1 + t1;
1064 
1065             /* xa + xc -(xb + xd) */
1066             r1 = r1 - t1;
1067 
1068             /* yb + yd */
1069             t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1070 
1071             /* ya' = ya + yb + yc + yd */
1072             pSrc[(2U * i0) + 1U] = s1 + t2;
1073 
1074             /* (ya + yc) - (yb + yd) */
1075             s1 = s1 - t2;
1076 
1077             /* (yb - yd) */
1078             t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1079 
1080             /* (xb - xd) */
1081             t2 = pSrc[2U * i1] - pSrc[2U * i3];
1082 
1083             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1084             pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
1085 
1086             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1087             pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
1088 
1089             /* (xa - xc) - (yb - yd) */
1090             r1 = r2 - t1;
1091 
1092             /* (xa - xc) + (yb - yd) */
1093             r2 = r2 + t1;
1094 
1095             /* (ya - yc) +  (xb - xd) */
1096             s1 = s2 + t2;
1097 
1098             /* (ya - yc) -  (xb - xd) */
1099             s2 = s2 - t2;
1100 
1101             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1102             pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
1103 
1104             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1105             pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
1106 
1107             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1108             pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
1109 
1110             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1111             pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
1112 
1113             i0 += n1;
1114          } while ( i0 < fftLen);
1115          j++;
1116       } while (j <= (n2 - 1U));
1117       twidCoefModifier <<= 2U;
1118    }
1119    /*  Initializations of last stage */
1120    n1 = n2;
1121    n2 >>= 2U;
1122 
1123    /*  Calculations of last stage */
1124    for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1125    {
1126       /*  index calculation for the input as, */
1127       /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1128       i1 = i0 + n2;
1129       i2 = i1 + n2;
1130       i3 = i2 + n2;
1131 
1132       /*  Butterfly implementation */
1133       /* xa + xc */
1134       r1 = pSrc[2U * i0] + pSrc[2U * i2];
1135 
1136       /* xa - xc */
1137       r2 = pSrc[2U * i0] - pSrc[2U * i2];
1138 
1139       /* ya + yc */
1140       s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1141 
1142       /* ya - yc */
1143       s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1144 
1145       /* xc + xd */
1146       t1 = pSrc[2U * i1] + pSrc[2U * i3];
1147 
1148       /* xa' = xa + xb + xc + xd */
1149       pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
1150 
1151       /* (xa + xb) - (xc + xd) */
1152       r1 = r1 - t1;
1153 
1154       /* yb + yd */
1155       t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1156 
1157       /* ya' = ya + yb + yc + yd */
1158       pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
1159 
1160       /* (ya + yc) - (yb + yd) */
1161       s1 = s1 - t2;
1162 
1163       /* (yb-yd) */
1164       t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1165 
1166       /* (xb-xd) */
1167       t2 = pSrc[2U * i1] - pSrc[2U * i3];
1168 
1169       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1170       pSrc[2U * i1] = r1 * onebyfftLen;
1171 
1172       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1173       pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
1174 
1175       /* (xa - xc) - (yb-yd) */
1176       r1 = r2 - t1;
1177 
1178       /* (xa - xc) + (yb-yd) */
1179       r2 = r2 + t1;
1180 
1181       /* (ya - yc) + (xb-xd) */
1182       s1 = s2 + t2;
1183 
1184       /* (ya - yc) - (xb-xd) */
1185       s2 = s2 - t2;
1186 
1187       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1188       pSrc[2U * i2] = r1 * onebyfftLen;
1189 
1190       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1191       pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
1192 
1193       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1194       pSrc[2U * i3] = r2 * onebyfftLen;
1195 
1196       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1197       pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
1198    }
1199 
1200 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
1201 }
1202 
1203 
1204