1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cfft_radix4_f32.c
4 * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/transform_functions.h"
30
31 extern void arm_bitreversal_f32(
32 float32_t * pSrc,
33 uint16_t fftSize,
34 uint16_t bitRevFactor,
35 const uint16_t * pBitRevTab);
36
37 void arm_radix4_butterfly_f32(
38 float32_t * pSrc,
39 uint16_t fftLen,
40 const float32_t * pCoef,
41 uint16_t twidCoefModifier);
42
43 void arm_radix4_butterfly_inverse_f32(
44 float32_t * pSrc,
45 uint16_t fftLen,
46 const float32_t * pCoef,
47 uint16_t twidCoefModifier,
48 float32_t onebyfftLen);
49
50
51
52
53 /**
54 @ingroup groupTransforms
55 */
56
57 /**
58 @addtogroup ComplexFFT
59 @{
60 */
61
62
63 /**
64 @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
65 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
66 @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
67 @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
68 @return none
69 */
70
arm_cfft_radix4_f32(const arm_cfft_radix4_instance_f32 * S,float32_t * pSrc)71 void arm_cfft_radix4_f32(
72 const arm_cfft_radix4_instance_f32 * S,
73 float32_t * pSrc)
74 {
75 if (S->ifftFlag == 1U)
76 {
77 /* Complex IFFT radix-4 */
78 arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
79 }
80 else
81 {
82 /* Complex FFT radix-4 */
83 arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
84 }
85
86 if (S->bitReverseFlag == 1U)
87 {
88 /* Bit Reversal */
89 arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
90 }
91
92 }
93
94 /**
95 @} end of ComplexFFT group
96 */
97
98 /* ----------------------------------------------------------------------
99 * Internal helper function used by the FFTs
100 * ---------------------------------------------------------------------- */
101
102 /**
103 brief Core function for the floating-point CFFT butterfly process.
104 param[in,out] pSrc points to the in-place buffer of floating-point data type
105 param[in] fftLen length of the FFT
106 param[in] pCoef points to the twiddle coefficient buffer
107 param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
108 return none
109 */
110
arm_radix4_butterfly_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier)111 void arm_radix4_butterfly_f32(
112 float32_t * pSrc,
113 uint16_t fftLen,
114 const float32_t * pCoef,
115 uint16_t twidCoefModifier)
116 {
117 float32_t co1, co2, co3, si1, si2, si3;
118 uint32_t ia1, ia2, ia3;
119 uint32_t i0, i1, i2, i3;
120 uint32_t n1, n2, j, k;
121
122 #if defined (ARM_MATH_LOOPUNROLL)
123
124 float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
125 float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
126 Ybminusd;
127 float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
128 float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
129 float32_t *ptr1;
130 float32_t p0,p1,p2,p3,p4,p5;
131 float32_t a0,a1,a2,a3,a4,a5,a6,a7;
132
133 /* Initializations for the first stage */
134 n2 = fftLen;
135 n1 = n2;
136
137 /* n2 = fftLen/4 */
138 n2 >>= 2U;
139 i0 = 0U;
140 ia1 = 0U;
141
142 j = n2;
143
144 /* Calculation of first stage */
145 do
146 {
147 /* index calculation for the input as, */
148 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
149 i1 = i0 + n2;
150 i2 = i1 + n2;
151 i3 = i2 + n2;
152
153 xaIn = pSrc[(2U * i0)];
154 yaIn = pSrc[(2U * i0) + 1U];
155
156 xbIn = pSrc[(2U * i1)];
157 ybIn = pSrc[(2U * i1) + 1U];
158
159 xcIn = pSrc[(2U * i2)];
160 ycIn = pSrc[(2U * i2) + 1U];
161
162 xdIn = pSrc[(2U * i3)];
163 ydIn = pSrc[(2U * i3) + 1U];
164
165 /* xa + xc */
166 Xaplusc = xaIn + xcIn;
167 /* xb + xd */
168 Xbplusd = xbIn + xdIn;
169 /* ya + yc */
170 Yaplusc = yaIn + ycIn;
171 /* yb + yd */
172 Ybplusd = ybIn + ydIn;
173
174 /* index calculation for the coefficients */
175 ia2 = ia1 + ia1;
176 co2 = pCoef[ia2 * 2U];
177 si2 = pCoef[(ia2 * 2U) + 1U];
178
179 /* xa - xc */
180 Xaminusc = xaIn - xcIn;
181 /* xb - xd */
182 Xbminusd = xbIn - xdIn;
183 /* ya - yc */
184 Yaminusc = yaIn - ycIn;
185 /* yb - yd */
186 Ybminusd = ybIn - ydIn;
187
188 /* xa' = xa + xb + xc + xd */
189 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
190 /* ya' = ya + yb + yc + yd */
191 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
192
193 /* (xa - xc) + (yb - yd) */
194 Xb12C_out = (Xaminusc + Ybminusd);
195 /* (ya - yc) + (xb - xd) */
196 Yb12C_out = (Yaminusc - Xbminusd);
197 /* (xa + xc) - (xb + xd) */
198 Xc12C_out = (Xaplusc - Xbplusd);
199 /* (ya + yc) - (yb + yd) */
200 Yc12C_out = (Yaplusc - Ybplusd);
201 /* (xa - xc) - (yb - yd) */
202 Xd12C_out = (Xaminusc - Ybminusd);
203 /* (ya - yc) + (xb - xd) */
204 Yd12C_out = (Xbminusd + Yaminusc);
205
206 co1 = pCoef[ia1 * 2U];
207 si1 = pCoef[(ia1 * 2U) + 1U];
208
209 /* index calculation for the coefficients */
210 ia3 = ia2 + ia1;
211 co3 = pCoef[ia3 * 2U];
212 si3 = pCoef[(ia3 * 2U) + 1U];
213
214 Xb12_out = Xb12C_out * co1;
215 Yb12_out = Yb12C_out * co1;
216 Xc12_out = Xc12C_out * co2;
217 Yc12_out = Yc12C_out * co2;
218 Xd12_out = Xd12C_out * co3;
219 Yd12_out = Yd12C_out * co3;
220
221 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
222 //Xb12_out -= Yb12C_out * si1;
223 p0 = Yb12C_out * si1;
224 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
225 //Yb12_out += Xb12C_out * si1;
226 p1 = Xb12C_out * si1;
227 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
228 //Xc12_out -= Yc12C_out * si2;
229 p2 = Yc12C_out * si2;
230 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
231 //Yc12_out += Xc12C_out * si2;
232 p3 = Xc12C_out * si2;
233 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
234 //Xd12_out -= Yd12C_out * si3;
235 p4 = Yd12C_out * si3;
236 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
237 //Yd12_out += Xd12C_out * si3;
238 p5 = Xd12C_out * si3;
239
240 Xb12_out += p0;
241 Yb12_out -= p1;
242 Xc12_out += p2;
243 Yc12_out -= p3;
244 Xd12_out += p4;
245 Yd12_out -= p5;
246
247 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
248 pSrc[2U * i1] = Xc12_out;
249
250 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
251 pSrc[(2U * i1) + 1U] = Yc12_out;
252
253 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
254 pSrc[2U * i2] = Xb12_out;
255
256 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
257 pSrc[(2U * i2) + 1U] = Yb12_out;
258
259 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
260 pSrc[2U * i3] = Xd12_out;
261
262 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
263 pSrc[(2U * i3) + 1U] = Yd12_out;
264
265 /* Twiddle coefficients index modifier */
266 ia1 += twidCoefModifier;
267
268 /* Updating input index */
269 i0++;
270
271 }
272 while (--j);
273
274 twidCoefModifier <<= 2U;
275
276 /* Calculation of second stage to excluding last stage */
277 for (k = fftLen >> 2U; k > 4U; k >>= 2U)
278 {
279 /* Initializations for the first stage */
280 n1 = n2;
281 n2 >>= 2U;
282 ia1 = 0U;
283
284 /* Calculation of first stage */
285 j = 0;
286 do
287 {
288 /* index calculation for the coefficients */
289 ia2 = ia1 + ia1;
290 ia3 = ia2 + ia1;
291 co1 = pCoef[(ia1 * 2U)];
292 si1 = pCoef[(ia1 * 2U) + 1U];
293 co2 = pCoef[(ia2 * 2U)];
294 si2 = pCoef[(ia2 * 2U) + 1U];
295 co3 = pCoef[(ia3 * 2U)];
296 si3 = pCoef[(ia3 * 2U) + 1U];
297
298 /* Twiddle coefficients index modifier */
299 ia1 += twidCoefModifier;
300
301 i0 = j;
302 do
303 {
304 /* index calculation for the input as, */
305 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
306 i1 = i0 + n2;
307 i2 = i1 + n2;
308 i3 = i2 + n2;
309
310 xaIn = pSrc[(2U * i0)];
311 yaIn = pSrc[(2U * i0) + 1U];
312
313 xbIn = pSrc[(2U * i1)];
314 ybIn = pSrc[(2U * i1) + 1U];
315
316 xcIn = pSrc[(2U * i2)];
317 ycIn = pSrc[(2U * i2) + 1U];
318
319 xdIn = pSrc[(2U * i3)];
320 ydIn = pSrc[(2U * i3) + 1U];
321
322 /* xa - xc */
323 Xaminusc = xaIn - xcIn;
324 /* (xb - xd) */
325 Xbminusd = xbIn - xdIn;
326 /* ya - yc */
327 Yaminusc = yaIn - ycIn;
328 /* (yb - yd) */
329 Ybminusd = ybIn - ydIn;
330
331 /* xa + xc */
332 Xaplusc = xaIn + xcIn;
333 /* xb + xd */
334 Xbplusd = xbIn + xdIn;
335 /* ya + yc */
336 Yaplusc = yaIn + ycIn;
337 /* yb + yd */
338 Ybplusd = ybIn + ydIn;
339
340 /* (xa - xc) + (yb - yd) */
341 Xb12C_out = (Xaminusc + Ybminusd);
342 /* (ya - yc) - (xb - xd) */
343 Yb12C_out = (Yaminusc - Xbminusd);
344 /* xa + xc -(xb + xd) */
345 Xc12C_out = (Xaplusc - Xbplusd);
346 /* (ya + yc) - (yb + yd) */
347 Yc12C_out = (Yaplusc - Ybplusd);
348 /* (xa - xc) - (yb - yd) */
349 Xd12C_out = (Xaminusc - Ybminusd);
350 /* (ya - yc) + (xb - xd) */
351 Yd12C_out = (Xbminusd + Yaminusc);
352
353 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
354 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
355
356 Xb12_out = Xb12C_out * co1;
357 Yb12_out = Yb12C_out * co1;
358 Xc12_out = Xc12C_out * co2;
359 Yc12_out = Yc12C_out * co2;
360 Xd12_out = Xd12C_out * co3;
361 Yd12_out = Yd12C_out * co3;
362
363 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
364 //Xb12_out -= Yb12C_out * si1;
365 p0 = Yb12C_out * si1;
366 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
367 //Yb12_out += Xb12C_out * si1;
368 p1 = Xb12C_out * si1;
369 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
370 //Xc12_out -= Yc12C_out * si2;
371 p2 = Yc12C_out * si2;
372 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
373 //Yc12_out += Xc12C_out * si2;
374 p3 = Xc12C_out * si2;
375 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
376 //Xd12_out -= Yd12C_out * si3;
377 p4 = Yd12C_out * si3;
378 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
379 //Yd12_out += Xd12C_out * si3;
380 p5 = Xd12C_out * si3;
381
382 Xb12_out += p0;
383 Yb12_out -= p1;
384 Xc12_out += p2;
385 Yc12_out -= p3;
386 Xd12_out += p4;
387 Yd12_out -= p5;
388
389 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
390 pSrc[2U * i1] = Xc12_out;
391
392 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
393 pSrc[(2U * i1) + 1U] = Yc12_out;
394
395 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
396 pSrc[2U * i2] = Xb12_out;
397
398 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
399 pSrc[(2U * i2) + 1U] = Yb12_out;
400
401 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
402 pSrc[2U * i3] = Xd12_out;
403
404 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
405 pSrc[(2U * i3) + 1U] = Yd12_out;
406
407 i0 += n1;
408 } while (i0 < fftLen);
409 j++;
410 } while (j <= (n2 - 1U));
411 twidCoefModifier <<= 2U;
412 }
413
414 j = fftLen >> 2;
415 ptr1 = &pSrc[0];
416
417 /* Calculations of last stage */
418 do
419 {
420 xaIn = ptr1[0];
421 yaIn = ptr1[1];
422 xbIn = ptr1[2];
423 ybIn = ptr1[3];
424 xcIn = ptr1[4];
425 ycIn = ptr1[5];
426 xdIn = ptr1[6];
427 ydIn = ptr1[7];
428
429 /* xa + xc */
430 Xaplusc = xaIn + xcIn;
431
432 /* xa - xc */
433 Xaminusc = xaIn - xcIn;
434
435 /* ya + yc */
436 Yaplusc = yaIn + ycIn;
437
438 /* ya - yc */
439 Yaminusc = yaIn - ycIn;
440
441 /* xb + xd */
442 Xbplusd = xbIn + xdIn;
443
444 /* yb + yd */
445 Ybplusd = ybIn + ydIn;
446
447 /* (xb-xd) */
448 Xbminusd = xbIn - xdIn;
449
450 /* (yb-yd) */
451 Ybminusd = ybIn - ydIn;
452
453 /* xa' = xa + xb + xc + xd */
454 a0 = (Xaplusc + Xbplusd);
455 /* ya' = ya + yb + yc + yd */
456 a1 = (Yaplusc + Ybplusd);
457 /* xc' = (xa-xb+xc-xd) */
458 a2 = (Xaplusc - Xbplusd);
459 /* yc' = (ya-yb+yc-yd) */
460 a3 = (Yaplusc - Ybplusd);
461 /* xb' = (xa+yb-xc-yd) */
462 a4 = (Xaminusc + Ybminusd);
463 /* yb' = (ya-xb-yc+xd) */
464 a5 = (Yaminusc - Xbminusd);
465 /* xd' = (xa-yb-xc+yd)) */
466 a6 = (Xaminusc - Ybminusd);
467 /* yd' = (ya+xb-yc-xd) */
468 a7 = (Xbminusd + Yaminusc);
469
470 ptr1[0] = a0;
471 ptr1[1] = a1;
472 ptr1[2] = a2;
473 ptr1[3] = a3;
474 ptr1[4] = a4;
475 ptr1[5] = a5;
476 ptr1[6] = a6;
477 ptr1[7] = a7;
478
479 /* increment pointer by 8 */
480 ptr1 += 8U;
481 } while (--j);
482
483 #else
484
485 float32_t t1, t2, r1, r2, s1, s2;
486
487 /* Initializations for the fft calculation */
488 n2 = fftLen;
489 n1 = n2;
490 for (k = fftLen; k > 1U; k >>= 2U)
491 {
492 /* Initializations for the fft calculation */
493 n1 = n2;
494 n2 >>= 2U;
495 ia1 = 0U;
496
497 /* FFT Calculation */
498 j = 0;
499 do
500 {
501 /* index calculation for the coefficients */
502 ia2 = ia1 + ia1;
503 ia3 = ia2 + ia1;
504 co1 = pCoef[ia1 * 2U];
505 si1 = pCoef[(ia1 * 2U) + 1U];
506 co2 = pCoef[ia2 * 2U];
507 si2 = pCoef[(ia2 * 2U) + 1U];
508 co3 = pCoef[ia3 * 2U];
509 si3 = pCoef[(ia3 * 2U) + 1U];
510
511 /* Twiddle coefficients index modifier */
512 ia1 = ia1 + twidCoefModifier;
513
514 i0 = j;
515 do
516 {
517 /* index calculation for the input as, */
518 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
519 i1 = i0 + n2;
520 i2 = i1 + n2;
521 i3 = i2 + n2;
522
523 /* xa + xc */
524 r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
525
526 /* xa - xc */
527 r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
528
529 /* ya + yc */
530 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
531
532 /* ya - yc */
533 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
534
535 /* xb + xd */
536 t1 = pSrc[2U * i1] + pSrc[2U * i3];
537
538 /* xa' = xa + xb + xc + xd */
539 pSrc[2U * i0] = r1 + t1;
540
541 /* xa + xc -(xb + xd) */
542 r1 = r1 - t1;
543
544 /* yb + yd */
545 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
546
547 /* ya' = ya + yb + yc + yd */
548 pSrc[(2U * i0) + 1U] = s1 + t2;
549
550 /* (ya + yc) - (yb + yd) */
551 s1 = s1 - t2;
552
553 /* (yb - yd) */
554 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
555
556 /* (xb - xd) */
557 t2 = pSrc[2U * i1] - pSrc[2U * i3];
558
559 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
560 pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
561
562 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
563 pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
564
565 /* (xa - xc) + (yb - yd) */
566 r1 = r2 + t1;
567
568 /* (xa - xc) - (yb - yd) */
569 r2 = r2 - t1;
570
571 /* (ya - yc) - (xb - xd) */
572 s1 = s2 - t2;
573
574 /* (ya - yc) + (xb - xd) */
575 s2 = s2 + t2;
576
577 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
578 pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
579
580 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
581 pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
582
583 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
584 pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
585
586 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
587 pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
588
589 i0 += n1;
590 } while ( i0 < fftLen);
591 j++;
592 } while (j <= (n2 - 1U));
593 twidCoefModifier <<= 2U;
594 }
595
596 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
597
598 }
599
600 /**
601 brief Core function for the floating-point CIFFT butterfly process.
602 param[in,out] pSrc points to the in-place buffer of floating-point data type
603 param[in] fftLen length of the FFT
604 param[in] pCoef points to twiddle coefficient buffer
605 param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
606 param[in] onebyfftLen value of 1/fftLen
607 return none
608 */
609
arm_radix4_butterfly_inverse_f32(float32_t * pSrc,uint16_t fftLen,const float32_t * pCoef,uint16_t twidCoefModifier,float32_t onebyfftLen)610 void arm_radix4_butterfly_inverse_f32(
611 float32_t * pSrc,
612 uint16_t fftLen,
613 const float32_t * pCoef,
614 uint16_t twidCoefModifier,
615 float32_t onebyfftLen)
616 {
617 float32_t co1, co2, co3, si1, si2, si3;
618 uint32_t ia1, ia2, ia3;
619 uint32_t i0, i1, i2, i3;
620 uint32_t n1, n2, j, k;
621
622 #if defined (ARM_MATH_LOOPUNROLL)
623
624 float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
625 float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
626 Ybminusd;
627 float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
628 float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
629 float32_t *ptr1;
630 float32_t p0,p1,p2,p3,p4,p5,p6,p7;
631 float32_t a0,a1,a2,a3,a4,a5,a6,a7;
632
633
634 /* Initializations for the first stage */
635 n2 = fftLen;
636 n1 = n2;
637
638 /* n2 = fftLen/4 */
639 n2 >>= 2U;
640 i0 = 0U;
641 ia1 = 0U;
642
643 j = n2;
644
645 /* Calculation of first stage */
646 do
647 {
648 /* index calculation for the input as, */
649 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
650 i1 = i0 + n2;
651 i2 = i1 + n2;
652 i3 = i2 + n2;
653
654 /* Butterfly implementation */
655 xaIn = pSrc[(2U * i0)];
656 yaIn = pSrc[(2U * i0) + 1U];
657
658 xcIn = pSrc[(2U * i2)];
659 ycIn = pSrc[(2U * i2) + 1U];
660
661 xbIn = pSrc[(2U * i1)];
662 ybIn = pSrc[(2U * i1) + 1U];
663
664 xdIn = pSrc[(2U * i3)];
665 ydIn = pSrc[(2U * i3) + 1U];
666
667 /* xa + xc */
668 Xaplusc = xaIn + xcIn;
669 /* xb + xd */
670 Xbplusd = xbIn + xdIn;
671 /* ya + yc */
672 Yaplusc = yaIn + ycIn;
673 /* yb + yd */
674 Ybplusd = ybIn + ydIn;
675
676 /* index calculation for the coefficients */
677 ia2 = ia1 + ia1;
678 co2 = pCoef[ia2 * 2U];
679 si2 = pCoef[(ia2 * 2U) + 1U];
680
681 /* xa - xc */
682 Xaminusc = xaIn - xcIn;
683 /* xb - xd */
684 Xbminusd = xbIn - xdIn;
685 /* ya - yc */
686 Yaminusc = yaIn - ycIn;
687 /* yb - yd */
688 Ybminusd = ybIn - ydIn;
689
690 /* xa' = xa + xb + xc + xd */
691 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
692
693 /* ya' = ya + yb + yc + yd */
694 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
695
696 /* (xa - xc) - (yb - yd) */
697 Xb12C_out = (Xaminusc - Ybminusd);
698 /* (ya - yc) + (xb - xd) */
699 Yb12C_out = (Yaminusc + Xbminusd);
700 /* (xa + xc) - (xb + xd) */
701 Xc12C_out = (Xaplusc - Xbplusd);
702 /* (ya + yc) - (yb + yd) */
703 Yc12C_out = (Yaplusc - Ybplusd);
704 /* (xa - xc) + (yb - yd) */
705 Xd12C_out = (Xaminusc + Ybminusd);
706 /* (ya - yc) - (xb - xd) */
707 Yd12C_out = (Yaminusc - Xbminusd);
708
709 co1 = pCoef[ia1 * 2U];
710 si1 = pCoef[(ia1 * 2U) + 1U];
711
712 /* index calculation for the coefficients */
713 ia3 = ia2 + ia1;
714 co3 = pCoef[ia3 * 2U];
715 si3 = pCoef[(ia3 * 2U) + 1U];
716
717 Xb12_out = Xb12C_out * co1;
718 Yb12_out = Yb12C_out * co1;
719 Xc12_out = Xc12C_out * co2;
720 Yc12_out = Yc12C_out * co2;
721 Xd12_out = Xd12C_out * co3;
722 Yd12_out = Yd12C_out * co3;
723
724 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
725 //Xb12_out -= Yb12C_out * si1;
726 p0 = Yb12C_out * si1;
727 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
728 //Yb12_out += Xb12C_out * si1;
729 p1 = Xb12C_out * si1;
730 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
731 //Xc12_out -= Yc12C_out * si2;
732 p2 = Yc12C_out * si2;
733 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
734 //Yc12_out += Xc12C_out * si2;
735 p3 = Xc12C_out * si2;
736 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
737 //Xd12_out -= Yd12C_out * si3;
738 p4 = Yd12C_out * si3;
739 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
740 //Yd12_out += Xd12C_out * si3;
741 p5 = Xd12C_out * si3;
742
743 Xb12_out -= p0;
744 Yb12_out += p1;
745 Xc12_out -= p2;
746 Yc12_out += p3;
747 Xd12_out -= p4;
748 Yd12_out += p5;
749
750 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
751 pSrc[2U * i1] = Xc12_out;
752
753 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
754 pSrc[(2U * i1) + 1U] = Yc12_out;
755
756 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
757 pSrc[2U * i2] = Xb12_out;
758
759 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
760 pSrc[(2U * i2) + 1U] = Yb12_out;
761
762 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
763 pSrc[2U * i3] = Xd12_out;
764
765 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
766 pSrc[(2U * i3) + 1U] = Yd12_out;
767
768 /* Twiddle coefficients index modifier */
769 ia1 = ia1 + twidCoefModifier;
770
771 /* Updating input index */
772 i0 = i0 + 1U;
773
774 } while (--j);
775
776 twidCoefModifier <<= 2U;
777
778 /* Calculation of second stage to excluding last stage */
779 for (k = fftLen >> 2U; k > 4U; k >>= 2U)
780 {
781 /* Initializations for the first stage */
782 n1 = n2;
783 n2 >>= 2U;
784 ia1 = 0U;
785
786 /* Calculation of first stage */
787 j = 0;
788 do
789 {
790 /* index calculation for the coefficients */
791 ia2 = ia1 + ia1;
792 ia3 = ia2 + ia1;
793 co1 = pCoef[ia1 * 2U];
794 si1 = pCoef[(ia1 * 2U) + 1U];
795 co2 = pCoef[ia2 * 2U];
796 si2 = pCoef[(ia2 * 2U) + 1U];
797 co3 = pCoef[ia3 * 2U];
798 si3 = pCoef[(ia3 * 2U) + 1U];
799
800 /* Twiddle coefficients index modifier */
801 ia1 = ia1 + twidCoefModifier;
802
803 i0 = j;
804 do
805 {
806 /* index calculation for the input as, */
807 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
808 i1 = i0 + n2;
809 i2 = i1 + n2;
810 i3 = i2 + n2;
811
812 xaIn = pSrc[(2U * i0)];
813 yaIn = pSrc[(2U * i0) + 1U];
814
815 xbIn = pSrc[(2U * i1)];
816 ybIn = pSrc[(2U * i1) + 1U];
817
818 xcIn = pSrc[(2U * i2)];
819 ycIn = pSrc[(2U * i2) + 1U];
820
821 xdIn = pSrc[(2U * i3)];
822 ydIn = pSrc[(2U * i3) + 1U];
823
824 /* xa - xc */
825 Xaminusc = xaIn - xcIn;
826 /* (xb - xd) */
827 Xbminusd = xbIn - xdIn;
828 /* ya - yc */
829 Yaminusc = yaIn - ycIn;
830 /* (yb - yd) */
831 Ybminusd = ybIn - ydIn;
832
833 /* xa + xc */
834 Xaplusc = xaIn + xcIn;
835 /* xb + xd */
836 Xbplusd = xbIn + xdIn;
837 /* ya + yc */
838 Yaplusc = yaIn + ycIn;
839 /* yb + yd */
840 Ybplusd = ybIn + ydIn;
841
842 /* (xa - xc) - (yb - yd) */
843 Xb12C_out = (Xaminusc - Ybminusd);
844 /* (ya - yc) + (xb - xd) */
845 Yb12C_out = (Yaminusc + Xbminusd);
846 /* xa + xc -(xb + xd) */
847 Xc12C_out = (Xaplusc - Xbplusd);
848 /* (ya + yc) - (yb + yd) */
849 Yc12C_out = (Yaplusc - Ybplusd);
850 /* (xa - xc) + (yb - yd) */
851 Xd12C_out = (Xaminusc + Ybminusd);
852 /* (ya - yc) - (xb - xd) */
853 Yd12C_out = (Yaminusc - Xbminusd);
854
855 pSrc[(2U * i0)] = Xaplusc + Xbplusd;
856 pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
857
858 Xb12_out = Xb12C_out * co1;
859 Yb12_out = Yb12C_out * co1;
860 Xc12_out = Xc12C_out * co2;
861 Yc12_out = Yc12C_out * co2;
862 Xd12_out = Xd12C_out * co3;
863 Yd12_out = Yd12C_out * co3;
864
865 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
866 //Xb12_out -= Yb12C_out * si1;
867 p0 = Yb12C_out * si1;
868 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
869 //Yb12_out += Xb12C_out * si1;
870 p1 = Xb12C_out * si1;
871 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
872 //Xc12_out -= Yc12C_out * si2;
873 p2 = Yc12C_out * si2;
874 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
875 //Yc12_out += Xc12C_out * si2;
876 p3 = Xc12C_out * si2;
877 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
878 //Xd12_out -= Yd12C_out * si3;
879 p4 = Yd12C_out * si3;
880 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
881 //Yd12_out += Xd12C_out * si3;
882 p5 = Xd12C_out * si3;
883
884 Xb12_out -= p0;
885 Yb12_out += p1;
886 Xc12_out -= p2;
887 Yc12_out += p3;
888 Xd12_out -= p4;
889 Yd12_out += p5;
890
891 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
892 pSrc[2U * i1] = Xc12_out;
893
894 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
895 pSrc[(2U * i1) + 1U] = Yc12_out;
896
897 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
898 pSrc[2U * i2] = Xb12_out;
899
900 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
901 pSrc[(2U * i2) + 1U] = Yb12_out;
902
903 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
904 pSrc[2U * i3] = Xd12_out;
905
906 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
907 pSrc[(2U * i3) + 1U] = Yd12_out;
908
909 i0 += n1;
910 } while (i0 < fftLen);
911 j++;
912 } while (j <= (n2 - 1U));
913 twidCoefModifier <<= 2U;
914 }
915 /* Initializations of last stage */
916
917 j = fftLen >> 2;
918 ptr1 = &pSrc[0];
919
920 /* Calculations of last stage */
921 do
922 {
923 xaIn = ptr1[0];
924 yaIn = ptr1[1];
925 xbIn = ptr1[2];
926 ybIn = ptr1[3];
927 xcIn = ptr1[4];
928 ycIn = ptr1[5];
929 xdIn = ptr1[6];
930 ydIn = ptr1[7];
931
932 /* Butterfly implementation */
933 /* xa + xc */
934 Xaplusc = xaIn + xcIn;
935
936 /* xa - xc */
937 Xaminusc = xaIn - xcIn;
938
939 /* ya + yc */
940 Yaplusc = yaIn + ycIn;
941
942 /* ya - yc */
943 Yaminusc = yaIn - ycIn;
944
945 /* xb + xd */
946 Xbplusd = xbIn + xdIn;
947
948 /* yb + yd */
949 Ybplusd = ybIn + ydIn;
950
951 /* (xb-xd) */
952 Xbminusd = xbIn - xdIn;
953
954 /* (yb-yd) */
955 Ybminusd = ybIn - ydIn;
956
957 /* xa' = (xa+xb+xc+xd) * onebyfftLen */
958 a0 = (Xaplusc + Xbplusd);
959 /* ya' = (ya+yb+yc+yd) * onebyfftLen */
960 a1 = (Yaplusc + Ybplusd);
961 /* xc' = (xa-xb+xc-xd) * onebyfftLen */
962 a2 = (Xaplusc - Xbplusd);
963 /* yc' = (ya-yb+yc-yd) * onebyfftLen */
964 a3 = (Yaplusc - Ybplusd);
965 /* xb' = (xa-yb-xc+yd) * onebyfftLen */
966 a4 = (Xaminusc - Ybminusd);
967 /* yb' = (ya+xb-yc-xd) * onebyfftLen */
968 a5 = (Yaminusc + Xbminusd);
969 /* xd' = (xa-yb-xc+yd) * onebyfftLen */
970 a6 = (Xaminusc + Ybminusd);
971 /* yd' = (ya-xb-yc+xd) * onebyfftLen */
972 a7 = (Yaminusc - Xbminusd);
973
974 p0 = a0 * onebyfftLen;
975 p1 = a1 * onebyfftLen;
976 p2 = a2 * onebyfftLen;
977 p3 = a3 * onebyfftLen;
978 p4 = a4 * onebyfftLen;
979 p5 = a5 * onebyfftLen;
980 p6 = a6 * onebyfftLen;
981 p7 = a7 * onebyfftLen;
982
983 /* xa' = (xa+xb+xc+xd) * onebyfftLen */
984 ptr1[0] = p0;
985 /* ya' = (ya+yb+yc+yd) * onebyfftLen */
986 ptr1[1] = p1;
987 /* xc' = (xa-xb+xc-xd) * onebyfftLen */
988 ptr1[2] = p2;
989 /* yc' = (ya-yb+yc-yd) * onebyfftLen */
990 ptr1[3] = p3;
991 /* xb' = (xa-yb-xc+yd) * onebyfftLen */
992 ptr1[4] = p4;
993 /* yb' = (ya+xb-yc-xd) * onebyfftLen */
994 ptr1[5] = p5;
995 /* xd' = (xa-yb-xc+yd) * onebyfftLen */
996 ptr1[6] = p6;
997 /* yd' = (ya-xb-yc+xd) * onebyfftLen */
998 ptr1[7] = p7;
999
1000 /* increment source pointer by 8 for next calculations */
1001 ptr1 = ptr1 + 8U;
1002
1003 } while (--j);
1004
1005 #else
1006
1007 float32_t t1, t2, r1, r2, s1, s2;
1008
1009 /* Initializations for the first stage */
1010 n2 = fftLen;
1011 n1 = n2;
1012
1013 /* Calculation of first stage */
1014 for (k = fftLen; k > 4U; k >>= 2U)
1015 {
1016 /* Initializations for the first stage */
1017 n1 = n2;
1018 n2 >>= 2U;
1019 ia1 = 0U;
1020
1021 /* Calculation of first stage */
1022 j = 0;
1023 do
1024 {
1025 /* index calculation for the coefficients */
1026 ia2 = ia1 + ia1;
1027 ia3 = ia2 + ia1;
1028 co1 = pCoef[ia1 * 2U];
1029 si1 = pCoef[(ia1 * 2U) + 1U];
1030 co2 = pCoef[ia2 * 2U];
1031 si2 = pCoef[(ia2 * 2U) + 1U];
1032 co3 = pCoef[ia3 * 2U];
1033 si3 = pCoef[(ia3 * 2U) + 1U];
1034
1035 /* Twiddle coefficients index modifier */
1036 ia1 = ia1 + twidCoefModifier;
1037
1038 i0 = j;
1039 do
1040 {
1041 /* index calculation for the input as, */
1042 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1043 i1 = i0 + n2;
1044 i2 = i1 + n2;
1045 i3 = i2 + n2;
1046
1047 /* xa + xc */
1048 r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
1049
1050 /* xa - xc */
1051 r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
1052
1053 /* ya + yc */
1054 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1055
1056 /* ya - yc */
1057 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1058
1059 /* xb + xd */
1060 t1 = pSrc[2U * i1] + pSrc[2U * i3];
1061
1062 /* xa' = xa + xb + xc + xd */
1063 pSrc[2U * i0] = r1 + t1;
1064
1065 /* xa + xc -(xb + xd) */
1066 r1 = r1 - t1;
1067
1068 /* yb + yd */
1069 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1070
1071 /* ya' = ya + yb + yc + yd */
1072 pSrc[(2U * i0) + 1U] = s1 + t2;
1073
1074 /* (ya + yc) - (yb + yd) */
1075 s1 = s1 - t2;
1076
1077 /* (yb - yd) */
1078 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1079
1080 /* (xb - xd) */
1081 t2 = pSrc[2U * i1] - pSrc[2U * i3];
1082
1083 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1084 pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
1085
1086 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1087 pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
1088
1089 /* (xa - xc) - (yb - yd) */
1090 r1 = r2 - t1;
1091
1092 /* (xa - xc) + (yb - yd) */
1093 r2 = r2 + t1;
1094
1095 /* (ya - yc) + (xb - xd) */
1096 s1 = s2 + t2;
1097
1098 /* (ya - yc) - (xb - xd) */
1099 s2 = s2 - t2;
1100
1101 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1102 pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
1103
1104 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1105 pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
1106
1107 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1108 pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
1109
1110 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1111 pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
1112
1113 i0 += n1;
1114 } while ( i0 < fftLen);
1115 j++;
1116 } while (j <= (n2 - 1U));
1117 twidCoefModifier <<= 2U;
1118 }
1119 /* Initializations of last stage */
1120 n1 = n2;
1121 n2 >>= 2U;
1122
1123 /* Calculations of last stage */
1124 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1125 {
1126 /* index calculation for the input as, */
1127 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1128 i1 = i0 + n2;
1129 i2 = i1 + n2;
1130 i3 = i2 + n2;
1131
1132 /* Butterfly implementation */
1133 /* xa + xc */
1134 r1 = pSrc[2U * i0] + pSrc[2U * i2];
1135
1136 /* xa - xc */
1137 r2 = pSrc[2U * i0] - pSrc[2U * i2];
1138
1139 /* ya + yc */
1140 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1141
1142 /* ya - yc */
1143 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1144
1145 /* xc + xd */
1146 t1 = pSrc[2U * i1] + pSrc[2U * i3];
1147
1148 /* xa' = xa + xb + xc + xd */
1149 pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
1150
1151 /* (xa + xb) - (xc + xd) */
1152 r1 = r1 - t1;
1153
1154 /* yb + yd */
1155 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1156
1157 /* ya' = ya + yb + yc + yd */
1158 pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
1159
1160 /* (ya + yc) - (yb + yd) */
1161 s1 = s1 - t2;
1162
1163 /* (yb-yd) */
1164 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1165
1166 /* (xb-xd) */
1167 t2 = pSrc[2U * i1] - pSrc[2U * i3];
1168
1169 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1170 pSrc[2U * i1] = r1 * onebyfftLen;
1171
1172 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1173 pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
1174
1175 /* (xa - xc) - (yb-yd) */
1176 r1 = r2 - t1;
1177
1178 /* (xa - xc) + (yb-yd) */
1179 r2 = r2 + t1;
1180
1181 /* (ya - yc) + (xb-xd) */
1182 s1 = s2 + t2;
1183
1184 /* (ya - yc) - (xb-xd) */
1185 s2 = s2 - t2;
1186
1187 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1188 pSrc[2U * i2] = r1 * onebyfftLen;
1189
1190 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1191 pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
1192
1193 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1194 pSrc[2U * i3] = r2 * onebyfftLen;
1195
1196 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1197 pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
1198 }
1199
1200 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
1201 }
1202
1203
1204