• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_rfft_fast_f32.c
4  * Description:  RFFT & RIFFT Floating point process function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/transform_functions.h"
30 
31 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
stage_rfft_f32(const arm_rfft_fast_instance_f32 * S,float32_t * p,float32_t * pOut)32 void stage_rfft_f32(
33   const arm_rfft_fast_instance_f32 * S,
34         float32_t * p,
35         float32_t * pOut)
36 {
37         int32_t  k;                                /* Loop Counter */
38         float32_t twR, twI;                         /* RFFT Twiddle coefficients */
39   const float32_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
40         float32_t *pA = p;                          /* increasing pointer */
41         float32_t *pB = p;                          /* decreasing pointer */
42         float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
43         float32_t t1a, t1b;                         /* temporary variables */
44         float32_t p0, p1, p2, p3;                   /* temporary variables */
45 
46         float32x4x2_t tw,xA,xB;
47         float32x4x2_t tmp1, tmp2, res;
48 
49         uint32x4_t     vecStridesFwd, vecStridesBkwd;
50 
51         vecStridesFwd = vidupq_u32((uint32_t)0, 2);
52         vecStridesBkwd = -vecStridesFwd;
53 
54         int blockCnt;
55 
56 
57    k = (S->Sint).fftLen - 1;
58 
59    /* Pack first and last sample of the frequency domain together */
60 
61    xBR = pB[0];
62    xBI = pB[1];
63    xAR = pA[0];
64    xAI = pA[1];
65 
66    twR = *pCoeff++ ;
67    twI = *pCoeff++ ;
68 
69    // U1 = XA(1) + XB(1); % It is real
70    t1a = xBR + xAR  ;
71 
72    // U2 = XB(1) - XA(1); % It is imaginary
73    t1b = xBI + xAI  ;
74 
75    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
76    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
77    *pOut++ = 0.5f * ( t1a + t1b );
78    *pOut++ = 0.5f * ( t1a - t1b );
79 
80    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
81    pB  = p + 2*k;
82    pA += 2;
83 
84    blockCnt = k >> 2;
85    while (blockCnt > 0)
86    {
87       /*
88          function X = my_split_rfft(X, ifftFlag)
89          % X is a series of real numbers
90          L  = length(X);
91          XC = X(1:2:end) +i*X(2:2:end);
92          XA = fft(XC);
93          XB = conj(XA([1 end:-1:2]));
94          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
95          for l = 2:L/2
96             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
97          end
98          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
99          X = XA;
100       */
101 
102 
103       xA = vld2q_f32(pA);
104       pA += 8;
105 
106       xB = vld2q_f32(pB);
107 
108       xB.val[0] = vldrwq_gather_shifted_offset_f32(pB, vecStridesBkwd);
109       xB.val[1] = vldrwq_gather_shifted_offset_f32(&pB[1], vecStridesBkwd);
110 
111       xB.val[1] = vnegq_f32(xB.val[1]);
112       pB -= 8;
113 
114 
115       tw = vld2q_f32(pCoeff);
116       pCoeff += 8;
117 
118 
119       tmp1.val[0] = vaddq_f32(xA.val[0],xB.val[0]);
120       tmp1.val[1] = vaddq_f32(xA.val[1],xB.val[1]);
121 
122       tmp2.val[0] = vsubq_f32(xB.val[0],xA.val[0]);
123       tmp2.val[1] = vsubq_f32(xB.val[1],xA.val[1]);
124 
125       res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
126       res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
127 
128       res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
129       res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
130 
131       res.val[0] = vaddq_f32(res.val[0],tmp1.val[0] );
132       res.val[1] = vaddq_f32(res.val[1],tmp1.val[1] );
133 
134       res.val[0] = vmulq_n_f32(res.val[0], 0.5f);
135       res.val[1] = vmulq_n_f32(res.val[1], 0.5f);
136 
137 
138       vst2q_f32(pOut, res);
139       pOut += 8;
140 
141 
142       blockCnt--;
143    }
144 
145    blockCnt = k & 3;
146    while (blockCnt > 0)
147    {
148       /*
149          function X = my_split_rfft(X, ifftFlag)
150          % X is a series of real numbers
151          L  = length(X);
152          XC = X(1:2:end) +i*X(2:2:end);
153          XA = fft(XC);
154          XB = conj(XA([1 end:-1:2]));
155          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
156          for l = 2:L/2
157             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
158          end
159          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
160          X = XA;
161       */
162 
163       xBI = pB[1];
164       xBR = pB[0];
165       xAR = pA[0];
166       xAI = pA[1];
167 
168       twR = *pCoeff++;
169       twI = *pCoeff++;
170 
171       t1a = xBR - xAR ;
172       t1b = xBI + xAI ;
173 
174       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
175       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
176       p0 = twR * t1a;
177       p1 = twI * t1a;
178       p2 = twR * t1b;
179       p3 = twI * t1b;
180 
181       *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
182       *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
183 
184       pA += 2;
185       pB -= 2;
186       blockCnt--;
187    }
188 }
189 
190 /* Prepares data for inverse cfft */
merge_rfft_f32(const arm_rfft_fast_instance_f32 * S,float32_t * p,float32_t * pOut)191 void merge_rfft_f32(
192   const arm_rfft_fast_instance_f32 * S,
193         float32_t * p,
194         float32_t * pOut)
195 {
196         int32_t  k;                                /* Loop Counter */
197         float32_t twR, twI;                         /* RFFT Twiddle coefficients */
198   const float32_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
199         float32_t *pA = p;                          /* increasing pointer */
200         float32_t *pB = p;                          /* decreasing pointer */
201         float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
202         float32_t t1a, t1b, r, s, t, u;             /* temporary variables */
203 
204         float32x4x2_t tw,xA,xB;
205         float32x4x2_t tmp1, tmp2, res;
206         uint32x4_t     vecStridesFwd, vecStridesBkwd;
207 
208         vecStridesFwd = vidupq_u32((uint32_t)0, 2);
209         vecStridesBkwd = -vecStridesFwd;
210 
211         int blockCnt;
212 
213 
214    k = (S->Sint).fftLen - 1;
215 
216    xAR = pA[0];
217    xAI = pA[1];
218 
219    pCoeff += 2 ;
220 
221    *pOut++ = 0.5f * ( xAR + xAI );
222    *pOut++ = 0.5f * ( xAR - xAI );
223 
224    pB  =  p + 2*k ;
225    pA +=  2    ;
226 
227    blockCnt = k >> 2;
228    while (blockCnt > 0)
229    {
230       /* G is half of the frequency complex spectrum */
231       //for k = 2:N
232       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
233       xA = vld2q_f32(pA);
234       pA += 8;
235 
236       xB = vld2q_f32(pB);
237 
238       xB.val[0] = vldrwq_gather_shifted_offset_f32(pB, vecStridesBkwd);
239       xB.val[1] = vldrwq_gather_shifted_offset_f32(&pB[1], vecStridesBkwd);
240 
241       xB.val[1] = vnegq_f32(xB.val[1]);
242       pB -= 8;
243 
244 
245       tw = vld2q_f32(pCoeff);
246       tw.val[1] = vnegq_f32(tw.val[1]);
247       pCoeff += 8;
248 
249 
250       tmp1.val[0] = vaddq_f32(xA.val[0],xB.val[0]);
251       tmp1.val[1] = vaddq_f32(xA.val[1],xB.val[1]);
252 
253       tmp2.val[0] = vsubq_f32(xB.val[0],xA.val[0]);
254       tmp2.val[1] = vsubq_f32(xB.val[1],xA.val[1]);
255 
256       res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
257       res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
258 
259       res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
260       res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
261 
262       res.val[0] = vaddq_f32(res.val[0],tmp1.val[0] );
263       res.val[1] = vaddq_f32(res.val[1],tmp1.val[1] );
264 
265       res.val[0] = vmulq_n_f32(res.val[0], 0.5f);
266       res.val[1] = vmulq_n_f32(res.val[1], 0.5f);
267 
268 
269       vst2q_f32(pOut, res);
270       pOut += 8;
271 
272 
273       blockCnt--;
274    }
275 
276    blockCnt = k & 3;
277    while (blockCnt > 0)
278    {
279       /* G is half of the frequency complex spectrum */
280       //for k = 2:N
281       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
282       xBI =   pB[1]    ;
283       xBR =   pB[0]    ;
284       xAR =  pA[0];
285       xAI =  pA[1];
286 
287       twR = *pCoeff++;
288       twI = *pCoeff++;
289 
290       t1a = xAR - xBR ;
291       t1b = xAI + xBI ;
292 
293       r = twR * t1a;
294       s = twI * t1b;
295       t = twI * t1a;
296       u = twR * t1b;
297 
298       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
299       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
300       *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
301       *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
302 
303       pA += 2;
304       pB -= 2;
305       blockCnt--;
306    }
307 
308 }
309 #else
stage_rfft_f32(const arm_rfft_fast_instance_f32 * S,float32_t * p,float32_t * pOut)310 void stage_rfft_f32(
311   const arm_rfft_fast_instance_f32 * S,
312         float32_t * p,
313         float32_t * pOut)
314 {
315         int32_t  k;                                /* Loop Counter */
316         float32_t twR, twI;                         /* RFFT Twiddle coefficients */
317   const float32_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
318         float32_t *pA = p;                          /* increasing pointer */
319         float32_t *pB = p;                          /* decreasing pointer */
320         float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
321         float32_t t1a, t1b;                         /* temporary variables */
322         float32_t p0, p1, p2, p3;                   /* temporary variables */
323 
324 
325    k = (S->Sint).fftLen - 1;
326 
327    /* Pack first and last sample of the frequency domain together */
328 
329    xBR = pB[0];
330    xBI = pB[1];
331    xAR = pA[0];
332    xAI = pA[1];
333 
334    twR = *pCoeff++ ;
335    twI = *pCoeff++ ;
336 
337 
338    // U1 = XA(1) + XB(1); % It is real
339    t1a = xBR + xAR  ;
340 
341    // U2 = XB(1) - XA(1); % It is imaginary
342    t1b = xBI + xAI  ;
343 
344    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
345    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
346    *pOut++ = 0.5f * ( t1a + t1b );
347    *pOut++ = 0.5f * ( t1a - t1b );
348 
349    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
350    pB  = p + 2*k;
351    pA += 2;
352 
353    do
354    {
355       /*
356          function X = my_split_rfft(X, ifftFlag)
357          % X is a series of real numbers
358          L  = length(X);
359          XC = X(1:2:end) +i*X(2:2:end);
360          XA = fft(XC);
361          XB = conj(XA([1 end:-1:2]));
362          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
363          for l = 2:L/2
364             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
365          end
366          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
367          X = XA;
368       */
369 
370       xBI = pB[1];
371       xBR = pB[0];
372       xAR = pA[0];
373       xAI = pA[1];
374 
375       twR = *pCoeff++;
376       twI = *pCoeff++;
377 
378       t1a = xBR - xAR ;
379       t1b = xBI + xAI ;
380 
381       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
382       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
383       p0 = twR * t1a;
384       p1 = twI * t1a;
385       p2 = twR * t1b;
386       p3 = twI * t1b;
387 
388       *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
389       *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
390 
391 
392       pA += 2;
393       pB -= 2;
394       k--;
395    } while (k > 0);
396 }
397 
398 /* Prepares data for inverse cfft */
merge_rfft_f32(const arm_rfft_fast_instance_f32 * S,float32_t * p,float32_t * pOut)399 void merge_rfft_f32(
400   const arm_rfft_fast_instance_f32 * S,
401         float32_t * p,
402         float32_t * pOut)
403 {
404         int32_t  k;                                /* Loop Counter */
405         float32_t twR, twI;                         /* RFFT Twiddle coefficients */
406   const float32_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
407         float32_t *pA = p;                          /* increasing pointer */
408         float32_t *pB = p;                          /* decreasing pointer */
409         float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
410         float32_t t1a, t1b, r, s, t, u;             /* temporary variables */
411 
412    k = (S->Sint).fftLen - 1;
413 
414    xAR = pA[0];
415    xAI = pA[1];
416 
417    pCoeff += 2 ;
418 
419    *pOut++ = 0.5f * ( xAR + xAI );
420    *pOut++ = 0.5f * ( xAR - xAI );
421 
422    pB  =  p + 2*k ;
423    pA +=  2	   ;
424 
425    while (k > 0)
426    {
427       /* G is half of the frequency complex spectrum */
428       //for k = 2:N
429       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
430       xBI =   pB[1]    ;
431       xBR =   pB[0]    ;
432       xAR =  pA[0];
433       xAI =  pA[1];
434 
435       twR = *pCoeff++;
436       twI = *pCoeff++;
437 
438       t1a = xAR - xBR ;
439       t1b = xAI + xBI ;
440 
441       r = twR * t1a;
442       s = twI * t1b;
443       t = twI * t1a;
444       u = twR * t1b;
445 
446       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
447       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
448       *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
449       *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
450 
451       pA += 2;
452       pB -= 2;
453       k--;
454    }
455 
456 }
457 
458 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
459 
460 /**
461   @ingroup groupTransforms
462 */
463 
464 /**
465   @defgroup RealFFT Real FFT Functions
466 
467   @par
468                    The CMSIS DSP library includes specialized algorithms for computing the
469                    FFT of real data sequences.  The FFT is defined over complex data but
470                    in many applications the input is real.  Real FFT algorithms take advantage
471                    of the symmetry properties of the FFT and have a speed advantage over complex
472                    algorithms of the same length.
473   @par
474                    The Fast RFFT algorithm relays on the mixed radix CFFT that save processor usage.
475   @par
476                    The real length N forward FFT of a sequence is computed using the steps shown below.
477   @par
478                    \image html RFFT.gif "Real Fast Fourier Transform"
479   @par
480                    The real sequence is initially treated as if it were complex to perform a CFFT.
481                    Later, a processing stage reshapes the data to obtain half of the frequency spectrum
482                    in complex format. Except the first complex number that contains the two real numbers
483                    X[0] and X[N/2] all the data is complex. In other words, the first complex sample
484                    contains two real values packed.
485   @par
486                    The input for the inverse RFFT should keep the same format as the output of the
487                    forward RFFT. A first processing stage pre-process the data to later perform an
488                    inverse CFFT.
489   @par
490                    \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
491   @par
492                    The algorithms for floating-point, Q15, and Q31 data are slightly different
493                    and we describe each algorithm in turn.
494   @par           Floating-point
495                    The main functions are \ref arm_rfft_fast_f32() and \ref arm_rfft_fast_init_f32().
496                    The older functions \ref arm_rfft_f32() and \ref arm_rfft_init_f32() have been deprecated
497                    but are still documented.
498   @par
499                    The FFT of a real N-point sequence has even symmetry in the frequency domain.
500                    The second half of the data equals the conjugate of the first half flipped in frequency.
501                    Looking at the data, we see that we can uniquely represent the FFT using only N/2 complex numbers.
502                    These are packed into the output array in alternating real and imaginary components:
503   @par
504                    X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ...
505                    real[(N/2)-1], imag[(N/2)-1 }
506   @par
507                    It happens that the first complex number (real[0], imag[0]) is actually
508                    all real. real[0] represents the DC offset, and imag[0] should be 0.
509                    (real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is
510                    the first harmonic and so on.
511   @par
512                    The real FFT functions pack the frequency domain data in this fashion.
513                    The forward transform outputs the data in this form and the inverse
514                    transform expects input data in this form. The function always performs
515                    the needed bitreversal so that the input and output data is always in
516                    normal order. The functions support lengths of [32, 64, 128, ..., 4096]
517                    samples.
518   @par           Q15 and Q31
519                    The real algorithms are defined in a similar manner and utilize N/2 complex
520                    transforms behind the scenes.
521   @par
522                    The complex transforms used internally include scaling to prevent fixed-point
523                    overflows.  The overall scaling equals 1/(fftLen/2).
524                    Due to the use of complex transform internally, the source buffer is
525                    modified by the rfft.
526   @par
527                    A separate instance structure must be defined for each transform used but
528                    twiddle factor and bit reversal tables can be reused.
529   @par
530                    There is also an associated initialization function for each data type.
531                    The initialization function performs the following operations:
532                     - Sets the values of the internal structure fields.
533                     - Initializes twiddle factor table and bit reversal table pointers.
534                     - Initializes the internal complex FFT data structure.
535   @par
536                    Use of the initialization function is optional **except for MVE versions where it is mandatory**.
537                    If you don't use the initialization functions, then the structures should be initialized with code
538                    similar to the one below:
539   <pre>
540       arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
541       arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
542   </pre>
543                    where <code>fftLenReal</code> is the length of the real transform;
544                    <code>fftLenBy2</code> length of  the internal complex transform (fftLenReal/2).
545                    <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
546                    <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
547                    output (=1).
548                    <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
549                    The value is based on the FFT length;
550                    <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
551                    <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
552                    <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
553                    must also be initialized.
554 @par
555                    Note that with MVE versions you can't initialize instance structures directly and **must
556                    use the initialization function**.
557  */
558 
559 /**
560   @addtogroup RealFFT
561   @{
562 */
563 
564 /**
565   @brief         Processing function for the floating-point real FFT.
566   @param[in]     S         points to an arm_rfft_fast_instance_f32 structure
567   @param[in]     p         points to input buffer (Source buffer is modified by this function.)
568   @param[in]     pOut      points to output buffer
569   @param[in]     ifftFlag
570                    - value = 0: RFFT
571                    - value = 1: RIFFT
572   @return        none
573 */
574 
arm_rfft_fast_f32(const arm_rfft_fast_instance_f32 * S,float32_t * p,float32_t * pOut,uint8_t ifftFlag)575 void arm_rfft_fast_f32(
576   const arm_rfft_fast_instance_f32 * S,
577   float32_t * p,
578   float32_t * pOut,
579   uint8_t ifftFlag)
580 {
581    const arm_cfft_instance_f32 * Sint = &(S->Sint);
582 
583    /* Calculation of Real FFT */
584    if (ifftFlag)
585    {
586       /*  Real FFT compression */
587       merge_rfft_f32(S, p, pOut);
588       /* Complex radix-4 IFFT process */
589       arm_cfft_f32( Sint, pOut, ifftFlag, 1);
590    }
591    else
592    {
593       /* Calculation of RFFT of input */
594       arm_cfft_f32( Sint, p, ifftFlag, 1);
595 
596       /*  Real FFT extraction */
597       stage_rfft_f32(S, p, pOut);
598    }
599 }
600 
601 /**
602 * @} end of RealFFT group
603 */
604