• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_rfft_fast_f16.c
4  * Description:  RFFT & RIFFT Floating point process function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/transform_functions_f16.h"
30 #include "arm_common_tables_f16.h"
31 
32 #if defined(ARM_FLOAT16_SUPPORTED)
33 
34 
35 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
36 
stage_rfft_f16(const arm_rfft_fast_instance_f16 * S,float16_t * p,float16_t * pOut)37 void stage_rfft_f16(
38   const arm_rfft_fast_instance_f16 * S,
39         float16_t * p,
40         float16_t * pOut)
41 {
42         int32_t  k;                                /* Loop Counter */
43         float16_t twR, twI;                         /* RFFT Twiddle coefficients */
44   const float16_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
45         float16_t *pA = p;                          /* increasing pointer */
46         float16_t *pB = p;                          /* decreasing pointer */
47         float16_t xAR, xAI, xBR, xBI;               /* temporary variables */
48         float16_t t1a, t1b;                         /* temporary variables */
49         float16_t p0, p1, p2, p3;                   /* temporary variables */
50 
51         float16x8x2_t tw,xA,xB;
52         float16x8x2_t tmp1, tmp2, res;
53 
54         uint16x8_t     vecStridesBkwd;
55 
56         vecStridesBkwd = vddupq_u16((uint16_t)14, 2);
57 
58 
59         int blockCnt;
60 
61 
62    k = (S->Sint).fftLen - 1;
63 
64    /* Pack first and last sample of the frequency domain together */
65 
66    xBR = pB[0];
67    xBI = pB[1];
68    xAR = pA[0];
69    xAI = pA[1];
70 
71    twR = *pCoeff++ ;
72    twI = *pCoeff++ ;
73 
74    // U1 = XA(1) + XB(1); % It is real
75    t1a = xBR + xAR  ;
76 
77    // U2 = XB(1) - XA(1); % It is imaginary
78    t1b = xBI + xAI  ;
79 
80    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
81    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
82    *pOut++ = 0.5f * ( t1a + t1b );
83    *pOut++ = 0.5f * ( t1a - t1b );
84 
85    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
86    pB  = p + 2*k - 14;
87    pA += 2;
88 
89    blockCnt = k >> 3;
90    while (blockCnt > 0)
91    {
92       /*
93          function X = my_split_rfft(X, ifftFlag)
94          % X is a series of real numbers
95          L  = length(X);
96          XC = X(1:2:end) +i*X(2:2:end);
97          XA = fft(XC);
98          XB = conj(XA([1 end:-1:2]));
99          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
100          for l = 2:L/2
101             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
102          end
103          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
104          X = XA;
105       */
106 
107 
108       xA = vld2q_f16(pA);
109       pA += 16;
110 
111       xB = vld2q_f16(pB);
112 
113       xB.val[0] = vldrhq_gather_shifted_offset_f16(pB, vecStridesBkwd);
114       xB.val[1] = vldrhq_gather_shifted_offset_f16(&pB[1], vecStridesBkwd);
115 
116       xB.val[1] = vnegq_f16(xB.val[1]);
117       pB -= 16;
118 
119 
120       tw = vld2q_f16(pCoeff);
121       pCoeff += 16;
122 
123 
124       tmp1.val[0] = vaddq_f16(xA.val[0],xB.val[0]);
125       tmp1.val[1] = vaddq_f16(xA.val[1],xB.val[1]);
126 
127       tmp2.val[0] = vsubq_f16(xB.val[0],xA.val[0]);
128       tmp2.val[1] = vsubq_f16(xB.val[1],xA.val[1]);
129 
130       res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
131       res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
132 
133       res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
134       res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
135 
136       res.val[0] = vaddq_f16(res.val[0],tmp1.val[0] );
137       res.val[1] = vaddq_f16(res.val[1],tmp1.val[1] );
138 
139       res.val[0] = vmulq_n_f16(res.val[0], 0.5f);
140       res.val[1] = vmulq_n_f16(res.val[1], 0.5f);
141 
142 
143       vst2q_f16(pOut, res);
144       pOut += 16;
145 
146 
147       blockCnt--;
148    }
149 
150    pB += 14;
151    blockCnt = k & 7;
152    while (blockCnt > 0)
153    {
154       /*
155          function X = my_split_rfft(X, ifftFlag)
156          % X is a series of real numbers
157          L  = length(X);
158          XC = X(1:2:end) +i*X(2:2:end);
159          XA = fft(XC);
160          XB = conj(XA([1 end:-1:2]));
161          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
162          for l = 2:L/2
163             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
164          end
165          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
166          X = XA;
167       */
168 
169       xBI = pB[1];
170       xBR = pB[0];
171       xAR = pA[0];
172       xAI = pA[1];
173 
174       twR = *pCoeff++;
175       twI = *pCoeff++;
176 
177       t1a = xBR - xAR ;
178       t1b = xBI + xAI ;
179 
180       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
181       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
182       p0 = twR * t1a;
183       p1 = twI * t1a;
184       p2 = twR * t1b;
185       p3 = twI * t1b;
186 
187       *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
188       *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
189 
190       pA += 2;
191       pB -= 2;
192       blockCnt--;
193    }
194 }
195 
196 /* Prepares data for inverse cfft */
merge_rfft_f16(const arm_rfft_fast_instance_f16 * S,float16_t * p,float16_t * pOut)197 void merge_rfft_f16(
198   const arm_rfft_fast_instance_f16 * S,
199         float16_t * p,
200         float16_t * pOut)
201 {
202         int32_t  k;                                /* Loop Counter */
203         float16_t twR, twI;                         /* RFFT Twiddle coefficients */
204   const float16_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
205         float16_t *pA = p;                          /* increasing pointer */
206         float16_t *pB = p;                          /* decreasing pointer */
207         float16_t xAR, xAI, xBR, xBI;               /* temporary variables */
208         float16_t t1a, t1b, r, s, t, u;             /* temporary variables */
209 
210         float16x8x2_t tw,xA,xB;
211         float16x8x2_t tmp1, tmp2, res;
212         uint16x8_t     vecStridesBkwd;
213 
214         vecStridesBkwd = vddupq_u16((uint16_t)14, 2);
215 
216         int blockCnt;
217 
218 
219    k = (S->Sint).fftLen - 1;
220 
221    xAR = pA[0];
222    xAI = pA[1];
223 
224    pCoeff += 2 ;
225 
226    *pOut++ = 0.5f * ( xAR + xAI );
227    *pOut++ = 0.5f * ( xAR - xAI );
228 
229    pB  =  p + 2*k - 14;
230    pA +=  2    ;
231 
232    blockCnt = k >> 3;
233    while (blockCnt > 0)
234    {
235       /* G is half of the frequency complex spectrum */
236       //for k = 2:N
237       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
238       xA = vld2q_f16(pA);
239       pA += 16;
240 
241       xB = vld2q_f16(pB);
242 
243       xB.val[0] = vldrhq_gather_shifted_offset_f16(pB, vecStridesBkwd);
244       xB.val[1] = vldrhq_gather_shifted_offset_f16(&pB[1], vecStridesBkwd);
245 
246       xB.val[1] = vnegq_f16(xB.val[1]);
247       pB -= 16;
248 
249 
250       tw = vld2q_f16(pCoeff);
251       tw.val[1] = vnegq_f16(tw.val[1]);
252       pCoeff += 16;
253 
254 
255       tmp1.val[0] = vaddq_f16(xA.val[0],xB.val[0]);
256       tmp1.val[1] = vaddq_f16(xA.val[1],xB.val[1]);
257 
258       tmp2.val[0] = vsubq_f16(xB.val[0],xA.val[0]);
259       tmp2.val[1] = vsubq_f16(xB.val[1],xA.val[1]);
260 
261       res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
262       res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
263 
264       res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
265       res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
266 
267       res.val[0] = vaddq_f16(res.val[0],tmp1.val[0] );
268       res.val[1] = vaddq_f16(res.val[1],tmp1.val[1] );
269 
270       res.val[0] = vmulq_n_f16(res.val[0], 0.5f);
271       res.val[1] = vmulq_n_f16(res.val[1], 0.5f);
272 
273 
274       vst2q_f16(pOut, res);
275       pOut += 16;
276 
277 
278       blockCnt--;
279    }
280 
281    pB += 14;
282    blockCnt = k & 7;
283    while (blockCnt > 0)
284    {
285       /* G is half of the frequency complex spectrum */
286       //for k = 2:N
287       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
288       xBI =   pB[1]    ;
289       xBR =   pB[0]    ;
290       xAR =  pA[0];
291       xAI =  pA[1];
292 
293       twR = *pCoeff++;
294       twI = *pCoeff++;
295 
296       t1a = xAR - xBR ;
297       t1b = xAI + xBI ;
298 
299       r = twR * t1a;
300       s = twI * t1b;
301       t = twI * t1a;
302       u = twR * t1b;
303 
304       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
305       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
306       *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
307       *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
308 
309       pA += 2;
310       pB -= 2;
311       blockCnt--;
312    }
313 
314 }
315 #else
stage_rfft_f16(const arm_rfft_fast_instance_f16 * S,float16_t * p,float16_t * pOut)316 void stage_rfft_f16(
317   const arm_rfft_fast_instance_f16 * S,
318         float16_t * p,
319         float16_t * pOut)
320 {
321         int32_t  k;                                /* Loop Counter */
322         float16_t twR, twI;                         /* RFFT Twiddle coefficients */
323   const float16_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
324         float16_t *pA = p;                          /* increasing pointer */
325         float16_t *pB = p;                          /* decreasing pointer */
326         float16_t xAR, xAI, xBR, xBI;               /* temporary variables */
327         float16_t t1a, t1b;                         /* temporary variables */
328         float16_t p0, p1, p2, p3;                   /* temporary variables */
329 
330 
331    k = (S->Sint).fftLen - 1;
332 
333    /* Pack first and last sample of the frequency domain together */
334 
335    xBR = pB[0];
336    xBI = pB[1];
337    xAR = pA[0];
338    xAI = pA[1];
339 
340    twR = *pCoeff++ ;
341    twI = *pCoeff++ ;
342 
343 
344    // U1 = XA(1) + XB(1); % It is real
345    t1a = xBR + xAR  ;
346 
347    // U2 = XB(1) - XA(1); % It is imaginary
348    t1b = xBI + xAI  ;
349 
350    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
351    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
352    *pOut++ = 0.5f * ( t1a + t1b );
353    *pOut++ = 0.5f * ( t1a - t1b );
354 
355    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
356    pB  = p + 2*k;
357    pA += 2;
358 
359    do
360    {
361       /*
362          function X = my_split_rfft(X, ifftFlag)
363          % X is a series of real numbers
364          L  = length(X);
365          XC = X(1:2:end) +i*X(2:2:end);
366          XA = fft(XC);
367          XB = conj(XA([1 end:-1:2]));
368          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
369          for l = 2:L/2
370             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
371          end
372          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
373          X = XA;
374       */
375 
376       xBI = pB[1];
377       xBR = pB[0];
378       xAR = pA[0];
379       xAI = pA[1];
380 
381       twR = *pCoeff++;
382       twI = *pCoeff++;
383 
384       t1a = xBR - xAR ;
385       t1b = xBI + xAI ;
386 
387       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
388       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
389       p0 = twR * t1a;
390       p1 = twI * t1a;
391       p2 = twR * t1b;
392       p3 = twI * t1b;
393 
394       *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
395       *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
396 
397 
398       pA += 2;
399       pB -= 2;
400       k--;
401    } while (k > 0);
402 }
403 
404 /* Prepares data for inverse cfft */
merge_rfft_f16(const arm_rfft_fast_instance_f16 * S,float16_t * p,float16_t * pOut)405 void merge_rfft_f16(
406   const arm_rfft_fast_instance_f16 * S,
407         float16_t * p,
408         float16_t * pOut)
409 {
410         int32_t  k;                                /* Loop Counter */
411         float16_t twR, twI;                         /* RFFT Twiddle coefficients */
412   const float16_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
413         float16_t *pA = p;                          /* increasing pointer */
414         float16_t *pB = p;                          /* decreasing pointer */
415         float16_t xAR, xAI, xBR, xBI;               /* temporary variables */
416         float16_t t1a, t1b, r, s, t, u;             /* temporary variables */
417 
418    k = (S->Sint).fftLen - 1;
419 
420    xAR = pA[0];
421    xAI = pA[1];
422 
423    pCoeff += 2 ;
424 
425    *pOut++ = 0.5f * ( xAR + xAI );
426    *pOut++ = 0.5f * ( xAR - xAI );
427 
428    pB  =  p + 2*k ;
429    pA +=  2	   ;
430 
431    while (k > 0)
432    {
433       /* G is half of the frequency complex spectrum */
434       //for k = 2:N
435       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
436       xBI =   pB[1]    ;
437       xBR =   pB[0]    ;
438       xAR =  pA[0];
439       xAI =  pA[1];
440 
441       twR = *pCoeff++;
442       twI = *pCoeff++;
443 
444       t1a = xAR - xBR ;
445       t1b = xAI + xBI ;
446 
447       r = twR * t1a;
448       s = twI * t1b;
449       t = twI * t1a;
450       u = twR * t1b;
451 
452       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
453       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
454       *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
455       *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
456 
457       pA += 2;
458       pB -= 2;
459       k--;
460    }
461 
462 }
463 
464 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
465 
466 /**
467   @ingroup groupTransforms
468 */
469 
470 /**
471   @defgroup RealFFT Real FFT Functions
472 
473   @par
474                    The CMSIS DSP library includes specialized algorithms for computing the
475                    FFT of real data sequences.  The FFT is defined over complex data but
476                    in many applications the input is real.  Real FFT algorithms take advantage
477                    of the symmetry properties of the FFT and have a speed advantage over complex
478                    algorithms of the same length.
479   @par
480                    The Fast RFFT algorithm relays on the mixed radix CFFT that save processor usage.
481   @par
482                    The real length N forward FFT of a sequence is computed using the steps shown below.
483   @par
484                    \image html RFFT.gif "Real Fast Fourier Transform"
485   @par
486                    The real sequence is initially treated as if it were complex to perform a CFFT.
487                    Later, a processing stage reshapes the data to obtain half of the frequency spectrum
488                    in complex format. Except the first complex number that contains the two real numbers
489                    X[0] and X[N/2] all the data is complex. In other words, the first complex sample
490                    contains two real values packed.
491   @par
492                    The input for the inverse RFFT should keep the same format as the output of the
493                    forward RFFT. A first processing stage pre-process the data to later perform an
494                    inverse CFFT.
495   @par
496                    \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
497   @par
498                    The algorithms for floating-point, Q15, and Q31 data are slightly different
499                    and we describe each algorithm in turn.
500   @par           Floating-point
501                    The main functions are \ref arm_rfft_fast_f16() and \ref arm_rfft_fast_init_f16().
502 
503   @par
504                    The FFT of a real N-point sequence has even symmetry in the frequency domain.
505                    The second half of the data equals the conjugate of the first half flipped in frequency.
506                    Looking at the data, we see that we can uniquely represent the FFT using only N/2 complex numbers.
507                    These are packed into the output array in alternating real and imaginary components:
508   @par
509                    X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ...
510                    real[(N/2)-1], imag[(N/2)-1 }
511   @par
512                    It happens that the first complex number (real[0], imag[0]) is actually
513                    all real. real[0] represents the DC offset, and imag[0] should be 0.
514                    (real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is
515                    the first harmonic and so on.
516   @par
517                    The real FFT functions pack the frequency domain data in this fashion.
518                    The forward transform outputs the data in this form and the inverse
519                    transform expects input data in this form. The function always performs
520                    the needed bitreversal so that the input and output data is always in
521                    normal order. The functions support lengths of [32, 64, 128, ..., 4096]
522                    samples.
523   @par           Q15 and Q31
524                    The real algorithms are defined in a similar manner and utilize N/2 complex
525                    transforms behind the scenes.
526   @par
527                    The complex transforms used internally include scaling to prevent fixed-point
528                    overflows.  The overall scaling equals 1/(fftLen/2).
529                    Due to the use of complex transform internally, the source buffer is
530                    modified by the rfft.
531   @par
532                    A separate instance structure must be defined for each transform used but
533                    twiddle factor and bit reversal tables can be reused.
534   @par
535                    There is also an associated initialization function for each data type.
536                    The initialization function performs the following operations:
537                     - Sets the values of the internal structure fields.
538                     - Initializes twiddle factor table and bit reversal table pointers.
539                     - Initializes the internal complex FFT data structure.
540   @par
541                    Use of the initialization function is optional **except for MVE versions where it is mandatory**.
542                    If you don't use the initialization functions, then the structures should be initialized with code
543                    similar to the one below:
544   <pre>
545       arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
546       arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
547   </pre>
548                    where <code>fftLenReal</code> is the length of the real transform;
549                    <code>fftLenBy2</code> length of  the internal complex transform (fftLenReal/2).
550                    <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
551                    <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
552                    output (=1).
553                    <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
554                    The value is based on the FFT length;
555                    <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
556                    <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
557                    <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
558                    must also be initialized.
559 @par
560                    Note that with MVE versions you can't initialize instance structures directly and **must
561                    use the initialization function**.
562  */
563 
564 /**
565   @addtogroup RealFFT
566   @{
567 */
568 
569 /**
570   @brief         Processing function for the floating-point real FFT.
571   @param[in]     S         points to an arm_rfft_fast_instance_f16 structure
572   @param[in]     p         points to input buffer (Source buffer is modified by this function.)
573   @param[in]     pOut      points to output buffer
574   @param[in]     ifftFlag
575                    - value = 0: RFFT
576                    - value = 1: RIFFT
577   @return        none
578 */
579 
arm_rfft_fast_f16(const arm_rfft_fast_instance_f16 * S,float16_t * p,float16_t * pOut,uint8_t ifftFlag)580 void arm_rfft_fast_f16(
581   const arm_rfft_fast_instance_f16 * S,
582   float16_t * p,
583   float16_t * pOut,
584   uint8_t ifftFlag)
585 {
586    const arm_cfft_instance_f16 * Sint = &(S->Sint);
587 
588 
589    /* Calculation of Real FFT */
590    if (ifftFlag)
591    {
592       /*  Real FFT compression */
593       merge_rfft_f16(S, p, pOut);
594       /* Complex radix-4 IFFT process */
595       arm_cfft_f16( Sint, pOut, ifftFlag, 1);
596    }
597    else
598    {
599 
600       /* Calculation of RFFT of input */
601       arm_cfft_f16( Sint, p, ifftFlag, 1);
602 
603       /*  Real FFT extraction */
604       stage_rfft_f16(S, p, pOut);
605    }
606 }
607 
608 /**
609 * @} end of RealFFT group
610 */
611 
612 #endif /*  #if defined(ARM_FLOAT16_SUPPORTED) */
613