• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_rfft_q31.c
4  * Description:  FFT & RIFFT Q31 process function
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/transform_functions.h"
30 
31 /* ----------------------------------------------------------------------
32  * Internal functions prototypes
33  * -------------------------------------------------------------------- */
34 
35 void arm_split_rfft_q31(
36         q31_t * pSrc,
37         uint32_t fftLen,
38   const q31_t * pATable,
39   const q31_t * pBTable,
40         q31_t * pDst,
41         uint32_t modifier);
42 
43 void arm_split_rifft_q31(
44         q31_t * pSrc,
45         uint32_t fftLen,
46   const q31_t * pATable,
47   const q31_t * pBTable,
48         q31_t * pDst,
49         uint32_t modifier);
50 
51 /**
52   @addtogroup RealFFT
53   @{
54  */
55 
56 /**
57   @brief         Processing function for the Q31 RFFT/RIFFT.
58   @param[in]     S     points to an instance of the Q31 RFFT/RIFFT structure
59   @param[in]     pSrc  points to input buffer (Source buffer is modified by this function)
60   @param[out]    pDst  points to output buffer
61   @return        none
62 
63   @par           Input an output formats
64                    Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
65                    Hence the output format is different for different RFFT sizes.
66                    The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
67   @par
68                    \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"
69   @par
70                    \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"
71   @par
72                    If the input buffer is of length N, the output buffer must have length 2*N.
73                    The input buffer is modified by this function.
74   @par
75                    For the RIFFT, the source buffer must at least have length
76                    fftLenReal + 2.
77                    The last two elements must be equal to what would be generated
78                    by the RFFT:
79                      (pSrc[0] - pSrc[1]) >> 1 and 0
80 
81  */
82 
arm_rfft_q31(const arm_rfft_instance_q31 * S,q31_t * pSrc,q31_t * pDst)83 void arm_rfft_q31(
84   const arm_rfft_instance_q31 * S,
85         q31_t * pSrc,
86         q31_t * pDst)
87 {
88 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
89   const arm_cfft_instance_q31 *S_CFFT = &(S->cfftInst);
90 #else
91   const arm_cfft_instance_q31 *S_CFFT = S->pCfft;
92 #endif
93         uint32_t L2 = S->fftLenReal >> 1U;
94 
95   /* Calculation of RIFFT of input */
96   if (S->ifftFlagR == 1U)
97   {
98      /*  Real IFFT core process */
99      arm_split_rifft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
100 
101      /* Complex IFFT process */
102      arm_cfft_q31 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
103 
104      arm_shift_q31(pDst, 1, pDst, S->fftLenReal);
105   }
106   else
107   {
108      /* Calculation of RFFT of input */
109 
110      /* Complex FFT process */
111      arm_cfft_q31 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
112 
113      /*  Real FFT core process */
114      arm_split_rfft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
115   }
116 
117 }
118 
119 /**
120   @} end of RealFFT group
121  */
122 
123 /**
124   @brief         Core Real FFT process
125   @param[in]     pSrc      points to input buffer
126   @param[in]     fftLen    length of FFT
127   @param[in]     pATable   points to twiddle Coef A buffer
128   @param[in]     pBTable   points to twiddle Coef B buffer
129   @param[out]    pDst      points to output buffer
130   @param[in]     modifier  twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
131   @return        none
132  */
133 
134 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
135 
136 #include "arm_helium_utils.h"
137 #include "arm_vec_fft.h"
138 
139 #if defined(__CMSIS_GCC_H)
140 
141 #define MVE_CMPLX_MULT_FX_AxB_S32(A,B)          vqdmladhxq_s32(vqdmlsdhq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
142 #define MVE_CMPLX_MULT_FX_AxConjB_S32(A,B)      vqdmladhq_s32(vqdmlsdhxq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
143 
144 #endif
145 
arm_split_rfft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)146 void arm_split_rfft_q31(
147     q31_t       *pSrc,
148     uint32_t     fftLen,
149     const q31_t       *pATable,
150     const q31_t       *pBTable,
151     q31_t       *pDst,
152     uint32_t     modifier)
153 {
154     uint32_t        i;          /* Loop Counter */
155     const q31_t    *pCoefA, *pCoefB;    /* Temporary pointers for twiddle factors */
156     q31_t          *pOut1 = &pDst[2];
157     q31_t          *pIn1 = &pSrc[2];
158     uint32x4_t      offset = { 2, 3, 0, 1 };
159     uint32x4_t      offsetCoef = { 0, 1, modifier * 2, modifier * 2 + 1 };
160 
161     offset = offset + (2 * fftLen - 4);
162 
163 
164     /* Init coefficient pointers */
165     pCoefA = &pATable[modifier * 2];
166     pCoefB = &pBTable[modifier * 2];
167 
168     const q31_t    *pCoefAb, *pCoefBb;
169     pCoefAb = pCoefA;
170     pCoefBb = pCoefB;
171 
172     pIn1 = &pSrc[2];
173 
174     i = fftLen - 1U;
175     i = i / 2 + 1;
176     while (i > 0U) {
177         q31x4_t         in1 = vld1q_s32(pIn1);
178         q31x4_t         in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset);
179         q31x4_t         coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef);
180         q31x4_t         coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef);
181 #if defined(__CMSIS_GCC_H)
182         q31x4_t         out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB_S32(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB_S32(coefB, in2));
183 #else
184         q31x4_t         out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB(coefB, in2));
185 #endif
186         vst1q(pOut1, out);
187         pOut1 += 4;
188 
189         offsetCoef += modifier * 4;
190         offset -= 4;
191 
192         pIn1 += 4;
193         i -= 1;
194     }
195 
196     pDst[2 * fftLen] = (pSrc[0] - pSrc[1]) >> 1U;
197     pDst[2 * fftLen + 1] = 0;
198 
199     pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
200     pDst[1] = 0;
201 }
202 #else
arm_split_rfft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)203 void arm_split_rfft_q31(
204         q31_t * pSrc,
205         uint32_t fftLen,
206   const q31_t * pATable,
207   const q31_t * pBTable,
208         q31_t * pDst,
209         uint32_t modifier)
210 {
211         uint32_t i;                                    /* Loop Counter */
212         q31_t outR, outI;                              /* Temporary variables for output */
213   const q31_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
214         q31_t CoefA1, CoefA2, CoefB1;                  /* Temporary variables for twiddle coefficients */
215         q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[4 * fftLen - 1];
216         q31_t *pIn1 =  &pSrc[2], *pIn2 =  &pSrc[2 * fftLen - 1];
217 
218   /* Init coefficient pointers */
219   pCoefA = &pATable[modifier * 2];
220   pCoefB = &pBTable[modifier * 2];
221 
222   i = fftLen - 1U;
223 
224   while (i > 0U)
225   {
226      /*
227        outR = (  pSrc[2 * i]             * pATable[2 * i]
228                - pSrc[2 * i + 1]         * pATable[2 * i + 1]
229                + pSrc[2 * n - 2 * i]     * pBTable[2 * i]
230                + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
231 
232        outI = (  pIn[2 * i + 1]         * pATable[2 * i]
233                + pIn[2 * i]             * pATable[2 * i + 1]
234                + pIn[2 * n - 2 * i]     * pBTable[2 * i + 1]
235                - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
236       */
237 
238      CoefA1 = *pCoefA++;
239      CoefA2 = *pCoefA;
240 
241      /* outR = (pSrc[2 * i] * pATable[2 * i] */
242      mult_32x32_keep32_R (outR, *pIn1, CoefA1);
243 
244      /* outI = pIn[2 * i] * pATable[2 * i + 1] */
245      mult_32x32_keep32_R (outI, *pIn1++, CoefA2);
246 
247      /* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
248      multSub_32x32_keep32_R (outR, *pIn1, CoefA2);
249 
250      /* (pIn[2 * i + 1] * pATable[2 * i] */
251      multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);
252 
253      /* pSrc[2 * n - 2 * i] * pBTable[2 * i]  */
254      multSub_32x32_keep32_R (outR, *pIn2, CoefA2);
255      CoefB1 = *pCoefB;
256 
257      /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
258      multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);
259 
260      /* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
261      multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);
262 
263      /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
264      multSub_32x32_keep32_R (outI, *pIn2--, CoefA2);
265 
266      /* write output */
267      *pOut1++ = outR;
268      *pOut1++ = outI;
269 
270      /* write complex conjugate output */
271      *pOut2-- = -outI;
272      *pOut2-- = outR;
273 
274      /* update coefficient pointer */
275      pCoefB = pCoefB + (2 * modifier);
276      pCoefA = pCoefA + (2 * modifier - 1);
277 
278      /* Decrement loop count */
279      i--;
280   }
281 
282   pDst[2 * fftLen]     = (pSrc[0] - pSrc[1]) >> 1U;
283   pDst[2 * fftLen + 1] = 0;
284 
285   pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
286   pDst[1] = 0;
287 }
288 #endif /* defined(ARM_MATH_MVEI) */
289 
290 /**
291   @brief         Core Real IFFT process
292   @param[in]     pSrc      points to input buffer
293   @param[in]     fftLen    length of FFT
294   @param[in]     pATable   points to twiddle Coef A buffer
295   @param[in]     pBTable   points to twiddle Coef B buffer
296   @param[out]    pDst      points to output buffer
297   @param[in]     modifier  twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
298   @return        none
299  */
300 
301 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
302 
arm_split_rifft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)303 void arm_split_rifft_q31(
304         q31_t * pSrc,
305         uint32_t fftLen,
306   const q31_t * pATable,
307   const q31_t * pBTable,
308         q31_t * pDst,
309         uint32_t modifier)
310 {
311     uint32_t        i;          /* Loop Counter */
312     const q31_t    *pCoefA, *pCoefB;    /* Temporary pointers for twiddle factors */
313     q31_t          *pIn1;
314     uint32x4_t      offset = { 2, 3, 0, 1 };
315     uint32x4_t      offsetCoef = { 0, 1, modifier * 2, modifier * 2 + 1 };
316     int32x4_t       conj = { 1, -1, 1, -1 };
317 
318     offset = offset + (2 * fftLen - 2);
319 
320     /* Init coefficient pointers */
321     pCoefA = &pATable[0];
322     pCoefB = &pBTable[0];
323 
324     const q31_t    *pCoefAb, *pCoefBb;
325     pCoefAb = pCoefA;
326     pCoefBb = pCoefB;
327 
328     pIn1 = &pSrc[0];
329 
330     i = fftLen;
331     i = i >> 1;
332     while (i > 0U) {
333         q31x4_t         in1 = vld1q_s32(pIn1);
334         q31x4_t         in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset);
335         q31x4_t         coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef);
336         q31x4_t         coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef);
337 
338         /* can we avoid the conjugate here ? */
339 #if defined(__CMSIS_GCC_H)
340         q31x4_t         out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB_S32(in1, coefA),
341                                      vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB_S32(in2, coefB)));
342 #else
343         q31x4_t         out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA),
344                                      vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB)));
345 #endif
346         vst1q_s32(pDst, out);
347         pDst += 4;
348 
349         offsetCoef += modifier * 4;
350         offset -= 4;
351 
352         pIn1 += 4;
353         i -= 1;
354     }
355 }
356 #else
arm_split_rifft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)357 void arm_split_rifft_q31(
358         q31_t * pSrc,
359         uint32_t fftLen,
360   const q31_t * pATable,
361   const q31_t * pBTable,
362         q31_t * pDst,
363         uint32_t modifier)
364 {
365         q31_t outR, outI;                              /* Temporary variables for output */
366   const q31_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
367         q31_t CoefA1, CoefA2, CoefB1;                  /* Temporary variables for twiddle coefficients */
368         q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[2 * fftLen + 1];
369 
370   pCoefA = &pATable[0];
371   pCoefB = &pBTable[0];
372 
373   while (fftLen > 0U)
374   {
375      /*
376        outR = (  pIn[2 * i]             * pATable[2 * i]
377                + pIn[2 * i + 1]         * pATable[2 * i + 1]
378                + pIn[2 * n - 2 * i]     * pBTable[2 * i]
379                - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
380 
381        outI = (  pIn[2 * i + 1]         * pATable[2 * i]
382                - pIn[2 * i]             * pATable[2 * i + 1]
383                - pIn[2 * n - 2 * i]     * pBTable[2 * i + 1]
384                - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
385       */
386 
387      CoefA1 = *pCoefA++;
388      CoefA2 = *pCoefA;
389 
390      /* outR = (pIn[2 * i] * pATable[2 * i] */
391      mult_32x32_keep32_R (outR, *pIn1, CoefA1);
392 
393      /* - pIn[2 * i] * pATable[2 * i + 1] */
394      mult_32x32_keep32_R (outI, *pIn1++, -CoefA2);
395 
396      /* pIn[2 * i + 1] * pATable[2 * i + 1] */
397      multAcc_32x32_keep32_R (outR, *pIn1, CoefA2);
398 
399      /* pIn[2 * i + 1] * pATable[2 * i] */
400      multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);
401 
402      /* pIn[2 * n - 2 * i] * pBTable[2 * i] */
403      multAcc_32x32_keep32_R (outR, *pIn2, CoefA2);
404      CoefB1 = *pCoefB;
405 
406      /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
407      multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);
408 
409      /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
410      multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);
411 
412      /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
413      multAcc_32x32_keep32_R (outI, *pIn2--, CoefA2);
414 
415      /* write output */
416      *pDst++ = outR;
417      *pDst++ = outI;
418 
419      /* update coefficient pointer */
420      pCoefB = pCoefB + (modifier * 2);
421      pCoefA = pCoefA + (modifier * 2 - 1);
422 
423      /* Decrement loop count */
424      fftLen--;
425   }
426 
427 }
428 
429 #endif /* defined(ARM_MATH_MVEI) */
430