1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_rfft_q31.c
4 * Description: FFT & RIFFT Q31 process function
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/transform_functions.h"
30
31 /* ----------------------------------------------------------------------
32 * Internal functions prototypes
33 * -------------------------------------------------------------------- */
34
35 void arm_split_rfft_q31(
36 q31_t * pSrc,
37 uint32_t fftLen,
38 const q31_t * pATable,
39 const q31_t * pBTable,
40 q31_t * pDst,
41 uint32_t modifier);
42
43 void arm_split_rifft_q31(
44 q31_t * pSrc,
45 uint32_t fftLen,
46 const q31_t * pATable,
47 const q31_t * pBTable,
48 q31_t * pDst,
49 uint32_t modifier);
50
51 /**
52 @addtogroup RealFFT
53 @{
54 */
55
56 /**
57 @brief Processing function for the Q31 RFFT/RIFFT.
58 @param[in] S points to an instance of the Q31 RFFT/RIFFT structure
59 @param[in] pSrc points to input buffer (Source buffer is modified by this function)
60 @param[out] pDst points to output buffer
61 @return none
62
63 @par Input an output formats
64 Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
65 Hence the output format is different for different RFFT sizes.
66 The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
67 @par
68 \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"
69 @par
70 \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"
71 @par
72 If the input buffer is of length N, the output buffer must have length 2*N.
73 The input buffer is modified by this function.
74 @par
75 For the RIFFT, the source buffer must at least have length
76 fftLenReal + 2.
77 The last two elements must be equal to what would be generated
78 by the RFFT:
79 (pSrc[0] - pSrc[1]) >> 1 and 0
80
81 */
82
arm_rfft_q31(const arm_rfft_instance_q31 * S,q31_t * pSrc,q31_t * pDst)83 void arm_rfft_q31(
84 const arm_rfft_instance_q31 * S,
85 q31_t * pSrc,
86 q31_t * pDst)
87 {
88 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
89 const arm_cfft_instance_q31 *S_CFFT = &(S->cfftInst);
90 #else
91 const arm_cfft_instance_q31 *S_CFFT = S->pCfft;
92 #endif
93 uint32_t L2 = S->fftLenReal >> 1U;
94
95 /* Calculation of RIFFT of input */
96 if (S->ifftFlagR == 1U)
97 {
98 /* Real IFFT core process */
99 arm_split_rifft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
100
101 /* Complex IFFT process */
102 arm_cfft_q31 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
103
104 arm_shift_q31(pDst, 1, pDst, S->fftLenReal);
105 }
106 else
107 {
108 /* Calculation of RFFT of input */
109
110 /* Complex FFT process */
111 arm_cfft_q31 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
112
113 /* Real FFT core process */
114 arm_split_rfft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
115 }
116
117 }
118
119 /**
120 @} end of RealFFT group
121 */
122
123 /**
124 @brief Core Real FFT process
125 @param[in] pSrc points to input buffer
126 @param[in] fftLen length of FFT
127 @param[in] pATable points to twiddle Coef A buffer
128 @param[in] pBTable points to twiddle Coef B buffer
129 @param[out] pDst points to output buffer
130 @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
131 @return none
132 */
133
134 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
135
136 #include "arm_helium_utils.h"
137 #include "arm_vec_fft.h"
138
139 #if defined(__CMSIS_GCC_H)
140
141 #define MVE_CMPLX_MULT_FX_AxB_S32(A,B) vqdmladhxq_s32(vqdmlsdhq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
142 #define MVE_CMPLX_MULT_FX_AxConjB_S32(A,B) vqdmladhq_s32(vqdmlsdhxq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
143
144 #endif
145
arm_split_rfft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)146 void arm_split_rfft_q31(
147 q31_t *pSrc,
148 uint32_t fftLen,
149 const q31_t *pATable,
150 const q31_t *pBTable,
151 q31_t *pDst,
152 uint32_t modifier)
153 {
154 uint32_t i; /* Loop Counter */
155 const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
156 q31_t *pOut1 = &pDst[2];
157 q31_t *pIn1 = &pSrc[2];
158 uint32x4_t offset = { 2, 3, 0, 1 };
159 uint32x4_t offsetCoef = { 0, 1, modifier * 2, modifier * 2 + 1 };
160
161 offset = offset + (2 * fftLen - 4);
162
163
164 /* Init coefficient pointers */
165 pCoefA = &pATable[modifier * 2];
166 pCoefB = &pBTable[modifier * 2];
167
168 const q31_t *pCoefAb, *pCoefBb;
169 pCoefAb = pCoefA;
170 pCoefBb = pCoefB;
171
172 pIn1 = &pSrc[2];
173
174 i = fftLen - 1U;
175 i = i / 2 + 1;
176 while (i > 0U) {
177 q31x4_t in1 = vld1q_s32(pIn1);
178 q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset);
179 q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef);
180 q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef);
181 #if defined(__CMSIS_GCC_H)
182 q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB_S32(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB_S32(coefB, in2));
183 #else
184 q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB(coefB, in2));
185 #endif
186 vst1q(pOut1, out);
187 pOut1 += 4;
188
189 offsetCoef += modifier * 4;
190 offset -= 4;
191
192 pIn1 += 4;
193 i -= 1;
194 }
195
196 pDst[2 * fftLen] = (pSrc[0] - pSrc[1]) >> 1U;
197 pDst[2 * fftLen + 1] = 0;
198
199 pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
200 pDst[1] = 0;
201 }
202 #else
arm_split_rfft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)203 void arm_split_rfft_q31(
204 q31_t * pSrc,
205 uint32_t fftLen,
206 const q31_t * pATable,
207 const q31_t * pBTable,
208 q31_t * pDst,
209 uint32_t modifier)
210 {
211 uint32_t i; /* Loop Counter */
212 q31_t outR, outI; /* Temporary variables for output */
213 const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
214 q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
215 q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[4 * fftLen - 1];
216 q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[2 * fftLen - 1];
217
218 /* Init coefficient pointers */
219 pCoefA = &pATable[modifier * 2];
220 pCoefB = &pBTable[modifier * 2];
221
222 i = fftLen - 1U;
223
224 while (i > 0U)
225 {
226 /*
227 outR = ( pSrc[2 * i] * pATable[2 * i]
228 - pSrc[2 * i + 1] * pATable[2 * i + 1]
229 + pSrc[2 * n - 2 * i] * pBTable[2 * i]
230 + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
231
232 outI = ( pIn[2 * i + 1] * pATable[2 * i]
233 + pIn[2 * i] * pATable[2 * i + 1]
234 + pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
235 - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
236 */
237
238 CoefA1 = *pCoefA++;
239 CoefA2 = *pCoefA;
240
241 /* outR = (pSrc[2 * i] * pATable[2 * i] */
242 mult_32x32_keep32_R (outR, *pIn1, CoefA1);
243
244 /* outI = pIn[2 * i] * pATable[2 * i + 1] */
245 mult_32x32_keep32_R (outI, *pIn1++, CoefA2);
246
247 /* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
248 multSub_32x32_keep32_R (outR, *pIn1, CoefA2);
249
250 /* (pIn[2 * i + 1] * pATable[2 * i] */
251 multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);
252
253 /* pSrc[2 * n - 2 * i] * pBTable[2 * i] */
254 multSub_32x32_keep32_R (outR, *pIn2, CoefA2);
255 CoefB1 = *pCoefB;
256
257 /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
258 multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);
259
260 /* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
261 multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);
262
263 /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
264 multSub_32x32_keep32_R (outI, *pIn2--, CoefA2);
265
266 /* write output */
267 *pOut1++ = outR;
268 *pOut1++ = outI;
269
270 /* write complex conjugate output */
271 *pOut2-- = -outI;
272 *pOut2-- = outR;
273
274 /* update coefficient pointer */
275 pCoefB = pCoefB + (2 * modifier);
276 pCoefA = pCoefA + (2 * modifier - 1);
277
278 /* Decrement loop count */
279 i--;
280 }
281
282 pDst[2 * fftLen] = (pSrc[0] - pSrc[1]) >> 1U;
283 pDst[2 * fftLen + 1] = 0;
284
285 pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
286 pDst[1] = 0;
287 }
288 #endif /* defined(ARM_MATH_MVEI) */
289
290 /**
291 @brief Core Real IFFT process
292 @param[in] pSrc points to input buffer
293 @param[in] fftLen length of FFT
294 @param[in] pATable points to twiddle Coef A buffer
295 @param[in] pBTable points to twiddle Coef B buffer
296 @param[out] pDst points to output buffer
297 @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
298 @return none
299 */
300
301 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
302
arm_split_rifft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)303 void arm_split_rifft_q31(
304 q31_t * pSrc,
305 uint32_t fftLen,
306 const q31_t * pATable,
307 const q31_t * pBTable,
308 q31_t * pDst,
309 uint32_t modifier)
310 {
311 uint32_t i; /* Loop Counter */
312 const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
313 q31_t *pIn1;
314 uint32x4_t offset = { 2, 3, 0, 1 };
315 uint32x4_t offsetCoef = { 0, 1, modifier * 2, modifier * 2 + 1 };
316 int32x4_t conj = { 1, -1, 1, -1 };
317
318 offset = offset + (2 * fftLen - 2);
319
320 /* Init coefficient pointers */
321 pCoefA = &pATable[0];
322 pCoefB = &pBTable[0];
323
324 const q31_t *pCoefAb, *pCoefBb;
325 pCoefAb = pCoefA;
326 pCoefBb = pCoefB;
327
328 pIn1 = &pSrc[0];
329
330 i = fftLen;
331 i = i >> 1;
332 while (i > 0U) {
333 q31x4_t in1 = vld1q_s32(pIn1);
334 q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset);
335 q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef);
336 q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef);
337
338 /* can we avoid the conjugate here ? */
339 #if defined(__CMSIS_GCC_H)
340 q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB_S32(in1, coefA),
341 vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB_S32(in2, coefB)));
342 #else
343 q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA),
344 vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB)));
345 #endif
346 vst1q_s32(pDst, out);
347 pDst += 4;
348
349 offsetCoef += modifier * 4;
350 offset -= 4;
351
352 pIn1 += 4;
353 i -= 1;
354 }
355 }
356 #else
arm_split_rifft_q31(q31_t * pSrc,uint32_t fftLen,const q31_t * pATable,const q31_t * pBTable,q31_t * pDst,uint32_t modifier)357 void arm_split_rifft_q31(
358 q31_t * pSrc,
359 uint32_t fftLen,
360 const q31_t * pATable,
361 const q31_t * pBTable,
362 q31_t * pDst,
363 uint32_t modifier)
364 {
365 q31_t outR, outI; /* Temporary variables for output */
366 const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
367 q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
368 q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[2 * fftLen + 1];
369
370 pCoefA = &pATable[0];
371 pCoefB = &pBTable[0];
372
373 while (fftLen > 0U)
374 {
375 /*
376 outR = ( pIn[2 * i] * pATable[2 * i]
377 + pIn[2 * i + 1] * pATable[2 * i + 1]
378 + pIn[2 * n - 2 * i] * pBTable[2 * i]
379 - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
380
381 outI = ( pIn[2 * i + 1] * pATable[2 * i]
382 - pIn[2 * i] * pATable[2 * i + 1]
383 - pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
384 - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
385 */
386
387 CoefA1 = *pCoefA++;
388 CoefA2 = *pCoefA;
389
390 /* outR = (pIn[2 * i] * pATable[2 * i] */
391 mult_32x32_keep32_R (outR, *pIn1, CoefA1);
392
393 /* - pIn[2 * i] * pATable[2 * i + 1] */
394 mult_32x32_keep32_R (outI, *pIn1++, -CoefA2);
395
396 /* pIn[2 * i + 1] * pATable[2 * i + 1] */
397 multAcc_32x32_keep32_R (outR, *pIn1, CoefA2);
398
399 /* pIn[2 * i + 1] * pATable[2 * i] */
400 multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);
401
402 /* pIn[2 * n - 2 * i] * pBTable[2 * i] */
403 multAcc_32x32_keep32_R (outR, *pIn2, CoefA2);
404 CoefB1 = *pCoefB;
405
406 /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
407 multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);
408
409 /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
410 multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);
411
412 /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
413 multAcc_32x32_keep32_R (outI, *pIn2--, CoefA2);
414
415 /* write output */
416 *pDst++ = outR;
417 *pDst++ = outI;
418
419 /* update coefficient pointer */
420 pCoefB = pCoefB + (modifier * 2);
421 pCoefA = pCoefA + (modifier * 2 - 1);
422
423 /* Decrement loop count */
424 fftLen--;
425 }
426
427 }
428
429 #endif /* defined(ARM_MATH_MVEI) */
430