• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mag_f32.c
4  * Description:  Floating-point complex magnitude
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions.h"
30 
31 /**
32   @ingroup groupCmplxMath
33  */
34 
35 /**
36   @defgroup cmplx_mag Complex Magnitude
37 
38   Computes the magnitude of the elements of a complex data vector.
39 
40   The <code>pSrc</code> points to the source data and
41   <code>pDst</code> points to the where the result should be written.
42   <code>numSamples</code> specifies the number of complex samples
43   in the input array and the data is stored in an interleaved fashion
44   (real, imag, real, imag, ...).
45   The input array has a total of <code>2*numSamples</code> values;
46   the output array has a total of <code>numSamples</code> values.
47 
48   The underlying algorithm is used:
49 
50   <pre>
51   for (n = 0; n < numSamples; n++) {
52       pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
53   }
54   </pre>
55 
56   There are separate functions for floating-point, Q15, and Q31 data types.
57  */
58 
59 /**
60   @addtogroup cmplx_mag
61   @{
62  */
63 
64 /**
65   @brief         Floating-point complex magnitude.
66   @param[in]     pSrc        points to input vector
67   @param[out]    pDst        points to output vector
68   @param[in]     numSamples  number of samples in each vector
69   @return        none
70  */
71 
72 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
73 #include "arm_vec_math.h"
74 #endif
75 
76 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
77 
78 #include "arm_helium_utils.h"
79 
80 
arm_cmplx_mag_f32(const float32_t * pSrc,float32_t * pDst,uint32_t numSamples)81 void arm_cmplx_mag_f32(
82   const float32_t * pSrc,
83         float32_t * pDst,
84         uint32_t numSamples)
85 {
86     int32_t blockSize = numSamples;  /* loop counters */
87     uint32_t  blkCnt;           /* loop counters */
88     f32x4x2_t vecSrc;
89     f32x4_t sum;
90     float32_t real, imag;                      /* Temporary variables to hold input values */
91 
92     /* Compute 4 complex samples at a time */
93     blkCnt = blockSize >> 2;
94     while (blkCnt > 0U)
95     {
96         q31x4_t newtonStartVec;
97         f32x4_t sumHalf, invSqrt;
98 
99         vecSrc = vld2q(pSrc);
100         pSrc += 8;
101         sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
102         sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
103 
104         /*
105          * inlined Fast SQRT using inverse SQRT newton-raphson method
106          */
107 
108         /* compute initial value */
109         newtonStartVec = vdupq_n_s32(INVSQRT_MAGIC_F32) - vshrq((q31x4_t) sum, 1);
110         sumHalf = sum * 0.5f;
111         /*
112          * compute 3 x iterations
113          *
114          * The more iterations, the more accuracy.
115          * If you need to trade a bit of accuracy for more performance,
116          * you can comment out the 3rd use of the macro.
117          */
118         INVSQRT_NEWTON_MVE_F32(invSqrt, sumHalf, (f32x4_t) newtonStartVec);
119         INVSQRT_NEWTON_MVE_F32(invSqrt, sumHalf, invSqrt);
120         INVSQRT_NEWTON_MVE_F32(invSqrt, sumHalf, invSqrt);
121         /*
122          * set negative values to 0
123          */
124         invSqrt = vdupq_m(invSqrt, 0.0f, vcmpltq(invSqrt, 0.0f));
125         /*
126          * sqrt(x) = x * invSqrt(x)
127          */
128         sum = vmulq(sum, invSqrt);
129         vst1q(pDst, sum);
130         pDst += 4;
131         /*
132          * Decrement the blockSize loop counter
133          */
134         blkCnt--;
135     }
136     /*
137      * tail
138      */
139     blkCnt = blockSize & 3;
140     while (blkCnt > 0U)
141     {
142       /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
143 
144       real = *pSrc++;
145       imag = *pSrc++;
146 
147       /* store result in destination buffer. */
148       arm_sqrt_f32((real * real) + (imag * imag), pDst++);
149 
150       /* Decrement loop counter */
151       blkCnt--;
152     }
153 }
154 
155 #else
arm_cmplx_mag_f32(const float32_t * pSrc,float32_t * pDst,uint32_t numSamples)156 void arm_cmplx_mag_f32(
157   const float32_t * pSrc,
158         float32_t * pDst,
159         uint32_t numSamples)
160 {
161   uint32_t blkCnt;                               /* loop counter */
162   float32_t real, imag;                      /* Temporary variables to hold input values */
163 
164 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
165 
166   float32x4x2_t vecA;
167   float32x4_t vRealA;
168   float32x4_t vImagA;
169   float32x4_t vMagSqA;
170 
171   float32x4x2_t vecB;
172   float32x4_t vRealB;
173   float32x4_t vImagB;
174   float32x4_t vMagSqB;
175 
176   /* Loop unrolling: Compute 8 outputs at a time */
177   blkCnt = numSamples >> 3;
178 
179   while (blkCnt > 0U)
180   {
181     /* out = sqrt((real * real) + (imag * imag)) */
182 
183     vecA = vld2q_f32(pSrc);
184     pSrc += 8;
185 
186     vecB = vld2q_f32(pSrc);
187     pSrc += 8;
188 
189     vRealA = vmulq_f32(vecA.val[0], vecA.val[0]);
190     vImagA = vmulq_f32(vecA.val[1], vecA.val[1]);
191     vMagSqA = vaddq_f32(vRealA, vImagA);
192 
193     vRealB = vmulq_f32(vecB.val[0], vecB.val[0]);
194     vImagB = vmulq_f32(vecB.val[1], vecB.val[1]);
195     vMagSqB = vaddq_f32(vRealB, vImagB);
196 
197     /* Store the result in the destination buffer. */
198     vst1q_f32(pDst, __arm_vec_sqrt_f32_neon(vMagSqA));
199     pDst += 4;
200 
201     vst1q_f32(pDst, __arm_vec_sqrt_f32_neon(vMagSqB));
202     pDst += 4;
203 
204     /* Decrement the loop counter */
205     blkCnt--;
206   }
207 
208   blkCnt = numSamples & 7;
209 
210 #else
211 
212 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
213 
214   /* Loop unrolling: Compute 4 outputs at a time */
215   blkCnt = numSamples >> 2U;
216 
217   while (blkCnt > 0U)
218   {
219     /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
220 
221     real = *pSrc++;
222     imag = *pSrc++;
223 
224     /* store result in destination buffer. */
225     arm_sqrt_f32((real * real) + (imag * imag), pDst++);
226 
227     real = *pSrc++;
228     imag = *pSrc++;
229     arm_sqrt_f32((real * real) + (imag * imag), pDst++);
230 
231     real = *pSrc++;
232     imag = *pSrc++;
233     arm_sqrt_f32((real * real) + (imag * imag), pDst++);
234 
235     real = *pSrc++;
236     imag = *pSrc++;
237     arm_sqrt_f32((real * real) + (imag * imag), pDst++);
238 
239     /* Decrement loop counter */
240     blkCnt--;
241   }
242 
243   /* Loop unrolling: Compute remaining outputs */
244   blkCnt = numSamples % 0x4U;
245 
246 #else
247 
248   /* Initialize blkCnt with number of samples */
249   blkCnt = numSamples;
250 
251 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
252 #endif /* #if defined(ARM_MATH_NEON) */
253 
254   while (blkCnt > 0U)
255   {
256     /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
257 
258     real = *pSrc++;
259     imag = *pSrc++;
260 
261     /* store result in destination buffer. */
262     arm_sqrt_f32((real * real) + (imag * imag), pDst++);
263 
264     /* Decrement loop counter */
265     blkCnt--;
266   }
267 
268 }
269 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
270 
271 /**
272   @} end of cmplx_mag group
273  */
274