• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mag_squared_f32.c
4  * Description:  Floating-point complex magnitude squared
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions.h"
30 
31 /**
32   @ingroup groupCmplxMath
33  */
34 
35 /**
36   @defgroup cmplx_mag_squared Complex Magnitude Squared
37 
38   Computes the magnitude squared of the elements of a complex data vector.
39 
40   The <code>pSrc</code> points to the source data and
41   <code>pDst</code> points to the where the result should be written.
42   <code>numSamples</code> specifies the number of complex samples
43   in the input array and the data is stored in an interleaved fashion
44   (real, imag, real, imag, ...).
45   The input array has a total of <code>2*numSamples</code> values;
46   the output array has a total of <code>numSamples</code> values.
47 
48   The underlying algorithm is used:
49 
50   <pre>
51   for (n = 0; n < numSamples; n++) {
52       pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
53   }
54   </pre>
55 
56   There are separate functions for floating-point, Q15, and Q31 data types.
57  */
58 
59 /**
60   @addtogroup cmplx_mag_squared
61   @{
62  */
63 
64 /**
65   @brief         Floating-point complex magnitude squared.
66   @param[in]     pSrc        points to input vector
67   @param[out]    pDst        points to output vector
68   @param[in]     numSamples  number of samples in each vector
69   @return        none
70  */
71 
72 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
73 
arm_cmplx_mag_squared_f32(const float32_t * pSrc,float32_t * pDst,uint32_t numSamples)74 void arm_cmplx_mag_squared_f32(
75   const float32_t * pSrc,
76         float32_t * pDst,
77         uint32_t numSamples)
78 {
79     int32_t blockSize = numSamples;  /* loop counters */
80     uint32_t  blkCnt;           /* loop counters */
81     f32x4x2_t vecSrc;
82     f32x4_t sum;
83     float32_t real, imag;                          /* Temporary input variables */
84 
85     /* Compute 4 complex samples at a time */
86     blkCnt = blockSize >> 2;
87     while (blkCnt > 0U)
88     {
89         vecSrc = vld2q(pSrc);
90         sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
91         sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
92         vst1q(pDst, sum);
93 
94         pSrc += 8;
95         pDst += 4;
96 
97         /*
98          * Decrement the blockSize loop counter
99          */
100         blkCnt--;
101     }
102 
103     /* Tail */
104     blkCnt = blockSize & 3;
105     while (blkCnt > 0U)
106     {
107       /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
108 
109       real = *pSrc++;
110       imag = *pSrc++;
111 
112       /* store result in destination buffer. */
113       *pDst++ = (real * real) + (imag * imag);
114 
115       /* Decrement loop counter */
116       blkCnt--;
117     }
118 
119 }
120 
121 #else
arm_cmplx_mag_squared_f32(const float32_t * pSrc,float32_t * pDst,uint32_t numSamples)122 void arm_cmplx_mag_squared_f32(
123   const float32_t * pSrc,
124         float32_t * pDst,
125         uint32_t numSamples)
126 {
127         uint32_t blkCnt;                               /* Loop counter */
128         float32_t real, imag;                          /* Temporary input variables */
129 
130 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
131   float32x4x2_t vecA;
132   float32x4_t vRealA;
133   float32x4_t vImagA;
134   float32x4_t vMagSqA;
135 
136   float32x4x2_t vecB;
137   float32x4_t vRealB;
138   float32x4_t vImagB;
139   float32x4_t vMagSqB;
140 
141   /* Loop unrolling: Compute 8 outputs at a time */
142   blkCnt = numSamples >> 3;
143 
144   while (blkCnt > 0U)
145   {
146     /* out = sqrt((real * real) + (imag * imag)) */
147 
148     vecA = vld2q_f32(pSrc);
149     pSrc += 8;
150 
151     vRealA = vmulq_f32(vecA.val[0], vecA.val[0]);
152     vImagA = vmulq_f32(vecA.val[1], vecA.val[1]);
153     vMagSqA = vaddq_f32(vRealA, vImagA);
154 
155     vecB = vld2q_f32(pSrc);
156     pSrc += 8;
157 
158     vRealB = vmulq_f32(vecB.val[0], vecB.val[0]);
159     vImagB = vmulq_f32(vecB.val[1], vecB.val[1]);
160     vMagSqB = vaddq_f32(vRealB, vImagB);
161 
162     /* Store the result in the destination buffer. */
163     vst1q_f32(pDst, vMagSqA);
164     pDst += 4;
165 
166     vst1q_f32(pDst, vMagSqB);
167     pDst += 4;
168 
169     /* Decrement the loop counter */
170     blkCnt--;
171   }
172 
173   blkCnt = numSamples & 7;
174 
175 #else
176 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
177 
178   /* Loop unrolling: Compute 4 outputs at a time */
179   blkCnt = numSamples >> 2U;
180 
181   while (blkCnt > 0U)
182   {
183     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
184 
185     real = *pSrc++;
186     imag = *pSrc++;
187     *pDst++ = (real * real) + (imag * imag);
188 
189     real = *pSrc++;
190     imag = *pSrc++;
191     *pDst++ = (real * real) + (imag * imag);
192 
193     real = *pSrc++;
194     imag = *pSrc++;
195     *pDst++ = (real * real) + (imag * imag);
196 
197     real = *pSrc++;
198     imag = *pSrc++;
199     *pDst++ = (real * real) + (imag * imag);
200 
201     /* Decrement loop counter */
202     blkCnt--;
203   }
204 
205   /* Loop unrolling: Compute remaining outputs */
206   blkCnt = numSamples % 0x4U;
207 
208 #else
209 
210   /* Initialize blkCnt with number of samples */
211   blkCnt = numSamples;
212 
213 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
214 #endif /* #if defined(ARM_MATH_NEON) */
215 
216   while (blkCnt > 0U)
217   {
218     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
219 
220     real = *pSrc++;
221     imag = *pSrc++;
222 
223     /* store result in destination buffer. */
224     *pDst++ = (real * real) + (imag * imag);
225 
226     /* Decrement loop counter */
227     blkCnt--;
228   }
229 
230 }
231 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
232 
233 /**
234   @} end of cmplx_mag_squared group
235  */
236