1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mag_squared_f32.c
4 * Description: Floating-point complex magnitude squared
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/complex_math_functions.h"
30
31 /**
32 @ingroup groupCmplxMath
33 */
34
35 /**
36 @defgroup cmplx_mag_squared Complex Magnitude Squared
37
38 Computes the magnitude squared of the elements of a complex data vector.
39
40 The <code>pSrc</code> points to the source data and
41 <code>pDst</code> points to the where the result should be written.
42 <code>numSamples</code> specifies the number of complex samples
43 in the input array and the data is stored in an interleaved fashion
44 (real, imag, real, imag, ...).
45 The input array has a total of <code>2*numSamples</code> values;
46 the output array has a total of <code>numSamples</code> values.
47
48 The underlying algorithm is used:
49
50 <pre>
51 for (n = 0; n < numSamples; n++) {
52 pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
53 }
54 </pre>
55
56 There are separate functions for floating-point, Q15, and Q31 data types.
57 */
58
59 /**
60 @addtogroup cmplx_mag_squared
61 @{
62 */
63
64 /**
65 @brief Floating-point complex magnitude squared.
66 @param[in] pSrc points to input vector
67 @param[out] pDst points to output vector
68 @param[in] numSamples number of samples in each vector
69 @return none
70 */
71
72 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
73
arm_cmplx_mag_squared_f32(const float32_t * pSrc,float32_t * pDst,uint32_t numSamples)74 void arm_cmplx_mag_squared_f32(
75 const float32_t * pSrc,
76 float32_t * pDst,
77 uint32_t numSamples)
78 {
79 int32_t blockSize = numSamples; /* loop counters */
80 uint32_t blkCnt; /* loop counters */
81 f32x4x2_t vecSrc;
82 f32x4_t sum;
83 float32_t real, imag; /* Temporary input variables */
84
85 /* Compute 4 complex samples at a time */
86 blkCnt = blockSize >> 2;
87 while (blkCnt > 0U)
88 {
89 vecSrc = vld2q(pSrc);
90 sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
91 sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
92 vst1q(pDst, sum);
93
94 pSrc += 8;
95 pDst += 4;
96
97 /*
98 * Decrement the blockSize loop counter
99 */
100 blkCnt--;
101 }
102
103 /* Tail */
104 blkCnt = blockSize & 3;
105 while (blkCnt > 0U)
106 {
107 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
108
109 real = *pSrc++;
110 imag = *pSrc++;
111
112 /* store result in destination buffer. */
113 *pDst++ = (real * real) + (imag * imag);
114
115 /* Decrement loop counter */
116 blkCnt--;
117 }
118
119 }
120
121 #else
arm_cmplx_mag_squared_f32(const float32_t * pSrc,float32_t * pDst,uint32_t numSamples)122 void arm_cmplx_mag_squared_f32(
123 const float32_t * pSrc,
124 float32_t * pDst,
125 uint32_t numSamples)
126 {
127 uint32_t blkCnt; /* Loop counter */
128 float32_t real, imag; /* Temporary input variables */
129
130 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
131 float32x4x2_t vecA;
132 float32x4_t vRealA;
133 float32x4_t vImagA;
134 float32x4_t vMagSqA;
135
136 float32x4x2_t vecB;
137 float32x4_t vRealB;
138 float32x4_t vImagB;
139 float32x4_t vMagSqB;
140
141 /* Loop unrolling: Compute 8 outputs at a time */
142 blkCnt = numSamples >> 3;
143
144 while (blkCnt > 0U)
145 {
146 /* out = sqrt((real * real) + (imag * imag)) */
147
148 vecA = vld2q_f32(pSrc);
149 pSrc += 8;
150
151 vRealA = vmulq_f32(vecA.val[0], vecA.val[0]);
152 vImagA = vmulq_f32(vecA.val[1], vecA.val[1]);
153 vMagSqA = vaddq_f32(vRealA, vImagA);
154
155 vecB = vld2q_f32(pSrc);
156 pSrc += 8;
157
158 vRealB = vmulq_f32(vecB.val[0], vecB.val[0]);
159 vImagB = vmulq_f32(vecB.val[1], vecB.val[1]);
160 vMagSqB = vaddq_f32(vRealB, vImagB);
161
162 /* Store the result in the destination buffer. */
163 vst1q_f32(pDst, vMagSqA);
164 pDst += 4;
165
166 vst1q_f32(pDst, vMagSqB);
167 pDst += 4;
168
169 /* Decrement the loop counter */
170 blkCnt--;
171 }
172
173 blkCnt = numSamples & 7;
174
175 #else
176 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
177
178 /* Loop unrolling: Compute 4 outputs at a time */
179 blkCnt = numSamples >> 2U;
180
181 while (blkCnt > 0U)
182 {
183 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
184
185 real = *pSrc++;
186 imag = *pSrc++;
187 *pDst++ = (real * real) + (imag * imag);
188
189 real = *pSrc++;
190 imag = *pSrc++;
191 *pDst++ = (real * real) + (imag * imag);
192
193 real = *pSrc++;
194 imag = *pSrc++;
195 *pDst++ = (real * real) + (imag * imag);
196
197 real = *pSrc++;
198 imag = *pSrc++;
199 *pDst++ = (real * real) + (imag * imag);
200
201 /* Decrement loop counter */
202 blkCnt--;
203 }
204
205 /* Loop unrolling: Compute remaining outputs */
206 blkCnt = numSamples % 0x4U;
207
208 #else
209
210 /* Initialize blkCnt with number of samples */
211 blkCnt = numSamples;
212
213 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
214 #endif /* #if defined(ARM_MATH_NEON) */
215
216 while (blkCnt > 0U)
217 {
218 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
219
220 real = *pSrc++;
221 imag = *pSrc++;
222
223 /* store result in destination buffer. */
224 *pDst++ = (real * real) + (imag * imag);
225
226 /* Decrement loop counter */
227 blkCnt--;
228 }
229
230 }
231 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
232
233 /**
234 @} end of cmplx_mag_squared group
235 */
236