1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mag_squared_q31.c
4 * Description: Q31 complex magnitude squared
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/complex_math_functions.h"
30
31 /**
32 @ingroup groupCmplxMath
33 */
34
35 /**
36 @addtogroup cmplx_mag_squared
37 @{
38 */
39
40 /**
41 @brief Q31 complex magnitude squared.
42 @param[in] pSrc points to input vector
43 @param[out] pDst points to output vector
44 @param[in] numSamples number of samples in each vector
45 @return none
46
47 @par Scaling and Overflow Behavior
48 The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
49 Input down scaling is not required.
50 */
51
52 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
53
arm_cmplx_mag_squared_q31(const q31_t * pSrc,q31_t * pDst,uint32_t numSamples)54 void arm_cmplx_mag_squared_q31(
55 const q31_t * pSrc,
56 q31_t * pDst,
57 uint32_t numSamples)
58 {
59 int32_t blockSize = numSamples; /* loop counters */
60 uint32_t blkCnt; /* loop counters */
61 q31x4x2_t vecSrc;
62 q31x4_t vReal, vImag;
63 q31x4_t vMagSq;
64 q31_t real, imag; /* Temporary input variables */
65 q31_t acc0, acc1; /* Accumulators */
66
67 /* Compute 4 complex samples at a time */
68 blkCnt = blockSize >> 2;
69 while (blkCnt > 0U)
70 {
71 vecSrc = vld2q(pSrc);
72 vReal = vmulhq(vecSrc.val[0], vecSrc.val[0]);
73 vImag = vmulhq(vecSrc.val[1], vecSrc.val[1]);
74 vMagSq = vqaddq(vReal, vImag);
75 vMagSq = vshrq(vMagSq, 1);
76
77 vst1q(pDst, vMagSq);
78
79 pSrc += 8;
80 pDst += 4;
81 /*
82 * Decrement the blkCnt loop counter
83 * Advance vector source and destination pointers
84 */
85 blkCnt --;
86 }
87
88 /* Tail */
89 blkCnt = blockSize & 3;
90 while (blkCnt > 0U)
91 {
92 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
93
94 real = *pSrc++;
95 imag = *pSrc++;
96 acc0 = (q31_t) (((q63_t) real * real) >> 33);
97 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
98
99 /* store result in 3.29 format in destination buffer. */
100 *pDst++ = acc0 + acc1;
101
102 /* Decrement loop counter */
103 blkCnt--;
104 }
105 }
106
107 #else
arm_cmplx_mag_squared_q31(const q31_t * pSrc,q31_t * pDst,uint32_t numSamples)108 void arm_cmplx_mag_squared_q31(
109 const q31_t * pSrc,
110 q31_t * pDst,
111 uint32_t numSamples)
112 {
113 uint32_t blkCnt; /* Loop counter */
114 q31_t real, imag; /* Temporary input variables */
115 q31_t acc0, acc1; /* Accumulators */
116
117 #if defined (ARM_MATH_LOOPUNROLL)
118
119 /* Loop unrolling: Compute 4 outputs at a time */
120 blkCnt = numSamples >> 2U;
121
122 while (blkCnt > 0U)
123 {
124 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
125
126 real = *pSrc++;
127 imag = *pSrc++;
128 acc0 = (q31_t) (((q63_t) real * real) >> 33);
129 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
130 /* store the result in 3.29 format in the destination buffer. */
131 *pDst++ = acc0 + acc1;
132
133 real = *pSrc++;
134 imag = *pSrc++;
135 acc0 = (q31_t) (((q63_t) real * real) >> 33);
136 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
137 *pDst++ = acc0 + acc1;
138
139 real = *pSrc++;
140 imag = *pSrc++;
141 acc0 = (q31_t) (((q63_t) real * real) >> 33);
142 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
143 *pDst++ = acc0 + acc1;
144
145 real = *pSrc++;
146 imag = *pSrc++;
147 acc0 = (q31_t) (((q63_t) real * real) >> 33);
148 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
149 *pDst++ = acc0 + acc1;
150
151 /* Decrement loop counter */
152 blkCnt--;
153 }
154
155 /* Loop unrolling: Compute remaining outputs */
156 blkCnt = numSamples % 0x4U;
157
158 #else
159
160 /* Initialize blkCnt with number of samples */
161 blkCnt = numSamples;
162
163 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
164
165 while (blkCnt > 0U)
166 {
167 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
168
169 real = *pSrc++;
170 imag = *pSrc++;
171 acc0 = (q31_t) (((q63_t) real * real) >> 33);
172 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
173
174 /* store result in 3.29 format in destination buffer. */
175 *pDst++ = acc0 + acc1;
176
177 /* Decrement loop counter */
178 blkCnt--;
179 }
180
181 }
182
183 #endif /* defined(ARM_MATH_MVEI) */
184
185 /**
186 @} end of cmplx_mag_squared group
187 */
188