1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mag_q31.c
4 * Description: Q31 complex magnitude
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/complex_math_functions.h"
30
31 /**
32 @ingroup groupCmplxMath
33 */
34
35 /**
36 @addtogroup cmplx_mag
37 @{
38 */
39
40 /**
41 @brief Q31 complex magnitude.
42 @param[in] pSrc points to input vector
43 @param[out] pDst points to output vector
44 @param[in] numSamples number of samples in each vector
45 @return none
46
47 @par Scaling and Overflow Behavior
48 The function implements 1.31 by 1.31 multiplications and finally output is converted into 2.30 format.
49 Input down scaling is not required.
50 */
51
52 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
53
54 #include "arm_helium_utils.h"
55
arm_cmplx_mag_q31(const q31_t * pSrc,q31_t * pDst,uint32_t numSamples)56 void arm_cmplx_mag_q31(
57 const q31_t * pSrc,
58 q31_t * pDst,
59 uint32_t numSamples)
60 {
61 int32_t blockSize = numSamples; /* loop counters */
62 uint32_t blkCnt; /* loop counters */
63
64 q31x4x2_t vecSrc;
65 q31x4_t sum;
66
67 q31_t real, imag; /* Temporary input variables */
68 q31_t acc0, acc1; /* Accumulators */
69
70 /* Compute 4 complex samples at a time */
71 blkCnt = blockSize >> 2;
72 while (blkCnt > 0U)
73 {
74 vecSrc = vld2q(pSrc);
75
76 sum = vqaddq(vmulhq(vecSrc.val[0], vecSrc.val[0]),
77 vmulhq(vecSrc.val[1], vecSrc.val[1]));
78
79 sum = vshrq(sum, 1);
80
81 /*
82
83 This function is using a table. There are compilations flags to avoid
84 including this table (and in this case, arm_cmplx_maq_q31 must not
85 be built and linked.)
86
87 */
88 sum = FAST_VSQRT_Q31(sum);
89
90 vst1q(pDst, sum);
91
92 /*
93 * Decrement the blockSize loop counter
94 */
95 blkCnt--;
96 pSrc += 8;
97 pDst += 4;
98 }
99
100 /*
101 * tail
102 */
103 blkCnt = blockSize & 3;
104 while (blkCnt > 0U)
105 {
106 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
107
108 real = *pSrc++;
109 imag = *pSrc++;
110 acc0 = (q31_t) (((q63_t) real * real) >> 33);
111 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
112
113 /* store result in 2.30 format in destination buffer. */
114 arm_sqrt_q31(acc0 + acc1, pDst++);
115
116 /* Decrement loop counter */
117 blkCnt--;
118 }
119 }
120
121 #else
arm_cmplx_mag_q31(const q31_t * pSrc,q31_t * pDst,uint32_t numSamples)122 void arm_cmplx_mag_q31(
123 const q31_t * pSrc,
124 q31_t * pDst,
125 uint32_t numSamples)
126 {
127 uint32_t blkCnt; /* Loop counter */
128 q31_t real, imag; /* Temporary input variables */
129 q31_t acc0, acc1; /* Accumulators */
130
131 #if defined (ARM_MATH_LOOPUNROLL)
132
133 /* Loop unrolling: Compute 4 outputs at a time */
134 blkCnt = numSamples >> 2U;
135
136 while (blkCnt > 0U)
137 {
138 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
139
140 real = *pSrc++;
141 imag = *pSrc++;
142 acc0 = (q31_t) (((q63_t) real * real) >> 33);
143 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
144
145 /* store result in 2.30 format in destination buffer. */
146 arm_sqrt_q31(acc0 + acc1, pDst++);
147
148 real = *pSrc++;
149 imag = *pSrc++;
150 acc0 = (q31_t) (((q63_t) real * real) >> 33);
151 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
152 arm_sqrt_q31(acc0 + acc1, pDst++);
153
154 real = *pSrc++;
155 imag = *pSrc++;
156 acc0 = (q31_t) (((q63_t) real * real) >> 33);
157 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
158 arm_sqrt_q31(acc0 + acc1, pDst++);
159
160 real = *pSrc++;
161 imag = *pSrc++;
162 acc0 = (q31_t) (((q63_t) real * real) >> 33);
163 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
164 arm_sqrt_q31(acc0 + acc1, pDst++);
165
166 /* Decrement loop counter */
167 blkCnt--;
168 }
169
170 /* Loop unrolling: Compute remaining outputs */
171 blkCnt = numSamples % 0x4U;
172
173 #else
174
175 /* Initialize blkCnt with number of samples */
176 blkCnt = numSamples;
177
178 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
179
180 while (blkCnt > 0U)
181 {
182 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
183
184 real = *pSrc++;
185 imag = *pSrc++;
186 acc0 = (q31_t) (((q63_t) real * real) >> 33);
187 acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
188
189 /* store result in 2.30 format in destination buffer. */
190 arm_sqrt_q31(acc0 + acc1, pDst++);
191
192 /* Decrement loop counter */
193 blkCnt--;
194 }
195
196 }
197 #endif /* defined(ARM_MATH_MVEI) */
198
199 /**
200 @} end of cmplx_mag group
201 */
202