• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mag_squared_q31.c
4  * Description:  Q31 complex magnitude squared
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions.h"
30 
31 /**
32   @ingroup groupCmplxMath
33  */
34 
35 /**
36   @addtogroup cmplx_mag_squared
37   @{
38  */
39 
40 /**
41   @brief         Q31 complex magnitude squared.
42   @param[in]     pSrc        points to input vector
43   @param[out]    pDst        points to output vector
44   @param[in]     numSamples  number of samples in each vector
45   @return        none
46 
47   @par           Scaling and Overflow Behavior
48                    The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
49                    Input down scaling is not required.
50  */
51 
52 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
53 
arm_cmplx_mag_squared_q31(const q31_t * pSrc,q31_t * pDst,uint32_t numSamples)54 void arm_cmplx_mag_squared_q31(
55   const q31_t * pSrc,
56         q31_t * pDst,
57         uint32_t numSamples)
58 {
59     int32_t blockSize = numSamples;  /* loop counters */
60     uint32_t  blkCnt;           /* loop counters */
61     q31x4x2_t vecSrc;
62     q31x4_t vReal, vImag;
63     q31x4_t vMagSq;
64     q31_t real, imag;                              /* Temporary input variables */
65     q31_t acc0, acc1;                              /* Accumulators */
66 
67     /* Compute 4 complex samples at a time */
68     blkCnt = blockSize >> 2;
69     while (blkCnt > 0U)
70     {
71         vecSrc = vld2q(pSrc);
72         vReal = vmulhq(vecSrc.val[0], vecSrc.val[0]);
73         vImag = vmulhq(vecSrc.val[1], vecSrc.val[1]);
74         vMagSq = vqaddq(vReal, vImag);
75         vMagSq = vshrq(vMagSq, 1);
76 
77         vst1q(pDst, vMagSq);
78 
79         pSrc += 8;
80         pDst += 4;
81         /*
82          * Decrement the blkCnt loop counter
83          * Advance vector source and destination pointers
84          */
85         blkCnt --;
86     }
87 
88     /* Tail */
89     blkCnt = blockSize & 3;
90     while (blkCnt > 0U)
91     {
92       /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
93 
94       real = *pSrc++;
95       imag = *pSrc++;
96       acc0 = (q31_t) (((q63_t) real * real) >> 33);
97       acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
98 
99       /* store result in 3.29 format in destination buffer. */
100       *pDst++ = acc0 + acc1;
101 
102       /* Decrement loop counter */
103       blkCnt--;
104     }
105 }
106 
107 #else
arm_cmplx_mag_squared_q31(const q31_t * pSrc,q31_t * pDst,uint32_t numSamples)108 void arm_cmplx_mag_squared_q31(
109   const q31_t * pSrc,
110         q31_t * pDst,
111         uint32_t numSamples)
112 {
113         uint32_t blkCnt;                               /* Loop counter */
114         q31_t real, imag;                              /* Temporary input variables */
115         q31_t acc0, acc1;                              /* Accumulators */
116 
117 #if defined (ARM_MATH_LOOPUNROLL)
118 
119   /* Loop unrolling: Compute 4 outputs at a time */
120   blkCnt = numSamples >> 2U;
121 
122   while (blkCnt > 0U)
123   {
124     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
125 
126     real = *pSrc++;
127     imag = *pSrc++;
128     acc0 = (q31_t) (((q63_t) real * real) >> 33);
129     acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
130     /* store the result in 3.29 format in the destination buffer. */
131     *pDst++ = acc0 + acc1;
132 
133     real = *pSrc++;
134     imag = *pSrc++;
135     acc0 = (q31_t) (((q63_t) real * real) >> 33);
136     acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
137     *pDst++ = acc0 + acc1;
138 
139     real = *pSrc++;
140     imag = *pSrc++;
141     acc0 = (q31_t) (((q63_t) real * real) >> 33);
142     acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
143     *pDst++ = acc0 + acc1;
144 
145     real = *pSrc++;
146     imag = *pSrc++;
147     acc0 = (q31_t) (((q63_t) real * real) >> 33);
148     acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
149     *pDst++ = acc0 + acc1;
150 
151     /* Decrement loop counter */
152     blkCnt--;
153   }
154 
155   /* Loop unrolling: Compute remaining outputs */
156   blkCnt = numSamples % 0x4U;
157 
158 #else
159 
160   /* Initialize blkCnt with number of samples */
161   blkCnt = numSamples;
162 
163 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
164 
165   while (blkCnt > 0U)
166   {
167     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
168 
169     real = *pSrc++;
170     imag = *pSrc++;
171     acc0 = (q31_t) (((q63_t) real * real) >> 33);
172     acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
173 
174     /* store result in 3.29 format in destination buffer. */
175     *pDst++ = acc0 + acc1;
176 
177     /* Decrement loop counter */
178     blkCnt--;
179   }
180 
181 }
182 
183 #endif /* defined(ARM_MATH_MVEI) */
184 
185 /**
186   @} end of cmplx_mag_squared group
187  */
188