• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mag_squared_f16.c
4  * Description:  Floating-point complex magnitude squared
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 /**
34   @ingroup groupCmplxMath
35  */
36 
37 /**
38   @defgroup cmplx_mag_squared Complex Magnitude Squared
39 
40   Computes the magnitude squared of the elements of a complex data vector.
41 
42   The <code>pSrc</code> points to the source data and
43   <code>pDst</code> points to the where the result should be written.
44   <code>numSamples</code> specifies the number of complex samples
45   in the input array and the data is stored in an interleaved fashion
46   (real, imag, real, imag, ...).
47   The input array has a total of <code>2*numSamples</code> values;
48   the output array has a total of <code>numSamples</code> values.
49 
50   The underlying algorithm is used:
51 
52   <pre>
53   for (n = 0; n < numSamples; n++) {
54       pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
55   }
56   </pre>
57 
58   There are separate functions for floating-point, Q15, and Q31 data types.
59  */
60 
61 /**
62   @addtogroup cmplx_mag_squared
63   @{
64  */
65 
66 /**
67   @brief         Floating-point complex magnitude squared.
68   @param[in]     pSrc        points to input vector
69   @param[out]    pDst        points to output vector
70   @param[in]     numSamples  number of samples in each vector
71   @return        none
72  */
73 
74 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
75 
arm_cmplx_mag_squared_f16(const float16_t * pSrc,float16_t * pDst,uint32_t numSamples)76 void arm_cmplx_mag_squared_f16(
77   const float16_t * pSrc,
78         float16_t * pDst,
79         uint32_t numSamples)
80 {
81     int32_t blockSize = numSamples;  /* loop counters */
82     f16x8x2_t vecSrc;
83     f16x8_t sum;
84 
85     /* Compute 4 complex samples at a time */
86     while (blockSize > 0)
87     {
88         mve_pred16_t p = vctp16q(blockSize);
89         vecSrc = vld2q(pSrc);
90         sum = vmulq_m(vuninitializedq_f16(),vecSrc.val[0], vecSrc.val[0],p);
91         sum = vfmaq_m(sum, vecSrc.val[1], vecSrc.val[1],p);
92         vstrhq_p_f16(pDst, sum,p);
93 
94         pSrc += 16;
95         pDst += 8;
96 
97         /*
98          * Decrement the blockSize loop counter
99          */
100         blockSize-= 8;
101     }
102 
103 }
104 
105 #else
arm_cmplx_mag_squared_f16(const float16_t * pSrc,float16_t * pDst,uint32_t numSamples)106 void arm_cmplx_mag_squared_f16(
107   const float16_t * pSrc,
108         float16_t * pDst,
109         uint32_t numSamples)
110 {
111         uint32_t blkCnt;                               /* Loop counter */
112         _Float16 real, imag;                          /* Temporary input variables */
113 
114 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
115 
116   /* Loop unrolling: Compute 4 outputs at a time */
117   blkCnt = numSamples >> 2U;
118 
119   while (blkCnt > 0U)
120   {
121     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
122 
123     real = *pSrc++;
124     imag = *pSrc++;
125     *pDst++ = (real * real) + (imag * imag);
126 
127     real = *pSrc++;
128     imag = *pSrc++;
129     *pDst++ = (real * real) + (imag * imag);
130 
131     real = *pSrc++;
132     imag = *pSrc++;
133     *pDst++ = (real * real) + (imag * imag);
134 
135     real = *pSrc++;
136     imag = *pSrc++;
137     *pDst++ = (real * real) + (imag * imag);
138 
139     /* Decrement loop counter */
140     blkCnt--;
141   }
142 
143   /* Loop unrolling: Compute remaining outputs */
144   blkCnt = numSamples % 0x4U;
145 
146 #else
147 
148   /* Initialize blkCnt with number of samples */
149   blkCnt = numSamples;
150 
151 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
152 
153   while (blkCnt > 0U)
154   {
155     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
156 
157     real = *pSrc++;
158     imag = *pSrc++;
159 
160     /* store result in destination buffer. */
161     *pDst++ = (real * real) + (imag * imag);
162 
163     /* Decrement loop counter */
164     blkCnt--;
165   }
166 
167 }
168 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
169 
170 /**
171   @} end of cmplx_mag_squared group
172  */
173 
174 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */