• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mult_real_f16.c
4  * Description:  Floating-point complex by real multiplication
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 /**
34   @ingroup groupCmplxMath
35  */
36 
37 /**
38   @defgroup CmplxByRealMult Complex-by-Real Multiplication
39 
40   Multiplies a complex vector by a real vector and generates a complex result.
41   The data in the complex arrays is stored in an interleaved fashion
42   (real, imag, real, imag, ...).
43   The parameter <code>numSamples</code> represents the number of complex
44   samples processed.  The complex arrays have a total of <code>2*numSamples</code>
45   real values while the real array has a total of <code>numSamples</code>
46   real values.
47 
48   The underlying algorithm is used:
49 
50   <pre>
51   for (n = 0; n < numSamples; n++) {
52       pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
53       pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
54   }
55   </pre>
56 
57   There are separate functions for floating-point, Q15, and Q31 data types.
58  */
59 
60 /**
61   @addtogroup CmplxByRealMult
62   @{
63  */
64 
65 /**
66   @brief         Floating-point complex-by-real multiplication.
67   @param[in]     pSrcCmplx   points to complex input vector
68   @param[in]     pSrcReal    points to real input vector
69   @param[out]    pCmplxDst   points to complex output vector
70   @param[in]     numSamples  number of samples in each vector
71   @return        none
72  */
73 
74 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
75 
arm_cmplx_mult_real_f16(const float16_t * pSrcCmplx,const float16_t * pSrcReal,float16_t * pCmplxDst,uint32_t numSamples)76 void arm_cmplx_mult_real_f16(
77   const float16_t * pSrcCmplx,
78   const float16_t * pSrcReal,
79         float16_t * pCmplxDst,
80         uint32_t numSamples)
81 {
82     static const uint16_t stride_cmplx_x_real_16[8] = {
83         0, 0, 1, 1, 2, 2, 3, 3
84         };
85     uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
86     uint32_t blkCnt;
87     f16x8_t rVec;
88     f16x8_t cmplxVec;
89     f16x8_t dstVec;
90     uint16x8_t strideVec;
91 
92 
93     /* stride vector for pairs of real generation */
94     strideVec = vld1q(stride_cmplx_x_real_16);
95 
96     /* Compute 4 complex outputs at a time */
97     blkCnt = blockSizeC >> 3;
98     while (blkCnt > 0U)
99     {
100         cmplxVec = vld1q(pSrcCmplx);
101         rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
102         dstVec = vmulq(cmplxVec, rVec);
103         vst1q(pCmplxDst, dstVec);
104 
105         pSrcReal += 4;
106         pSrcCmplx += 8;
107         pCmplxDst += 8;
108         blkCnt--;
109     }
110 
111     blkCnt = blockSizeC & 7;
112     if (blkCnt > 0U) {
113         mve_pred16_t p0 = vctp16q(blkCnt);
114 
115         cmplxVec = vld1q(pSrcCmplx);
116         rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
117         dstVec = vmulq(cmplxVec, rVec);
118         vstrhq_p_f16(pCmplxDst, dstVec, p0);
119     }
120 }
121 
122 #else
arm_cmplx_mult_real_f16(const float16_t * pSrcCmplx,const float16_t * pSrcReal,float16_t * pCmplxDst,uint32_t numSamples)123 void arm_cmplx_mult_real_f16(
124   const float16_t * pSrcCmplx,
125   const float16_t * pSrcReal,
126         float16_t * pCmplxDst,
127         uint32_t numSamples)
128 {
129         uint32_t blkCnt;                               /* Loop counter */
130         float16_t in;                                  /* Temporary variable */
131 
132 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
133 
134   /* Loop unrolling: Compute 4 outputs at a time */
135   blkCnt = numSamples >> 2U;
136 
137   while (blkCnt > 0U)
138   {
139     /* C[2 * i    ] = A[2 * i    ] * B[i]. */
140     /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
141 
142     in = *pSrcReal++;
143     /* store result in destination buffer. */
144     *pCmplxDst++ = *pSrcCmplx++ * in;
145     *pCmplxDst++ = *pSrcCmplx++ * in;
146 
147     in = *pSrcReal++;
148     *pCmplxDst++ = *pSrcCmplx++ * in;
149     *pCmplxDst++ = *pSrcCmplx++ * in;
150 
151     in = *pSrcReal++;
152     *pCmplxDst++ = *pSrcCmplx++ * in;
153     *pCmplxDst++ = *pSrcCmplx++ * in;
154 
155     in = *pSrcReal++;
156     *pCmplxDst++ = *pSrcCmplx++* in;
157     *pCmplxDst++ = *pSrcCmplx++ * in;
158 
159     /* Decrement loop counter */
160     blkCnt--;
161   }
162 
163   /* Loop unrolling: Compute remaining outputs */
164   blkCnt = numSamples % 0x4U;
165 
166 #else
167 
168   /* Initialize blkCnt with number of samples */
169   blkCnt = numSamples;
170 
171 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
172 
173   while (blkCnt > 0U)
174   {
175     /* C[2 * i    ] = A[2 * i    ] * B[i]. */
176     /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
177 
178     in = *pSrcReal++;
179     /* store result in destination buffer. */
180     *pCmplxDst++ = *pSrcCmplx++ * in;
181     *pCmplxDst++ = *pSrcCmplx++ * in;
182 
183     /* Decrement loop counter */
184     blkCnt--;
185   }
186 
187 }
188 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
189 
190 /**
191   @} end of CmplxByRealMult group
192  */
193 
194 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */