• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mult_real_q15.c
4  * Description:  Q15 complex by real multiplication
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions.h"
30 
31 /**
32   @ingroup groupCmplxMath
33  */
34 
35 /**
36   @addtogroup CmplxByRealMult
37   @{
38  */
39 
40 /**
41   @brief         Q15 complex-by-real multiplication.
42   @param[in]     pSrcCmplx   points to complex input vector
43   @param[in]     pSrcReal    points to real input vector
44   @param[out]    pCmplxDst   points to complex output vector
45   @param[in]     numSamples  number of samples in each vector
46   @return        none
47 
48   @par           Scaling and Overflow Behavior
49                    The function uses saturating arithmetic.
50                    Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
51  */
52 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
53 
arm_cmplx_mult_real_q15(const q15_t * pSrcCmplx,const q15_t * pSrcReal,q15_t * pCmplxDst,uint32_t numSamples)54 void arm_cmplx_mult_real_q15(
55   const q15_t * pSrcCmplx,
56   const q15_t * pSrcReal,
57         q15_t * pCmplxDst,
58         uint32_t numSamples)
59 {
60   static const uint16_t stride_cmplx_x_real_16[8] = {
61       0, 0, 1, 1, 2, 2, 3, 3
62       };
63   q15x8_t rVec;
64   q15x8_t cmplxVec;
65   q15x8_t dstVec;
66   uint16x8_t strideVec;
67   uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
68   uint32_t blkCnt;
69   q15_t in;
70 
71   /*
72   * stride vector for pairs of real generation
73   */
74   strideVec = vld1q(stride_cmplx_x_real_16);
75 
76   blkCnt = blockSizeC >> 3;
77 
78   while (blkCnt > 0U)
79   {
80     cmplxVec = vld1q(pSrcCmplx);
81     rVec = vldrhq_gather_shifted_offset_s16(pSrcReal, strideVec);
82     dstVec = vqdmulhq(cmplxVec, rVec);
83     vst1q(pCmplxDst, dstVec);
84 
85     pSrcReal += 4;
86     pSrcCmplx += 8;
87     pCmplxDst += 8;
88     blkCnt --;
89   }
90 
91   /* Tail */
92   blkCnt = (blockSizeC & 7) >> 1;
93   while (blkCnt > 0U)
94   {
95     /* C[2 * i    ] = A[2 * i    ] * B[i]. */
96     /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
97 
98     in = *pSrcReal++;
99     /* store the result in the destination buffer. */
100     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
101     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
102 
103     /* Decrement loop counter */
104     blkCnt--;
105   }
106 }
107 #else
arm_cmplx_mult_real_q15(const q15_t * pSrcCmplx,const q15_t * pSrcReal,q15_t * pCmplxDst,uint32_t numSamples)108 void arm_cmplx_mult_real_q15(
109   const q15_t * pSrcCmplx,
110   const q15_t * pSrcReal,
111         q15_t * pCmplxDst,
112         uint32_t numSamples)
113 {
114         uint32_t blkCnt;                               /* Loop counter */
115         q15_t in;                                      /* Temporary variable */
116 
117 #if defined (ARM_MATH_LOOPUNROLL)
118 
119 #if defined (ARM_MATH_DSP)
120         q31_t inA1, inA2;                              /* Temporary variables to hold input data */
121         q31_t inB1;                                    /* Temporary variables to hold input data */
122         q15_t out1, out2, out3, out4;                  /* Temporary variables to hold output data */
123         q31_t mul1, mul2, mul3, mul4;                  /* Temporary variables to hold intermediate data */
124 #endif
125 
126   /* Loop unrolling: Compute 4 outputs at a time */
127   blkCnt = numSamples >> 2U;
128 
129   while (blkCnt > 0U)
130   {
131     /* C[2 * i    ] = A[2 * i    ] * B[i]. */
132     /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
133 
134 #if defined (ARM_MATH_DSP)
135     /* read 2 complex numbers both real and imaginary from complex input buffer */
136     inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
137     inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
138     /* read 2 real values at a time from real input buffer */
139     inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
140 
141     /* multiply complex number with real numbers */
142 #ifndef ARM_MATH_BIG_ENDIAN
143     mul1 = (q31_t) ((q15_t) (inA1)       * (q15_t) (inB1));
144     mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
145     mul3 = (q31_t) ((q15_t) (inA2)       * (q15_t) (inB1 >> 16));
146     mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
147 #else
148     mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
149     mul1 = (q31_t) ((q15_t) inA1         * (q15_t) (inB1 >> 16));
150     mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
151     mul3 = (q31_t) ((q15_t) inA2         * (q15_t) inB1);
152 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
153 
154     /* saturate the result */
155     out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
156     out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
157     out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
158     out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
159 
160     /* pack real and imaginary outputs and store them to destination */
161     write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16));
162     write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16));
163 
164     inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
165     inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
166     inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
167 
168 #ifndef ARM_MATH_BIG_ENDIAN
169     mul1 = (q31_t) ((q15_t) (inA1)       * (q15_t) (inB1));
170     mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
171     mul3 = (q31_t) ((q15_t) (inA2)       * (q15_t) (inB1 >> 16));
172     mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
173 #else
174     mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
175     mul1 = (q31_t) ((q15_t) inA1         * (q15_t) (inB1 >> 16));
176     mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
177     mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
178 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
179 
180     out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
181     out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
182     out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
183     out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
184 
185     write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16));
186     write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16));
187 #else
188     in = *pSrcReal++;
189     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
190     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
191 
192     in = *pSrcReal++;
193     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
194     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
195 
196     in = *pSrcReal++;
197     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
198     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
199 
200     in = *pSrcReal++;
201     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
202     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
203 #endif
204 
205     /* Decrement loop counter */
206     blkCnt--;
207   }
208 
209   /* Loop unrolling: Compute remaining outputs */
210   blkCnt = numSamples % 0x4U;
211 
212 #else
213 
214   /* Initialize blkCnt with number of samples */
215   blkCnt = numSamples;
216 
217 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
218 
219   while (blkCnt > 0U)
220   {
221     /* C[2 * i    ] = A[2 * i    ] * B[i]. */
222     /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
223 
224     in = *pSrcReal++;
225     /* store the result in the destination buffer. */
226     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
227     *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
228 
229     /* Decrement loop counter */
230     blkCnt--;
231   }
232 
233 }
234 #endif /* defined(ARM_MATH_MVEI) */
235 
236 /**
237   @} end of CmplxByRealMult group
238  */
239