1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_scale_f16.c
4 * Description: Multiplies a floating-point vector by a scalar
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/basic_math_functions_f16.h"
30
31 /**
32 @ingroup groupMath
33 */
34
35 /**
36 @defgroup BasicScale Vector Scale
37
38 Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
39
40 <pre>
41 pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
42 </pre>
43
44 In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
45 a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
46 The shift allows the gain of the scaling operation to exceed 1.0.
47 The algorithm used with fixed-point data is:
48
49 <pre>
50 pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
51 </pre>
52
53 The overall scale factor applied to the fixed-point data is
54 <pre>
55 scale = scaleFract * 2^shift.
56 </pre>
57
58 The functions support in-place computation allowing the source and destination
59 pointers to reference the same memory buffer.
60 */
61
62 /**
63 @addtogroup BasicScale
64 @{
65 */
66
67 /**
68 @brief Multiplies a floating-point vector by a scalar.
69 @param[in] pSrc points to the input vector
70 @param[in] scale scale factor to be applied
71 @param[out] pDst points to the output vector
72 @param[in] blockSize number of samples in each vector
73 @return none
74 */
75
76 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
77
78 #include "arm_helium_utils.h"
79
arm_scale_f16(const float16_t * pSrc,float16_t scale,float16_t * pDst,uint32_t blockSize)80 void arm_scale_f16(
81 const float16_t * pSrc,
82 float16_t scale,
83 float16_t * pDst,
84 uint32_t blockSize)
85 {
86 uint32_t blkCnt; /* Loop counter */
87
88 f16x8_t vec1;
89 f16x8_t res;
90
91 /* Compute 4 outputs at a time */
92 blkCnt = blockSize >> 3U;
93
94 while (blkCnt > 0U)
95 {
96 /* C = A + offset */
97
98 /* Add offset and then store the results in the destination buffer. */
99 vec1 = vld1q(pSrc);
100 res = vmulq(vec1,scale);
101 vst1q(pDst, res);
102
103 /* Increment pointers */
104 pSrc += 8;
105 pDst += 8;
106
107 /* Decrement the loop counter */
108 blkCnt--;
109 }
110
111 /* Tail */
112 blkCnt = blockSize & 0x7;
113
114 if (blkCnt > 0U)
115 {
116 mve_pred16_t p0 = vctp16q(blkCnt);
117 vec1 = vld1q((float16_t const *) pSrc);
118 vstrhq_p(pDst, vmulq(vec1, scale), p0);
119 }
120
121
122 }
123
124 #else
125 #if defined(ARM_FLOAT16_SUPPORTED)
arm_scale_f16(const float16_t * pSrc,float16_t scale,float16_t * pDst,uint32_t blockSize)126 void arm_scale_f16(
127 const float16_t *pSrc,
128 float16_t scale,
129 float16_t *pDst,
130 uint32_t blockSize)
131 {
132 uint32_t blkCnt; /* Loop counter */
133
134 #if defined (ARM_MATH_LOOPUNROLL)
135
136 /* Loop unrolling: Compute 4 outputs at a time */
137 blkCnt = blockSize >> 2U;
138
139 while (blkCnt > 0U)
140 {
141 /* C = A * scale */
142
143 /* Scale input and store result in destination buffer. */
144 *pDst++ = (*pSrc++) * scale;
145
146 *pDst++ = (*pSrc++) * scale;
147
148 *pDst++ = (*pSrc++) * scale;
149
150 *pDst++ = (*pSrc++) * scale;
151
152 /* Decrement loop counter */
153 blkCnt--;
154 }
155
156 /* Loop unrolling: Compute remaining outputs */
157 blkCnt = blockSize % 0x4U;
158
159 #else
160
161 /* Initialize blkCnt with number of samples */
162 blkCnt = blockSize;
163
164 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
165
166 while (blkCnt > 0U)
167 {
168 /* C = A * scale */
169
170 /* Scale input and store result in destination buffer. */
171 *pDst++ = (*pSrc++) * scale;
172
173 /* Decrement loop counter */
174 blkCnt--;
175 }
176
177 }
178 #endif
179 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
180
181 /**
182 @} end of BasicScale group
183 */
184