1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_var_f16.c
4 * Description: Variance of the elements of a floating-point vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions_f16.h"
30
31
32 #if defined(ARM_FLOAT16_SUPPORTED)
33
34
35 /**
36 @ingroup groupStats
37 */
38
39
40 /**
41 @addtogroup variance
42 @{
43 */
44
45 /**
46 @brief Variance of the elements of a floating-point vector.
47 @param[in] pSrc points to the input vector
48 @param[in] blockSize number of samples in input vector
49 @param[out] pResult variance value returned here
50 @return none
51 */
52 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
53
54 #include "arm_helium_utils.h"
55
56
arm_var_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)57 void arm_var_f16(
58 const float16_t * pSrc,
59 uint32_t blockSize,
60 float16_t * pResult)
61 {
62 int32_t blkCnt; /* loop counters */
63 f16x8_t vecSrc;
64 f16x8_t sumVec = vdupq_n_f16((float16_t) 0.0);
65 float16_t fMean;
66
67 if (blockSize <= 1U) {
68 *pResult = 0;
69 return;
70 }
71
72
73 arm_mean_f16(pSrc, blockSize, &fMean);
74
75 /* 6.14 bug */
76 #if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
77 __asm volatile(
78 " vmov.i32 %[acc], #0 \n"
79 : [acc] "+t"(sumVec)
80 :
81 : );
82 #endif
83
84 blkCnt = blockSize;
85 do {
86 mve_pred16_t p = vctp16q(blkCnt);
87
88 vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
89 /*
90 * sum lanes
91 */
92 vecSrc = vsubq_m(vuninitializedq_f16(), vecSrc, fMean, p);
93 sumVec = vfmaq_m(sumVec, vecSrc, vecSrc, p);
94
95 blkCnt -= 8;
96 pSrc += 8;
97 }
98 while (blkCnt > 0);
99
100 /* Variance */
101 *pResult = vecAddAcrossF16Mve(sumVec) / (float16_t) (blockSize - 1.0f);
102 }
103 #else
104
arm_var_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)105 void arm_var_f16(
106 const float16_t * pSrc,
107 uint32_t blockSize,
108 float16_t * pResult)
109 {
110 uint32_t blkCnt; /* Loop counter */
111 _Float16 sum = 0.0f; /* Temporary result storage */
112 _Float16 fSum = 0.0f;
113 _Float16 fMean, fValue;
114 const float16_t * pInput = pSrc;
115
116 if (blockSize <= 1U)
117 {
118 *pResult = 0;
119 return;
120 }
121
122 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
123
124 /* Loop unrolling: Compute 4 outputs at a time */
125 blkCnt = blockSize >> 2U;
126
127 while (blkCnt > 0U)
128 {
129 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
130
131 sum += *pInput++;
132 sum += *pInput++;
133 sum += *pInput++;
134 sum += *pInput++;
135
136
137 /* Decrement loop counter */
138 blkCnt--;
139 }
140
141 /* Loop unrolling: Compute remaining outputs */
142 blkCnt = blockSize % 0x4U;
143
144 #else
145
146 /* Initialize blkCnt with number of samples */
147 blkCnt = blockSize;
148
149 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
150
151 while (blkCnt > 0U)
152 {
153 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
154
155 sum += *pInput++;
156
157 /* Decrement loop counter */
158 blkCnt--;
159 }
160
161 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
162 fMean = sum / (float16_t) blockSize;
163
164 pInput = pSrc;
165
166 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
167
168 /* Loop unrolling: Compute 4 outputs at a time */
169 blkCnt = blockSize >> 2U;
170
171 while (blkCnt > 0U)
172 {
173 fValue = *pInput++ - fMean;
174 fSum += fValue * fValue;
175
176 fValue = *pInput++ - fMean;
177 fSum += fValue * fValue;
178
179 fValue = *pInput++ - fMean;
180 fSum += fValue * fValue;
181
182 fValue = *pInput++ - fMean;
183 fSum += fValue * fValue;
184
185 /* Decrement loop counter */
186 blkCnt--;
187 }
188
189 /* Loop unrolling: Compute remaining outputs */
190 blkCnt = blockSize % 0x4U;
191
192 #else
193
194 /* Initialize blkCnt with number of samples */
195 blkCnt = blockSize;
196
197 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
198
199 while (blkCnt > 0U)
200 {
201 fValue = *pInput++ - fMean;
202 fSum += fValue * fValue;
203
204 /* Decrement loop counter */
205 blkCnt--;
206 }
207
208 /* Variance */
209 *pResult = fSum / (float16_t)(blockSize - 1.0f);
210 }
211 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
212
213 /**
214 @} end of variance group
215 */
216
217 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
218
219