• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_var_f16.c
4  * Description:  Variance of the elements of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions_f16.h"
30 
31 
32 #if defined(ARM_FLOAT16_SUPPORTED)
33 
34 
35 /**
36   @ingroup groupStats
37  */
38 
39 
40 /**
41   @addtogroup variance
42   @{
43  */
44 
45 /**
46   @brief         Variance of the elements of a floating-point vector.
47   @param[in]     pSrc       points to the input vector
48   @param[in]     blockSize  number of samples in input vector
49   @param[out]    pResult    variance value returned here
50   @return        none
51  */
52 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
53 
54 #include "arm_helium_utils.h"
55 
56 
arm_var_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)57 void arm_var_f16(
58            const float16_t * pSrc,
59                  uint32_t blockSize,
60                  float16_t * pResult)
61 {
62     int32_t         blkCnt;     /* loop counters */
63     f16x8_t         vecSrc;
64     f16x8_t         sumVec = vdupq_n_f16((float16_t) 0.0);
65     float16_t       fMean;
66 
67     if (blockSize <= 1U) {
68         *pResult = 0;
69         return;
70     }
71 
72 
73     arm_mean_f16(pSrc, blockSize, &fMean);
74 
75 /* 6.14 bug */
76 #if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
77     __asm volatile(
78         "   vmov.i32                     %[acc], #0 \n"
79         : [acc] "+t"(sumVec)
80         :
81         : );
82 #endif
83 
84     blkCnt = blockSize;
85     do {
86         mve_pred16_t    p = vctp16q(blkCnt);
87 
88         vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
89         /*
90          * sum lanes
91          */
92         vecSrc = vsubq_m(vuninitializedq_f16(), vecSrc, fMean, p);
93         sumVec = vfmaq_m(sumVec, vecSrc, vecSrc, p);
94 
95         blkCnt -= 8;
96         pSrc += 8;
97     }
98     while (blkCnt > 0);
99 
100     /* Variance */
101     *pResult = vecAddAcrossF16Mve(sumVec) / (float16_t) (blockSize - 1.0f);
102 }
103 #else
104 
arm_var_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)105 void arm_var_f16(
106   const float16_t * pSrc,
107         uint32_t blockSize,
108         float16_t * pResult)
109 {
110         uint32_t blkCnt;                               /* Loop counter */
111         _Float16 sum = 0.0f;                          /* Temporary result storage */
112         _Float16 fSum = 0.0f;
113         _Float16 fMean, fValue;
114   const float16_t * pInput = pSrc;
115 
116   if (blockSize <= 1U)
117   {
118     *pResult = 0;
119     return;
120   }
121 
122 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
123 
124   /* Loop unrolling: Compute 4 outputs at a time */
125   blkCnt = blockSize >> 2U;
126 
127   while (blkCnt > 0U)
128   {
129     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
130 
131     sum += *pInput++;
132     sum += *pInput++;
133     sum += *pInput++;
134     sum += *pInput++;
135 
136 
137     /* Decrement loop counter */
138     blkCnt--;
139   }
140 
141   /* Loop unrolling: Compute remaining outputs */
142   blkCnt = blockSize % 0x4U;
143 
144 #else
145 
146   /* Initialize blkCnt with number of samples */
147   blkCnt = blockSize;
148 
149 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
150 
151   while (blkCnt > 0U)
152   {
153     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
154 
155     sum += *pInput++;
156 
157     /* Decrement loop counter */
158     blkCnt--;
159   }
160 
161   /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
162   fMean = sum / (float16_t) blockSize;
163 
164   pInput = pSrc;
165 
166 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
167 
168   /* Loop unrolling: Compute 4 outputs at a time */
169   blkCnt = blockSize >> 2U;
170 
171   while (blkCnt > 0U)
172   {
173     fValue = *pInput++ - fMean;
174     fSum += fValue * fValue;
175 
176     fValue = *pInput++ - fMean;
177     fSum += fValue * fValue;
178 
179     fValue = *pInput++ - fMean;
180     fSum += fValue * fValue;
181 
182     fValue = *pInput++ - fMean;
183     fSum += fValue * fValue;
184 
185     /* Decrement loop counter */
186     blkCnt--;
187   }
188 
189   /* Loop unrolling: Compute remaining outputs */
190   blkCnt = blockSize % 0x4U;
191 
192 #else
193 
194   /* Initialize blkCnt with number of samples */
195   blkCnt = blockSize;
196 
197 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
198 
199   while (blkCnt > 0U)
200   {
201     fValue = *pInput++ - fMean;
202     fSum += fValue * fValue;
203 
204     /* Decrement loop counter */
205     blkCnt--;
206   }
207 
208   /* Variance */
209   *pResult = fSum / (float16_t)(blockSize - 1.0f);
210 }
211 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
212 
213 /**
214   @} end of variance group
215  */
216 
217 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
218 
219