• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_var_f32.c
4  * Description:  Variance of the elements of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 
31 /**
32   @ingroup groupStats
33  */
34 
35 /**
36   @defgroup variance  Variance
37 
38   Calculates the variance of the elements in the input vector.
39   The underlying algorithm used is the direct method sometimes referred to as the two-pass method:
40 
41   <pre>
42       Result = sum(element - meanOfElements)^2) / numElement - 1
43 
44       meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize
45   </pre>
46 
47   There are separate functions for floating point, Q31, and Q15 data types.
48  */
49 
50 /**
51   @addtogroup variance
52   @{
53  */
54 
55 /**
56   @brief         Variance of the elements of a floating-point vector.
57   @param[in]     pSrc       points to the input vector
58   @param[in]     blockSize  number of samples in input vector
59   @param[out]    pResult    variance value returned here
60   @return        none
61  */
62 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
63 
64 #include "arm_helium_utils.h"
65 
arm_var_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult)66 void arm_var_f32(
67            const float32_t * pSrc,
68                  uint32_t blockSize,
69                  float32_t * pResult)
70 {
71     uint32_t         blkCnt;     /* loop counters */
72     f32x4_t         vecSrc;
73     f32x4_t         sumVec = vdupq_n_f32(0.0f);
74     float32_t       fMean;
75     float32_t sum = 0.0f;                          /* accumulator */
76     float32_t in;                                  /* Temporary variable to store input value */
77 
78     if (blockSize <= 1U) {
79         *pResult = 0;
80         return;
81     }
82 
83     arm_mean_f32(pSrc, blockSize, &fMean);
84 
85     /* Compute 4 outputs at a time */
86     blkCnt = blockSize >> 2U;
87     while (blkCnt > 0U)
88     {
89 
90         vecSrc = vldrwq_f32(pSrc);
91         /*
92          * sum lanes
93          */
94         vecSrc = vsubq(vecSrc, fMean);
95         sumVec = vfmaq(sumVec, vecSrc, vecSrc);
96 
97         blkCnt --;
98         pSrc += 4;
99     }
100 
101     sum = vecAddAcrossF32Mve(sumVec);
102 
103     /*
104      * tail
105      */
106     blkCnt = blockSize & 0x3;
107     while (blkCnt > 0U)
108     {
109        in = *pSrc++ - fMean;
110        sum += in * in;
111 
112        /* Decrement loop counter */
113        blkCnt--;
114     }
115 
116     /* Variance */
117     *pResult = sum / (float32_t) (blockSize - 1);
118 }
119 #else
120 #if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_var_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult)121 void arm_var_f32(
122            const float32_t * pSrc,
123                  uint32_t blockSize,
124                  float32_t * pResult)
125 {
126   float32_t mean;
127 
128   float32_t sum = 0.0f;                          /* accumulator */
129   float32_t in;                                  /* Temporary variable to store input value */
130   uint32_t blkCnt;                               /* loop counter */
131 
132   float32x4_t sumV = vdupq_n_f32(0.0f);                          /* Temporary result storage */
133   float32x2_t sumV2;
134   float32x4_t inV;
135   float32x4_t avg;
136 
137   arm_mean_f32(pSrc,blockSize,&mean);
138   avg = vdupq_n_f32(mean);
139 
140   blkCnt = blockSize >> 2U;
141 
142   /* Compute 4 outputs at a time.
143    ** a second loop below computes the remaining 1 to 3 samples. */
144   while (blkCnt > 0U)
145   {
146     /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
147     /* Compute Power and then store the result in a temporary variable, sum. */
148     inV = vld1q_f32(pSrc);
149     inV = vsubq_f32(inV, avg);
150     sumV = vmlaq_f32(sumV, inV, inV);
151     pSrc += 4;
152 
153     /* Decrement the loop counter */
154     blkCnt--;
155   }
156 
157   sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
158   sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
159 
160   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
161    ** No loop unrolling is used. */
162   blkCnt = blockSize % 0x4U;
163 
164   while (blkCnt > 0U)
165   {
166     /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
167     /* compute power and then store the result in a temporary variable, sum. */
168     in = *pSrc++;
169     in = in - mean;
170     sum += in * in;
171 
172     /* Decrement the loop counter */
173     blkCnt--;
174   }
175 
176   /* Variance */
177   *pResult = sum / (float32_t)(blockSize - 1.0f);
178 
179 }
180 
181 #else
arm_var_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult)182 void arm_var_f32(
183   const float32_t * pSrc,
184         uint32_t blockSize,
185         float32_t * pResult)
186 {
187         uint32_t blkCnt;                               /* Loop counter */
188         float32_t sum = 0.0f;                          /* Temporary result storage */
189         float32_t fSum = 0.0f;
190         float32_t fMean, fValue;
191   const float32_t * pInput = pSrc;
192 
193   if (blockSize <= 1U)
194   {
195     *pResult = 0;
196     return;
197   }
198 
199 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
200 
201   /* Loop unrolling: Compute 4 outputs at a time */
202   blkCnt = blockSize >> 2U;
203 
204   while (blkCnt > 0U)
205   {
206     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
207 
208     sum += *pInput++;
209     sum += *pInput++;
210     sum += *pInput++;
211     sum += *pInput++;
212 
213 
214     /* Decrement loop counter */
215     blkCnt--;
216   }
217 
218   /* Loop unrolling: Compute remaining outputs */
219   blkCnt = blockSize % 0x4U;
220 
221 #else
222 
223   /* Initialize blkCnt with number of samples */
224   blkCnt = blockSize;
225 
226 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
227 
228   while (blkCnt > 0U)
229   {
230     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
231 
232     sum += *pInput++;
233 
234     /* Decrement loop counter */
235     blkCnt--;
236   }
237 
238   /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
239   fMean = sum / (float32_t) blockSize;
240 
241   pInput = pSrc;
242 
243 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
244 
245   /* Loop unrolling: Compute 4 outputs at a time */
246   blkCnt = blockSize >> 2U;
247 
248   while (blkCnt > 0U)
249   {
250     fValue = *pInput++ - fMean;
251     fSum += fValue * fValue;
252 
253     fValue = *pInput++ - fMean;
254     fSum += fValue * fValue;
255 
256     fValue = *pInput++ - fMean;
257     fSum += fValue * fValue;
258 
259     fValue = *pInput++ - fMean;
260     fSum += fValue * fValue;
261 
262     /* Decrement loop counter */
263     blkCnt--;
264   }
265 
266   /* Loop unrolling: Compute remaining outputs */
267   blkCnt = blockSize % 0x4U;
268 
269 #else
270 
271   /* Initialize blkCnt with number of samples */
272   blkCnt = blockSize;
273 
274 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
275 
276   while (blkCnt > 0U)
277   {
278     fValue = *pInput++ - fMean;
279     fSum += fValue * fValue;
280 
281     /* Decrement loop counter */
282     blkCnt--;
283   }
284 
285   /* Variance */
286   *pResult = fSum / (float32_t)(blockSize - 1.0f);
287 }
288 #endif /* #if defined(ARM_MATH_NEON) */
289 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
290 
291 /**
292   @} end of variance group
293  */
294