• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mean_f32.c
4  * Description:  Mean value of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 
31 /**
32   @ingroup groupStats
33  */
34 
35 
36 /**
37   @addtogroup mean
38   @{
39  */
40 
41 /**
42   @brief         Mean value of a floating-point vector.
43   @param[in]     pSrc       points to the input vector.
44   @param[in]     blockSize  number of samples in input vector.
45   @param[out]    pResult    mean value returned here.
46   @return        none
47  */
48 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
49 
50 #include "arm_helium_utils.h"
51 
arm_mean_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult)52 void arm_mean_f32(
53   const float32_t * pSrc,
54   uint32_t blockSize,
55   float32_t * pResult)
56 {
57     uint32_t  blkCnt;           /* loop counters */
58     f32x4_t vecSrc;
59     f32x4_t sumVec = vdupq_n_f32(0.0f);
60     float32_t sum = 0.0f;
61 
62     /* Compute 4 outputs at a time */
63     blkCnt = blockSize >> 2U;
64     while (blkCnt > 0U)
65     {
66         vecSrc = vldrwq_f32(pSrc);
67         sumVec = vaddq_f32(sumVec, vecSrc);
68 
69         blkCnt --;
70         pSrc += 4;
71     }
72 
73     sum = vecAddAcrossF32Mve(sumVec);
74 
75     /* Tail */
76     blkCnt = blockSize & 0x3;
77 
78     while (blkCnt > 0U)
79     {
80       /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
81       sum += *pSrc++;
82 
83       /* Decrement loop counter */
84       blkCnt--;
85     }
86 
87     *pResult = sum / (float32_t) blockSize;
88 }
89 
90 
91 #else
92 #if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mean_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult)93 void arm_mean_f32(
94   const float32_t * pSrc,
95   uint32_t blockSize,
96   float32_t * pResult)
97 {
98   float32_t sum = 0.0f;                          /* Temporary result storage */
99   float32x4_t sumV = vdupq_n_f32(0.0f);                          /* Temporary result storage */
100   float32x2_t sumV2;
101 
102   uint32_t blkCnt;                               /* Loop counter */
103 
104   float32x4_t inV;
105 
106   blkCnt = blockSize >> 2U;
107 
108   /* Compute 4 outputs at a time.
109    ** a second loop below computes the remaining 1 to 3 samples. */
110   while (blkCnt > 0U)
111   {
112     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
113     inV = vld1q_f32(pSrc);
114     sumV = vaddq_f32(sumV, inV);
115 
116     pSrc += 4;
117     /* Decrement the loop counter */
118     blkCnt--;
119   }
120 
121   sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
122   sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
123 
124   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
125    ** No loop unrolling is used. */
126   blkCnt = blockSize & 3;
127 
128   while (blkCnt > 0U)
129   {
130     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
131     sum += *pSrc++;
132 
133     /* Decrement the loop counter */
134     blkCnt--;
135   }
136 
137   /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
138   /* Store the result to the destination */
139   *pResult = sum / (float32_t) blockSize;
140 }
141 #else
arm_mean_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult)142 void arm_mean_f32(
143   const float32_t * pSrc,
144         uint32_t blockSize,
145         float32_t * pResult)
146 {
147         uint32_t blkCnt;                               /* Loop counter */
148         float32_t sum = 0.0f;                          /* Temporary result storage */
149 
150 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
151 
152   /* Loop unrolling: Compute 4 outputs at a time */
153   blkCnt = blockSize >> 2U;
154 
155   while (blkCnt > 0U)
156   {
157     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
158     sum += *pSrc++;
159 
160     sum += *pSrc++;
161 
162     sum += *pSrc++;
163 
164     sum += *pSrc++;
165 
166     /* Decrement the loop counter */
167     blkCnt--;
168   }
169 
170   /* Loop unrolling: Compute remaining outputs */
171   blkCnt = blockSize % 0x4U;
172 
173 #else
174 
175   /* Initialize blkCnt with number of samples */
176   blkCnt = blockSize;
177 
178 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
179 
180   while (blkCnt > 0U)
181   {
182     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
183     sum += *pSrc++;
184 
185     /* Decrement loop counter */
186     blkCnt--;
187   }
188 
189   /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
190   /* Store result to destination */
191   *pResult = (sum / blockSize);
192 }
193 #endif /* #if defined(ARM_MATH_NEON) */
194 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
195 
196 /**
197   @} end of mean group
198  */
199