1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_absmax_no_idx_f32.c
4 * Description: Maximum value of absolute values of a floating-point vector
5 *
6 * $Date: 16 November 2021
7 * $Revision: V1.10.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
31 #include <limits.h>
32 #endif
33
34 /**
35 @ingroup groupStats
36 */
37
38
39 /**
40 @addtogroup AbsMax
41 @{
42 */
43
44 /**
45 @brief Maximum value of absolute values of a floating-point vector.
46 @param[in] pSrc points to the input vector
47 @param[in] blockSize number of samples in input vector
48 @param[out] pResult maximum value returned here
49 @return none
50 */
51 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
52
53 #include "arm_helium_utils.h"
54
arm_absmax_no_idx_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult)55 void arm_absmax_no_idx_f32(
56 const float32_t * pSrc,
57 uint32_t blockSize,
58 float32_t * pResult)
59 {
60 int32_t blkCnt; /* loop counters */
61 f32x4_t vecSrc;
62 float32_t const *pSrcVec;
63 f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN);
64 float32_t maxValue = F32_ABSMIN;
65 mve_pred16_t p0;
66
67
68 pSrcVec = (float32_t const *) pSrc;
69 blkCnt = blockSize >> 2;
70 while (blkCnt > 0)
71 {
72 vecSrc = vldrwq_f32(pSrcVec);
73 pSrcVec += 4;
74 /*
75 * update per-lane max.
76 */
77 curExtremValVec = vmaxnmaq(vecSrc, curExtremValVec);
78 /*
79 * Decrement the blockSize loop counter
80 */
81 blkCnt--;
82 }
83 /*
84 * tail
85 * (will be merged thru tail predication)
86 */
87 blkCnt = blockSize & 3;
88 if (blkCnt > 0)
89 {
90 vecSrc = vldrwq_f32(pSrcVec);
91 pSrcVec += 4;
92 p0 = vctp32q(blkCnt);
93 /*
94 * Get current max per lane and current index per lane
95 * when a max is selected
96 */
97 curExtremValVec = vmaxnmaq_m(curExtremValVec, vecSrc, p0);
98 }
99 /*
100 * Get max value across the vector
101 */
102 maxValue = vmaxnmavq(maxValue, curExtremValVec);
103 *pResult = maxValue;
104 }
105
106
107 #else
108 #if defined(ARM_MATH_LOOPUNROLL)
arm_absmax_no_idx_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult)109 void arm_absmax_no_idx_f32(
110 const float32_t * pSrc,
111 uint32_t blockSize,
112 float32_t * pResult)
113 {
114 float32_t cur_absmax, out; /* Temporary variables to store the output value. */\
115 uint32_t blkCnt; /* Loop counter */ \
116 \
117 /* Load first input value that act as reference value for comparision */ \
118 out = *pSrc++; \
119 out = (out > 0.0f) ? out : -out; \
120 \
121 /* Loop unrolling: Compute 4 outputs at a time */ \
122 blkCnt = (blockSize - 1U) >> 2U; \
123 \
124 while (blkCnt > 0U) \
125 { \
126 /* Initialize cur_absmax to next consecutive values one by one */ \
127 cur_absmax = *pSrc++; \
128 cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
129 /* compare for the extrema value */ \
130 if (cur_absmax > out) \
131 { \
132 /* Update the extrema value and it's index */ \
133 out = cur_absmax; \
134 } \
135 \
136 cur_absmax = *pSrc++; \
137 cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
138 if (cur_absmax > out) \
139 { \
140 out = cur_absmax; \
141 } \
142 \
143 cur_absmax = *pSrc++; \
144 cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
145 if (cur_absmax > out) \
146 { \
147 out = cur_absmax; \
148 } \
149 \
150 cur_absmax = *pSrc++; \
151 cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
152 if (cur_absmax > out) \
153 { \
154 out = cur_absmax; \
155 } \
156 \
157 \
158 /* Decrement loop counter */ \
159 blkCnt--; \
160 } \
161 \
162 /* Loop unrolling: Compute remaining outputs */ \
163 blkCnt = (blockSize - 1U) % 4U; \
164 \
165 \
166 while (blkCnt > 0U) \
167 { \
168 cur_absmax = *pSrc++; \
169 cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
170 if (cur_absmax > out) \
171 { \
172 out = cur_absmax; \
173 } \
174 \
175 /* Decrement loop counter */ \
176 blkCnt--; \
177 } \
178 \
179 /* Store the extrema value and it's index into destination pointers */ \
180 *pResult = out; \
181 }
182 #else
arm_absmax_no_idx_f32(const float32_t * pSrc,uint32_t blockSize,float32_t * pResult)183 void arm_absmax_no_idx_f32(
184 const float32_t * pSrc,
185 uint32_t blockSize,
186 float32_t * pResult)
187 {
188 float32_t maxVal, out; /* Temporary variables to store the output value. */
189 uint32_t blkCnt; /* Loop counter */
190
191
192
193
194
195 /* Load first input value that act as reference value for comparision */
196 out = fabsf(*pSrc++);
197
198 /* Initialize blkCnt with number of samples */
199 blkCnt = (blockSize - 1U);
200
201
202 while (blkCnt > 0U)
203 {
204 /* Initialize maxVal to the next consecutive values one by one */
205 maxVal = fabsf(*pSrc++);
206
207 /* compare for the maximum value */
208 if (out < maxVal)
209 {
210 /* Update the maximum value and it's index */
211 out = maxVal;
212 }
213
214 /* Decrement loop counter */
215 blkCnt--;
216 }
217
218 /* Store the maximum value and it's index into destination pointers */
219 *pResult = out;
220 }
221 #endif /* defined(ARM_MATH_LOOPUNROLL) */
222 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
223 /**
224 @} end of AbsMax group
225 */
226