1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_max_q7.c
4 * Description: Maximum value of a Q7 vector
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30
31 /**
32 @ingroup groupStats
33 */
34
35 /**
36 @addtogroup Max
37 @{
38 */
39
40 /**
41 @brief Maximum value of a Q7 vector.
42 @param[in] pSrc points to the input vector
43 @param[in] blockSize number of samples in input vector
44 @param[out] pResult maximum value returned here
45 @param[out] pIndex index of maximum value returned here
46 @return none
47 */
48 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
49
50 #include "arm_helium_utils.h"
51
arm_small_blk_max_q7(const q7_t * pSrc,uint16_t blockSize,q7_t * pResult,uint32_t * pIndex)52 static void arm_small_blk_max_q7(
53 const q7_t * pSrc,
54 uint16_t blockSize,
55 q7_t * pResult,
56 uint32_t * pIndex)
57 {
58 int32_t blkCnt; /* loop counters */
59 q7x16_t extremValVec = vdupq_n_s8(Q7_MIN);
60 q7_t maxValue = Q7_MIN;
61 uint8x16_t indexVec;
62 uint8x16_t extremIdxVec;
63 mve_pred16_t p0;
64 uint8_t extremIdxArr[16];
65
66 indexVec = vidupq_u8(0U, 1);
67
68 blkCnt = blockSize;
69 do {
70 mve_pred16_t p = vctp8q(blkCnt);
71 q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p);
72 /*
73 * Get current max per lane and current index per lane
74 * when a max is selected
75 */
76 p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
77
78 extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
79 /* store per-lane extrema indexes */
80 vst1q_p_u8(extremIdxArr, indexVec, p0);
81
82 indexVec += 16;
83 pSrc += 16;
84 blkCnt -= 16;
85 }
86 while (blkCnt > 0);
87
88
89 /* Get max value across the vector */
90 maxValue = vmaxvq(maxValue, extremValVec);
91
92 /* set index for lower values to max possible index */
93 p0 = vcmpgeq(extremValVec, maxValue);
94 extremIdxVec = vld1q_u8(extremIdxArr);
95
96 indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
97 *pIndex = vminvq_u8(blockSize - 1, indexVec);
98 *pResult = maxValue;
99 }
100
arm_max_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)101 void arm_max_q7(
102 const q7_t * pSrc,
103 uint32_t blockSize,
104 q7_t * pResult,
105 uint32_t * pIndex)
106 {
107 int32_t totalSize = blockSize;
108 const uint16_t sub_blk_sz = UINT8_MAX + 1;
109
110 if (totalSize <= sub_blk_sz)
111 {
112 arm_small_blk_max_q7(pSrc, blockSize, pResult, pIndex);
113 }
114 else
115 {
116 uint32_t curIdx = 0;
117 q7_t curBlkExtr = Q7_MIN;
118 uint32_t curBlkPos = 0;
119 uint32_t curBlkIdx = 0;
120 /*
121 * process blocks of 255 elts
122 */
123 while (totalSize >= sub_blk_sz)
124 {
125 const q7_t *curSrc = pSrc;
126
127 arm_small_blk_max_q7(curSrc, sub_blk_sz, pResult, pIndex);
128 if (*pResult > curBlkExtr)
129 {
130 /*
131 * update partial extrema
132 */
133 curBlkExtr = *pResult;
134 curBlkPos = *pIndex;
135 curBlkIdx = curIdx;
136 }
137 curIdx++;
138 pSrc += sub_blk_sz;
139 totalSize -= sub_blk_sz;
140 }
141 /*
142 * remainder
143 */
144 arm_small_blk_max_q7(pSrc, totalSize, pResult, pIndex);
145 if (*pResult > curBlkExtr)
146 {
147 curBlkExtr = *pResult;
148 curBlkPos = *pIndex;
149 curBlkIdx = curIdx;
150 }
151 *pIndex = curBlkIdx * sub_blk_sz + curBlkPos;
152 *pResult = curBlkExtr;
153 }
154 }
155 #else
arm_max_q7(const q7_t * pSrc,uint32_t blockSize,q7_t * pResult,uint32_t * pIndex)156 void arm_max_q7(
157 const q7_t * pSrc,
158 uint32_t blockSize,
159 q7_t * pResult,
160 uint32_t * pIndex)
161 {
162 q7_t maxVal, out; /* Temporary variables to store the output value. */
163 uint32_t blkCnt, outIndex; /* Loop counter */
164
165 #if defined (ARM_MATH_LOOPUNROLL)
166 uint32_t index; /* index of maximum value */
167 #endif
168
169 /* Initialise index value to zero. */
170 outIndex = 0U;
171 /* Load first input value that act as reference value for comparision */
172 out = *pSrc++;
173
174 #if defined (ARM_MATH_LOOPUNROLL)
175 /* Initialise index of maximum value. */
176 index = 0U;
177
178 /* Loop unrolling: Compute 4 outputs at a time */
179 blkCnt = (blockSize - 1U) >> 2U;
180
181 while (blkCnt > 0U)
182 {
183 /* Initialize maxVal to next consecutive values one by one */
184 maxVal = *pSrc++;
185
186 /* compare for the maximum value */
187 if (out < maxVal)
188 {
189 /* Update the maximum value and it's index */
190 out = maxVal;
191 outIndex = index + 1U;
192 }
193
194 maxVal = *pSrc++;
195 if (out < maxVal)
196 {
197 out = maxVal;
198 outIndex = index + 2U;
199 }
200
201 maxVal = *pSrc++;
202 if (out < maxVal)
203 {
204 out = maxVal;
205 outIndex = index + 3U;
206 }
207
208 maxVal = *pSrc++;
209 if (out < maxVal)
210 {
211 out = maxVal;
212 outIndex = index + 4U;
213 }
214
215 index += 4U;
216
217 /* Decrement loop counter */
218 blkCnt--;
219 }
220
221 /* Loop unrolling: Compute remaining outputs */
222 blkCnt = (blockSize - 1U) % 4U;
223
224 #else
225
226 /* Initialize blkCnt with number of samples */
227 blkCnt = (blockSize - 1U);
228
229 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
230
231 while (blkCnt > 0U)
232 {
233 /* Initialize maxVal to the next consecutive values one by one */
234 maxVal = *pSrc++;
235
236 /* compare for the maximum value */
237 if (out < maxVal)
238 {
239 /* Update the maximum value and it's index */
240 out = maxVal;
241 outIndex = blockSize - blkCnt;
242 }
243
244 /* Decrement loop counter */
245 blkCnt--;
246 }
247
248 /* Store the maximum value and it's index into destination pointers */
249 *pResult = out;
250 *pIndex = outIndex;
251 }
252 #endif /* defined(ARM_MATH_MVEI) */
253
254 /**
255 @} end of Max group
256 */
257