1
2 /* ----------------------------------------------------------------------
3 * Project: CMSIS DSP Library
4 * Title: arm_chebyshev_distance_f32.c
5 * Description: Chebyshev distance between two vectors
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30 #include "dsp/distance_functions.h"
31 #include <limits.h>
32 #include <math.h>
33
34
35 /**
36 @addtogroup Chebyshev
37 @{
38 */
39
40
41 /**
42 * @brief Chebyshev distance between two vectors
43 * @param[in] pA First vector
44 * @param[in] pB Second vector
45 * @param[in] blockSize vector length
46 * @return distance
47 *
48 */
49
50 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
51
52 #include "arm_helium_utils.h"
53 #include "arm_vec_math.h"
54
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)55 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
56 {
57 uint32_t blkCnt; /* loop counters */
58 f32x4_t vecA, vecB;
59 f32x4_t vecDiff = vdupq_n_f32(0.0);
60 float32_t maxValue = 0.0;
61
62
63 blkCnt = blockSize >> 2;
64 while (blkCnt > 0U) {
65 vecA = vld1q(pA);
66 pA += 4;
67 vecB = vld1q(pB);
68 pB += 4;
69 /*
70 * update per-lane max.
71 */
72 vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff);
73 /*
74 * Decrement the blockSize loop counter
75 */
76 blkCnt--;
77 }
78 /*
79 * tail
80 * (will be merged thru tail predication)
81 */
82 blkCnt = blockSize & 3;
83 if (blkCnt > 0U) {
84 mve_pred16_t p0 = vctp32q(blkCnt);
85
86 vecA = vldrwq_z_f32(pA, p0);
87 vecB = vldrwq_z_f32(pB, p0);
88
89 /*
90 * Get current max per lane and current index per lane
91 * when a max is selected
92 */
93 vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0);
94 }
95 /*
96 * Get max value across the vector
97 */
98 return vmaxnmavq(maxValue, vecDiff);
99 }
100
101 #else
102 #if defined(ARM_MATH_NEON)
103
104 #include "NEMath.h"
105
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)106 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
107 {
108 float32_t diff=0.0f, maxVal=0.0f, tmpA, tmpB;
109 uint32_t blkCnt;
110 float32x4_t a,b,diffV, maxValV;
111 float32x2_t maxValV2;
112
113 if (blockSize <= 3)
114 {
115 tmpA = *pA++;
116 tmpB = *pB++;
117 diff = fabsf(tmpA - tmpB);
118 maxVal = diff;
119 blockSize--;
120
121 while(blockSize > 0)
122 {
123 tmpA = *pA++;
124 tmpB = *pB++;
125 diff = fabsf(tmpA - tmpB);
126 if (diff > maxVal)
127 {
128 maxVal = diff;
129 }
130 blockSize --;
131 }
132 }
133 else
134 {
135
136 a = vld1q_f32(pA);
137 b = vld1q_f32(pB);
138 pA += 4;
139 pB += 4;
140
141 diffV = vabdq_f32(a,b);
142
143 blockSize -= 4;
144
145 maxValV = diffV;
146
147
148 blkCnt = blockSize >> 2;
149 while(blkCnt > 0)
150 {
151 a = vld1q_f32(pA);
152 b = vld1q_f32(pB);
153
154 diffV = vabdq_f32(a,b);
155 maxValV = vmaxq_f32(maxValV, diffV);
156
157 pA += 4;
158 pB += 4;
159 blkCnt --;
160 }
161 maxValV2 = vpmax_f32(vget_low_f32(maxValV),vget_high_f32(maxValV));
162 maxValV2 = vpmax_f32(maxValV2,maxValV2);
163 maxVal = vget_lane_f32(maxValV2,0);
164
165
166 blkCnt = blockSize & 3;
167 while(blkCnt > 0)
168 {
169 tmpA = *pA++;
170 tmpB = *pB++;
171 diff = fabsf(tmpA - tmpB);
172 if (diff > maxVal)
173 {
174 maxVal = diff;
175 }
176 blkCnt --;
177 }
178 }
179 return(maxVal);
180 }
181
182 #else
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)183 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
184 {
185 float32_t diff=0.0f, maxVal,tmpA, tmpB;
186
187 tmpA = *pA++;
188 tmpB = *pB++;
189 diff = fabsf(tmpA - tmpB);
190 maxVal = diff;
191 blockSize--;
192
193 while(blockSize > 0)
194 {
195 tmpA = *pA++;
196 tmpB = *pB++;
197 diff = fabsf(tmpA - tmpB);
198 if (diff > maxVal)
199 {
200 maxVal = diff;
201 }
202 blockSize --;
203 }
204
205 return(maxVal);
206 }
207 #endif
208 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
209
210
211 /**
212 * @} end of Chebyshev group
213 */
214