1
2 /* ----------------------------------------------------------------------
3 * Project: CMSIS DSP Library
4 * Title: arm_chebyshev_distance_f32.c
5 * Description: Chebyshev distance between two vectors
6 *
7 *
8 * Target Processor: Cortex-M cores
9 * -------------------------------------------------------------------- */
10 /*
11 * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
12 *
13 * SPDX-License-Identifier: Apache-2.0
14 *
15 * Licensed under the Apache License, Version 2.0 (the License); you may
16 * not use this file except in compliance with the License.
17 * You may obtain a copy of the License at
18 *
19 * www.apache.org/licenses/LICENSE-2.0
20 *
21 * Unless required by applicable law or agreed to in writing, software
22 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
23 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 * See the License for the specific language governing permissions and
25 * limitations under the License.
26 */
27
28 #include "arm_math.h"
29 #include <limits.h>
30 #include <math.h>
31
32
33 /**
34 @addtogroup FloatDist
35 @{
36 */
37
38
39 /**
40 * @brief Chebyshev distance between two vectors
41 * @param[in] pA First vector
42 * @param[in] pB Second vector
43 * @param[in] blockSize vector length
44 * @return distance
45 *
46 */
47
48 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
49
50 #include "arm_helium_utils.h"
51 #include "arm_vec_math.h"
52
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)53 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
54 {
55 uint32_t blkCnt; /* loop counters */
56 f32x4_t vecA, vecB;
57 f32x4_t vecDiff = vdupq_n_f32(0.0);
58 float32_t maxValue = 0.0;
59
60
61 blkCnt = blockSize >> 2;
62 while (blkCnt > 0U) {
63 vecA = vld1q(pA);
64 pA += 4;
65 vecB = vld1q(pB);
66 pB += 4;
67 /*
68 * update per-lane max.
69 */
70 vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff);
71 /*
72 * Decrement the blockSize loop counter
73 */
74 blkCnt--;
75 }
76 /*
77 * tail
78 * (will be merged thru tail predication)
79 */
80 blkCnt = blockSize & 3;
81 if (blkCnt > 0U) {
82 mve_pred16_t p0 = vctp32q(blkCnt);
83
84 vecA = vldrwq_z_f32(pA, p0);
85 vecB = vldrwq_z_f32(pB, p0);
86
87 /*
88 * Get current max per lane and current index per lane
89 * when a max is selected
90 */
91 vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0);
92 }
93 /*
94 * Get max value across the vector
95 */
96 return vmaxnmavq(maxValue, vecDiff);
97 }
98
99 #else
100 #if defined(ARM_MATH_NEON)
101
102 #include "NEMath.h"
103
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)104 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
105 {
106 float32_t diff=0.0f, maxVal=0.0f, tmpA, tmpB;
107 uint32_t blkCnt;
108 float32x4_t a,b,diffV, maxValV;
109 float32x2_t maxValV2;
110
111 if (blockSize <= 3)
112 {
113 tmpA = *pA++;
114 tmpB = *pB++;
115 diff = fabsf(tmpA - tmpB);
116 maxVal = diff;
117 blockSize--;
118
119 while(blockSize > 0)
120 {
121 tmpA = *pA++;
122 tmpB = *pB++;
123 diff = fabsf(tmpA - tmpB);
124 if (diff > maxVal)
125 {
126 maxVal = diff;
127 }
128 blockSize --;
129 }
130 }
131 else
132 {
133
134 a = vld1q_f32(pA);
135 b = vld1q_f32(pB);
136 pA += 4;
137 pB += 4;
138
139 diffV = vabdq_f32(a,b);
140
141 blockSize -= 4;
142
143 maxValV = diffV;
144
145
146 blkCnt = blockSize >> 2;
147 while(blkCnt > 0)
148 {
149 a = vld1q_f32(pA);
150 b = vld1q_f32(pB);
151
152 diffV = vabdq_f32(a,b);
153 maxValV = vmaxq_f32(maxValV, diffV);
154
155 pA += 4;
156 pB += 4;
157 blkCnt --;
158 }
159 maxValV2 = vpmax_f32(vget_low_f32(maxValV),vget_high_f32(maxValV));
160 maxValV2 = vpmax_f32(maxValV2,maxValV2);
161 maxVal = vget_lane_f32(maxValV2,0);
162
163
164 blkCnt = blockSize & 3;
165 while(blkCnt > 0)
166 {
167 tmpA = *pA++;
168 tmpB = *pB++;
169 diff = fabsf(tmpA - tmpB);
170 if (diff > maxVal)
171 {
172 maxVal = diff;
173 }
174 blkCnt --;
175 }
176 }
177 return(maxVal);
178 }
179
180 #else
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)181 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
182 {
183 float32_t diff=0.0f, maxVal,tmpA, tmpB;
184
185 tmpA = *pA++;
186 tmpB = *pB++;
187 diff = fabsf(tmpA - tmpB);
188 maxVal = diff;
189 blockSize--;
190
191 while(blockSize > 0)
192 {
193 tmpA = *pA++;
194 tmpB = *pB++;
195 diff = fabsf(tmpA - tmpB);
196 if (diff > maxVal)
197 {
198 maxVal = diff;
199 }
200 blockSize --;
201 }
202
203 return(maxVal);
204 }
205 #endif
206 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
207
208
209 /**
210 * @} end of FloatDist group
211 */
212