• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_chebyshev_distance_f32.c
5  * Description:  Chebyshev distance between two vectors
6  *
7  *
8  * Target Processor: Cortex-M cores
9  * -------------------------------------------------------------------- */
10 /*
11  * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
12  *
13  * SPDX-License-Identifier: Apache-2.0
14  *
15  * Licensed under the Apache License, Version 2.0 (the License); you may
16  * not use this file except in compliance with the License.
17  * You may obtain a copy of the License at
18  *
19  * www.apache.org/licenses/LICENSE-2.0
20  *
21  * Unless required by applicable law or agreed to in writing, software
22  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
23  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24  * See the License for the specific language governing permissions and
25  * limitations under the License.
26  */
27 
28 #include "arm_math.h"
29 #include <limits.h>
30 #include <math.h>
31 
32 
33 /**
34   @addtogroup FloatDist
35   @{
36  */
37 
38 
39 /**
40  * @brief        Chebyshev distance between two vectors
41  * @param[in]    pA         First vector
42  * @param[in]    pB         Second vector
43  * @param[in]    blockSize  vector length
44  * @return distance
45  *
46  */
47 
48 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
49 
50 #include "arm_helium_utils.h"
51 #include "arm_vec_math.h"
52 
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)53 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
54 {
55     uint32_t        blkCnt;     /* loop counters */
56     f32x4_t         vecA, vecB;
57     f32x4_t         vecDiff = vdupq_n_f32(0.0);
58     float32_t       maxValue = 0.0;
59 
60 
61     blkCnt = blockSize >> 2;
62     while (blkCnt > 0U) {
63         vecA = vld1q(pA);
64         pA += 4;
65         vecB = vld1q(pB);
66         pB += 4;
67         /*
68          * update per-lane max.
69          */
70         vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff);
71         /*
72          * Decrement the blockSize loop counter
73          */
74         blkCnt--;
75     }
76     /*
77      * tail
78      * (will be merged thru tail predication)
79      */
80     blkCnt = blockSize & 3;
81     if (blkCnt > 0U) {
82         mve_pred16_t    p0 = vctp32q(blkCnt);
83 
84         vecA = vldrwq_z_f32(pA, p0);
85         vecB = vldrwq_z_f32(pB, p0);
86 
87         /*
88          * Get current max per lane and current index per lane
89          * when a max is selected
90          */
91         vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0);
92     }
93     /*
94      * Get max value across the vector
95      */
96     return vmaxnmavq(maxValue, vecDiff);
97 }
98 
99 #else
100 #if defined(ARM_MATH_NEON)
101 
102 #include "NEMath.h"
103 
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)104 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
105 {
106    float32_t diff=0.0f, maxVal=0.0f, tmpA, tmpB;
107    uint32_t blkCnt;
108    float32x4_t a,b,diffV, maxValV;
109    float32x2_t maxValV2;
110 
111    if (blockSize <= 3)
112    {
113       tmpA = *pA++;
114       tmpB = *pB++;
115       diff = fabsf(tmpA - tmpB);
116       maxVal = diff;
117       blockSize--;
118 
119       while(blockSize > 0)
120       {
121          tmpA = *pA++;
122          tmpB = *pB++;
123          diff = fabsf(tmpA - tmpB);
124          if (diff > maxVal)
125          {
126            maxVal = diff;
127          }
128          blockSize --;
129       }
130    }
131    else
132    {
133 
134       a = vld1q_f32(pA);
135       b = vld1q_f32(pB);
136       pA += 4;
137       pB += 4;
138 
139       diffV = vabdq_f32(a,b);
140 
141       blockSize -= 4;
142 
143       maxValV = diffV;
144 
145 
146       blkCnt = blockSize >> 2;
147       while(blkCnt > 0)
148       {
149            a = vld1q_f32(pA);
150            b = vld1q_f32(pB);
151 
152            diffV = vabdq_f32(a,b);
153            maxValV = vmaxq_f32(maxValV, diffV);
154 
155            pA += 4;
156            pB += 4;
157            blkCnt --;
158       }
159       maxValV2 = vpmax_f32(vget_low_f32(maxValV),vget_high_f32(maxValV));
160       maxValV2 = vpmax_f32(maxValV2,maxValV2);
161       maxVal = vget_lane_f32(maxValV2,0);
162 
163 
164       blkCnt = blockSize & 3;
165       while(blkCnt > 0)
166       {
167          tmpA = *pA++;
168          tmpB = *pB++;
169          diff = fabsf(tmpA - tmpB);
170          if (diff > maxVal)
171          {
172             maxVal = diff;
173          }
174          blkCnt --;
175       }
176    }
177    return(maxVal);
178 }
179 
180 #else
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)181 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
182 {
183    float32_t diff=0.0f,  maxVal,tmpA, tmpB;
184 
185    tmpA = *pA++;
186    tmpB = *pB++;
187    diff = fabsf(tmpA - tmpB);
188    maxVal = diff;
189    blockSize--;
190 
191    while(blockSize > 0)
192    {
193       tmpA = *pA++;
194       tmpB = *pB++;
195       diff = fabsf(tmpA - tmpB);
196       if (diff > maxVal)
197       {
198         maxVal = diff;
199       }
200       blockSize --;
201    }
202 
203    return(maxVal);
204 }
205 #endif
206 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
207 
208 
209 /**
210  * @} end of FloatDist group
211  */
212