• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_chebyshev_distance_f32.c
5  * Description:  Chebyshev distance between two vectors
6  *
7  * $Date:        23 April 2021
8  * $Revision:    V1.9.0
9  *
10  * Target Processor: Cortex-M and Cortex-A cores
11  * -------------------------------------------------------------------- */
12 /*
13  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14  *
15  * SPDX-License-Identifier: Apache-2.0
16  *
17  * Licensed under the Apache License, Version 2.0 (the License); you may
18  * not use this file except in compliance with the License.
19  * You may obtain a copy of the License at
20  *
21  * www.apache.org/licenses/LICENSE-2.0
22  *
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  */
29 
30 #include "dsp/distance_functions.h"
31 #include <limits.h>
32 #include <math.h>
33 
34 
35 /**
36   @addtogroup Chebyshev
37   @{
38  */
39 
40 
41 /**
42  * @brief        Chebyshev distance between two vectors
43  * @param[in]    pA         First vector
44  * @param[in]    pB         Second vector
45  * @param[in]    blockSize  vector length
46  * @return distance
47  *
48  */
49 
50 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
51 
52 #include "arm_helium_utils.h"
53 #include "arm_vec_math.h"
54 
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)55 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
56 {
57     uint32_t        blkCnt;     /* loop counters */
58     f32x4_t         vecA, vecB;
59     f32x4_t         vecDiff = vdupq_n_f32(0.0);
60     float32_t       maxValue = 0.0;
61 
62 
63     blkCnt = blockSize >> 2;
64     while (blkCnt > 0U) {
65         vecA = vld1q(pA);
66         pA += 4;
67         vecB = vld1q(pB);
68         pB += 4;
69         /*
70          * update per-lane max.
71          */
72         vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff);
73         /*
74          * Decrement the blockSize loop counter
75          */
76         blkCnt--;
77     }
78     /*
79      * tail
80      * (will be merged thru tail predication)
81      */
82     blkCnt = blockSize & 3;
83     if (blkCnt > 0U) {
84         mve_pred16_t    p0 = vctp32q(blkCnt);
85 
86         vecA = vldrwq_z_f32(pA, p0);
87         vecB = vldrwq_z_f32(pB, p0);
88 
89         /*
90          * Get current max per lane and current index per lane
91          * when a max is selected
92          */
93         vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0);
94     }
95     /*
96      * Get max value across the vector
97      */
98     return vmaxnmavq(maxValue, vecDiff);
99 }
100 
101 #else
102 #if defined(ARM_MATH_NEON)
103 
104 #include "NEMath.h"
105 
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)106 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
107 {
108    float32_t diff=0.0f, maxVal=0.0f, tmpA, tmpB;
109    uint32_t blkCnt;
110    float32x4_t a,b,diffV, maxValV;
111    float32x2_t maxValV2;
112 
113    if (blockSize <= 3)
114    {
115       tmpA = *pA++;
116       tmpB = *pB++;
117       diff = fabsf(tmpA - tmpB);
118       maxVal = diff;
119       blockSize--;
120 
121       while(blockSize > 0)
122       {
123          tmpA = *pA++;
124          tmpB = *pB++;
125          diff = fabsf(tmpA - tmpB);
126          if (diff > maxVal)
127          {
128            maxVal = diff;
129          }
130          blockSize --;
131       }
132    }
133    else
134    {
135 
136       a = vld1q_f32(pA);
137       b = vld1q_f32(pB);
138       pA += 4;
139       pB += 4;
140 
141       diffV = vabdq_f32(a,b);
142 
143       blockSize -= 4;
144 
145       maxValV = diffV;
146 
147 
148       blkCnt = blockSize >> 2;
149       while(blkCnt > 0)
150       {
151            a = vld1q_f32(pA);
152            b = vld1q_f32(pB);
153 
154            diffV = vabdq_f32(a,b);
155            maxValV = vmaxq_f32(maxValV, diffV);
156 
157            pA += 4;
158            pB += 4;
159            blkCnt --;
160       }
161       maxValV2 = vpmax_f32(vget_low_f32(maxValV),vget_high_f32(maxValV));
162       maxValV2 = vpmax_f32(maxValV2,maxValV2);
163       maxVal = vget_lane_f32(maxValV2,0);
164 
165 
166       blkCnt = blockSize & 3;
167       while(blkCnt > 0)
168       {
169          tmpA = *pA++;
170          tmpB = *pB++;
171          diff = fabsf(tmpA - tmpB);
172          if (diff > maxVal)
173          {
174             maxVal = diff;
175          }
176          blkCnt --;
177       }
178    }
179    return(maxVal);
180 }
181 
182 #else
arm_chebyshev_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)183 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
184 {
185    float32_t diff=0.0f,  maxVal,tmpA, tmpB;
186 
187    tmpA = *pA++;
188    tmpB = *pB++;
189    diff = fabsf(tmpA - tmpB);
190    maxVal = diff;
191    blockSize--;
192 
193    while(blockSize > 0)
194    {
195       tmpA = *pA++;
196       tmpB = *pB++;
197       diff = fabsf(tmpA - tmpB);
198       if (diff > maxVal)
199       {
200         maxVal = diff;
201       }
202       blockSize --;
203    }
204 
205    return(maxVal);
206 }
207 #endif
208 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
209 
210 
211 /**
212  * @} end of Chebyshev group
213  */
214