• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_cityblock_distance_f32.c
5  * Description:  Cityblock (Manhattan) distance between two vectors
6  *
7  * $Date:        23 April 2021
8  * $Revision:    V1.9.0
9  *
10  * Target Processor: Cortex-M and Cortex-A cores
11  * -------------------------------------------------------------------- */
12 /*
13  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14  *
15  * SPDX-License-Identifier: Apache-2.0
16  *
17  * Licensed under the Apache License, Version 2.0 (the License); you may
18  * not use this file except in compliance with the License.
19  * You may obtain a copy of the License at
20  *
21  * www.apache.org/licenses/LICENSE-2.0
22  *
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  */
29 
30 #include "dsp/distance_functions.h"
31 #include <limits.h>
32 #include <math.h>
33 
34 /**
35   @addtogroup Manhattan
36   @{
37  */
38 
39 
40 /**
41  * @brief        Cityblock (Manhattan) distance between two vectors
42  * @param[in]    pA         First vector
43  * @param[in]    pB         Second vector
44  * @param[in]    blockSize  vector length
45  * @return distance
46  *
47  */
48 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
49 
50 #include "arm_helium_utils.h"
51 #include "arm_vec_math.h"
52 
arm_cityblock_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)53 float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
54 {
55     uint32_t        blkCnt;
56     f32x4_t         a, b, accumV, tempV;
57 
58     accumV = vdupq_n_f32(0.0f);
59 
60     blkCnt = blockSize >> 2;
61     while (blkCnt > 0U) {
62         a = vld1q(pA);
63         b = vld1q(pB);
64 
65         tempV = vabdq(a, b);
66         accumV = vaddq(accumV, tempV);
67 
68         pA += 4;
69         pB += 4;
70         blkCnt--;
71     }
72 
73     /*
74      * tail
75      * (will be merged thru tail predication)
76      */
77     blkCnt = blockSize & 3;
78     if (blkCnt > 0U) {
79         mve_pred16_t    p0 = vctp32q(blkCnt);
80 
81         a = vldrwq_z_f32(pA, p0);
82         b = vldrwq_z_f32(pB, p0);
83 
84         tempV = vabdq(a, b);
85         accumV = vaddq_m(accumV, accumV, tempV, p0);
86     }
87 
88     return vecAddAcrossF32Mve(accumV);
89 }
90 
91 #else
92 #if defined(ARM_MATH_NEON)
93 
94 #include "NEMath.h"
95 
arm_cityblock_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)96 float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
97 {
98    float32_t accum=0.0f, tmpA, tmpB;
99    uint32_t blkCnt;
100    float32x4_t a,b,accumV, tempV;
101    float32x2_t accumV2;
102 
103    accumV = vdupq_n_f32(0.0f);
104 
105    blkCnt = blockSize >> 2;
106    while(blkCnt > 0)
107    {
108         a = vld1q_f32(pA);
109         b = vld1q_f32(pB);
110 
111         tempV = vabdq_f32(a,b);
112         accumV = vaddq_f32(accumV, tempV);
113 
114         pA += 4;
115         pB += 4;
116         blkCnt --;
117    }
118    accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
119    accumV2 = vpadd_f32(accumV2,accumV2);
120    accum = vget_lane_f32(accumV2,0);
121 
122 
123    blkCnt = blockSize & 3;
124    while(blkCnt > 0)
125    {
126       tmpA = *pA++;
127       tmpB = *pB++;
128       accum += fabsf(tmpA - tmpB);
129 
130       blkCnt --;
131    }
132    return(accum);
133 }
134 
135 #else
arm_cityblock_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)136 float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
137 {
138    float32_t accum,tmpA, tmpB;
139 
140    accum = 0.0f;
141    while(blockSize > 0)
142    {
143       tmpA = *pA++;
144       tmpB = *pB++;
145       accum  += fabsf(tmpA - tmpB);
146 
147       blockSize --;
148    }
149 
150    return(accum);
151 }
152 #endif
153 #endif
154 
155 /**
156  * @} end of Manhattan group
157  */
158