1
2 /* ----------------------------------------------------------------------
3 * Project: CMSIS DSP Library
4 * Title: arm_cityblock_distance_f32.c
5 * Description: Cityblock (Manhattan) distance between two vectors
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30 #include "dsp/distance_functions.h"
31 #include <limits.h>
32 #include <math.h>
33
34 /**
35 @addtogroup Manhattan
36 @{
37 */
38
39
40 /**
41 * @brief Cityblock (Manhattan) distance between two vectors
42 * @param[in] pA First vector
43 * @param[in] pB Second vector
44 * @param[in] blockSize vector length
45 * @return distance
46 *
47 */
48 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
49
50 #include "arm_helium_utils.h"
51 #include "arm_vec_math.h"
52
arm_cityblock_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)53 float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
54 {
55 uint32_t blkCnt;
56 f32x4_t a, b, accumV, tempV;
57
58 accumV = vdupq_n_f32(0.0f);
59
60 blkCnt = blockSize >> 2;
61 while (blkCnt > 0U) {
62 a = vld1q(pA);
63 b = vld1q(pB);
64
65 tempV = vabdq(a, b);
66 accumV = vaddq(accumV, tempV);
67
68 pA += 4;
69 pB += 4;
70 blkCnt--;
71 }
72
73 /*
74 * tail
75 * (will be merged thru tail predication)
76 */
77 blkCnt = blockSize & 3;
78 if (blkCnt > 0U) {
79 mve_pred16_t p0 = vctp32q(blkCnt);
80
81 a = vldrwq_z_f32(pA, p0);
82 b = vldrwq_z_f32(pB, p0);
83
84 tempV = vabdq(a, b);
85 accumV = vaddq_m(accumV, accumV, tempV, p0);
86 }
87
88 return vecAddAcrossF32Mve(accumV);
89 }
90
91 #else
92 #if defined(ARM_MATH_NEON)
93
94 #include "NEMath.h"
95
arm_cityblock_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)96 float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
97 {
98 float32_t accum=0.0f, tmpA, tmpB;
99 uint32_t blkCnt;
100 float32x4_t a,b,accumV, tempV;
101 float32x2_t accumV2;
102
103 accumV = vdupq_n_f32(0.0f);
104
105 blkCnt = blockSize >> 2;
106 while(blkCnt > 0)
107 {
108 a = vld1q_f32(pA);
109 b = vld1q_f32(pB);
110
111 tempV = vabdq_f32(a,b);
112 accumV = vaddq_f32(accumV, tempV);
113
114 pA += 4;
115 pB += 4;
116 blkCnt --;
117 }
118 accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
119 accumV2 = vpadd_f32(accumV2,accumV2);
120 accum = vget_lane_f32(accumV2,0);
121
122
123 blkCnt = blockSize & 3;
124 while(blkCnt > 0)
125 {
126 tmpA = *pA++;
127 tmpB = *pB++;
128 accum += fabsf(tmpA - tmpB);
129
130 blkCnt --;
131 }
132 return(accum);
133 }
134
135 #else
arm_cityblock_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)136 float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
137 {
138 float32_t accum,tmpA, tmpB;
139
140 accum = 0.0f;
141 while(blockSize > 0)
142 {
143 tmpA = *pA++;
144 tmpB = *pB++;
145 accum += fabsf(tmpA - tmpB);
146
147 blockSize --;
148 }
149
150 return(accum);
151 }
152 #endif
153 #endif
154
155 /**
156 * @} end of Manhattan group
157 */
158