1
2 /* ----------------------------------------------------------------------
3 * Project: CMSIS DSP Library
4 * Title: arm_jensenshannon_distance_f32.c
5 * Description: Jensen-Shannon distance between two vectors
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30 #include "dsp/distance_functions.h"
31 #include <limits.h>
32 #include <math.h>
33
34
35 /**
36 @addtogroup JensenShannon
37 @{
38 */
39
40 #if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE)
41 /// @private
rel_entr(float32_t x,float32_t y)42 __STATIC_INLINE float32_t rel_entr(float32_t x, float32_t y)
43 {
44 return (x * logf(x / y));
45 }
46 #endif
47
48
49 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
50
51 #include "arm_helium_utils.h"
52 #include "arm_vec_math.h"
53
arm_jensenshannon_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)54 float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
55 {
56 uint32_t blkCnt;
57 float32_t tmp;
58 f32x4_t a, b, t, tmpV, accumV;
59
60 accumV = vdupq_n_f32(0.0f);
61
62 blkCnt = blockSize >> 2;
63 while (blkCnt > 0U) {
64 a = vld1q(pA);
65 b = vld1q(pB);
66
67 t = vaddq(a, b);
68 t = vmulq(t, 0.5f);
69
70 tmpV = vmulq(a, vrecip_medprec_f32(t));
71 tmpV = vlogq_f32(tmpV);
72 accumV = vfmaq(accumV, a, tmpV);
73
74 tmpV = vmulq_f32(b, vrecip_medprec_f32(t));
75 tmpV = vlogq_f32(tmpV);
76 accumV = vfmaq(accumV, b, tmpV);
77
78 pA += 4;
79 pB += 4;
80 blkCnt--;
81 }
82
83 /*
84 * tail
85 * (will be merged thru tail predication)
86 */
87 blkCnt = blockSize & 3;
88 if (blkCnt > 0U) {
89 mve_pred16_t p0 = vctp32q(blkCnt);
90
91 a = vldrwq_z_f32(pA, p0);
92 b = vldrwq_z_f32(pB, p0);
93
94 t = vaddq(a, b);
95 t = vmulq(t, 0.5f);
96
97 tmpV = vmulq_f32(a, vrecip_medprec_f32(t));
98 tmpV = vlogq_f32(tmpV);
99 accumV = vfmaq_m_f32(accumV, a, tmpV, p0);
100
101 tmpV = vmulq_f32(b, vrecip_medprec_f32(t));
102 tmpV = vlogq_f32(tmpV);
103 accumV = vfmaq_m_f32(accumV, b, tmpV, p0);
104
105 }
106
107 arm_sqrt_f32(vecAddAcrossF32Mve(accumV) / 2.0f, &tmp);
108 return (tmp);
109 }
110
111 #else
112
113 #if defined(ARM_MATH_NEON)
114
115 #include "NEMath.h"
116
117
118 /**
119 * @brief Jensen-Shannon distance between two vectors
120 *
121 * This function is assuming that elements of second vector are > 0
122 * and 0 only when the corresponding element of first vector is 0.
123 * Otherwise the result of the computation does not make sense
124 * and for speed reasons, the cases returning NaN or Infinity are not
125 * managed.
126 *
127 * When the function is computing x log (x / y) with x == 0 and y == 0,
128 * it will compute the right result (0) but a division by zero will occur
129 * and should be ignored in client code.
130 *
131 * @param[in] pA First vector
132 * @param[in] pB Second vector
133 * @param[in] blockSize vector length
134 * @return distance
135 *
136 */
137
138
arm_jensenshannon_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)139 float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
140 {
141 float32_t accum, result, tmp,a,b;
142 uint32_t blkCnt;
143 float32x4_t aV,bV,t, tmpV, accumV;
144 float32x2_t accumV2;
145
146 accum = 0.0f;
147 accumV = vdupq_n_f32(0.0f);
148
149 blkCnt = blockSize >> 2;
150 while(blkCnt > 0)
151 {
152 aV = vld1q_f32(pA);
153 bV = vld1q_f32(pB);
154 t = vaddq_f32(aV,bV);
155 t = vmulq_n_f32(t, 0.5f);
156
157 tmpV = vmulq_f32(aV, vinvq_f32(t));
158 tmpV = vlogq_f32(tmpV);
159 accumV = vmlaq_f32(accumV, aV, tmpV);
160
161
162 tmpV = vmulq_f32(bV, vinvq_f32(t));
163 tmpV = vlogq_f32(tmpV);
164 accumV = vmlaq_f32(accumV, bV, tmpV);
165
166 pA += 4;
167 pB += 4;
168
169
170 blkCnt --;
171 }
172
173 accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
174 accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
175
176 blkCnt = blockSize & 3;
177 while(blkCnt > 0)
178 {
179 a = *pA;
180 b = *pB;
181 tmp = (a + b) / 2.0f;
182 accum += rel_entr(a, tmp);
183 accum += rel_entr(b, tmp);
184
185 pA++;
186 pB++;
187
188 blkCnt --;
189 }
190
191
192 arm_sqrt_f32(accum/2.0f, &result);
193 return(result);
194
195 }
196
197 #else
198
199
200 /**
201 * @brief Jensen-Shannon distance between two vectors
202 *
203 * This function is assuming that elements of second vector are > 0
204 * and 0 only when the corresponding element of first vector is 0.
205 * Otherwise the result of the computation does not make sense
206 * and for speed reasons, the cases returning NaN or Infinity are not
207 * managed.
208 *
209 * When the function is computing x log (x / y) with x == 0 and y == 0,
210 * it will compute the right result (0) but a division by zero will occur
211 * and should be ignored in client code.
212 *
213 * @param[in] pA First vector
214 * @param[in] pB Second vector
215 * @param[in] blockSize vector length
216 * @return distance
217 *
218 */
219
220
arm_jensenshannon_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)221 float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
222 {
223 float32_t left, right,sum, result, tmp;
224 uint32_t i;
225
226 left = 0.0f;
227 right = 0.0f;
228 for(i=0; i < blockSize; i++)
229 {
230 tmp = (pA[i] + pB[i]) / 2.0f;
231 left += rel_entr(pA[i], tmp);
232 right += rel_entr(pB[i], tmp);
233 }
234
235
236 sum = left + right;
237 arm_sqrt_f32(sum/2.0f, &result);
238 return(result);
239
240 }
241
242 #endif
243 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
244
245 /**
246 * @} end of JensenShannon group
247 */
248