• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_jensenshannon_distance_f32.c
5  * Description:  Jensen-Shannon distance between two vectors
6  *
7  * $Date:        23 April 2021
8  * $Revision:    V1.9.0
9  *
10  * Target Processor: Cortex-M and Cortex-A cores
11  * -------------------------------------------------------------------- */
12 /*
13  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14  *
15  * SPDX-License-Identifier: Apache-2.0
16  *
17  * Licensed under the Apache License, Version 2.0 (the License); you may
18  * not use this file except in compliance with the License.
19  * You may obtain a copy of the License at
20  *
21  * www.apache.org/licenses/LICENSE-2.0
22  *
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  */
29 
30 #include "dsp/distance_functions.h"
31 #include <limits.h>
32 #include <math.h>
33 
34 
35 /**
36   @addtogroup JensenShannon
37   @{
38  */
39 
40 #if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE)
41 /// @private
rel_entr(float32_t x,float32_t y)42 __STATIC_INLINE float32_t rel_entr(float32_t x, float32_t y)
43 {
44     return (x * logf(x / y));
45 }
46 #endif
47 
48 
49 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
50 
51 #include "arm_helium_utils.h"
52 #include "arm_vec_math.h"
53 
arm_jensenshannon_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)54 float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
55 {
56     uint32_t        blkCnt;
57     float32_t       tmp;
58     f32x4_t         a, b, t, tmpV, accumV;
59 
60     accumV = vdupq_n_f32(0.0f);
61 
62     blkCnt = blockSize >> 2;
63     while (blkCnt > 0U) {
64         a = vld1q(pA);
65         b = vld1q(pB);
66 
67         t = vaddq(a, b);
68         t = vmulq(t, 0.5f);
69 
70         tmpV = vmulq(a, vrecip_medprec_f32(t));
71         tmpV = vlogq_f32(tmpV);
72         accumV = vfmaq(accumV, a, tmpV);
73 
74         tmpV = vmulq_f32(b, vrecip_medprec_f32(t));
75         tmpV = vlogq_f32(tmpV);
76         accumV = vfmaq(accumV, b, tmpV);
77 
78         pA += 4;
79         pB += 4;
80         blkCnt--;
81     }
82 
83     /*
84      * tail
85      * (will be merged thru tail predication)
86      */
87     blkCnt = blockSize & 3;
88     if (blkCnt > 0U) {
89         mve_pred16_t    p0 = vctp32q(blkCnt);
90 
91         a = vldrwq_z_f32(pA, p0);
92         b = vldrwq_z_f32(pB, p0);
93 
94         t = vaddq(a, b);
95         t = vmulq(t, 0.5f);
96 
97         tmpV = vmulq_f32(a, vrecip_medprec_f32(t));
98         tmpV = vlogq_f32(tmpV);
99         accumV = vfmaq_m_f32(accumV, a, tmpV, p0);
100 
101         tmpV = vmulq_f32(b, vrecip_medprec_f32(t));
102         tmpV = vlogq_f32(tmpV);
103         accumV = vfmaq_m_f32(accumV, b, tmpV, p0);
104 
105     }
106 
107     arm_sqrt_f32(vecAddAcrossF32Mve(accumV) / 2.0f, &tmp);
108     return (tmp);
109 }
110 
111 #else
112 
113 #if defined(ARM_MATH_NEON)
114 
115 #include "NEMath.h"
116 
117 
118 /**
119  * @brief        Jensen-Shannon distance between two vectors
120  *
121  * This function is assuming that elements of second vector are > 0
122  * and 0 only when the corresponding element of first vector is 0.
123  * Otherwise the result of the computation does not make sense
124  * and for speed reasons, the cases returning NaN or Infinity are not
125  * managed.
126  *
127  * When the function is computing x log (x / y) with x == 0 and y == 0,
128  * it will compute the right result (0) but a division by zero will occur
129  * and should be ignored in client code.
130  *
131  * @param[in]    pA         First vector
132  * @param[in]    pB         Second vector
133  * @param[in]    blockSize  vector length
134  * @return distance
135  *
136  */
137 
138 
arm_jensenshannon_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)139 float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
140 {
141     float32_t accum, result, tmp,a,b;
142     uint32_t blkCnt;
143     float32x4_t aV,bV,t, tmpV, accumV;
144     float32x2_t accumV2;
145 
146     accum = 0.0f;
147     accumV = vdupq_n_f32(0.0f);
148 
149     blkCnt = blockSize >> 2;
150     while(blkCnt > 0)
151     {
152       aV = vld1q_f32(pA);
153       bV = vld1q_f32(pB);
154       t = vaddq_f32(aV,bV);
155       t = vmulq_n_f32(t, 0.5f);
156 
157       tmpV = vmulq_f32(aV, vinvq_f32(t));
158       tmpV = vlogq_f32(tmpV);
159       accumV = vmlaq_f32(accumV, aV, tmpV);
160 
161 
162       tmpV = vmulq_f32(bV, vinvq_f32(t));
163       tmpV = vlogq_f32(tmpV);
164       accumV = vmlaq_f32(accumV, bV, tmpV);
165 
166       pA += 4;
167       pB += 4;
168 
169 
170       blkCnt --;
171     }
172 
173     accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
174     accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
175 
176     blkCnt = blockSize & 3;
177     while(blkCnt > 0)
178     {
179       a = *pA;
180       b = *pB;
181       tmp = (a + b) / 2.0f;
182       accum += rel_entr(a, tmp);
183       accum += rel_entr(b, tmp);
184 
185       pA++;
186       pB++;
187 
188       blkCnt --;
189     }
190 
191 
192     arm_sqrt_f32(accum/2.0f, &result);
193     return(result);
194 
195 }
196 
197 #else
198 
199 
200 /**
201  * @brief        Jensen-Shannon distance between two vectors
202  *
203  * This function is assuming that elements of second vector are > 0
204  * and 0 only when the corresponding element of first vector is 0.
205  * Otherwise the result of the computation does not make sense
206  * and for speed reasons, the cases returning NaN or Infinity are not
207  * managed.
208  *
209  * When the function is computing x log (x / y) with x == 0 and y == 0,
210  * it will compute the right result (0) but a division by zero will occur
211  * and should be ignored in client code.
212  *
213  * @param[in]    pA         First vector
214  * @param[in]    pB         Second vector
215  * @param[in]    blockSize  vector length
216  * @return distance
217  *
218  */
219 
220 
arm_jensenshannon_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)221 float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
222 {
223     float32_t left, right,sum, result, tmp;
224     uint32_t i;
225 
226     left = 0.0f;
227     right = 0.0f;
228     for(i=0; i < blockSize; i++)
229     {
230       tmp = (pA[i] + pB[i]) / 2.0f;
231       left  += rel_entr(pA[i], tmp);
232       right += rel_entr(pB[i], tmp);
233     }
234 
235 
236     sum = left + right;
237     arm_sqrt_f32(sum/2.0f, &result);
238     return(result);
239 
240 }
241 
242 #endif
243 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
244 
245 /**
246  * @} end of JensenShannon group
247  */
248