• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Yujian-Bo distance
2 use super::levenshtein::Levenshtein;
3 use crate::{Algorithm, Result};
4 
5 /// [Yujian-Bo distance] is a normalization of [`Levenshtein`].
6 ///
7 /// [Yujian-Bo distance]: https://ieeexplore.ieee.org/document/4160958
8 #[derive(Default)]
9 pub struct YujianBo {
10     /// Algorithm instance to use for calculating Levenshtein distance.
11     pub levenshtein: Levenshtein,
12 }
13 
14 impl Algorithm<f64> for YujianBo {
for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64> where C: Iterator<Item = E>, E: Eq + core::hash::Hash,15     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
16     where
17         C: Iterator<Item = E>,
18         E: Eq + core::hash::Hash,
19     {
20         let lev = self.levenshtein.for_iter(s1, s2);
21         let dc: usize = self.levenshtein.del_cost;
22         let ic: usize = self.levenshtein.ins_cost;
23         let lval = lev.val();
24         let res = if lval == 0 {
25             0.0
26         } else {
27             (2 * lval) as f64 / (lev.len1 * dc + lev.len2 * ic + lval) as f64
28         };
29         Result {
30             abs: res,
31             is_distance: true,
32             max: 1.0,
33             len1: lev.len1,
34             len2: lev.len2,
35         }
36     }
37 }
38 
39 #[cfg(test)]
40 mod tests {
41     use crate::str::yujian_bo;
42     use assert2::assert;
43     use rstest::rstest;
44 
is_close(a: f64, b: f64) -> bool45     fn is_close(a: f64, b: f64) -> bool {
46         (a - b).abs() < 1E-5
47     }
48 
49     #[rstest]
50     #[case("", "", 0.0)]
51     // parity with abydos
52     #[case("a", "", 1.0)]
53     #[case("", "a", 1.0)]
54     #[case("bc", "", 1.0)]
55     #[case("", "bc", 1.0)]
56     #[case("bc", "bc", 0.0)]
57     #[case("bcd", "fgh", 0.6666666666666666)]
58     #[case("ATCG", "TAGC", 0.5454545454545454)]
59     #[case("cat", "hat", 0.285714285714)]
60     #[case("aluminum", "Catalan", 0.6363636363636364)]
function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64)61     fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64) {
62         assert!(is_close(yujian_bo(s1, s2), exp));
63     }
64 }
65