1 //! Yujian-Bo distance 2 use super::levenshtein::Levenshtein; 3 use crate::{Algorithm, Result}; 4 5 /// [Yujian-Bo distance] is a normalization of [`Levenshtein`]. 6 /// 7 /// [Yujian-Bo distance]: https://ieeexplore.ieee.org/document/4160958 8 #[derive(Default)] 9 pub struct YujianBo { 10 /// Algorithm instance to use for calculating Levenshtein distance. 11 pub levenshtein: Levenshtein, 12 } 13 14 impl Algorithm<f64> for YujianBo { for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64> where C: Iterator<Item = E>, E: Eq + core::hash::Hash,15 fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64> 16 where 17 C: Iterator<Item = E>, 18 E: Eq + core::hash::Hash, 19 { 20 let lev = self.levenshtein.for_iter(s1, s2); 21 let dc: usize = self.levenshtein.del_cost; 22 let ic: usize = self.levenshtein.ins_cost; 23 let lval = lev.val(); 24 let res = if lval == 0 { 25 0.0 26 } else { 27 (2 * lval) as f64 / (lev.len1 * dc + lev.len2 * ic + lval) as f64 28 }; 29 Result { 30 abs: res, 31 is_distance: true, 32 max: 1.0, 33 len1: lev.len1, 34 len2: lev.len2, 35 } 36 } 37 } 38 39 #[cfg(test)] 40 mod tests { 41 use crate::str::yujian_bo; 42 use assert2::assert; 43 use rstest::rstest; 44 is_close(a: f64, b: f64) -> bool45 fn is_close(a: f64, b: f64) -> bool { 46 (a - b).abs() < 1E-5 47 } 48 49 #[rstest] 50 #[case("", "", 0.0)] 51 // parity with abydos 52 #[case("a", "", 1.0)] 53 #[case("", "a", 1.0)] 54 #[case("bc", "", 1.0)] 55 #[case("", "bc", 1.0)] 56 #[case("bc", "bc", 0.0)] 57 #[case("bcd", "fgh", 0.6666666666666666)] 58 #[case("ATCG", "TAGC", 0.5454545454545454)] 59 #[case("cat", "hat", 0.285714285714)] 60 #[case("aluminum", "Catalan", 0.6363636363636364)] function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64)61 fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64) { 62 assert!(is_close(yujian_bo(s1, s2), exp)); 63 } 64 } 65