1 //! Roberts similarity 2 #![cfg(feature = "std")] 3 use crate::counter::Counter; 4 use crate::{Algorithm, Result}; 5 6 /// [Roberts similarity]. 7 /// 8 /// The metric is always normalized on the interval from 0.0 to 1.0. 9 /// 10 /// [Roberts similarity]: https://github.com/chrislit/abydos/blob/master/abydos/distance/_roberts.py 11 #[derive(Default)] 12 pub struct Roberts {} 13 14 impl Algorithm<f64> for Roberts { for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64> where C: Iterator<Item = E>, E: Eq + core::hash::Hash,15 fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64> 16 where 17 C: Iterator<Item = E>, 18 E: Eq + core::hash::Hash, 19 { 20 let c1 = Counter::from_iter(s1); 21 let c2 = Counter::from_iter(s2); 22 let n1 = c1.count(); 23 let n2 = c2.count(); 24 if n1 == 0 && n2 == 0 { 25 return Result { 26 abs: 1.0, 27 is_distance: false, 28 max: 1., 29 len1: n1, 30 len2: n2, 31 }; 32 } 33 34 let cm = c1.merge(&c2); 35 let alphabet = cm.keys(); 36 let mut s1: f64 = 0.; 37 let mut s2: usize = 0; 38 for key in alphabet { 39 let v1 = c1.get(key).unwrap_or(&0); 40 let v2 = c2.get(key).unwrap_or(&0); 41 if v1 != &0 && v2 != &0 { 42 s1 += ((v1 + v2) * v1.min(v2)) as f64 / *v1.max(v2) as f64; 43 } 44 s2 += v1 + v2; 45 } 46 47 Result { 48 abs: s1 / s2 as f64, 49 is_distance: false, 50 max: 1., 51 len1: n1, 52 len2: n2, 53 } 54 } 55 } 56 57 #[cfg(test)] 58 mod tests { 59 use crate::str::roberts; 60 use assert2::assert; 61 use rstest::rstest; 62 is_close(a: f64, b: f64) -> bool63 fn is_close(a: f64, b: f64) -> bool { 64 (a - b).abs() < 1E-5 65 } 66 67 #[rstest] 68 #[case("", "", 1.)] 69 #[case("a", "a", 1.)] 70 #[case("", "a", 0.)] 71 #[case("a", "", 0.)] 72 // Parity with abydos. 73 // By default, abydos uses bi-grams with word separators to tokenize any passed text 74 // for Roberts. And that's what gets tested. However, textdistance uses bag of chars 75 // by default and doesn't add any word separators ever. So, instead of using results 76 // from tests, I've put results of running the values through `Roberts(qval=1).sim(a, b)`. 77 #[case("cat", "hat", 0.6666666666666666)] 78 #[case("Niall", "Neil", 0.6111111111111112)] 79 #[case("aluminum", "Catalan", 0.3555555555555555)] 80 #[case("ATCG", "TAGC", 1.0)] 81 #[case("Nigel", "Niall", 0.55)] 82 #[case("Niall", "Nigel", 0.55)] 83 #[case("Colin", "Coiln", 1.0)] 84 #[case("Coiln", "Colin", 1.0)] 85 #[case("ATCAACGAGT", "AACGATTAG", 0.9210526315789473)] function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64)86 fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64) { 87 let act = roberts(s1, s2); 88 let ok = is_close(act, exp); 89 assert!(ok, "roberts({}, {}) is {}, not {}", s1, s2, act, exp); 90 } 91 } 92