• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Roberts similarity
2 #![cfg(feature = "std")]
3 use crate::counter::Counter;
4 use crate::{Algorithm, Result};
5 
6 /// [Roberts similarity].
7 ///
8 /// The metric is always normalized on the interval from 0.0 to 1.0.
9 ///
10 /// [Roberts similarity]: https://github.com/chrislit/abydos/blob/master/abydos/distance/_roberts.py
11 #[derive(Default)]
12 pub struct Roberts {}
13 
14 impl Algorithm<f64> for Roberts {
for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64> where C: Iterator<Item = E>, E: Eq + core::hash::Hash,15     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
16     where
17         C: Iterator<Item = E>,
18         E: Eq + core::hash::Hash,
19     {
20         let c1 = Counter::from_iter(s1);
21         let c2 = Counter::from_iter(s2);
22         let n1 = c1.count();
23         let n2 = c2.count();
24         if n1 == 0 && n2 == 0 {
25             return Result {
26                 abs: 1.0,
27                 is_distance: false,
28                 max: 1.,
29                 len1: n1,
30                 len2: n2,
31             };
32         }
33 
34         let cm = c1.merge(&c2);
35         let alphabet = cm.keys();
36         let mut s1: f64 = 0.;
37         let mut s2: usize = 0;
38         for key in alphabet {
39             let v1 = c1.get(key).unwrap_or(&0);
40             let v2 = c2.get(key).unwrap_or(&0);
41             if v1 != &0 && v2 != &0 {
42                 s1 += ((v1 + v2) * v1.min(v2)) as f64 / *v1.max(v2) as f64;
43             }
44             s2 += v1 + v2;
45         }
46 
47         Result {
48             abs: s1 / s2 as f64,
49             is_distance: false,
50             max: 1.,
51             len1: n1,
52             len2: n2,
53         }
54     }
55 }
56 
57 #[cfg(test)]
58 mod tests {
59     use crate::str::roberts;
60     use assert2::assert;
61     use rstest::rstest;
62 
is_close(a: f64, b: f64) -> bool63     fn is_close(a: f64, b: f64) -> bool {
64         (a - b).abs() < 1E-5
65     }
66 
67     #[rstest]
68     #[case("", "", 1.)]
69     #[case("a", "a", 1.)]
70     #[case("", "a", 0.)]
71     #[case("a", "", 0.)]
72     // Parity with abydos.
73     // By default, abydos uses bi-grams with word separators to tokenize any passed text
74     // for Roberts. And that's what gets tested. However, textdistance uses bag of chars
75     // by default and doesn't add any word separators ever. So, instead of using results
76     // from tests, I've put results of running the values through `Roberts(qval=1).sim(a, b)`.
77     #[case("cat", "hat", 0.6666666666666666)]
78     #[case("Niall", "Neil", 0.6111111111111112)]
79     #[case("aluminum", "Catalan", 0.3555555555555555)]
80     #[case("ATCG", "TAGC", 1.0)]
81     #[case("Nigel", "Niall", 0.55)]
82     #[case("Niall", "Nigel", 0.55)]
83     #[case("Colin", "Coiln", 1.0)]
84     #[case("Coiln", "Colin", 1.0)]
85     #[case("ATCAACGAGT", "AACGATTAG", 0.9210526315789473)]
function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64)86     fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64) {
87         let act = roberts(s1, s2);
88         let ok = is_close(act, exp);
89         assert!(ok, "roberts({}, {}) is {}, not {}", s1, s2, act, exp);
90     }
91 }
92