• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! MLIPNS similarity
2 use super::hamming::Hamming;
3 use crate::{Algorithm, Result};
4 use core::hash::Hash;
5 
6 /// [MLIPNS similarity] is a normalization for [`Hamming`] that returns either 0 or 1.
7 ///
8 /// MLIPNS stands for Modified Language-Independent Product Name Search.
9 ///
10 /// [MLIPNS similarity]: https://www.sial.iias.spb.su/files/386-386-1-PB.pdf
11 pub struct MLIPNS {
12     hamming: Hamming,
13     threshold: f64,
14     max_mismatches: usize,
15 }
16 
17 impl Default for MLIPNS {
default() -> Self18     fn default() -> Self {
19         Self {
20             hamming: Hamming::default(),
21             threshold: 0.25,
22             max_mismatches: 2,
23         }
24     }
25 }
26 
27 impl MLIPNS {
check(&self, ham: &Result<usize>) -> bool28     fn check(&self, ham: &Result<usize>) -> bool {
29         let mut mismatches = 0;
30         let mut max_length = ham.max;
31         let mut ham_val = ham.val();
32         while mismatches <= self.max_mismatches {
33             if max_length == 0 {
34                 return true;
35             }
36             if (1.0 - (max_length - ham_val) as f64 / max_length as f64) <= self.threshold {
37                 return true;
38             }
39             mismatches += 1;
40             ham_val -= 1;
41             max_length -= 1;
42         }
43         max_length == 0
44     }
45 }
46 
47 impl Algorithm<usize> for MLIPNS {
for_iter<C, E>(&self, s1: C, s2: C) -> Result<usize> where C: Iterator<Item = E>, E: Eq + Hash,48     fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<usize>
49     where
50         C: Iterator<Item = E>,
51         E: Eq + Hash,
52     {
53         let ham = self.hamming.for_iter(s1, s2);
54         Result {
55             abs: self.check(&ham).into(),
56             is_distance: false,
57             max: 1,
58             len1: ham.len1,
59             len2: ham.len2,
60         }
61     }
62 }
63 
64 #[cfg(test)]
65 mod tests {
66     use crate::str::mlipns;
67     use assert2::assert;
68     use rstest::rstest;
69 
70     #[rstest]
71     #[case("", "", 1)]
72     // parity with abydos and talisman
73     #[case("cat", "hat", 1)]
74     #[case("Niall", "Neil", 0)]
75     #[case("aluminum", "Catalan", 0)]
76     #[case("ATCG", "TAGC", 0)]
function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize)77     fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
78         assert!(mlipns(s1, s2) == exp);
79     }
80 }
81