1 //! MLIPNS similarity 2 use super::hamming::Hamming; 3 use crate::{Algorithm, Result}; 4 use core::hash::Hash; 5 6 /// [MLIPNS similarity] is a normalization for [`Hamming`] that returns either 0 or 1. 7 /// 8 /// MLIPNS stands for Modified Language-Independent Product Name Search. 9 /// 10 /// [MLIPNS similarity]: https://www.sial.iias.spb.su/files/386-386-1-PB.pdf 11 pub struct MLIPNS { 12 hamming: Hamming, 13 threshold: f64, 14 max_mismatches: usize, 15 } 16 17 impl Default for MLIPNS { default() -> Self18 fn default() -> Self { 19 Self { 20 hamming: Hamming::default(), 21 threshold: 0.25, 22 max_mismatches: 2, 23 } 24 } 25 } 26 27 impl MLIPNS { check(&self, ham: &Result<usize>) -> bool28 fn check(&self, ham: &Result<usize>) -> bool { 29 let mut mismatches = 0; 30 let mut max_length = ham.max; 31 let mut ham_val = ham.val(); 32 while mismatches <= self.max_mismatches { 33 if max_length == 0 { 34 return true; 35 } 36 if (1.0 - (max_length - ham_val) as f64 / max_length as f64) <= self.threshold { 37 return true; 38 } 39 mismatches += 1; 40 ham_val -= 1; 41 max_length -= 1; 42 } 43 max_length == 0 44 } 45 } 46 47 impl Algorithm<usize> for MLIPNS { for_iter<C, E>(&self, s1: C, s2: C) -> Result<usize> where C: Iterator<Item = E>, E: Eq + Hash,48 fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<usize> 49 where 50 C: Iterator<Item = E>, 51 E: Eq + Hash, 52 { 53 let ham = self.hamming.for_iter(s1, s2); 54 Result { 55 abs: self.check(&ham).into(), 56 is_distance: false, 57 max: 1, 58 len1: ham.len1, 59 len2: ham.len2, 60 } 61 } 62 } 63 64 #[cfg(test)] 65 mod tests { 66 use crate::str::mlipns; 67 use assert2::assert; 68 use rstest::rstest; 69 70 #[rstest] 71 #[case("", "", 1)] 72 // parity with abydos and talisman 73 #[case("cat", "hat", 1)] 74 #[case("Niall", "Neil", 0)] 75 #[case("aluminum", "Catalan", 0)] 76 #[case("ATCG", "TAGC", 0)] function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize)77 fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) { 78 assert!(mlipns(s1, s2) == exp); 79 } 80 } 81