1 //! Bag distance 2 #![cfg(feature = "std")] 3 use crate::counter::Counter; 4 use crate::{Algorithm, Result}; 5 6 /// [Bag distance] is how many max items there are in one sequence that aren't in the other. 7 /// 8 /// [Bag distance]: http://www-db.disi.unibo.it/research/papers/SPIRE02.pdf 9 #[derive(Default)] 10 pub struct Bag {} 11 12 impl Algorithm<usize> for Bag { for_iter<C, E>(&self, s1: C, s2: C) -> Result<usize> where C: Iterator<Item = E>, E: Eq + core::hash::Hash,13 fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<usize> 14 where 15 C: Iterator<Item = E>, 16 E: Eq + core::hash::Hash, 17 { 18 let c1 = Counter::from_iter(s1); 19 let c2 = Counter::from_iter(s2); 20 let d1 = c1.diff_count(&c2); 21 let d2 = c2.diff_count(&c1); 22 let l1 = c1.count(); 23 let l2 = c2.count(); 24 25 Result { 26 abs: d1.max(d2), 27 is_distance: true, 28 max: l1.max(l2), 29 len1: l1, 30 len2: l2, 31 } 32 } 33 } 34 35 #[cfg(test)] 36 mod tests { 37 use crate::str::bag; 38 use assert2::assert; 39 use rstest::rstest; 40 41 #[rstest] 42 #[case("", "", 0)] 43 // parity with textdistance 44 #[case("qwe", "qwe", 0)] 45 #[case("qwe", "erty", 3)] 46 #[case("qwe", "ewq", 0)] 47 #[case("qwe", "rtys", 4)] 48 // parity with talisman 49 #[case("cat", "hat", 1)] 50 #[case("Niall", "Neil", 2)] 51 #[case("aluminum", "Catalan", 5)] 52 #[case("ATCG", "TAGC", 0)] function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize)53 fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) { 54 let act = bag(s1, s2); 55 assert!(act == exp); 56 } 57 } 58