//! Helper functions providing the default implementation of distance/similarity algorithms for strings. //! //! See also [`textdistance::nstr`](super::nstr) for normalized distance. use super::*; /// Calculate unrestricted [Damerau-Levenshtein distance][1] for two strings. /// /// A wrapper for [`DamerauLevenshtein`]. /// /// use textdistance::str::damerau_levenshtein; /// assert!(damerau_levenshtein("abc", "acbd") == 2); // "bc" swapped and "d" added /// /// [1]: https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance #[cfg(feature = "std")] pub fn damerau_levenshtein(s1: &str, s2: &str) -> usize { DamerauLevenshtein::default().for_str(s1, s2).val() } /// Calculate restricted [Damerau-Levenshtein distance][1] for two strings. /// /// A wrapper for [`DamerauLevenshtein`]. /// /// use textdistance::str::damerau_levenshtein; /// assert!(damerau_levenshtein("abc", "acbd") == 2); // "bc" swapped and "d" added /// /// [1]: https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance #[cfg(feature = "std")] pub fn damerau_levenshtein_restricted(s1: &str, s2: &str) -> usize { let a = DamerauLevenshtein { restricted: true, ..Default::default() }; a.for_str(s1, s2).val() } /// Calculate [Hamming distance][1] for two strings. /// /// A wrapper for [`Hamming`]. /// /// use textdistance::str::hamming; /// assert!(hamming("abc", "acbd") == 3); // only "a" matches /// /// [1]: https://en.wikipedia.org/wiki/Hamming_distance pub fn hamming(s1: &str, s2: &str) -> usize { Hamming::default().for_str(s1, s2).val() } /// Calculate the length of the [Longest Common SubSequence][1] for two strings. /// /// A wrapper for [`LCSSeq`]. /// /// use textdistance::str::lcsseq; /// assert!(lcsseq("abcdef", "xbcegf") == 4); // "bcef" /// /// [1]: https://en.wikipedia.org/wiki/Longest_common_subsequence pub fn lcsseq(s1: &str, s2: &str) -> usize { LCSSeq::default().for_str(s1, s2).val() } /// Calculate the length of the [Longest Common SubString][1] for two strings. /// /// A wrapper for [`LCSStr`]. /// /// use textdistance::str::lcsstr; /// assert!(lcsstr("abcdef", "xbcegf") == 2); // "bc" /// /// [1]: https://en.wikipedia.org/wiki/Longest_common_substring pub fn lcsstr(s1: &str, s2: &str) -> usize { LCSStr::default().for_str(s1, s2).val() } /// Calculate [Levenshtein distance][1] for two strings. /// /// A wrapper for [`Levenshtein`]. /// /// use textdistance::str::levenshtein; /// assert!(levenshtein("abc", "acbd") == 2); // add "c" at 2 and then swap "c" with "d" at 4 /// /// [1]: https://en.wikipedia.org/wiki/Levenshtein_distance pub fn levenshtein(s1: &str, s2: &str) -> usize { Levenshtein::default().for_str(s1, s2).val() } /// Calculate [Ratcliff-Obershelp normalized similarity][1] for two strings. /// /// A wrapper for [`RatcliffObershelp`]. /// /// use textdistance::str::ratcliff_obershelp; /// assert_eq!(ratcliff_obershelp("abc", "acbd"), 0.5714285714285714); /// /// [1]: https://en.wikipedia.org/wiki/Gestalt_pattern_matching pub fn ratcliff_obershelp(s1: &str, s2: &str) -> f64 { RatcliffObershelp::default().for_str(s1, s2).nval() } /// Calculate [Sift4 distance][1] for two strings using the "simplest" algorithm. /// /// A wrapper for [`Sift4Simple`]. /// /// use textdistance::str::sift4_simple; /// assert!(sift4_simple("abc", "acbd") == 2); /// /// [1]: https://siderite.dev/blog/super-fast-and-accurate-string-distance.html pub fn sift4_simple(s1: &str, s2: &str) -> usize { Sift4Simple::default().for_str(s1, s2).val() } /// Calculate [Sift4 distance][1] for two strings using the "common" algorithm. /// /// A wrapper for [`Sift4Common`]. /// /// use textdistance::str::sift4_common; /// assert!(sift4_common("abc", "acbd") == 2); /// /// [1]: https://siderite.dev/blog/super-fast-and-accurate-string-distance.html pub fn sift4_common(s1: &str, s2: &str) -> usize { Sift4Common::default().for_str(s1, s2).val() } /// Calculate [Jaro normalized similarity][1] for two strings. /// /// A wrapper for [`Jaro`]. /// /// use textdistance::str::jaro; /// assert_eq!(jaro("abc", "acbd"), 0.8055555555555555); /// /// [1]: https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance#Jaro_similarity pub fn jaro(s1: &str, s2: &str) -> f64 { Jaro::default().for_str(s1, s2).nval() } /// Calculate [Jaro-Winkler normalized similarity][1] for two strings. /// /// A wrapper for [`JaroWinkler`]. /// /// use textdistance::str::jaro_winkler; /// assert_eq!(jaro_winkler("abc", "acbd"), 0.825); /// /// [1]: https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance pub fn jaro_winkler(s1: &str, s2: &str) -> f64 { JaroWinkler::default().for_str(s1, s2).nval() } /// Calculate [Yujian-Bo normalization][1] of [Levenshtein] for two strings. /// /// A wrapper for [`YujianBo`]. /// /// use textdistance::str::yujian_bo; /// assert_eq!(yujian_bo("abc", "acbd"), 0.4444444444444444); /// /// [1]: https://ieeexplore.ieee.org/document/4160958 pub fn yujian_bo(s1: &str, s2: &str) -> f64 { YujianBo::default().for_str(s1, s2).nval() } /// Calculate [MLIPNS normalization][1] of [Hamming] for two strings. /// /// A wrapper for [`MLIPNS`]. /// /// use textdistance::str::mlipns; /// assert!(mlipns("abc", "acbd") == 0); /// /// [1]: https://www.sial.iias.spb.su/files/386-386-1-PB.pdf pub fn mlipns(s1: &str, s2: &str) -> usize { MLIPNS::default().for_str(s1, s2).val() } /// Calculate [Bag distance][1] for two strings. /// /// A wrapper for [`Bag`]. /// /// use textdistance::str::bag; /// assert!(bag("abc", "acbd") == 1); /// /// [1]: http://www-db.disi.unibo.it/research/papers/SPIRE02.pdf #[cfg(feature = "std")] pub fn bag(s1: &str, s2: &str) -> usize { Bag::default().for_str(s1, s2).val() } /// Calculate [LIG3 normalization][1] of [Hamming] by [Levenshtein] for two strings. /// /// A wrapper for [`LIG3`]. /// /// use textdistance::str::lig3; /// assert_eq!(lig3("abc", "acbd"), 0.5); /// /// [1]: https://github.com/chrislit/abydos/blob/master/abydos/distance/_lig3.py pub fn lig3(s1: &str, s2: &str) -> f64 { LIG3::default().for_str(s1, s2).nval() } /// Calculate [Jaccard normalized similarity][1] for two strings. /// /// A wrapper for [`Jaccard`]. /// /// use textdistance::str::jaccard; /// assert_eq!(jaccard("abc", "acbd"), 0.75); /// /// [1]: https://en.wikipedia.org/wiki/Jaccard_index #[cfg(feature = "std")] pub fn jaccard(s1: &str, s2: &str) -> f64 { Jaccard::default().for_str(s1, s2).nval() } /// Calculate [Sørensen–Dice normalized similarity][1] for two strings. /// /// A wrapper for [`SorensenDice`]. /// /// use textdistance::str::sorensen_dice; /// assert_eq!(sorensen_dice("abc", "acbd"), 0.8571428571428571); /// /// [1]:https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient #[cfg(feature = "std")] pub fn sorensen_dice(s1: &str, s2: &str) -> f64 { SorensenDice::default().for_str(s1, s2).nval() } /// Calculate [Tversky normalized similarity][1] for two strings. /// /// A wrapper for [`Tversky`]. /// /// use textdistance::str::tversky; /// assert_eq!(tversky("abc", "acbd"), 0.75); /// /// [1]: https://en.wikipedia.org/wiki/Tversky_index #[cfg(feature = "std")] pub fn tversky(s1: &str, s2: &str) -> f64 { Tversky::default().for_str(s1, s2).nval() } /// Calculate [Overlap normalized similarity][1] for two strings. /// /// A wrapper for [`Overlap`]. /// /// use textdistance::str::overlap; /// assert_eq!(overlap("abc", "acbd"), 1.0); /// /// [1]: https://en.wikipedia.org/wiki/Overlap_coefficient #[cfg(feature = "std")] pub fn overlap(s1: &str, s2: &str) -> f64 { Overlap::default().for_str(s1, s2).nval() } /// Calculate [Cosine normalized similarity][1] for two strings. /// /// A wrapper for [`Cosine`]. /// /// use textdistance::str::cosine; /// assert_eq!(cosine("abc", "acbd"), 0.8660254037844387); /// /// [1]: https://en.wikipedia.org/wiki/Cosine_similarity #[cfg(feature = "std")] pub fn cosine(s1: &str, s2: &str) -> f64 { Cosine::default().for_str(s1, s2).nval() } /// Calculate common prefix length for two strings. /// /// A wrapper for [`Prefix`]. /// /// use textdistance::str::prefix; /// assert!(prefix("abc", "acbd") == 1); // "a" /// pub fn prefix(s1: &str, s2: &str) -> usize { Prefix::default().for_str(s1, s2).val() } /// Calculate common suffix length for two strings. /// /// A wrapper for [`Suffix`]. /// /// use textdistance::str::suffix; /// assert!(suffix("abcd", "axcd") == 2); // "cd" /// pub fn suffix(s1: &str, s2: &str) -> usize { Suffix::default().for_str(s1, s2).val() } /// Calculate length distance for two strings. /// /// A wrapper for [`Length`]. /// /// use textdistance::str::length; /// assert!(length("abcd", "axc") == 4 - 3); /// pub fn length(s1: &str, s2: &str) -> usize { Length::default().for_str(s1, s2).val() } /// Calculate [Smith-Waterman similarity] for two strings. /// /// A wrapper for [`SmithWaterman`]. /// /// use textdistance::str::smith_waterman; /// assert!(smith_waterman("abc", "acbd") == 1); /// /// [Smith-Waterman similarity]: https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm pub fn smith_waterman(s1: &str, s2: &str) -> usize { SmithWaterman::default().for_str(s1, s2).val() } /// Calculate [Entropy]-based [normalized compression distance][1] for two strings. /// /// A wrapper for [`EntropyNCD`]. /// /// use textdistance::str::entropy_ncd; /// assert_eq!(entropy_ncd("abc", "acbd"), 0.12174985473119697); /// /// [1]: https://en.wikipedia.org/wiki/Normalized_compression_distance /// [Entropy]: https://en.wikipedia.org/wiki/Entropy_(information_theory) #[cfg(feature = "std")] pub fn entropy_ncd(s1: &str, s2: &str) -> f64 { EntropyNCD::default().for_str(s1, s2).nval() } /// Calculate [Roberts similarity] for two strings. /// /// A wrapper for [`Roberts`]. /// /// use textdistance::str::roberts; /// assert_eq!(roberts("abc", "acbd"), 0.8571428571428571); /// /// [Roberts similarity]: https://github.com/chrislit/abydos/blob/master/abydos/distance/_roberts.py #[cfg(feature = "std")] pub fn roberts(s1: &str, s2: &str) -> f64 { Roberts::default().for_str(s1, s2).nval() }