• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Smith-Waterman sequence alignment
2 use crate::{Algorithm, Result};
3 use alloc::vec;
4 use alloc::vec::Vec;
5 
6 /// [Smith-Waterman similarity] is edit-based and designed for nucleic acid (and protein) sequences.
7 ///
8 /// [Smith-Waterman similarity]: https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
9 pub struct SmithWaterman {
10     /// The cost of an alignment gap. Default: 1.
11     pub gap_cost: isize,
12 
13     /// The cost of symbols matching. Default: -1.
14     pub match_cost: isize,
15 
16     /// The cost of symbols not matching. Default: 0.
17     pub mismatch_cost: isize,
18 }
19 
20 impl Default for SmithWaterman {
default() -> Self21     fn default() -> Self {
22         Self {
23             gap_cost: 1,
24             match_cost: -1,
25             mismatch_cost: 0,
26         }
27     }
28 }
29 
30 impl Algorithm<usize> for SmithWaterman {
for_vec<E: Eq>(&self, s1: &[E], s2: &[E]) -> Result<usize>31     fn for_vec<E: Eq>(&self, s1: &[E], s2: &[E]) -> Result<usize> {
32         let l1 = s1.len();
33         let l2 = s2.len();
34         let mut dist_mat: Vec<Vec<isize>> = vec![vec![0; l2 + 1]; l1 + 1];
35         for (i, sc1) in s1.iter().enumerate() {
36             for (j, sc2) in s2.iter().enumerate() {
37                 let cost = if sc1 == sc2 {
38                     self.match_cost
39                 } else {
40                     self.mismatch_cost
41                 };
42                 let match_ = dist_mat[i][j] - cost;
43                 let delete = dist_mat[i][j + 1] - self.gap_cost;
44                 let insert = dist_mat[i + 1][j] - self.gap_cost;
45                 dist_mat[i + 1][j + 1] = 0.max(match_).max(delete).max(insert);
46             }
47         }
48         let result = dist_mat[l1][l2];
49         Result {
50             #[allow(clippy::cast_sign_loss)]
51             abs: result as usize,
52             is_distance: false,
53             max: l1.max(l2),
54             len1: l1,
55             len2: l2,
56         }
57     }
58 }
59 
60 #[cfg(test)]
61 mod tests {
62     use crate::str::smith_waterman;
63     use assert2::assert;
64     use rstest::rstest;
65 
66     #[rstest]
67     // parity with textdistance
68     #[case("hello", "world", 1)]
69     #[case("abcd", "abce", 3)]
70     #[case("AGACTAGTTAC", "CGAGACGT", 3)]
71     #[case("qwe", "rty", 0)]
72     #[case("qwe", "rty", 0)]
73     #[case("check", "shrek", 2)]
74     // parity with abydos
75     #[case("cat", "hat", 2)]
76     #[case("Niall", "Neil", 1)]
77     #[case("aluminum", "Catalan", 0)]
78     #[case("ATCG", "TAGC", 1)]
function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize)79     fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
80         assert!(smith_waterman(s1, s2) == exp);
81     }
82 }
83