• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use core::time::Duration;
2 use criterion::BenchmarkId;
3 use criterion::{black_box, criterion_group, criterion_main, Criterion};
4 use std::fs;
5 use textdistance::{nstr, str};
6 
read_licenses() -> Vec<(String, String)>7 fn read_licenses() -> Vec<(String, String)> {
8     let mut licenses: Vec<(String, String)> = Vec::new();
9     let dir = fs::read_dir("choosealicense.com/_licenses").unwrap();
10     let mut i = 0;
11     for lfile in dir {
12         let lpath = lfile.unwrap();
13         let ltext = fs::read_to_string(lpath.path()).unwrap();
14         let lname = lpath.file_name().to_str().unwrap().to_owned();
15         // shorten the text to speed up benchmarks run
16         let ltext = ltext[1..200].to_string();
17         licenses.push((lname, ltext));
18 
19         // take only a subset of licenses to speed up benchmarks run
20         i += 1;
21         if i == 10 {
22             break;
23         }
24     }
25     licenses
26 }
27 
28 type AlgFn = dyn Fn(&str, &str) -> f64;
29 
criterion_benchmark(c: &mut Criterion)30 fn criterion_benchmark(c: &mut Criterion) {
31     benchmark_nstr(c);
32 }
33 
benchmark_nstr(c: &mut Criterion)34 fn benchmark_nstr(c: &mut Criterion) {
35     let licenses = read_licenses();
36     let mut group = c.benchmark_group("nstr");
37     group.sample_size(10);
38     group.measurement_time(Duration::new(3, 0));
39     group.warm_up_time(Duration::new(1, 0));
40     // group.sampling_mode(criterion::SamplingMode::Flat);
41 
42     let algs: Vec<(&str, Box<AlgFn>)> = vec![
43         ("bag", Box::new(nstr::bag)),
44         ("cosine", Box::new(nstr::cosine)),
45         ("damerau_levenshtein", Box::new(nstr::damerau_levenshtein)),
46         (
47             "damerau_levenshtein_restricted",
48             Box::new(nstr::damerau_levenshtein_restricted),
49         ),
50         ("entropy_ncd", Box::new(nstr::entropy_ncd)),
51         ("hamming", Box::new(nstr::hamming)),
52         ("jaccard", Box::new(nstr::jaccard)),
53         ("jaro_winkler", Box::new(nstr::jaro_winkler)),
54         ("jaro", Box::new(nstr::jaro)),
55         ("lcsseq", Box::new(nstr::lcsseq)),
56         ("lcsstr", Box::new(nstr::lcsstr)),
57         ("length", Box::new(nstr::length)),
58         ("levenshtein", Box::new(nstr::levenshtein)),
59         ("lig3", Box::new(nstr::lig3)),
60         ("mlipns", Box::new(nstr::mlipns)),
61         ("overlap", Box::new(nstr::overlap)),
62         ("prefix", Box::new(nstr::prefix)),
63         ("ratcliff_obershelp", Box::new(nstr::ratcliff_obershelp)),
64         ("roberts", Box::new(nstr::roberts)),
65         ("sift4_common", Box::new(nstr::sift4_common)),
66         ("sift4_simple", Box::new(nstr::sift4_simple)),
67         ("smith_waterman", Box::new(nstr::smith_waterman)),
68         ("sorensen_dice", Box::new(nstr::sorensen_dice)),
69         ("suffix", Box::new(nstr::suffix)),
70         ("tversky", Box::new(nstr::tversky)),
71         ("yujian_bo", Box::new(nstr::yujian_bo)),
72     ];
73 
74     for (alg_name, alg_fn) in algs {
75         group.bench_with_input(
76             BenchmarkId::from_parameter(alg_name),
77             &licenses,
78             |b, licenses| {
79                 b.iter(|| {
80                     for (_, l1) in licenses {
81                         for (_, l2) in licenses {
82                             let s1 = black_box(l1);
83                             let s2 = black_box(l2);
84                             alg_fn(s1, s2);
85                         }
86                     }
87                 });
88             },
89         );
90     }
91 }
92 
93 criterion_group!(benches, criterion_benchmark);
94 criterion_main!(benches);
95