1 use core::time::Duration;
2 use criterion::BenchmarkId;
3 use criterion::{black_box, criterion_group, criterion_main, Criterion};
4 use std::fs;
5 use textdistance::{nstr, str};
6
read_licenses() -> Vec<(String, String)>7 fn read_licenses() -> Vec<(String, String)> {
8 let mut licenses: Vec<(String, String)> = Vec::new();
9 let dir = fs::read_dir("choosealicense.com/_licenses").unwrap();
10 let mut i = 0;
11 for lfile in dir {
12 let lpath = lfile.unwrap();
13 let ltext = fs::read_to_string(lpath.path()).unwrap();
14 let lname = lpath.file_name().to_str().unwrap().to_owned();
15 // shorten the text to speed up benchmarks run
16 let ltext = ltext[1..200].to_string();
17 licenses.push((lname, ltext));
18
19 // take only a subset of licenses to speed up benchmarks run
20 i += 1;
21 if i == 10 {
22 break;
23 }
24 }
25 licenses
26 }
27
28 type AlgFn = dyn Fn(&str, &str) -> f64;
29
criterion_benchmark(c: &mut Criterion)30 fn criterion_benchmark(c: &mut Criterion) {
31 benchmark_nstr(c);
32 }
33
benchmark_nstr(c: &mut Criterion)34 fn benchmark_nstr(c: &mut Criterion) {
35 let licenses = read_licenses();
36 let mut group = c.benchmark_group("nstr");
37 group.sample_size(10);
38 group.measurement_time(Duration::new(3, 0));
39 group.warm_up_time(Duration::new(1, 0));
40 // group.sampling_mode(criterion::SamplingMode::Flat);
41
42 let algs: Vec<(&str, Box<AlgFn>)> = vec![
43 ("bag", Box::new(nstr::bag)),
44 ("cosine", Box::new(nstr::cosine)),
45 ("damerau_levenshtein", Box::new(nstr::damerau_levenshtein)),
46 (
47 "damerau_levenshtein_restricted",
48 Box::new(nstr::damerau_levenshtein_restricted),
49 ),
50 ("entropy_ncd", Box::new(nstr::entropy_ncd)),
51 ("hamming", Box::new(nstr::hamming)),
52 ("jaccard", Box::new(nstr::jaccard)),
53 ("jaro_winkler", Box::new(nstr::jaro_winkler)),
54 ("jaro", Box::new(nstr::jaro)),
55 ("lcsseq", Box::new(nstr::lcsseq)),
56 ("lcsstr", Box::new(nstr::lcsstr)),
57 ("length", Box::new(nstr::length)),
58 ("levenshtein", Box::new(nstr::levenshtein)),
59 ("lig3", Box::new(nstr::lig3)),
60 ("mlipns", Box::new(nstr::mlipns)),
61 ("overlap", Box::new(nstr::overlap)),
62 ("prefix", Box::new(nstr::prefix)),
63 ("ratcliff_obershelp", Box::new(nstr::ratcliff_obershelp)),
64 ("roberts", Box::new(nstr::roberts)),
65 ("sift4_common", Box::new(nstr::sift4_common)),
66 ("sift4_simple", Box::new(nstr::sift4_simple)),
67 ("smith_waterman", Box::new(nstr::smith_waterman)),
68 ("sorensen_dice", Box::new(nstr::sorensen_dice)),
69 ("suffix", Box::new(nstr::suffix)),
70 ("tversky", Box::new(nstr::tversky)),
71 ("yujian_bo", Box::new(nstr::yujian_bo)),
72 ];
73
74 for (alg_name, alg_fn) in algs {
75 group.bench_with_input(
76 BenchmarkId::from_parameter(alg_name),
77 &licenses,
78 |b, licenses| {
79 b.iter(|| {
80 for (_, l1) in licenses {
81 for (_, l2) in licenses {
82 let s1 = black_box(l1);
83 let s2 = black_box(l2);
84 alg_fn(s1, s2);
85 }
86 }
87 });
88 },
89 );
90 }
91 }
92
93 criterion_group!(benches, criterion_benchmark);
94 criterion_main!(benches);
95