1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 use criterion::{black_box, criterion_group, criterion_main, Criterion};
6 use rand::SeedableRng;
7 use rand_distr::{Alphanumeric, Distribution, Uniform};
8 use rand_pcg::Lcg64Xsh32;
9 use std::ops::RangeInclusive;
10
11 use zerovec::VarZeroVec;
12
13 /// Generates an array of random alphanumeric strings.
14 ///
15 /// - length = range of lengths for the strings (chosen uniformly at random)
16 /// - count = number of strings to generate
17 /// - seed = seed for the PRNG
18 ///
19 /// Returns a tuple including the vector and a u64 that can be used to seed the next PRNG.
random_alphanums(lengths: RangeInclusive<usize>, count: usize, seed: u64) -> (Vec<String>, u64)20 fn random_alphanums(lengths: RangeInclusive<usize>, count: usize, seed: u64) -> (Vec<String>, u64) {
21 // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
22 let mut rng1 = Lcg64Xsh32::seed_from_u64(seed);
23 let mut rng2 = Lcg64Xsh32::seed_from_u64(rand::Rng::gen(&mut rng1));
24 let alpha_dist = Alphanumeric;
25 let len_dist = Uniform::from(lengths);
26 let string_vec = len_dist
27 .sample_iter(&mut rng1)
28 .take(count)
29 .map(|len| {
30 (&alpha_dist)
31 .sample_iter(&mut rng2)
32 .take(len)
33 .map(char::from)
34 .collect::<String>()
35 })
36 .collect();
37 (string_vec, rand::Rng::gen(&mut rng1))
38 }
39
overview_bench(c: &mut Criterion)40 fn overview_bench(c: &mut Criterion) {
41 // Same as vzv/char_count/vzv but with different inputs
42 let seed = 42;
43 let (string_vec, _) = random_alphanums(2..=10, 100, seed);
44 let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
45 let vzv = VarZeroVec::<str>::parse_bytes(black_box(bytes.as_slice())).unwrap();
46
47 c.bench_function("vzv/overview", |b| {
48 b.iter(|| {
49 black_box(&vzv)
50 .iter()
51 .fold(0, |sum, string| sum + string.chars().count())
52 });
53 });
54
55 {
56 char_count_benches(c);
57 binary_search_benches(c);
58 vzv_precompute_bench(c);
59 }
60
61 #[cfg(feature = "serde")]
62 {
63 serde_benches(c);
64 }
65 }
66
char_count_benches(c: &mut Criterion)67 fn char_count_benches(c: &mut Criterion) {
68 let seed = 2021;
69 let (string_vec, _) = random_alphanums(2..=20, 100, seed);
70 let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
71 let vzv = VarZeroVec::<str>::parse_bytes(black_box(bytes.as_slice())).unwrap();
72
73 // *** Count chars in vec of 100 strings ***
74 c.bench_function("vzv/char_count/slice", |b| {
75 b.iter(|| {
76 black_box(&string_vec)
77 .iter()
78 .fold(0, |sum, string| sum + string.chars().count())
79 });
80 });
81
82 // *** Count chars in vec of 100 strings ***
83 c.bench_function("vzv/char_count/vzv", |b| {
84 b.iter(|| {
85 black_box(&vzv)
86 .iter()
87 .fold(0, |sum, string| sum + string.chars().count())
88 });
89 });
90 }
91
binary_search_benches(c: &mut Criterion)92 fn binary_search_benches(c: &mut Criterion) {
93 let seed = 2021;
94 let (string_vec, seed) = random_alphanums(2..=20, 500, seed);
95 let (needles, _) = random_alphanums(2..=20, 10, seed);
96 let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
97 let vzv = VarZeroVec::<str>::parse_bytes(black_box(bytes.as_slice())).unwrap();
98 let single_needle = "lmnop".to_owned();
99
100 // *** Binary search vec of 500 strings 10 times ***
101 c.bench_function("vzv/binary_search/slice", |b| {
102 b.iter(|| {
103 black_box(&needles)
104 .iter()
105 .map(|needle| black_box(&string_vec).binary_search(needle))
106 .filter(|r| r.is_ok())
107 .count()
108 });
109 });
110
111 // *** Binary search vec of 500 strings 10 times ***
112 c.bench_function("vzv/binary_search/vzv", |b| {
113 b.iter(|| {
114 black_box(&needles)
115 .iter()
116 .map(|needle| black_box(&vzv).binary_search(needle))
117 .filter(|r| r.is_ok())
118 .count()
119 });
120 });
121
122 c.bench_function("vzv/binary_search/single/slice", |b| {
123 b.iter(|| black_box(&string_vec).binary_search(black_box(&single_needle)));
124 });
125
126 c.bench_function("vzv/binary_search/single/vzv", |b| {
127 b.iter(|| black_box(&vzv).binary_search(black_box(&single_needle)));
128 });
129 }
130
131 #[cfg(feature = "serde")]
serde_benches(c: &mut Criterion)132 fn serde_benches(c: &mut Criterion) {
133 let seed = 2021;
134 let (string_vec, _) = random_alphanums(2..=20, 100, seed);
135 let bincode_vec = bincode::serialize(&string_vec).unwrap();
136 let vzv: VarZeroVec<str> = VarZeroVec::from(&*string_vec);
137 let bincode_vzv = bincode::serialize(&vzv).unwrap();
138
139 // *** Deserialize vec of 100 strings ***
140 c.bench_function("vzv/deserialize/string/vec_owned", |b| {
141 b.iter(|| bincode::deserialize::<Vec<String>>(black_box(&bincode_vec)));
142 });
143
144 // *** Deserialize vec of 100 strings ***
145 c.bench_function("vzv/deserialize/string/vec_borrowed", |b| {
146 b.iter(|| bincode::deserialize::<Vec<&str>>(black_box(&bincode_vec)));
147 });
148
149 // *** Deserialize vec of 100 strings ***
150 c.bench_function("vzv/deserialize/string/vzv", |b| {
151 b.iter(|| bincode::deserialize::<VarZeroVec<str>>(black_box(&bincode_vzv)));
152 });
153 }
154
155 // Testing differences between operating on slices with precomputed/non-precomputed indexing info
vzv_precompute_bench(c: &mut Criterion)156 fn vzv_precompute_bench(c: &mut Criterion) {
157 let seed = 2021;
158 let (string_vec, seed) = random_alphanums(2..=20, 500, seed);
159 let (needles, _) = random_alphanums(2..=20, 10, seed);
160 let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
161 let vzv = VarZeroVec::<str>::parse_bytes(black_box(bytes.as_slice())).unwrap();
162 let borrowed = vzv.as_components();
163 let slice = vzv.as_slice();
164 let single_needle = "lmnop";
165
166 c.bench_function("vzv_precompute/get/precomputed", |b| {
167 b.iter(|| black_box(&borrowed).get(100));
168 });
169
170 c.bench_function("vzv_precompute/get/slice", |b| {
171 b.iter(|| black_box(&slice).get(100));
172 });
173
174 c.bench_function("vzv_precompute/search/precomputed", |b| {
175 b.iter(|| black_box(&borrowed).binary_search(single_needle));
176 });
177
178 c.bench_function("vzv_precompute/search/slice", |b| {
179 b.iter(|| black_box(&slice).binary_search(single_needle));
180 });
181
182 c.bench_function("vzv_precompute/search_multi/precomputed", |b| {
183 b.iter(|| {
184 black_box(&needles)
185 .iter()
186 .map(|needle| black_box(&borrowed).binary_search(needle))
187 .filter(|r| r.is_ok())
188 .count()
189 });
190 });
191
192 c.bench_function("vzv_precompute/search_multi/slice", |b| {
193 b.iter(|| {
194 black_box(&needles)
195 .iter()
196 .map(|needle| black_box(&slice).binary_search(needle))
197 .filter(|r| r.is_ok())
198 .count()
199 });
200 });
201 }
202
203 criterion_group!(benches, overview_bench,);
204 criterion_main!(benches);
205