1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 use std::collections::HashMap;
6
7 use criterion::{black_box, criterion_group, criterion_main, Criterion};
8
9 use zerovec::maps::ZeroMapKV;
10 use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec};
11 use zerovec::{ZeroHashMap, ZeroMap};
12
13 const DATA: [(&str, &str); 16] = [
14 ("ar", "Arabic"),
15 ("bn", "Bangla"),
16 ("ccp", "Chakma"),
17 ("chr", "Cherokee"),
18 ("el", "Greek"),
19 ("en", "English"),
20 ("eo", "Esperanto"),
21 ("es", "Spanish"),
22 ("fr", "French"),
23 ("iu", "Inuktitut"),
24 ("ja", "Japanese"),
25 ("ru", "Russian"),
26 ("sr", "Serbian"),
27 ("th", "Thai"),
28 ("tr", "Turkish"),
29 ("zh", "Chinese"),
30 ];
31
32 const POSTCARD: [u8; 274] = [
33 98, 16, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 14, 0, 0, 0, 16,
34 0, 0, 0, 18, 0, 0, 0, 20, 0, 0, 0, 22, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0, 0, 0, 30, 0,
35 0, 0, 32, 0, 0, 0, 97, 114, 98, 110, 99, 99, 112, 99, 104, 114, 101, 108, 101, 110, 101, 111,
36 101, 115, 102, 114, 105, 117, 106, 97, 114, 117, 115, 114, 116, 104, 116, 114, 122, 104, 173,
37 1, 16, 0, 0, 0, 6, 0, 0, 0, 12, 0, 0, 0, 18, 0, 0, 0, 26, 0, 0, 0, 31, 0, 0, 0, 38, 0, 0, 0,
38 47, 0, 0, 0, 54, 0, 0, 0, 60, 0, 0, 0, 69, 0, 0, 0, 77, 0, 0, 0, 84, 0, 0, 0, 91, 0, 0, 0, 95,
39 0, 0, 0, 102, 0, 0, 0, 65, 114, 97, 98, 105, 99, 66, 97, 110, 103, 108, 97, 67, 104, 97, 107,
40 109, 97, 67, 104, 101, 114, 111, 107, 101, 101, 71, 114, 101, 101, 107, 69, 110, 103, 108, 105,
41 115, 104, 69, 115, 112, 101, 114, 97, 110, 116, 111, 83, 112, 97, 110, 105, 115, 104, 70, 114,
42 101, 110, 99, 104, 73, 110, 117, 107, 116, 105, 116, 117, 116, 74, 97, 112, 97, 110, 101, 115,
43 101, 82, 117, 115, 115, 105, 97, 110, 83, 101, 114, 98, 105, 97, 110, 84, 104, 97, 105, 84,
44 117, 114, 107, 105, 115, 104, 67, 104, 105, 110, 101, 115, 101,
45 ];
46
47 const POSTCARD_HASHMAP: [u8; 176] = [
48 16, 2, 114, 117, 7, 82, 117, 115, 115, 105, 97, 110, 3, 99, 99, 112, 6, 67, 104, 97, 107, 109,
49 97, 3, 99, 104, 114, 8, 67, 104, 101, 114, 111, 107, 101, 101, 2, 116, 114, 7, 84, 117, 114,
50 107, 105, 115, 104, 2, 116, 104, 4, 84, 104, 97, 105, 2, 106, 97, 8, 74, 97, 112, 97, 110, 101,
51 115, 101, 2, 101, 115, 7, 83, 112, 97, 110, 105, 115, 104, 2, 101, 111, 9, 69, 115, 112, 101,
52 114, 97, 110, 116, 111, 2, 122, 104, 7, 67, 104, 105, 110, 101, 115, 101, 2, 115, 114, 7, 83,
53 101, 114, 98, 105, 97, 110, 2, 101, 110, 7, 69, 110, 103, 108, 105, 115, 104, 2, 105, 117, 9,
54 73, 110, 117, 107, 116, 105, 116, 117, 116, 2, 102, 114, 6, 70, 114, 101, 110, 99, 104, 2, 98,
55 110, 6, 66, 97, 110, 103, 108, 97, 2, 101, 108, 5, 71, 114, 101, 101, 107, 2, 97, 114, 6, 65,
56 114, 97, 98, 105, 99,
57 ];
58
59 const POSTCARD_ZEROHASHMAP: [u8; 404] = [
60 128, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
62 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
63 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
64 0, 0, 0, 98, 16, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 10, 0, 0, 0, 13, 0,
65 0, 0, 15, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 21, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0, 0,
66 0, 30, 0, 0, 0, 32, 0, 0, 0, 115, 114, 101, 111, 116, 114, 97, 114, 105, 117, 99, 99, 112, 102,
67 114, 101, 115, 106, 97, 122, 104, 99, 104, 114, 98, 110, 101, 110, 101, 108, 114, 117, 116,
68 104, 173, 1, 16, 0, 0, 0, 7, 0, 0, 0, 16, 0, 0, 0, 23, 0, 0, 0, 29, 0, 0, 0, 38, 0, 0, 0, 44,
69 0, 0, 0, 50, 0, 0, 0, 57, 0, 0, 0, 65, 0, 0, 0, 72, 0, 0, 0, 80, 0, 0, 0, 86, 0, 0, 0, 93, 0,
70 0, 0, 98, 0, 0, 0, 105, 0, 0, 0, 83, 101, 114, 98, 105, 97, 110, 69, 115, 112, 101, 114, 97,
71 110, 116, 111, 84, 117, 114, 107, 105, 115, 104, 65, 114, 97, 98, 105, 99, 73, 110, 117, 107,
72 116, 105, 116, 117, 116, 67, 104, 97, 107, 109, 97, 70, 114, 101, 110, 99, 104, 83, 112, 97,
73 110, 105, 115, 104, 74, 97, 112, 97, 110, 101, 115, 101, 67, 104, 105, 110, 101, 115, 101, 67,
74 104, 101, 114, 111, 107, 101, 101, 66, 97, 110, 103, 108, 97, 69, 110, 103, 108, 105, 115, 104,
75 71, 114, 101, 101, 107, 82, 117, 115, 115, 105, 97, 110, 84, 104, 97, 105,
76 ];
77
78 /// Run this function to print new data to the console.
79 /// Requires the optional `serde` Cargo feature.
80 #[allow(dead_code)]
generate_zeromap()81 fn generate_zeromap() {
82 let map = build_zeromap(false);
83 let buf = postcard::to_stdvec(&map).unwrap();
84 println!("{buf:?}");
85 }
86
87 /// Run this function to print new data to the console.
88 /// Requires the optional `serde` Cargo feature.
89 #[allow(dead_code)]
generate_hashmap()90 fn generate_hashmap() {
91 let map = build_hashmap(false);
92 let buf = postcard::to_stdvec(&map).unwrap();
93 println!("{buf:?}");
94 }
95
96 /// Run this function to print new data to the console.
97 /// Requires the optional `serde` Cargo feature.
98 #[allow(dead_code)]
generate_zerohashmap()99 fn generate_zerohashmap() {
100 let map = build_zerohashmap(false);
101 let buf = postcard::to_stdvec(&map).unwrap();
102 println!("{buf:?}");
103 }
104
overview_bench(c: &mut Criterion)105 fn overview_bench(c: &mut Criterion) {
106 bench_zeromap(c);
107 bench_hashmap(c);
108 bench_zerohashmap(c);
109 }
110
bench_zeromap(c: &mut Criterion)111 fn bench_zeromap(c: &mut Criterion) {
112 // Uncomment the following line to re-generate the const data.
113 // generate_hashmap();
114
115 bench_deserialize(c);
116 bench_deserialize_large(c);
117 bench_lookup(c);
118 bench_lookup_large(c);
119 }
120
build_zeromap(large: bool) -> ZeroMap<'static, Index32Str, Index32Str>121 fn build_zeromap(large: bool) -> ZeroMap<'static, Index32Str, Index32Str> {
122 // TODO(#2826): This should use ZeroMap::from_iter, however that currently takes
123 // *minutes*, whereas this code runs in milliseconds
124 let mut keys = Vec::new();
125 let mut values = Vec::new();
126 let mut data = DATA.to_vec();
127 data.sort();
128 for &(key, value) in data.iter() {
129 if large {
130 for n in 0..8192 {
131 keys.push(format!("{key}{n:04}"));
132 values.push(indexify(value));
133 }
134 } else {
135 keys.push(key.to_owned());
136 values.push(indexify(value));
137 }
138 }
139
140 let keys = keys.iter().map(|s| indexify(s)).collect::<Vec<_>>();
141 // keys are sorted by construction
142 unsafe { ZeroMap::from_parts_unchecked(VarZeroVec::from(&keys), VarZeroVec::from(&values)) }
143 }
144
bench_deserialize(c: &mut Criterion)145 fn bench_deserialize(c: &mut Criterion) {
146 c.bench_function("zeromap/deserialize/small", |b| {
147 b.iter(|| {
148 let map: ZeroMap<Index32Str, Index32Str> =
149 postcard::from_bytes(black_box(&POSTCARD)).unwrap();
150 assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
151 })
152 });
153 }
154
bench_deserialize_large(c: &mut Criterion)155 fn bench_deserialize_large(c: &mut Criterion) {
156 let buf = large_zeromap_postcard_bytes();
157 c.bench_function("zeromap/deserialize/large", |b| {
158 b.iter(|| {
159 let map: ZeroMap<Index32Str, Index32Str> =
160 postcard::from_bytes(black_box(&buf)).unwrap();
161 assert_eq!(map.get(indexify("iu3333")).map(|x| &x.0), Some("Inuktitut"));
162 })
163 });
164 }
165
bench_lookup(c: &mut Criterion)166 fn bench_lookup(c: &mut Criterion) {
167 let map: ZeroMap<Index32Str, Index32Str> = postcard::from_bytes(black_box(&POSTCARD)).unwrap();
168 c.bench_function("zeromap/lookup/small", |b| {
169 b.iter(|| {
170 assert_eq!(
171 map.get(black_box(indexify("iu"))).map(|x| &x.0),
172 Some("Inuktitut")
173 );
174 assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None);
175 });
176 });
177 }
178
bench_lookup_large(c: &mut Criterion)179 fn bench_lookup_large(c: &mut Criterion) {
180 let buf = large_zeromap_postcard_bytes();
181 let map: ZeroMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();
182 c.bench_function("zeromap/lookup/large", |b| {
183 b.iter(|| {
184 assert_eq!(
185 map.get(black_box(indexify("iu3333"))).map(|x| &x.0),
186 Some("Inuktitut")
187 );
188 assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None);
189 });
190 });
191 }
192
large_zeromap_postcard_bytes() -> Vec<u8>193 fn large_zeromap_postcard_bytes() -> Vec<u8> {
194 postcard::to_stdvec(&build_zeromap(true)).unwrap()
195 }
196
bench_hashmap(c: &mut Criterion)197 fn bench_hashmap(c: &mut Criterion) {
198 // Uncomment the following line to re-generate the const data.
199 // generate_hashmap();
200
201 bench_deserialize_hashmap(c);
202 bench_deserialize_large_hashmap(c);
203 bench_lookup_hashmap(c);
204 bench_lookup_large_hashmap(c);
205 }
206
build_hashmap(large: bool) -> HashMap<String, String>207 fn build_hashmap(large: bool) -> HashMap<String, String> {
208 let mut map: HashMap<String, String> = HashMap::new();
209 for &(key, value) in DATA.iter() {
210 if large {
211 for n in 0..8192 {
212 map.insert(format!("{key}{n}"), value.to_owned());
213 }
214 } else {
215 map.insert(key.to_owned(), value.to_owned());
216 }
217 }
218 map
219 }
220
bench_deserialize_hashmap(c: &mut Criterion)221 fn bench_deserialize_hashmap(c: &mut Criterion) {
222 c.bench_function("zeromap/deserialize/small/hashmap", |b| {
223 b.iter(|| {
224 let map: HashMap<String, String> =
225 postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap();
226 assert_eq!(map.get("iu"), Some(&"Inuktitut".to_owned()));
227 })
228 });
229 }
230
bench_deserialize_large_hashmap(c: &mut Criterion)231 fn bench_deserialize_large_hashmap(c: &mut Criterion) {
232 let buf = large_hashmap_postcard_bytes();
233 c.bench_function("zeromap/deserialize/large/hashmap", |b| {
234 b.iter(|| {
235 let map: HashMap<String, String> = postcard::from_bytes(black_box(&buf)).unwrap();
236 assert_eq!(map.get("iu3333"), Some(&"Inuktitut".to_owned()));
237 })
238 });
239 }
240
bench_lookup_hashmap(c: &mut Criterion)241 fn bench_lookup_hashmap(c: &mut Criterion) {
242 let map: HashMap<String, String> = postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap();
243 c.bench_function("zeromap/lookup/small/hashmap", |b| {
244 b.iter(|| {
245 assert_eq!(map.get(black_box("iu")), Some(&"Inuktitut".to_owned()));
246 assert_eq!(map.get(black_box("zz")), None);
247 });
248 });
249 }
250
bench_lookup_large_hashmap(c: &mut Criterion)251 fn bench_lookup_large_hashmap(c: &mut Criterion) {
252 let buf = large_hashmap_postcard_bytes();
253 let map: HashMap<String, String> = postcard::from_bytes(&buf).unwrap();
254 c.bench_function("zeromap/lookup/large/hashmap", |b| {
255 b.iter(|| {
256 assert_eq!(map.get(black_box("iu3333")), Some(&"Inuktitut".to_owned()));
257 assert_eq!(map.get(black_box("zz")), None);
258 });
259 });
260 }
261
large_hashmap_postcard_bytes() -> Vec<u8>262 fn large_hashmap_postcard_bytes() -> Vec<u8> {
263 postcard::to_stdvec(&build_hashmap(true)).unwrap()
264 }
265
bench_zerohashmap(c: &mut Criterion)266 fn bench_zerohashmap(c: &mut Criterion) {
267 // Uncomment the following line to re-generate the const data.
268 // generate_zerohashmap();
269
270 bench_deserialize_zerohashmap(c);
271 bench_deserialize_large_zerohashmap(c);
272 bench_zerohashmap_lookup(c);
273 bench_zerohashmap_lookup_large(c);
274 }
275
build_zerohashmap(large: bool) -> ZeroHashMap<'static, Index32Str, Index32Str>276 fn build_zerohashmap(large: bool) -> ZeroHashMap<'static, Index32Str, Index32Str> {
277 let mut kv = Vec::new();
278
279 for (key, value) in DATA.iter() {
280 if large {
281 for n in 0..512 {
282 kv.push((format!("{key}{n}"), indexify(value)));
283 }
284 } else {
285 kv.push((key.to_string(), indexify(value)));
286 }
287 }
288
289 ZeroHashMap::from_iter(kv.iter().map(|kv| (indexify(&kv.0), kv.1)))
290 }
291
bench_deserialize_zerohashmap(c: &mut Criterion)292 fn bench_deserialize_zerohashmap(c: &mut Criterion) {
293 c.bench_function("zerohashmap/deserialize/small", |b| {
294 b.iter(|| {
295 let map: ZeroHashMap<Index32Str, Index32Str> =
296 postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
297 assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
298 })
299 });
300 }
301
bench_deserialize_large_zerohashmap(c: &mut Criterion)302 fn bench_deserialize_large_zerohashmap(c: &mut Criterion) {
303 let buf = large_zerohashmap_postcard_bytes();
304 c.bench_function("zerohashmap/deserialize/large", |b| {
305 b.iter(|| {
306 let map: ZeroHashMap<Index32Str, Index32Str> =
307 postcard::from_bytes(black_box(&buf)).unwrap();
308 assert_eq!(map.get(indexify("iu333")).map(|x| &x.0), Some("Inuktitut"));
309 })
310 });
311 }
312
bench_zerohashmap_lookup(c: &mut Criterion)313 fn bench_zerohashmap_lookup(c: &mut Criterion) {
314 let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> =
315 postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
316
317 c.bench_function("zerohashmap/lookup/small", |b| {
318 b.iter(|| {
319 assert_eq!(
320 zero_hashmap.get(black_box(indexify("iu"))).map(|x| &x.0),
321 Some("Inuktitut")
322 );
323 assert_eq!(
324 zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
325 None
326 );
327 });
328 });
329 }
330
bench_zerohashmap_lookup_large(c: &mut Criterion)331 fn bench_zerohashmap_lookup_large(c: &mut Criterion) {
332 let buf = large_zerohashmap_postcard_bytes();
333 let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();
334
335 c.bench_function("zerohashmap/lookup/large", |b| {
336 b.iter(|| {
337 assert_eq!(
338 zero_hashmap.get(black_box(indexify("iu333"))).map(|x| &x.0),
339 Some("Inuktitut")
340 );
341 assert_eq!(
342 zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
343 None
344 );
345 });
346 });
347 }
348
large_zerohashmap_postcard_bytes() -> Vec<u8>349 fn large_zerohashmap_postcard_bytes() -> Vec<u8> {
350 postcard::to_stdvec(&build_zerohashmap(true)).unwrap()
351 }
352
353 criterion_group!(benches, overview_bench);
354 criterion_main!(benches);
355
356 /// This type lets us use a u32-index-format VarZeroVec with the ZeroMap.
357 ///
358 /// Eventually we will have a FormatSelector type that lets us do `ZeroMap<FormatSelector<K, Index32>, V>`
359 /// (https://github.com/unicode-org/icu4x/issues/2312)
360 ///
361 /// , isn't actually important; it's just more convenient to use make_varule to get the
362 /// full suite of traits instead of `#[derive(VarULE)]`. (With `#[derive(VarULE)]` we would have to manually
363 /// define a Serialize implementation, and that would be gnarly)
364 /// https://github.com/unicode-org/icu4x/issues/2310 tracks being able to do this with derive(ULE)
365 #[zerovec::make_varule(Index32Str)]
366 #[zerovec::skip_derive(ZeroMapKV)]
367 #[derive(Eq, PartialEq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
368 #[zerovec::derive(Serialize, Deserialize, Hash)]
369 pub(crate) struct Index32StrBorrowed<'a>(#[serde(borrow)] pub &'a str);
370
371 impl<'a> ZeroMapKV<'a> for Index32Str {
372 type Container = VarZeroVec<'a, Index32Str, Index32>;
373 type Slice = VarZeroSlice<Index32Str, Index32>;
374 type GetType = Index32Str;
375 type OwnedType = Box<Index32Str>;
376 }
377
378 #[inline]
indexify(s: &str) -> &Index32Str379 fn indexify(s: &str) -> &Index32Str {
380 unsafe { &*(s as *const str as *const Index32Str) }
381 }
382