• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use std::collections::HashMap;
6 
7 use criterion::{black_box, criterion_group, criterion_main, Criterion};
8 
9 use zerovec::maps::ZeroMapKV;
10 use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec};
11 use zerovec::{ZeroHashMap, ZeroMap};
12 
13 const DATA: [(&str, &str); 16] = [
14     ("ar", "Arabic"),
15     ("bn", "Bangla"),
16     ("ccp", "Chakma"),
17     ("chr", "Cherokee"),
18     ("el", "Greek"),
19     ("en", "English"),
20     ("eo", "Esperanto"),
21     ("es", "Spanish"),
22     ("fr", "French"),
23     ("iu", "Inuktitut"),
24     ("ja", "Japanese"),
25     ("ru", "Russian"),
26     ("sr", "Serbian"),
27     ("th", "Thai"),
28     ("tr", "Turkish"),
29     ("zh", "Chinese"),
30 ];
31 
32 const POSTCARD: [u8; 274] = [
33     98, 16, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 14, 0, 0, 0, 16,
34     0, 0, 0, 18, 0, 0, 0, 20, 0, 0, 0, 22, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0, 0, 0, 30, 0,
35     0, 0, 32, 0, 0, 0, 97, 114, 98, 110, 99, 99, 112, 99, 104, 114, 101, 108, 101, 110, 101, 111,
36     101, 115, 102, 114, 105, 117, 106, 97, 114, 117, 115, 114, 116, 104, 116, 114, 122, 104, 173,
37     1, 16, 0, 0, 0, 6, 0, 0, 0, 12, 0, 0, 0, 18, 0, 0, 0, 26, 0, 0, 0, 31, 0, 0, 0, 38, 0, 0, 0,
38     47, 0, 0, 0, 54, 0, 0, 0, 60, 0, 0, 0, 69, 0, 0, 0, 77, 0, 0, 0, 84, 0, 0, 0, 91, 0, 0, 0, 95,
39     0, 0, 0, 102, 0, 0, 0, 65, 114, 97, 98, 105, 99, 66, 97, 110, 103, 108, 97, 67, 104, 97, 107,
40     109, 97, 67, 104, 101, 114, 111, 107, 101, 101, 71, 114, 101, 101, 107, 69, 110, 103, 108, 105,
41     115, 104, 69, 115, 112, 101, 114, 97, 110, 116, 111, 83, 112, 97, 110, 105, 115, 104, 70, 114,
42     101, 110, 99, 104, 73, 110, 117, 107, 116, 105, 116, 117, 116, 74, 97, 112, 97, 110, 101, 115,
43     101, 82, 117, 115, 115, 105, 97, 110, 83, 101, 114, 98, 105, 97, 110, 84, 104, 97, 105, 84,
44     117, 114, 107, 105, 115, 104, 67, 104, 105, 110, 101, 115, 101,
45 ];
46 
47 const POSTCARD_HASHMAP: [u8; 176] = [
48     16, 2, 114, 117, 7, 82, 117, 115, 115, 105, 97, 110, 3, 99, 99, 112, 6, 67, 104, 97, 107, 109,
49     97, 3, 99, 104, 114, 8, 67, 104, 101, 114, 111, 107, 101, 101, 2, 116, 114, 7, 84, 117, 114,
50     107, 105, 115, 104, 2, 116, 104, 4, 84, 104, 97, 105, 2, 106, 97, 8, 74, 97, 112, 97, 110, 101,
51     115, 101, 2, 101, 115, 7, 83, 112, 97, 110, 105, 115, 104, 2, 101, 111, 9, 69, 115, 112, 101,
52     114, 97, 110, 116, 111, 2, 122, 104, 7, 67, 104, 105, 110, 101, 115, 101, 2, 115, 114, 7, 83,
53     101, 114, 98, 105, 97, 110, 2, 101, 110, 7, 69, 110, 103, 108, 105, 115, 104, 2, 105, 117, 9,
54     73, 110, 117, 107, 116, 105, 116, 117, 116, 2, 102, 114, 6, 70, 114, 101, 110, 99, 104, 2, 98,
55     110, 6, 66, 97, 110, 103, 108, 97, 2, 101, 108, 5, 71, 114, 101, 101, 107, 2, 97, 114, 6, 65,
56     114, 97, 98, 105, 99,
57 ];
58 
59 const POSTCARD_ZEROHASHMAP: [u8; 404] = [
60     128, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
61     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
62     0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
63     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
64     0, 0, 0, 98, 16, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 10, 0, 0, 0, 13, 0,
65     0, 0, 15, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 21, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0, 0,
66     0, 30, 0, 0, 0, 32, 0, 0, 0, 115, 114, 101, 111, 116, 114, 97, 114, 105, 117, 99, 99, 112, 102,
67     114, 101, 115, 106, 97, 122, 104, 99, 104, 114, 98, 110, 101, 110, 101, 108, 114, 117, 116,
68     104, 173, 1, 16, 0, 0, 0, 7, 0, 0, 0, 16, 0, 0, 0, 23, 0, 0, 0, 29, 0, 0, 0, 38, 0, 0, 0, 44,
69     0, 0, 0, 50, 0, 0, 0, 57, 0, 0, 0, 65, 0, 0, 0, 72, 0, 0, 0, 80, 0, 0, 0, 86, 0, 0, 0, 93, 0,
70     0, 0, 98, 0, 0, 0, 105, 0, 0, 0, 83, 101, 114, 98, 105, 97, 110, 69, 115, 112, 101, 114, 97,
71     110, 116, 111, 84, 117, 114, 107, 105, 115, 104, 65, 114, 97, 98, 105, 99, 73, 110, 117, 107,
72     116, 105, 116, 117, 116, 67, 104, 97, 107, 109, 97, 70, 114, 101, 110, 99, 104, 83, 112, 97,
73     110, 105, 115, 104, 74, 97, 112, 97, 110, 101, 115, 101, 67, 104, 105, 110, 101, 115, 101, 67,
74     104, 101, 114, 111, 107, 101, 101, 66, 97, 110, 103, 108, 97, 69, 110, 103, 108, 105, 115, 104,
75     71, 114, 101, 101, 107, 82, 117, 115, 115, 105, 97, 110, 84, 104, 97, 105,
76 ];
77 
78 /// Run this function to print new data to the console.
79 /// Requires the optional `serde` Cargo feature.
80 #[allow(dead_code)]
generate_zeromap()81 fn generate_zeromap() {
82     let map = build_zeromap(false);
83     let buf = postcard::to_stdvec(&map).unwrap();
84     println!("{buf:?}");
85 }
86 
87 /// Run this function to print new data to the console.
88 /// Requires the optional `serde` Cargo feature.
89 #[allow(dead_code)]
generate_hashmap()90 fn generate_hashmap() {
91     let map = build_hashmap(false);
92     let buf = postcard::to_stdvec(&map).unwrap();
93     println!("{buf:?}");
94 }
95 
96 /// Run this function to print new data to the console.
97 /// Requires the optional `serde` Cargo feature.
98 #[allow(dead_code)]
generate_zerohashmap()99 fn generate_zerohashmap() {
100     let map = build_zerohashmap(false);
101     let buf = postcard::to_stdvec(&map).unwrap();
102     println!("{buf:?}");
103 }
104 
overview_bench(c: &mut Criterion)105 fn overview_bench(c: &mut Criterion) {
106     bench_zeromap(c);
107     bench_hashmap(c);
108     bench_zerohashmap(c);
109 }
110 
bench_zeromap(c: &mut Criterion)111 fn bench_zeromap(c: &mut Criterion) {
112     // Uncomment the following line to re-generate the const data.
113     // generate_hashmap();
114 
115     bench_deserialize(c);
116     bench_deserialize_large(c);
117     bench_lookup(c);
118     bench_lookup_large(c);
119 }
120 
build_zeromap(large: bool) -> ZeroMap<'static, Index32Str, Index32Str>121 fn build_zeromap(large: bool) -> ZeroMap<'static, Index32Str, Index32Str> {
122     // TODO(#2826): This should use ZeroMap::from_iter, however that currently takes
123     // *minutes*, whereas this code runs in milliseconds
124     let mut keys = Vec::new();
125     let mut values = Vec::new();
126     let mut data = DATA.to_vec();
127     data.sort();
128     for &(key, value) in data.iter() {
129         if large {
130             for n in 0..8192 {
131                 keys.push(format!("{key}{n:04}"));
132                 values.push(indexify(value));
133             }
134         } else {
135             keys.push(key.to_owned());
136             values.push(indexify(value));
137         }
138     }
139 
140     let keys = keys.iter().map(|s| indexify(s)).collect::<Vec<_>>();
141     // keys are sorted by construction
142     unsafe { ZeroMap::from_parts_unchecked(VarZeroVec::from(&keys), VarZeroVec::from(&values)) }
143 }
144 
bench_deserialize(c: &mut Criterion)145 fn bench_deserialize(c: &mut Criterion) {
146     c.bench_function("zeromap/deserialize/small", |b| {
147         b.iter(|| {
148             let map: ZeroMap<Index32Str, Index32Str> =
149                 postcard::from_bytes(black_box(&POSTCARD)).unwrap();
150             assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
151         })
152     });
153 }
154 
bench_deserialize_large(c: &mut Criterion)155 fn bench_deserialize_large(c: &mut Criterion) {
156     let buf = large_zeromap_postcard_bytes();
157     c.bench_function("zeromap/deserialize/large", |b| {
158         b.iter(|| {
159             let map: ZeroMap<Index32Str, Index32Str> =
160                 postcard::from_bytes(black_box(&buf)).unwrap();
161             assert_eq!(map.get(indexify("iu3333")).map(|x| &x.0), Some("Inuktitut"));
162         })
163     });
164 }
165 
bench_lookup(c: &mut Criterion)166 fn bench_lookup(c: &mut Criterion) {
167     let map: ZeroMap<Index32Str, Index32Str> = postcard::from_bytes(black_box(&POSTCARD)).unwrap();
168     c.bench_function("zeromap/lookup/small", |b| {
169         b.iter(|| {
170             assert_eq!(
171                 map.get(black_box(indexify("iu"))).map(|x| &x.0),
172                 Some("Inuktitut")
173             );
174             assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None);
175         });
176     });
177 }
178 
bench_lookup_large(c: &mut Criterion)179 fn bench_lookup_large(c: &mut Criterion) {
180     let buf = large_zeromap_postcard_bytes();
181     let map: ZeroMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();
182     c.bench_function("zeromap/lookup/large", |b| {
183         b.iter(|| {
184             assert_eq!(
185                 map.get(black_box(indexify("iu3333"))).map(|x| &x.0),
186                 Some("Inuktitut")
187             );
188             assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None);
189         });
190     });
191 }
192 
large_zeromap_postcard_bytes() -> Vec<u8>193 fn large_zeromap_postcard_bytes() -> Vec<u8> {
194     postcard::to_stdvec(&build_zeromap(true)).unwrap()
195 }
196 
bench_hashmap(c: &mut Criterion)197 fn bench_hashmap(c: &mut Criterion) {
198     // Uncomment the following line to re-generate the const data.
199     // generate_hashmap();
200 
201     bench_deserialize_hashmap(c);
202     bench_deserialize_large_hashmap(c);
203     bench_lookup_hashmap(c);
204     bench_lookup_large_hashmap(c);
205 }
206 
build_hashmap(large: bool) -> HashMap<String, String>207 fn build_hashmap(large: bool) -> HashMap<String, String> {
208     let mut map: HashMap<String, String> = HashMap::new();
209     for &(key, value) in DATA.iter() {
210         if large {
211             for n in 0..8192 {
212                 map.insert(format!("{key}{n}"), value.to_owned());
213             }
214         } else {
215             map.insert(key.to_owned(), value.to_owned());
216         }
217     }
218     map
219 }
220 
bench_deserialize_hashmap(c: &mut Criterion)221 fn bench_deserialize_hashmap(c: &mut Criterion) {
222     c.bench_function("zeromap/deserialize/small/hashmap", |b| {
223         b.iter(|| {
224             let map: HashMap<String, String> =
225                 postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap();
226             assert_eq!(map.get("iu"), Some(&"Inuktitut".to_owned()));
227         })
228     });
229 }
230 
bench_deserialize_large_hashmap(c: &mut Criterion)231 fn bench_deserialize_large_hashmap(c: &mut Criterion) {
232     let buf = large_hashmap_postcard_bytes();
233     c.bench_function("zeromap/deserialize/large/hashmap", |b| {
234         b.iter(|| {
235             let map: HashMap<String, String> = postcard::from_bytes(black_box(&buf)).unwrap();
236             assert_eq!(map.get("iu3333"), Some(&"Inuktitut".to_owned()));
237         })
238     });
239 }
240 
bench_lookup_hashmap(c: &mut Criterion)241 fn bench_lookup_hashmap(c: &mut Criterion) {
242     let map: HashMap<String, String> = postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap();
243     c.bench_function("zeromap/lookup/small/hashmap", |b| {
244         b.iter(|| {
245             assert_eq!(map.get(black_box("iu")), Some(&"Inuktitut".to_owned()));
246             assert_eq!(map.get(black_box("zz")), None);
247         });
248     });
249 }
250 
bench_lookup_large_hashmap(c: &mut Criterion)251 fn bench_lookup_large_hashmap(c: &mut Criterion) {
252     let buf = large_hashmap_postcard_bytes();
253     let map: HashMap<String, String> = postcard::from_bytes(&buf).unwrap();
254     c.bench_function("zeromap/lookup/large/hashmap", |b| {
255         b.iter(|| {
256             assert_eq!(map.get(black_box("iu3333")), Some(&"Inuktitut".to_owned()));
257             assert_eq!(map.get(black_box("zz")), None);
258         });
259     });
260 }
261 
large_hashmap_postcard_bytes() -> Vec<u8>262 fn large_hashmap_postcard_bytes() -> Vec<u8> {
263     postcard::to_stdvec(&build_hashmap(true)).unwrap()
264 }
265 
bench_zerohashmap(c: &mut Criterion)266 fn bench_zerohashmap(c: &mut Criterion) {
267     // Uncomment the following line to re-generate the const data.
268     // generate_zerohashmap();
269 
270     bench_deserialize_zerohashmap(c);
271     bench_deserialize_large_zerohashmap(c);
272     bench_zerohashmap_lookup(c);
273     bench_zerohashmap_lookup_large(c);
274 }
275 
build_zerohashmap(large: bool) -> ZeroHashMap<'static, Index32Str, Index32Str>276 fn build_zerohashmap(large: bool) -> ZeroHashMap<'static, Index32Str, Index32Str> {
277     let mut kv = Vec::new();
278 
279     for (key, value) in DATA.iter() {
280         if large {
281             for n in 0..512 {
282                 kv.push((format!("{key}{n}"), indexify(value)));
283             }
284         } else {
285             kv.push((key.to_string(), indexify(value)));
286         }
287     }
288 
289     ZeroHashMap::from_iter(kv.iter().map(|kv| (indexify(&kv.0), kv.1)))
290 }
291 
bench_deserialize_zerohashmap(c: &mut Criterion)292 fn bench_deserialize_zerohashmap(c: &mut Criterion) {
293     c.bench_function("zerohashmap/deserialize/small", |b| {
294         b.iter(|| {
295             let map: ZeroHashMap<Index32Str, Index32Str> =
296                 postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
297             assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
298         })
299     });
300 }
301 
bench_deserialize_large_zerohashmap(c: &mut Criterion)302 fn bench_deserialize_large_zerohashmap(c: &mut Criterion) {
303     let buf = large_zerohashmap_postcard_bytes();
304     c.bench_function("zerohashmap/deserialize/large", |b| {
305         b.iter(|| {
306             let map: ZeroHashMap<Index32Str, Index32Str> =
307                 postcard::from_bytes(black_box(&buf)).unwrap();
308             assert_eq!(map.get(indexify("iu333")).map(|x| &x.0), Some("Inuktitut"));
309         })
310     });
311 }
312 
bench_zerohashmap_lookup(c: &mut Criterion)313 fn bench_zerohashmap_lookup(c: &mut Criterion) {
314     let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> =
315         postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
316 
317     c.bench_function("zerohashmap/lookup/small", |b| {
318         b.iter(|| {
319             assert_eq!(
320                 zero_hashmap.get(black_box(indexify("iu"))).map(|x| &x.0),
321                 Some("Inuktitut")
322             );
323             assert_eq!(
324                 zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
325                 None
326             );
327         });
328     });
329 }
330 
bench_zerohashmap_lookup_large(c: &mut Criterion)331 fn bench_zerohashmap_lookup_large(c: &mut Criterion) {
332     let buf = large_zerohashmap_postcard_bytes();
333     let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();
334 
335     c.bench_function("zerohashmap/lookup/large", |b| {
336         b.iter(|| {
337             assert_eq!(
338                 zero_hashmap.get(black_box(indexify("iu333"))).map(|x| &x.0),
339                 Some("Inuktitut")
340             );
341             assert_eq!(
342                 zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
343                 None
344             );
345         });
346     });
347 }
348 
large_zerohashmap_postcard_bytes() -> Vec<u8>349 fn large_zerohashmap_postcard_bytes() -> Vec<u8> {
350     postcard::to_stdvec(&build_zerohashmap(true)).unwrap()
351 }
352 
353 criterion_group!(benches, overview_bench);
354 criterion_main!(benches);
355 
356 /// This type lets us use a u32-index-format VarZeroVec with the ZeroMap.
357 ///
358 /// Eventually we will have a FormatSelector type that lets us do `ZeroMap<FormatSelector<K, Index32>, V>`
359 /// (https://github.com/unicode-org/icu4x/issues/2312)
360 ///
361 /// ,  isn't actually important; it's just more convenient to use make_varule to get the
362 /// full suite of traits instead of `#[derive(VarULE)]`. (With `#[derive(VarULE)]` we would have to manually
363 /// define a Serialize implementation, and that would be gnarly)
364 /// https://github.com/unicode-org/icu4x/issues/2310 tracks being able to do this with derive(ULE)
365 #[zerovec::make_varule(Index32Str)]
366 #[zerovec::skip_derive(ZeroMapKV)]
367 #[derive(Eq, PartialEq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
368 #[zerovec::derive(Serialize, Deserialize, Hash)]
369 pub(crate) struct Index32StrBorrowed<'a>(#[serde(borrow)] pub &'a str);
370 
371 impl<'a> ZeroMapKV<'a> for Index32Str {
372     type Container = VarZeroVec<'a, Index32Str, Index32>;
373     type Slice = VarZeroSlice<Index32Str, Index32>;
374     type GetType = Index32Str;
375     type OwnedType = Box<Index32Str>;
376 }
377 
378 #[inline]
indexify(s: &str) -> &Index32Str379 fn indexify(s: &str) -> &Index32Str {
380     unsafe { &*(s as *const str as *const Index32Str) }
381 }
382