1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 use criterion::{black_box, criterion_group, criterion_main, Criterion};
6
7 use icu_collections::codepointtrie::CodePointTrie;
8
9 #[path = "tries/mod.rs"]
10 mod tries;
11
12 mod sample_str_lng {
13 // "eng" is all ASCII
14 pub const ENG: &str = "Universal Declaration of Human Rights";
15 // "pcd" has ASCII mixed with low-BMP code points
16 pub const PCD: &str = "Dèclaråcion dès dreûts d' l'ome po tos lès payîs dè monde";
17 // "ukr" uses low-BMP code points (less than U+1000)
18 pub const UKR: &str = "ЗАГАЛЬНА ДЕКЛАРАЦІЯ ПРАВ ЛЮДИНІ";
19 // "yue" uses high-BMP code points (greater than U+1000), so it benefits from fast mode
20 pub const YUE: &str = "世界人权宣言";
21 // "ccp" exercises supplementary code points
22 pub const CCP: &str = " ";
23 }
24
25 const SAMPLE_STRING_MIXED: &str = "Dèclaråcion ЗАГАЛЬНА 世界人权宣言 ";
26
27 /// A function that returns 100 code points in the desired language
one_hundred_code_points(sample_str: &str) -> String28 fn one_hundred_code_points(sample_str: &str) -> String {
29 sample_str.chars().cycle().take(100).collect()
30 }
31
get_trie_small() -> CodePointTrie<'static, u8>32 fn get_trie_small() -> CodePointTrie<'static, u8> {
33 CodePointTrie::try_new(
34 tries::gc_small::HEADER,
35 tries::gc_small::INDEX,
36 tries::gc_small::DATA,
37 )
38 .unwrap()
39 }
40
get_trie_fast() -> CodePointTrie<'static, u8>41 fn get_trie_fast() -> CodePointTrie<'static, u8> {
42 CodePointTrie::try_new(
43 tries::gc_fast::HEADER,
44 tries::gc_fast::INDEX,
45 tries::gc_fast::DATA,
46 )
47 .unwrap()
48 }
49
overview_bench(c: &mut Criterion)50 fn overview_bench(c: &mut Criterion) {
51 let s = one_hundred_code_points(SAMPLE_STRING_MIXED);
52 let cpt_small = get_trie_small();
53
54 c.bench_function("cpt/overview", |b| {
55 b.iter(|| {
56 black_box(&s)
57 .chars()
58 .map(|c| black_box(&cpt_small).get32(c as u32))
59 .reduce(|a, b| a.wrapping_add(b))
60 });
61 });
62
63 c.bench_function("cpt/get_range", |b| {
64 b.iter(|| {
65 black_box(&s)
66 .chars()
67 .map(|c| black_box(&cpt_small).get_range(c as u32).unwrap())
68 .fold(0u32, |acc, ele| {
69 acc.wrapping_add(ele.range.end() - ele.range.start() + ele.value as u32)
70 })
71 });
72 });
73
74 {
75 let cpt_fast = get_trie_fast();
76 lang_bench(c, &cpt_small, "small/eng", sample_str_lng::ENG);
77 lang_bench(c, &cpt_small, "small/pcd", sample_str_lng::PCD);
78 lang_bench(c, &cpt_small, "small/ukr", sample_str_lng::UKR);
79 lang_bench(c, &cpt_small, "small/yue", sample_str_lng::YUE);
80 lang_bench(c, &cpt_small, "small/ccp", sample_str_lng::CCP);
81 lang_bench(c, &cpt_fast, "fast/eng", sample_str_lng::ENG);
82 lang_bench(c, &cpt_fast, "fast/pcd", sample_str_lng::PCD);
83 lang_bench(c, &cpt_fast, "fast/ukr", sample_str_lng::UKR);
84 lang_bench(c, &cpt_fast, "fast/yue", sample_str_lng::YUE);
85 lang_bench(c, &cpt_fast, "fast/ccp", sample_str_lng::CCP);
86 }
87 }
88
lang_bench(c: &mut Criterion, cpt: &CodePointTrie<u8>, lid: &str, sample_str: &str)89 fn lang_bench(c: &mut Criterion, cpt: &CodePointTrie<u8>, lid: &str, sample_str: &str) {
90 let bench_name = format!("cpt/get/{lid}");
91 let s = one_hundred_code_points(sample_str);
92
93 c.bench_function(&bench_name, |b| {
94 b.iter(|| {
95 black_box(&s)
96 .chars()
97 .map(|c| black_box(&cpt).get32(c as u32))
98 .reduce(|a, b| a.wrapping_add(b))
99 });
100 });
101 }
102
103 criterion_group!(benches, overview_bench,);
104 criterion_main!(benches);
105