• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use criterion::{black_box, criterion_group, criterion_main, Criterion};
6 
7 use icu_collections::codepointtrie::CodePointTrie;
8 
9 #[path = "tries/mod.rs"]
10 mod tries;
11 
12 mod sample_str_lng {
13     // "eng" is all ASCII
14     pub const ENG: &str = "Universal Declaration of Human Rights";
15     // "pcd" has ASCII mixed with low-BMP code points
16     pub const PCD: &str = "Dèclaråcion dès dreûts d' l'ome po tos lès payîs dè monde";
17     // "ukr" uses low-BMP code points (less than U+1000)
18     pub const UKR: &str = "ЗАГАЛЬНА ДЕКЛАРАЦІЯ ПРАВ ЛЮДИНІ";
19     // "yue" uses high-BMP code points (greater than U+1000), so it benefits from fast mode
20     pub const YUE: &str = "世界人权宣言";
21     // "ccp" exercises supplementary code points
22     pub const CCP: &str = "�������������������������� ������������������ ������������������������";
23 }
24 
25 const SAMPLE_STRING_MIXED: &str = "Dèclaråcion ЗАГАЛЬНА 世界人权宣言 ��������������������������";
26 
27 /// A function that returns 100 code points in the desired language
one_hundred_code_points(sample_str: &str) -> String28 fn one_hundred_code_points(sample_str: &str) -> String {
29     sample_str.chars().cycle().take(100).collect()
30 }
31 
get_trie_small() -> CodePointTrie<'static, u8>32 fn get_trie_small() -> CodePointTrie<'static, u8> {
33     CodePointTrie::try_new(
34         tries::gc_small::HEADER,
35         tries::gc_small::INDEX,
36         tries::gc_small::DATA,
37     )
38     .unwrap()
39 }
40 
get_trie_fast() -> CodePointTrie<'static, u8>41 fn get_trie_fast() -> CodePointTrie<'static, u8> {
42     CodePointTrie::try_new(
43         tries::gc_fast::HEADER,
44         tries::gc_fast::INDEX,
45         tries::gc_fast::DATA,
46     )
47     .unwrap()
48 }
49 
overview_bench(c: &mut Criterion)50 fn overview_bench(c: &mut Criterion) {
51     let s = one_hundred_code_points(SAMPLE_STRING_MIXED);
52     let cpt_small = get_trie_small();
53 
54     c.bench_function("cpt/overview", |b| {
55         b.iter(|| {
56             black_box(&s)
57                 .chars()
58                 .map(|c| black_box(&cpt_small).get32(c as u32))
59                 .reduce(|a, b| a.wrapping_add(b))
60         });
61     });
62 
63     c.bench_function("cpt/get_range", |b| {
64         b.iter(|| {
65             black_box(&s)
66                 .chars()
67                 .map(|c| black_box(&cpt_small).get_range(c as u32).unwrap())
68                 .fold(0u32, |acc, ele| {
69                     acc.wrapping_add(ele.range.end() - ele.range.start() + ele.value as u32)
70                 })
71         });
72     });
73 
74     {
75         let cpt_fast = get_trie_fast();
76         lang_bench(c, &cpt_small, "small/eng", sample_str_lng::ENG);
77         lang_bench(c, &cpt_small, "small/pcd", sample_str_lng::PCD);
78         lang_bench(c, &cpt_small, "small/ukr", sample_str_lng::UKR);
79         lang_bench(c, &cpt_small, "small/yue", sample_str_lng::YUE);
80         lang_bench(c, &cpt_small, "small/ccp", sample_str_lng::CCP);
81         lang_bench(c, &cpt_fast, "fast/eng", sample_str_lng::ENG);
82         lang_bench(c, &cpt_fast, "fast/pcd", sample_str_lng::PCD);
83         lang_bench(c, &cpt_fast, "fast/ukr", sample_str_lng::UKR);
84         lang_bench(c, &cpt_fast, "fast/yue", sample_str_lng::YUE);
85         lang_bench(c, &cpt_fast, "fast/ccp", sample_str_lng::CCP);
86     }
87 }
88 
lang_bench(c: &mut Criterion, cpt: &CodePointTrie<u8>, lid: &str, sample_str: &str)89 fn lang_bench(c: &mut Criterion, cpt: &CodePointTrie<u8>, lid: &str, sample_str: &str) {
90     let bench_name = format!("cpt/get/{lid}");
91     let s = one_hundred_code_points(sample_str);
92 
93     c.bench_function(&bench_name, |b| {
94         b.iter(|| {
95             black_box(&s)
96                 .chars()
97                 .map(|c| black_box(&cpt).get32(c as u32))
98                 .reduce(|a, b| a.wrapping_add(b))
99         });
100     });
101 }
102 
103 criterion_group!(benches, overview_bench,);
104 criterion_main!(benches);
105