1 //! Compares the performance of `UnicodeSegmentation::graphemes` with stdlib's UTF-8 scalar-based
2 //! `std::str::chars`.
3 //!
4 //! It is expected that `std::str::chars` is faster than `UnicodeSegmentation::graphemes` since it
5 //! does not consider the complexity of grapheme clusters. The question in this benchmark
6 //! is how much slower full unicode handling is.
7
8 use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
9
10 use std::fs;
11 use unicode_segmentation::UnicodeSegmentation;
12
13 const FILES: &[&str] = &[
14 "arabic",
15 "english",
16 "hindi",
17 "japanese",
18 "korean",
19 "mandarin",
20 "russian",
21 "source_code",
22 ];
23
24 #[inline(always)]
grapheme(text: &str)25 fn grapheme(text: &str) {
26 for c in UnicodeSegmentation::graphemes(black_box(text), true) {
27 black_box(c);
28 }
29 }
30
31 #[inline(always)]
scalar(text: &str)32 fn scalar(text: &str) {
33 for c in black_box(text).chars() {
34 black_box(c);
35 }
36 }
37
bench_all(c: &mut Criterion)38 fn bench_all(c: &mut Criterion) {
39 let mut group = c.benchmark_group("chars");
40
41 for file in FILES {
42 group.bench_with_input(
43 BenchmarkId::new("grapheme", file),
44 &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
45 |b, content| b.iter(|| grapheme(content)),
46 );
47 }
48
49 for file in FILES {
50 group.bench_with_input(
51 BenchmarkId::new("scalar", file),
52 &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
53 |b, content| b.iter(|| scalar(content)),
54 );
55 }
56 }
57
58 criterion_group!(benches, bench_all);
59 criterion_main!(benches);
60