• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Compares the performance of `UnicodeSegmentation::graphemes` with stdlib's UTF-8 scalar-based
2 //! `std::str::chars`.
3 //!
4 //! It is expected that `std::str::chars` is faster than `UnicodeSegmentation::graphemes` since it
5 //! does not consider the complexity of grapheme clusters. The question in this benchmark
6 //! is how much slower full unicode handling is.
7 
8 use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
9 
10 use std::fs;
11 use unicode_segmentation::UnicodeSegmentation;
12 
13 const FILES: &[&str] = &[
14     "arabic",
15     "english",
16     "hindi",
17     "japanese",
18     "korean",
19     "mandarin",
20     "russian",
21     "source_code",
22 ];
23 
24 #[inline(always)]
grapheme(text: &str)25 fn grapheme(text: &str) {
26     for c in UnicodeSegmentation::graphemes(black_box(text), true) {
27         black_box(c);
28     }
29 }
30 
31 #[inline(always)]
scalar(text: &str)32 fn scalar(text: &str) {
33     for c in black_box(text).chars() {
34         black_box(c);
35     }
36 }
37 
bench_all(c: &mut Criterion)38 fn bench_all(c: &mut Criterion) {
39     let mut group = c.benchmark_group("chars");
40 
41     for file in FILES {
42         group.bench_with_input(
43             BenchmarkId::new("grapheme", file),
44             &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
45             |b, content| b.iter(|| grapheme(content)),
46         );
47     }
48 
49     for file in FILES {
50         group.bench_with_input(
51             BenchmarkId::new("scalar", file),
52             &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
53             |b, content| b.iter(|| scalar(content)),
54         );
55     }
56 }
57 
58 criterion_group!(benches, bench_all);
59 criterion_main!(benches);
60