• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use criterion::{black_box, criterion_group, criterion_main, Criterion};
6 use rand::SeedableRng;
7 use rand_distr::{Distribution, LogNormal};
8 use rand_pcg::Lcg64Xsh32;
9 use std::fmt;
10 
11 #[path = "../src/samples.rs"]
12 mod samples;
13 use samples::*;
14 
15 use zerovec::ule::*;
16 use zerovec::ZeroVec;
17 
18 #[repr(align(8))]
19 #[derive(Default)]
20 struct AlignedBuffer(Vec<u8>);
21 
22 /// Generate a large list of u32s for stress testing.
23 #[allow(dead_code)]
get_needles_and_haystack() -> (Vec<u32>, Vec<u32>)24 fn get_needles_and_haystack() -> (Vec<u32>, Vec<u32>) {
25     // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
26     // LogNormal(10, 1) generates numbers with mean 36315 and mode 8103, a distribution that, in
27     // spirit, correlates with Unicode properties (many low values and a long tail of high values)
28     let mut rng = Lcg64Xsh32::seed_from_u64(2021);
29     let dist = LogNormal::new(10.0, 1.0).unwrap();
30     let haystack = {
31         let mut unsorted: Vec<u32> = (&dist)
32             .sample_iter(&mut rng)
33             .take(1000)
34             .map(|f| f as u32)
35             .collect();
36         unsorted.sort_unstable();
37         unsorted
38     };
39     let needles: Vec<u32> = (&dist)
40         .sample_iter(&mut rng)
41         .take(100)
42         .map(|f| f as u32)
43         .collect();
44     (needles, haystack)
45 }
46 
47 #[allow(dead_code, clippy::ptr_arg)]
vec_to_unaligned_uvec<'a, T>(vec: &Vec<T>, buffer: &'a mut AlignedBuffer) -> ZeroVec<'a, T> where T: EqULE + Copy + PartialEq + fmt::Debug,48 fn vec_to_unaligned_uvec<'a, T>(vec: &Vec<T>, buffer: &'a mut AlignedBuffer) -> ZeroVec<'a, T>
49 where
50     T: EqULE + Copy + PartialEq + fmt::Debug,
51 {
52     // Pad with zero to ensure it is not aligned
53     buffer.0.push(0);
54     buffer
55         .0
56         .extend(ZeroVec::from_slice_or_alloc(vec.as_slice()).as_bytes());
57     ZeroVec::<T>::parse_bytes(&buffer.0[1..]).unwrap()
58 }
59 
overview_bench(c: &mut Criterion)60 fn overview_bench(c: &mut Criterion) {
61     c.bench_function("zerovec/overview", |b| {
62         b.iter(|| {
63             ZeroVec::<u32>::parse_bytes(black_box(TEST_BUFFER_LE))
64                 .unwrap()
65                 .iter()
66                 .sum::<u32>()
67         });
68     });
69 
70     {
71         sum_benches(c);
72         binary_search_benches(c);
73     }
74 }
75 
sum_benches(c: &mut Criterion)76 fn sum_benches(c: &mut Criterion) {
77     let normal_slice = &TEST_SLICE[0..19];
78     let aligned_ule_slice =
79         <u32 as AsULE>::ULE::parse_bytes_to_slice(&TEST_BUFFER_LE[0..76]).unwrap();
80     let unalign_ule_slice =
81         <u32 as AsULE>::ULE::parse_bytes_to_slice(&TEST_BUFFER_LE[1..77]).unwrap();
82 
83     assert_eq!(normal_slice.len(), aligned_ule_slice.len());
84     assert_eq!(normal_slice.len(), unalign_ule_slice.len());
85 
86     c.bench_function("zerovec/sum/sample/slice", |b| {
87         b.iter(|| {
88             black_box(normal_slice)
89                 .iter()
90                 .copied()
91                 .fold(0u32, |sum, val| sum.wrapping_add(val))
92         })
93     });
94 
95     c.bench_function("zerovec/sum/sample/zerovec_aligned", |b| {
96         b.iter(|| {
97             ZeroVec::<u32>::new_borrowed(black_box(aligned_ule_slice))
98                 .iter()
99                 .fold(0u32, |sum, val| sum.wrapping_add(val))
100         });
101     });
102 
103     c.bench_function("zerovec/sum/sample/zerovec_unaligned", |b| {
104         b.iter(|| {
105             ZeroVec::<u32>::new_borrowed(black_box(unalign_ule_slice))
106                 .iter()
107                 .fold(0u32, |sum, val| sum.wrapping_add(val))
108         });
109     });
110 }
111 
binary_search_benches(c: &mut Criterion)112 fn binary_search_benches(c: &mut Criterion) {
113     c.bench_function("zerovec/binary_search/sample/slice", |b| {
114         b.iter(|| black_box(&TEST_SLICE).binary_search(&0x0c0d0c));
115     });
116 
117     c.bench_function("zerovec/binary_search/sample/zerovec", |b| {
118         let zerovec = ZeroVec::<u32>::parse_bytes(black_box(TEST_BUFFER_LE)).unwrap();
119         b.iter(|| zerovec.binary_search(&0x0c0d0c));
120     });
121 
122     let (needles_100, haystack) = get_needles_and_haystack();
123     // Only search for 50 needles to put all figures in nanoseconds
124     let needles_50 = &needles_100[0..50];
125 
126     // *** Binary search vec of 1000 `u32` 50 times ***
127     c.bench_function("zerovec/binary_search/log_normal/slice", |b| {
128         b.iter(|| {
129             black_box(&needles_50)
130                 .iter()
131                 .map(|needle| black_box(&haystack).binary_search(needle))
132                 .filter(|r| r.is_ok())
133                 .count()
134         });
135     });
136 
137     let mut buffer = AlignedBuffer::default();
138     let zerovec = vec_to_unaligned_uvec(black_box(&haystack), &mut buffer);
139     assert_eq!(zerovec, haystack.as_slice());
140 
141     // *** Binary search vec of 1000 `u32` 50 times ***
142     c.bench_function("zerovec/binary_search/log_normal/zerovec", |b| {
143         b.iter(|| {
144             black_box(&needles_50)
145                 .iter()
146                 .map(|needle| black_box(&zerovec).binary_search(needle))
147                 .filter(|r| r.is_ok())
148                 .count()
149         });
150     });
151 
152     let single_needle = 36315;
153 
154     c.bench_function("zerovec/binary_search/log_normal/single/slice", |b| {
155         b.iter(|| black_box(&haystack).binary_search(&single_needle));
156     });
157 
158     c.bench_function("zerovec/binary_search/log_normal/single/zerovec", |b| {
159         b.iter(|| black_box(&zerovec).binary_search(&single_needle));
160     });
161 }
162 
163 criterion_group!(benches, overview_bench,);
164 criterion_main!(benches);
165