1 //! Benchmark the overhead that the synchronization of `OnceCell::get` causes. 2 //! We do some other operations that write to memory to get an imprecise but somewhat realistic 3 //! measurement. 4 5 use once_cell::sync::OnceCell; 6 use std::sync::atomic::{AtomicUsize, Ordering}; 7 8 const N_THREADS: usize = 16; 9 const N_ROUNDS: usize = 1_000_000; 10 11 static CELL: OnceCell<usize> = OnceCell::new(); 12 static OTHER: AtomicUsize = AtomicUsize::new(0); 13 main()14fn main() { 15 let start = std::time::Instant::now(); 16 let threads = 17 (0..N_THREADS).map(|i| std::thread::spawn(move || thread_main(i))).collect::<Vec<_>>(); 18 for thread in threads { 19 thread.join().unwrap(); 20 } 21 println!("{:?}", start.elapsed()); 22 println!("{:?}", OTHER.load(Ordering::Relaxed)); 23 } 24 25 #[inline(never)] thread_main(i: usize)26fn thread_main(i: usize) { 27 // The operations we do here don't really matter, as long as we do multiple writes, and 28 // everything is messy enough to prevent the compiler from optimizing the loop away. 29 let mut data = [i; 128]; 30 let mut accum = 0usize; 31 for _ in 0..N_ROUNDS { 32 let _value = CELL.get_or_init(|| i + 1); 33 let k = OTHER.fetch_add(data[accum & 0x7F] as usize, Ordering::Relaxed); 34 for j in data.iter_mut() { 35 *j = (*j).wrapping_add(accum); 36 accum = accum.wrapping_add(k); 37 } 38 } 39 } 40