• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // These constants may end up unused depending on platform support.
2 #[allow(unused)]
3 use crate::{ARBITRARY1, ARBITRARY9};
4 
5 use super::{folded_multiply, ARBITRARY2, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7};
6 
7 /// Used for FixedState, and RandomState if atomics for dynamic init are unavailable.
8 const FIXED_GLOBAL_SEED: SharedSeed = SharedSeed {
9     seeds: [ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7],
10 };
11 
gen_per_hasher_seed() -> u6412 pub(crate) fn gen_per_hasher_seed() -> u64 {
13     // We initialize the per-hasher seed with the stack pointer to ensure
14     // different threads have different seeds, with as side benefit that
15     // stack address randomization gives us further non-determinism.
16     let mut per_hasher_seed = 0;
17     let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64;
18     per_hasher_seed = stack_ptr;
19 
20     // If we have the standard library available we use a thread-local
21     // state to ensure RandomStates are different with high probability,
22     // even if the call stack is the same.
23     #[cfg(feature = "std")]
24     {
25         use std::cell::Cell;
26         thread_local! {
27             static PER_HASHER_NONDETERMINISM: Cell<u64> = const { Cell::new(0) };
28         }
29 
30         PER_HASHER_NONDETERMINISM.with(|cell| {
31             let nondeterminism = cell.get();
32             per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
33             cell.set(per_hasher_seed);
34         })
35     };
36 
37     // If we don't have the standard library we instead use a global
38     // atomic instead of a thread-local state.
39     //
40     // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner,
41     // but this doesn't matter in practice - it is impossible that two
42     // different threads have the same stack location, so they'll almost
43     // surely generate different seeds, and provide a different possible
44     // update for PER_HASHER_NONDETERMINISM. If we would use a proper
45     // fetch_add atomic update then there is a larger chance of
46     // problematic contention.
47     //
48     // We use usize instead of 64-bit atomics for best platform support.
49     #[cfg(not(feature = "std"))]
50     {
51         use core::sync::atomic::{AtomicUsize, Ordering};
52         static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0);
53 
54         let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64;
55         per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
56         PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed);
57     }
58 
59     // One extra mixing step to ensure good random bits.
60     folded_multiply(per_hasher_seed, ARBITRARY2)
61 }
62 
63 /// A random seed intended to be shared by many different foldhash instances.
64 ///
65 /// This seed is consumed by [`FoldHasher::with_seed`](crate::fast::FoldHasher::with_seed),
66 /// and [`SeedableRandomState::with_seed`](crate::fast::SeedableRandomState::with_seed).
67 #[derive(Clone, Debug)]
68 pub struct SharedSeed {
69     pub(crate) seeds: [u64; 4],
70 }
71 
72 impl SharedSeed {
73     /// Returns the globally shared randomly initialized [`SharedSeed`] as used
74     /// by [`RandomState`](crate::fast::RandomState).
75     #[inline(always)]
global_random() -> &'static SharedSeed76     pub fn global_random() -> &'static SharedSeed {
77         global::GlobalSeed::new().get()
78     }
79 
80     /// Returns the globally shared fixed [`SharedSeed`] as used
81     /// by [`FixedState`](crate::fast::FixedState).
82     #[inline(always)]
global_fixed() -> &'static SharedSeed83     pub const fn global_fixed() -> &'static SharedSeed {
84         &FIXED_GLOBAL_SEED
85     }
86 
87     /// Generates a new [`SharedSeed`] from a single 64-bit seed.
88     ///
89     /// Note that this is somewhat expensive so it is suggested to re-use the
90     /// [`SharedSeed`] as much as possible, using the per-hasher seed to
91     /// differentiate between hash instances.
from_u64(seed: u64) -> Self92     pub const fn from_u64(seed: u64) -> Self {
93         macro_rules! mix {
94             ($x: expr) => {
95                 folded_multiply($x, ARBITRARY9)
96             };
97         }
98 
99         let seed_a = mix!(mix!(mix!(seed)));
100         let seed_b = mix!(mix!(mix!(seed_a)));
101         let seed_c = mix!(mix!(mix!(seed_b)));
102         let seed_d = mix!(mix!(mix!(seed_c)));
103 
104         // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be
105         // a common input. So we want our global seeds that are XOR'ed with the
106         // input to always be non-zero. To also ensure there is always a good spread
107         // of bits, we give up 3 bits of entropy and simply force some bits on.
108         const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1;
109         Self {
110             seeds: [
111                 seed_a | FORCED_ONES,
112                 seed_b | FORCED_ONES,
113                 seed_c | FORCED_ONES,
114                 seed_d | FORCED_ONES,
115             ],
116         }
117     }
118 }
119 
120 #[cfg(target_has_atomic = "8")]
121 mod global {
122     use super::*;
123     use core::cell::UnsafeCell;
124     use core::sync::atomic::{AtomicU8, Ordering};
125 
generate_global_seed() -> SharedSeed126     fn generate_global_seed() -> SharedSeed {
127         let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9);
128 
129         // Use address space layout randomization as our main randomness source.
130         // This isn't great, but we don't advertise HashDoS resistance in the first
131         // place. This is a whole lot better than nothing, at near zero cost with
132         // no dependencies.
133         let mut seed = 0;
134         let stack_ptr = &seed as *const _;
135         let func_ptr = generate_global_seed;
136         let static_ptr = &GLOBAL_SEED_STORAGE as *const _;
137         seed = mix(seed, stack_ptr as usize as u64);
138         seed = mix(seed, func_ptr as usize as u64);
139         seed = mix(seed, static_ptr as usize as u64);
140 
141         // If we have the standard library available, augment entropy with the
142         // current time and an address from the allocator.
143         #[cfg(feature = "std")]
144         {
145             #[cfg(not(any(
146                 miri,
147                 all(target_family = "wasm", target_os = "unknown"),
148                 target_os = "zkvm"
149             )))]
150             if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() {
151                 seed = mix(seed, duration.subsec_nanos() as u64);
152                 seed = mix(seed, duration.as_secs());
153             }
154 
155             let box_ptr = &*Box::new(0u8) as *const _;
156             seed = mix(seed, box_ptr as usize as u64);
157         }
158 
159         SharedSeed::from_u64(seed)
160     }
161 
162     // Now all the below code purely exists to cache the above seed as
163     // efficiently as possible. Even if we weren't a no_std crate and had access to
164     // OnceLock, we don't want to check whether the global is set each time we
165     // hash an object, so we hand-roll a global storage where type safety allows us
166     // to assume the storage is initialized after construction.
167     struct GlobalSeedStorage {
168         state: AtomicU8,
169         seed: UnsafeCell<SharedSeed>,
170     }
171 
172     const UNINIT: u8 = 0;
173     const LOCKED: u8 = 1;
174     const INIT: u8 = 2;
175 
176     // SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive
177     // LOCKED state, and only read the UnsafeCells when state is in the
178     // once-achieved-eternally-preserved state INIT.
179     unsafe impl Sync for GlobalSeedStorage {}
180 
181     static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage {
182         state: AtomicU8::new(UNINIT),
183         seed: UnsafeCell::new(SharedSeed { seeds: [0; 4] }),
184     };
185 
186     /// An object representing an initialized global seed.
187     ///
188     /// Does not actually store the seed inside itself, it is a zero-sized type.
189     /// This prevents inflating the RandomState size and in turn HashMap's size.
190     #[derive(Copy, Clone, Debug)]
191     pub struct GlobalSeed {
192         // So we can't accidentally type GlobalSeed { } within this crate.
193         _no_accidental_unsafe_init: (),
194     }
195 
196     impl GlobalSeed {
197         #[inline(always)]
new() -> Self198         pub fn new() -> Self {
199             if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT {
200                 Self::init_slow()
201             }
202             Self {
203                 _no_accidental_unsafe_init: (),
204             }
205         }
206 
207         #[cold]
208         #[inline(never)]
init_slow()209         fn init_slow() {
210             // Generate seed outside of critical section.
211             let seed = generate_global_seed();
212 
213             loop {
214                 match GLOBAL_SEED_STORAGE.state.compare_exchange_weak(
215                     UNINIT,
216                     LOCKED,
217                     Ordering::Acquire,
218                     Ordering::Acquire,
219                 ) {
220                     Ok(_) => unsafe {
221                         // SAFETY: we just acquired an exclusive lock.
222                         *GLOBAL_SEED_STORAGE.seed.get() = seed;
223                         GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release);
224                         return;
225                     },
226 
227                     Err(INIT) => return,
228 
229                     // Yes, it's a spin loop. We need to support no_std (so no easy
230                     // access to proper locks), this is a one-time-per-program
231                     // initialization, and the critical section is only a few
232                     // store instructions, so it'll be fine.
233                     _ => core::hint::spin_loop(),
234                 }
235             }
236         }
237 
238         #[inline(always)]
get(self) -> &'static SharedSeed239         pub fn get(self) -> &'static SharedSeed {
240             // SAFETY: our constructor ensured we are in the INIT state and thus
241             // this raw read does not race with any write.
242             unsafe { &*GLOBAL_SEED_STORAGE.seed.get() }
243         }
244     }
245 }
246 
247 #[cfg(not(target_has_atomic = "8"))]
248 mod global {
249     use super::*;
250 
251     #[derive(Copy, Clone, Debug)]
252     pub struct GlobalSeed {}
253 
254     impl GlobalSeed {
255         #[inline(always)]
new() -> Self256         pub fn new() -> Self {
257             Self {}
258         }
259 
260         #[inline(always)]
get(self) -> &'static SharedSeed261         pub fn get(self) -> &'static SharedSeed {
262             &super::FIXED_GLOBAL_SEED
263         }
264     }
265 }
266 
267 pub(crate) use global::GlobalSeed;
268