• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! The foldhash implementation optimized for speed.
2 
3 use core::hash::{BuildHasher, Hasher};
4 
5 use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed};
6 use crate::{folded_multiply, hash_bytes_long, hash_bytes_medium, rotate_right, ARBITRARY3};
7 
8 /// A [`Hasher`] instance implementing foldhash, optimized for speed.
9 ///
10 /// While you can create one directly with [`FoldHasher::with_seed`], you
11 /// most likely want to use [`RandomState`], [`SeedableRandomState`] or
12 /// [`FixedState`] to create [`FoldHasher`]s.
13 #[derive(Clone)]
14 pub struct FoldHasher {
15     accumulator: u64,
16     sponge: u128,
17     sponge_len: u8,
18     fold_seed: u64,
19     expand_seed: u64,
20     expand_seed2: u64,
21     expand_seed3: u64,
22 }
23 
24 impl FoldHasher {
25     /// Initializes this [`FoldHasher`] with the given per-hasher seed and
26     /// [`SharedSeed`].
27     #[inline]
with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher28     pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher {
29         FoldHasher {
30             accumulator: per_hasher_seed,
31             sponge: 0,
32             sponge_len: 0,
33             fold_seed: shared_seed.seeds[0],
34             expand_seed: shared_seed.seeds[1],
35             expand_seed2: shared_seed.seeds[2],
36             expand_seed3: shared_seed.seeds[3],
37         }
38     }
39 
40     #[inline(always)]
write_num<T: Into<u128>>(&mut self, x: T)41     fn write_num<T: Into<u128>>(&mut self, x: T) {
42         let bits: usize = 8 * core::mem::size_of::<T>();
43         if self.sponge_len as usize + bits > 128 {
44             let lo = self.sponge as u64;
45             let hi = (self.sponge >> 64) as u64;
46             self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed);
47             self.sponge = x.into();
48             self.sponge_len = bits as u8;
49         } else {
50             self.sponge |= x.into() << self.sponge_len;
51             self.sponge_len += bits as u8;
52         }
53     }
54 }
55 
56 impl Hasher for FoldHasher {
57     #[inline(always)]
write(&mut self, bytes: &[u8])58     fn write(&mut self, bytes: &[u8]) {
59         // We perform overlapping reads in the byte hash which could lead to
60         // trivial length-extension attacks. These should be defeated by
61         // adding a length-dependent rotation on our unpredictable seed
62         // which costs only a single cycle (or none if executed with
63         // instruction-level parallelism).
64         let len = bytes.len();
65         let base_seed = rotate_right(self.accumulator, len as u32);
66         if len <= 16 {
67             let mut s0 = base_seed;
68             let mut s1 = self.expand_seed;
69             // XOR the input into s0, s1, then multiply and fold.
70             if len >= 8 {
71                 s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap());
72                 s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap());
73             } else if len >= 4 {
74                 s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64;
75                 s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
76             } else if len > 0 {
77                 let lo = bytes[0];
78                 let mid = bytes[len / 2];
79                 let hi = bytes[len - 1];
80                 s0 ^= lo as u64;
81                 s1 ^= ((hi as u64) << 8) | mid as u64;
82             }
83             self.accumulator = folded_multiply(s0, s1);
84         } else if len < 256 {
85             self.accumulator = hash_bytes_medium(
86                 bytes,
87                 base_seed,
88                 base_seed.wrapping_add(self.expand_seed),
89                 self.fold_seed,
90             );
91         } else {
92             self.accumulator = hash_bytes_long(
93                 bytes,
94                 base_seed,
95                 base_seed.wrapping_add(self.expand_seed),
96                 base_seed.wrapping_add(self.expand_seed2),
97                 base_seed.wrapping_add(self.expand_seed3),
98                 self.fold_seed,
99             );
100         }
101     }
102 
103     #[inline(always)]
write_u8(&mut self, i: u8)104     fn write_u8(&mut self, i: u8) {
105         self.write_num(i);
106     }
107 
108     #[inline(always)]
write_u16(&mut self, i: u16)109     fn write_u16(&mut self, i: u16) {
110         self.write_num(i);
111     }
112 
113     #[inline(always)]
write_u32(&mut self, i: u32)114     fn write_u32(&mut self, i: u32) {
115         self.write_num(i);
116     }
117 
118     #[inline(always)]
write_u64(&mut self, i: u64)119     fn write_u64(&mut self, i: u64) {
120         self.write_num(i);
121     }
122 
123     #[inline(always)]
write_u128(&mut self, i: u128)124     fn write_u128(&mut self, i: u128) {
125         let lo = i as u64;
126         let hi = (i >> 64) as u64;
127         self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed);
128     }
129 
130     #[inline(always)]
write_usize(&mut self, i: usize)131     fn write_usize(&mut self, i: usize) {
132         // u128 doesn't implement From<usize>.
133         #[cfg(target_pointer_width = "32")]
134         self.write_num(i as u32);
135         #[cfg(target_pointer_width = "64")]
136         self.write_num(i as u64);
137     }
138 
139     #[inline(always)]
finish(&self) -> u64140     fn finish(&self) -> u64 {
141         if self.sponge_len > 0 {
142             let lo = self.sponge as u64;
143             let hi = (self.sponge >> 64) as u64;
144             folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed)
145         } else {
146             self.accumulator
147         }
148     }
149 }
150 
151 /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly initialized.
152 #[derive(Copy, Clone, Debug)]
153 pub struct RandomState {
154     per_hasher_seed: u64,
155     global_seed: GlobalSeed,
156 }
157 
158 impl Default for RandomState {
159     #[inline(always)]
default() -> Self160     fn default() -> Self {
161         Self {
162             per_hasher_seed: gen_per_hasher_seed(),
163             global_seed: GlobalSeed::new(),
164         }
165     }
166 }
167 
168 impl BuildHasher for RandomState {
169     type Hasher = FoldHasher;
170 
171     #[inline(always)]
build_hasher(&self) -> FoldHasher172     fn build_hasher(&self) -> FoldHasher {
173         FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
174     }
175 }
176 
177 /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly
178 /// initialized by default, but can also be initialized with a specific seed.
179 ///
180 /// This can be useful for e.g. testing, but the downside is that this type
181 /// has a size of 16 bytes rather than the 8 bytes [`RandomState`] is.
182 #[derive(Copy, Clone, Debug)]
183 pub struct SeedableRandomState {
184     per_hasher_seed: u64,
185     shared_seed: &'static SharedSeed,
186 }
187 
188 impl Default for SeedableRandomState {
189     #[inline(always)]
default() -> Self190     fn default() -> Self {
191         Self::random()
192     }
193 }
194 
195 impl SeedableRandomState {
196     /// Generates a random [`SeedableRandomState`], similar to [`RandomState`].
197     #[inline(always)]
random() -> Self198     pub fn random() -> Self {
199         Self {
200             per_hasher_seed: gen_per_hasher_seed(),
201             shared_seed: SharedSeed::global_random(),
202         }
203     }
204 
205     /// Generates a fixed [`SeedableRandomState`], similar to [`FixedState`].
206     #[inline(always)]
fixed() -> Self207     pub fn fixed() -> Self {
208         Self {
209             per_hasher_seed: ARBITRARY3,
210             shared_seed: SharedSeed::global_fixed(),
211         }
212     }
213 
214     /// Generates a [`SeedableRandomState`] with the given per-hasher seed
215     /// and [`SharedSeed`].
216     #[inline(always)]
with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self217     pub fn with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self {
218         // XOR with ARBITRARY3 such that with_seed(0) matches default.
219         Self {
220             per_hasher_seed: per_hasher_seed ^ ARBITRARY3,
221             shared_seed,
222         }
223     }
224 }
225 
226 impl BuildHasher for SeedableRandomState {
227     type Hasher = FoldHasher;
228 
229     #[inline(always)]
build_hasher(&self) -> FoldHasher230     fn build_hasher(&self) -> FoldHasher {
231         FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed)
232     }
233 }
234 
235 /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that always has the same fixed seed.
236 ///
237 /// Not recommended unless you absolutely need determinism.
238 #[derive(Copy, Clone, Debug)]
239 pub struct FixedState {
240     per_hasher_seed: u64,
241 }
242 
243 impl FixedState {
244     /// Creates a [`FixedState`] with the given per-hasher-seed.
245     #[inline(always)]
with_seed(per_hasher_seed: u64) -> Self246     pub const fn with_seed(per_hasher_seed: u64) -> Self {
247         // XOR with ARBITRARY3 such that with_seed(0) matches default.
248         Self {
249             per_hasher_seed: per_hasher_seed ^ ARBITRARY3,
250         }
251     }
252 }
253 
254 impl Default for FixedState {
255     #[inline(always)]
default() -> Self256     fn default() -> Self {
257         Self {
258             per_hasher_seed: ARBITRARY3,
259         }
260     }
261 }
262 
263 impl BuildHasher for FixedState {
264     type Hasher = FoldHasher;
265 
266     #[inline(always)]
build_hasher(&self) -> FoldHasher267     fn build_hasher(&self) -> FoldHasher {
268         FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed())
269     }
270 }
271