1 //! The foldhash implementation optimized for speed. 2 3 use core::hash::{BuildHasher, Hasher}; 4 5 use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed}; 6 use crate::{folded_multiply, hash_bytes_long, hash_bytes_medium, rotate_right, ARBITRARY3}; 7 8 /// A [`Hasher`] instance implementing foldhash, optimized for speed. 9 /// 10 /// While you can create one directly with [`FoldHasher::with_seed`], you 11 /// most likely want to use [`RandomState`], [`SeedableRandomState`] or 12 /// [`FixedState`] to create [`FoldHasher`]s. 13 #[derive(Clone)] 14 pub struct FoldHasher { 15 accumulator: u64, 16 sponge: u128, 17 sponge_len: u8, 18 fold_seed: u64, 19 expand_seed: u64, 20 expand_seed2: u64, 21 expand_seed3: u64, 22 } 23 24 impl FoldHasher { 25 /// Initializes this [`FoldHasher`] with the given per-hasher seed and 26 /// [`SharedSeed`]. 27 #[inline] with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher28 pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher { 29 FoldHasher { 30 accumulator: per_hasher_seed, 31 sponge: 0, 32 sponge_len: 0, 33 fold_seed: shared_seed.seeds[0], 34 expand_seed: shared_seed.seeds[1], 35 expand_seed2: shared_seed.seeds[2], 36 expand_seed3: shared_seed.seeds[3], 37 } 38 } 39 40 #[inline(always)] write_num<T: Into<u128>>(&mut self, x: T)41 fn write_num<T: Into<u128>>(&mut self, x: T) { 42 let bits: usize = 8 * core::mem::size_of::<T>(); 43 if self.sponge_len as usize + bits > 128 { 44 let lo = self.sponge as u64; 45 let hi = (self.sponge >> 64) as u64; 46 self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); 47 self.sponge = x.into(); 48 self.sponge_len = bits as u8; 49 } else { 50 self.sponge |= x.into() << self.sponge_len; 51 self.sponge_len += bits as u8; 52 } 53 } 54 } 55 56 impl Hasher for FoldHasher { 57 #[inline(always)] write(&mut self, bytes: &[u8])58 fn write(&mut self, bytes: &[u8]) { 59 // We perform overlapping reads in the byte hash which could lead to 60 // trivial length-extension attacks. These should be defeated by 61 // adding a length-dependent rotation on our unpredictable seed 62 // which costs only a single cycle (or none if executed with 63 // instruction-level parallelism). 64 let len = bytes.len(); 65 let base_seed = rotate_right(self.accumulator, len as u32); 66 if len <= 16 { 67 let mut s0 = base_seed; 68 let mut s1 = self.expand_seed; 69 // XOR the input into s0, s1, then multiply and fold. 70 if len >= 8 { 71 s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap()); 72 s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap()); 73 } else if len >= 4 { 74 s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64; 75 s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64; 76 } else if len > 0 { 77 let lo = bytes[0]; 78 let mid = bytes[len / 2]; 79 let hi = bytes[len - 1]; 80 s0 ^= lo as u64; 81 s1 ^= ((hi as u64) << 8) | mid as u64; 82 } 83 self.accumulator = folded_multiply(s0, s1); 84 } else if len < 256 { 85 self.accumulator = hash_bytes_medium( 86 bytes, 87 base_seed, 88 base_seed.wrapping_add(self.expand_seed), 89 self.fold_seed, 90 ); 91 } else { 92 self.accumulator = hash_bytes_long( 93 bytes, 94 base_seed, 95 base_seed.wrapping_add(self.expand_seed), 96 base_seed.wrapping_add(self.expand_seed2), 97 base_seed.wrapping_add(self.expand_seed3), 98 self.fold_seed, 99 ); 100 } 101 } 102 103 #[inline(always)] write_u8(&mut self, i: u8)104 fn write_u8(&mut self, i: u8) { 105 self.write_num(i); 106 } 107 108 #[inline(always)] write_u16(&mut self, i: u16)109 fn write_u16(&mut self, i: u16) { 110 self.write_num(i); 111 } 112 113 #[inline(always)] write_u32(&mut self, i: u32)114 fn write_u32(&mut self, i: u32) { 115 self.write_num(i); 116 } 117 118 #[inline(always)] write_u64(&mut self, i: u64)119 fn write_u64(&mut self, i: u64) { 120 self.write_num(i); 121 } 122 123 #[inline(always)] write_u128(&mut self, i: u128)124 fn write_u128(&mut self, i: u128) { 125 let lo = i as u64; 126 let hi = (i >> 64) as u64; 127 self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); 128 } 129 130 #[inline(always)] write_usize(&mut self, i: usize)131 fn write_usize(&mut self, i: usize) { 132 // u128 doesn't implement From<usize>. 133 #[cfg(target_pointer_width = "32")] 134 self.write_num(i as u32); 135 #[cfg(target_pointer_width = "64")] 136 self.write_num(i as u64); 137 } 138 139 #[inline(always)] finish(&self) -> u64140 fn finish(&self) -> u64 { 141 if self.sponge_len > 0 { 142 let lo = self.sponge as u64; 143 let hi = (self.sponge >> 64) as u64; 144 folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed) 145 } else { 146 self.accumulator 147 } 148 } 149 } 150 151 /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly initialized. 152 #[derive(Copy, Clone, Debug)] 153 pub struct RandomState { 154 per_hasher_seed: u64, 155 global_seed: GlobalSeed, 156 } 157 158 impl Default for RandomState { 159 #[inline(always)] default() -> Self160 fn default() -> Self { 161 Self { 162 per_hasher_seed: gen_per_hasher_seed(), 163 global_seed: GlobalSeed::new(), 164 } 165 } 166 } 167 168 impl BuildHasher for RandomState { 169 type Hasher = FoldHasher; 170 171 #[inline(always)] build_hasher(&self) -> FoldHasher172 fn build_hasher(&self) -> FoldHasher { 173 FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get()) 174 } 175 } 176 177 /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly 178 /// initialized by default, but can also be initialized with a specific seed. 179 /// 180 /// This can be useful for e.g. testing, but the downside is that this type 181 /// has a size of 16 bytes rather than the 8 bytes [`RandomState`] is. 182 #[derive(Copy, Clone, Debug)] 183 pub struct SeedableRandomState { 184 per_hasher_seed: u64, 185 shared_seed: &'static SharedSeed, 186 } 187 188 impl Default for SeedableRandomState { 189 #[inline(always)] default() -> Self190 fn default() -> Self { 191 Self::random() 192 } 193 } 194 195 impl SeedableRandomState { 196 /// Generates a random [`SeedableRandomState`], similar to [`RandomState`]. 197 #[inline(always)] random() -> Self198 pub fn random() -> Self { 199 Self { 200 per_hasher_seed: gen_per_hasher_seed(), 201 shared_seed: SharedSeed::global_random(), 202 } 203 } 204 205 /// Generates a fixed [`SeedableRandomState`], similar to [`FixedState`]. 206 #[inline(always)] fixed() -> Self207 pub fn fixed() -> Self { 208 Self { 209 per_hasher_seed: ARBITRARY3, 210 shared_seed: SharedSeed::global_fixed(), 211 } 212 } 213 214 /// Generates a [`SeedableRandomState`] with the given per-hasher seed 215 /// and [`SharedSeed`]. 216 #[inline(always)] with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self217 pub fn with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self { 218 // XOR with ARBITRARY3 such that with_seed(0) matches default. 219 Self { 220 per_hasher_seed: per_hasher_seed ^ ARBITRARY3, 221 shared_seed, 222 } 223 } 224 } 225 226 impl BuildHasher for SeedableRandomState { 227 type Hasher = FoldHasher; 228 229 #[inline(always)] build_hasher(&self) -> FoldHasher230 fn build_hasher(&self) -> FoldHasher { 231 FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed) 232 } 233 } 234 235 /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that always has the same fixed seed. 236 /// 237 /// Not recommended unless you absolutely need determinism. 238 #[derive(Copy, Clone, Debug)] 239 pub struct FixedState { 240 per_hasher_seed: u64, 241 } 242 243 impl FixedState { 244 /// Creates a [`FixedState`] with the given per-hasher-seed. 245 #[inline(always)] with_seed(per_hasher_seed: u64) -> Self246 pub const fn with_seed(per_hasher_seed: u64) -> Self { 247 // XOR with ARBITRARY3 such that with_seed(0) matches default. 248 Self { 249 per_hasher_seed: per_hasher_seed ^ ARBITRARY3, 250 } 251 } 252 } 253 254 impl Default for FixedState { 255 #[inline(always)] default() -> Self256 fn default() -> Self { 257 Self { 258 per_hasher_seed: ARBITRARY3, 259 } 260 } 261 } 262 263 impl BuildHasher for FixedState { 264 type Hasher = FoldHasher; 265 266 #[inline(always)] build_hasher(&self) -> FoldHasher267 fn build_hasher(&self) -> FoldHasher { 268 FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed()) 269 } 270 } 271