• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! The implementation of XXH3_64.
2 
3 #![deny(
4     clippy::missing_safety_doc,
5     clippy::undocumented_unsafe_blocks,
6     unsafe_op_in_unsafe_fn
7 )]
8 
9 use core::hash;
10 
11 use crate::{
12     xxhash3::{primes::*, *},
13     IntoU128 as _, IntoU64 as _,
14 };
15 
16 pub use crate::xxhash3::{
17     FixedBuffer, FixedMutBuffer, OneshotWithSecretError, SecretBuffer, SecretTooShortError,
18     SecretWithSeedError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH,
19 };
20 
21 /// Calculates the 64-bit hash.
22 #[derive(Clone)]
23 pub struct Hasher {
24     #[cfg(feature = "alloc")]
25     inner: AllocRawHasher,
26     _private: (),
27 }
28 
29 impl Hasher {
30     /// Hash all data at once. If you can use this function, you may
31     /// see noticable speed gains for certain types of input.
32     #[must_use]
33     #[inline]
oneshot(input: &[u8]) -> u6434     pub fn oneshot(input: &[u8]) -> u64 {
35         impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input)
36     }
37 
38     /// Hash all data at once using the provided seed and a secret
39     /// derived from the seed. If you can use this function, you may
40     /// see noticable speed gains for certain types of input.
41     #[must_use]
42     #[inline]
oneshot_with_seed(seed: u64, input: &[u8]) -> u6443     pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 {
44         let mut secret = DEFAULT_SECRET_RAW;
45 
46         // We know that the secret will only be used if we have more
47         // than 240 bytes, so don't waste time computing it otherwise.
48         if input.len() > CUTOFF {
49             derive_secret(seed, &mut secret);
50         }
51 
52         let secret = Secret::new(&secret).expect("The default secret length is invalid");
53 
54         impl_oneshot(secret, seed, input)
55     }
56 
57     /// Hash all data at once using the provided secret and the
58     /// default seed. If you can use this function, you may see
59     /// noticable speed gains for certain types of input.
60     #[inline]
oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result<u64, OneshotWithSecretError>61     pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result<u64, OneshotWithSecretError> {
62         let secret = Secret::new(secret).map_err(OneshotWithSecretError)?;
63         Ok(impl_oneshot(secret, DEFAULT_SEED, input))
64     }
65 
66     /// Hash all data at once using the provided seed and secret. If
67     /// you can use this function, you may see noticable speed gains
68     /// for certain types of input.
69     #[inline]
oneshot_with_seed_and_secret( seed: u64, secret: &[u8], input: &[u8], ) -> Result<u64, OneshotWithSecretError>70     pub fn oneshot_with_seed_and_secret(
71         seed: u64,
72         secret: &[u8],
73         input: &[u8],
74     ) -> Result<u64, OneshotWithSecretError> {
75         let secret = if input.len() > CUTOFF {
76             Secret::new(secret).map_err(OneshotWithSecretError)?
77         } else {
78             DEFAULT_SECRET
79         };
80 
81         Ok(impl_oneshot(secret, seed, input))
82     }
83 }
84 
85 #[cfg(feature = "alloc")]
86 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
87 mod with_alloc {
88     use ::alloc::boxed::Box;
89 
90     use super::*;
91 
92     impl Hasher {
93         /// Constructs the hasher using the default seed and secret values.
new() -> Self94         pub fn new() -> Self {
95             Self {
96                 inner: RawHasherCore::allocate_default(),
97                 _private: (),
98             }
99         }
100 
101         /// Constructs the hasher using the provided seed and a secret
102         /// derived from the seed.
with_seed(seed: u64) -> Self103         pub fn with_seed(seed: u64) -> Self {
104             Self {
105                 inner: RawHasherCore::allocate_with_seed(seed),
106                 _private: (),
107             }
108         }
109 
110         /// Constructs the hasher using the provided seed and secret.
with_seed_and_secret( seed: u64, secret: impl Into<Box<[u8]>>, ) -> Result<Self, SecretTooShortError<Box<[u8]>>>111         pub fn with_seed_and_secret(
112             seed: u64,
113             secret: impl Into<Box<[u8]>>,
114         ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
115             Ok(Self {
116                 inner: RawHasherCore::allocate_with_seed_and_secret(seed, secret)?,
117                 _private: (),
118             })
119         }
120 
121         /// Returns the secret.
into_secret(self) -> Box<[u8]>122         pub fn into_secret(self) -> Box<[u8]> {
123             self.inner.into_secret()
124         }
125     }
126 
127     impl Default for Hasher {
default() -> Self128         fn default() -> Self {
129             Self::new()
130         }
131     }
132 
133     impl hash::Hasher for Hasher {
134         #[inline]
write(&mut self, input: &[u8])135         fn write(&mut self, input: &[u8]) {
136             self.inner.write(input)
137         }
138 
139         #[inline]
finish(&self) -> u64140         fn finish(&self) -> u64 {
141             self.inner.finish(Finalize64)
142         }
143     }
144 }
145 
146 #[derive(Clone)]
147 /// A lower-level interface for computing a hash from streaming data.
148 ///
149 /// The algorithm requires a secret which can be a reasonably large
150 /// piece of data. [`Hasher`][] makes one concrete implementation
151 /// decision that uses dynamic memory allocation, but specialized
152 /// usages may desire more flexibility. This type, combined with
153 /// [`SecretBuffer`][], offer that flexibility at the cost of a
154 /// generic type.
155 pub struct RawHasher<S>(RawHasherCore<S>);
156 
157 impl<S> RawHasher<S> {
158     /// Construct the hasher with the provided seed, secret, and
159     /// temporary buffer.
new(secret_buffer: SecretBuffer<S>) -> Self160     pub fn new(secret_buffer: SecretBuffer<S>) -> Self {
161         Self(RawHasherCore::new(secret_buffer))
162     }
163 
164     /// Returns the secret.
into_secret(self) -> S165     pub fn into_secret(self) -> S {
166         self.0.into_secret()
167     }
168 }
169 
170 impl<S> hash::Hasher for RawHasher<S>
171 where
172     S: FixedBuffer,
173 {
174     #[inline]
write(&mut self, input: &[u8])175     fn write(&mut self, input: &[u8]) {
176         self.0.write(input);
177     }
178 
179     #[inline]
finish(&self) -> u64180     fn finish(&self) -> u64 {
181         self.0.finish(Finalize64)
182     }
183 }
184 
185 struct Finalize64;
186 
187 impl Finalize for Finalize64 {
188     type Output = u64;
189 
190     #[inline(always)]
small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output191     fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output {
192         impl_oneshot(secret, seed, input)
193     }
194 
195     #[inline(always)]
large( &self, vector: impl Vector, acc: [u64; 8], last_block: &[u8], last_stripe: &[u8; 64], secret: &Secret, len: usize, ) -> Self::Output196     fn large(
197         &self,
198         vector: impl Vector,
199         acc: [u64; 8],
200         last_block: &[u8],
201         last_stripe: &[u8; 64],
202         secret: &Secret,
203         len: usize,
204     ) -> Self::Output {
205         Algorithm(vector).finalize_64(acc, last_block, last_stripe, secret, len)
206     }
207 }
208 
209 #[inline(always)]
impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64210 fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
211     match input.len() {
212         241.. => impl_241_plus_bytes(secret, input),
213 
214         129..=240 => impl_129_to_240_bytes(secret, seed, input),
215 
216         17..=128 => impl_17_to_128_bytes(secret, seed, input),
217 
218         9..=16 => impl_9_to_16_bytes(secret, seed, input),
219 
220         4..=8 => impl_4_to_8_bytes(secret, seed, input),
221 
222         1..=3 => impl_1_to_3_bytes(secret, seed, input),
223 
224         0 => impl_0_bytes(secret, seed),
225     }
226 }
227 
228 #[inline(always)]
impl_0_bytes(secret: &Secret, seed: u64) -> u64229 fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 {
230     let secret_words = secret.for_64().words_for_0();
231     avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1])
232 }
233 
234 #[inline(always)]
impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64235 fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
236     assert_input_range!(1..=3, input.len());
237     let combined = impl_1_to_3_bytes_combined(input);
238 
239     let secret_words = secret.for_64().words_for_1_to_3();
240 
241     let value = {
242         let secret = (secret_words[0] ^ secret_words[1]).into_u64();
243         secret.wrapping_add(seed) ^ combined.into_u64()
244     };
245 
246     // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result."
247     avalanche_xxh64(value)
248 }
249 
250 #[inline(always)]
impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64251 fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
252     assert_input_range!(4..=8, input.len());
253     let input_first = input.first_u32().unwrap();
254     let input_last = input.last_u32().unwrap();
255 
256     let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
257     let secret_words = secret.for_64().words_for_4_to_8();
258 
259     let combined = input_last.into_u64() | (input_first.into_u64() << 32);
260 
261     let mut value = {
262         let a = secret_words[0] ^ secret_words[1];
263         let b = a.wrapping_sub(modified_seed);
264         b ^ combined
265     };
266     value ^= value.rotate_left(49) ^ value.rotate_left(24);
267     value = value.wrapping_mul(PRIME_MX2);
268     value ^= (value >> 35).wrapping_add(input.len().into_u64());
269     value = value.wrapping_mul(PRIME_MX2);
270     value ^= value >> 28;
271     value
272 }
273 
274 #[inline(always)]
impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64275 fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
276     assert_input_range!(9..=16, input.len());
277     let input_first = input.first_u64().unwrap();
278     let input_last = input.last_u64().unwrap();
279 
280     let secret_words = secret.for_64().words_for_9_to_16();
281     let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
282     let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last;
283     let mul_result = low.into_u128().wrapping_mul(high.into_u128());
284     let value = input
285         .len()
286         .into_u64()
287         .wrapping_add(low.swap_bytes())
288         .wrapping_add(high)
289         .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
290 
291     avalanche(value)
292 }
293 
294 #[inline]
impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64295 fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
296     assert_input_range!(17..=128, input.len());
297     let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
298 
299     impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| {
300         acc = acc.wrapping_add(mix_step(fwd, &secret[0], seed));
301         acc = acc.wrapping_add(mix_step(bwd, &secret[1], seed));
302     });
303 
304     avalanche(acc)
305 }
306 
307 #[inline]
impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64308 fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
309     assert_input_range!(129..=240, input.len());
310     let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
311 
312     let (head, _) = input.bp_as_chunks();
313     let mut head = head.iter();
314 
315     let ss = secret.for_64().words_for_127_to_240_part1();
316     for (chunk, secret) in head.by_ref().zip(ss).take(8) {
317         acc = acc.wrapping_add(mix_step(chunk, secret, seed));
318     }
319 
320     acc = avalanche(acc);
321 
322     let ss = secret.for_64().words_for_127_to_240_part2();
323     for (chunk, secret) in head.zip(ss) {
324         acc = acc.wrapping_add(mix_step(chunk, secret, seed));
325     }
326 
327     let last_chunk = input.last_chunk().unwrap();
328     let ss = secret.for_64().words_for_127_to_240_part3();
329     acc = acc.wrapping_add(mix_step(last_chunk, ss, seed));
330 
331     avalanche(acc)
332 }
333 
334 #[inline]
impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64335 fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 {
336     assert_input_range!(241.., input.len());
337     dispatch! {
338         fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u64
339         []
340     }
341 }
342 
343 #[inline]
oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64344 fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 {
345     Algorithm(vector).oneshot(secret, input, Finalize64)
346 }
347 
348 #[cfg(test)]
349 mod test {
350     use std::hash::Hasher as _;
351 
352     use crate::xxhash3::test::bytes;
353 
354     use super::*;
355 
356     const _: () = {
is_clone<T: Clone>()357         const fn is_clone<T: Clone>() {}
358         is_clone::<Hasher>();
359     };
360 
361     const EMPTY_BYTES: [u8; 0] = [];
362 
hash_byte_by_byte(input: &[u8]) -> u64363     fn hash_byte_by_byte(input: &[u8]) -> u64 {
364         let mut hasher = Hasher::new();
365         for byte in input.chunks(1) {
366             hasher.write(byte)
367         }
368         hasher.finish()
369     }
370 
hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64371     fn hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64 {
372         let mut hasher = Hasher::with_seed(seed);
373         for byte in input.chunks(1) {
374             hasher.write(byte)
375         }
376         hasher.finish()
377     }
378 
379     #[test]
oneshot_empty()380     fn oneshot_empty() {
381         let hash = Hasher::oneshot(&EMPTY_BYTES);
382         assert_eq!(hash, 0x2d06_8005_38d3_94c2);
383     }
384 
385     #[test]
streaming_empty()386     fn streaming_empty() {
387         let hash = hash_byte_by_byte(&EMPTY_BYTES);
388         assert_eq!(hash, 0x2d06_8005_38d3_94c2);
389     }
390 
391     #[test]
oneshot_1_to_3_bytes()392     fn oneshot_1_to_3_bytes() {
393         test_1_to_3_bytes(Hasher::oneshot)
394     }
395 
396     #[test]
streaming_1_to_3_bytes()397     fn streaming_1_to_3_bytes() {
398         test_1_to_3_bytes(hash_byte_by_byte)
399     }
400 
401     #[track_caller]
test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u64)402     fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u64) {
403         let inputs = bytes![1, 2, 3];
404 
405         let expected = [
406             0xc44b_dff4_074e_ecdb,
407             0xd664_5fc3_051a_9457,
408             0x5f42_99fc_161c_9cbb,
409         ];
410 
411         for (input, expected) in inputs.iter().zip(expected) {
412             let hash = f(input);
413             assert_eq!(hash, expected, "input was {} bytes", input.len());
414         }
415     }
416 
417     #[test]
oneshot_4_to_8_bytes()418     fn oneshot_4_to_8_bytes() {
419         test_4_to_8_bytes(Hasher::oneshot)
420     }
421 
422     #[test]
streaming_4_to_8_bytes()423     fn streaming_4_to_8_bytes() {
424         test_4_to_8_bytes(hash_byte_by_byte)
425     }
426 
427     #[track_caller]
test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u64)428     fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u64) {
429         let inputs = bytes![4, 5, 6, 7, 8];
430 
431         let expected = [
432             0x60da_b036_a582_11f2,
433             0xb075_753a_84ca_0fbe,
434             0xa658_4d1d_9a6a_e704,
435             0x0cd2_084a_6240_6b69,
436             0x3a1c_2d7c_85af_88f8,
437         ];
438 
439         for (input, expected) in inputs.iter().zip(expected) {
440             let hash = f(input);
441             assert_eq!(hash, expected, "input was {} bytes", input.len());
442         }
443     }
444 
445     #[test]
oneshot_9_to_16_bytes()446     fn oneshot_9_to_16_bytes() {
447         test_9_to_16_bytes(Hasher::oneshot)
448     }
449 
450     #[test]
streaming_9_to_16_bytes()451     fn streaming_9_to_16_bytes() {
452         test_9_to_16_bytes(hash_byte_by_byte)
453     }
454 
455     #[track_caller]
test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u64)456     fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u64) {
457         let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16];
458 
459         let expected = [
460             0xe961_2598_145b_b9dc,
461             0xab69_a08e_f83d_8f77,
462             0x1cf3_96aa_4de6_198d,
463             0x5ace_6a51_1c10_894b,
464             0xb7a5_d8a8_309a_2cb9,
465             0x4cf4_5c94_4a9a_2237,
466             0x55ec_edc2_b87b_b042,
467             0x8355_e3a6_f617_70db,
468         ];
469 
470         for (input, expected) in inputs.iter().zip(expected) {
471             let hash = f(input);
472             assert_eq!(hash, expected, "input was {} bytes", input.len());
473         }
474     }
475 
476     #[test]
oneshot_17_to_128_bytes()477     fn oneshot_17_to_128_bytes() {
478         test_17_to_128_bytes(Hasher::oneshot)
479     }
480 
481     #[test]
streaming_17_to_128_bytes()482     fn streaming_17_to_128_bytes() {
483         test_17_to_128_bytes(hash_byte_by_byte)
484     }
485 
486     #[track_caller]
test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u64)487     fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u64) {
488         let lower_boundary = bytes![17, 18, 19];
489         let chunk_boundary = bytes![31, 32, 33];
490         let upper_boundary = bytes![126, 127, 128];
491 
492         let inputs = lower_boundary
493             .iter()
494             .chain(chunk_boundary)
495             .chain(upper_boundary);
496 
497         let expected = [
498             // lower_boundary
499             0x9ef3_41a9_9de3_7328,
500             0xf691_2490_d4c0_eed5,
501             0x60e7_2614_3cf5_0312,
502             // chunk_boundary
503             0x4f36_db8e_4df3_78fd,
504             0x3523_581f_e96e_4c05,
505             0xe68c_56ba_8899_1e58,
506             // upper_boundary
507             0x6c2a_9eb7_459c_dc61,
508             0x120b_9787_f842_5f2f,
509             0x85c6_174c_7ff4_c46b,
510         ];
511 
512         for (input, expected) in inputs.zip(expected) {
513             let hash = f(input);
514             assert_eq!(hash, expected, "input was {} bytes", input.len());
515         }
516     }
517 
518     #[test]
oneshot_129_to_240_bytes()519     fn oneshot_129_to_240_bytes() {
520         test_129_to_240_bytes(Hasher::oneshot)
521     }
522 
523     #[test]
streaming_129_to_240_bytes()524     fn streaming_129_to_240_bytes() {
525         test_129_to_240_bytes(hash_byte_by_byte)
526     }
527 
528     #[track_caller]
test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u64)529     fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u64) {
530         let lower_boundary = bytes![129, 130, 131];
531         let upper_boundary = bytes![238, 239, 240];
532 
533         let inputs = lower_boundary.iter().chain(upper_boundary);
534 
535         let expected = [
536             // lower_boundary
537             0xec76_42b4_31ba_3e5a,
538             0x4d32_24b1_0090_8a87,
539             0xe57f_7ea6_741f_e3a0,
540             // upper_boundary
541             0x3044_9a0b_4899_dee9,
542             0x972b_14e3_c46f_214b,
543             0x375a_384d_957f_e865,
544         ];
545 
546         for (input, expected) in inputs.zip(expected) {
547             let hash = f(input);
548             assert_eq!(hash, expected, "input was {} bytes", input.len());
549         }
550     }
551 
552     #[test]
oneshot_241_plus_bytes()553     fn oneshot_241_plus_bytes() {
554         test_241_plus_bytes(Hasher::oneshot)
555     }
556 
557     #[test]
streaming_241_plus_bytes()558     fn streaming_241_plus_bytes() {
559         test_241_plus_bytes(hash_byte_by_byte)
560     }
561 
562     #[track_caller]
test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u64)563     fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u64) {
564         let inputs = bytes![241, 242, 243, 244, 1024, 10240];
565 
566         let expected = [
567             0x02e8_cd95_421c_6d02,
568             0xddcb_33c4_9405_1832,
569             0x8835_f952_9193_e3dc,
570             0xbc17_c91e_c3cf_8d7f,
571             0xe5d7_8baf_a45b_2aa5,
572             0xbcd6_3266_df6e_2244,
573         ];
574 
575         for (input, expected) in inputs.iter().zip(expected) {
576             let hash = f(input);
577             assert_eq!(hash, expected, "input was {} bytes", input.len());
578         }
579     }
580 
581     #[test]
oneshot_with_seed()582     fn oneshot_with_seed() {
583         test_with_seed(Hasher::oneshot_with_seed)
584     }
585 
586     #[test]
streaming_with_seed()587     fn streaming_with_seed() {
588         test_with_seed(hash_byte_by_byte_with_seed)
589     }
590 
591     #[track_caller]
test_with_seed(mut f: impl FnMut(u64, &[u8]) -> u64)592     fn test_with_seed(mut f: impl FnMut(u64, &[u8]) -> u64) {
593         let inputs = bytes![0, 1, 4, 9, 17, 129, 241, 1024];
594 
595         let expected = [
596             0x4aed_e683_89c0_e311,
597             0x78fc_079a_75aa_f3c0,
598             0x1b73_06b8_9f25_4507,
599             0x7df7_627f_d1f9_39b6,
600             0x49ca_0fff_0950_1622,
601             0x2bfd_caec_30ff_3000,
602             0xf984_56bc_25be_0901,
603             0x2483_9f0f_cdf4_d078,
604         ];
605 
606         for (input, expected) in inputs.iter().zip(expected) {
607             let hash = f(0xdead_cafe, input);
608             assert_eq!(hash, expected, "input was {} bytes", input.len());
609         }
610     }
611 }
612