1 //! The implementation of XXH3_64.
2
3 #![deny(
4 clippy::missing_safety_doc,
5 clippy::undocumented_unsafe_blocks,
6 unsafe_op_in_unsafe_fn
7 )]
8
9 use core::hash;
10
11 use crate::{
12 xxhash3::{primes::*, *},
13 IntoU128 as _, IntoU64 as _,
14 };
15
16 pub use crate::xxhash3::{
17 FixedBuffer, FixedMutBuffer, OneshotWithSecretError, SecretBuffer, SecretTooShortError,
18 SecretWithSeedError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH,
19 };
20
21 /// Calculates the 64-bit hash.
22 #[derive(Clone)]
23 pub struct Hasher {
24 #[cfg(feature = "alloc")]
25 inner: AllocRawHasher,
26 _private: (),
27 }
28
29 impl Hasher {
30 /// Hash all data at once. If you can use this function, you may
31 /// see noticable speed gains for certain types of input.
32 #[must_use]
33 #[inline]
oneshot(input: &[u8]) -> u6434 pub fn oneshot(input: &[u8]) -> u64 {
35 impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input)
36 }
37
38 /// Hash all data at once using the provided seed and a secret
39 /// derived from the seed. If you can use this function, you may
40 /// see noticable speed gains for certain types of input.
41 #[must_use]
42 #[inline]
oneshot_with_seed(seed: u64, input: &[u8]) -> u6443 pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 {
44 let mut secret = DEFAULT_SECRET_RAW;
45
46 // We know that the secret will only be used if we have more
47 // than 240 bytes, so don't waste time computing it otherwise.
48 if input.len() > CUTOFF {
49 derive_secret(seed, &mut secret);
50 }
51
52 let secret = Secret::new(&secret).expect("The default secret length is invalid");
53
54 impl_oneshot(secret, seed, input)
55 }
56
57 /// Hash all data at once using the provided secret and the
58 /// default seed. If you can use this function, you may see
59 /// noticable speed gains for certain types of input.
60 #[inline]
oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result<u64, OneshotWithSecretError>61 pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result<u64, OneshotWithSecretError> {
62 let secret = Secret::new(secret).map_err(OneshotWithSecretError)?;
63 Ok(impl_oneshot(secret, DEFAULT_SEED, input))
64 }
65
66 /// Hash all data at once using the provided seed and secret. If
67 /// you can use this function, you may see noticable speed gains
68 /// for certain types of input.
69 #[inline]
oneshot_with_seed_and_secret( seed: u64, secret: &[u8], input: &[u8], ) -> Result<u64, OneshotWithSecretError>70 pub fn oneshot_with_seed_and_secret(
71 seed: u64,
72 secret: &[u8],
73 input: &[u8],
74 ) -> Result<u64, OneshotWithSecretError> {
75 let secret = if input.len() > CUTOFF {
76 Secret::new(secret).map_err(OneshotWithSecretError)?
77 } else {
78 DEFAULT_SECRET
79 };
80
81 Ok(impl_oneshot(secret, seed, input))
82 }
83 }
84
85 #[cfg(feature = "alloc")]
86 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
87 mod with_alloc {
88 use ::alloc::boxed::Box;
89
90 use super::*;
91
92 impl Hasher {
93 /// Constructs the hasher using the default seed and secret values.
new() -> Self94 pub fn new() -> Self {
95 Self {
96 inner: RawHasherCore::allocate_default(),
97 _private: (),
98 }
99 }
100
101 /// Constructs the hasher using the provided seed and a secret
102 /// derived from the seed.
with_seed(seed: u64) -> Self103 pub fn with_seed(seed: u64) -> Self {
104 Self {
105 inner: RawHasherCore::allocate_with_seed(seed),
106 _private: (),
107 }
108 }
109
110 /// Constructs the hasher using the provided seed and secret.
with_seed_and_secret( seed: u64, secret: impl Into<Box<[u8]>>, ) -> Result<Self, SecretTooShortError<Box<[u8]>>>111 pub fn with_seed_and_secret(
112 seed: u64,
113 secret: impl Into<Box<[u8]>>,
114 ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
115 Ok(Self {
116 inner: RawHasherCore::allocate_with_seed_and_secret(seed, secret)?,
117 _private: (),
118 })
119 }
120
121 /// Returns the secret.
into_secret(self) -> Box<[u8]>122 pub fn into_secret(self) -> Box<[u8]> {
123 self.inner.into_secret()
124 }
125 }
126
127 impl Default for Hasher {
default() -> Self128 fn default() -> Self {
129 Self::new()
130 }
131 }
132
133 impl hash::Hasher for Hasher {
134 #[inline]
write(&mut self, input: &[u8])135 fn write(&mut self, input: &[u8]) {
136 self.inner.write(input)
137 }
138
139 #[inline]
finish(&self) -> u64140 fn finish(&self) -> u64 {
141 self.inner.finish(Finalize64)
142 }
143 }
144 }
145
146 #[derive(Clone)]
147 /// A lower-level interface for computing a hash from streaming data.
148 ///
149 /// The algorithm requires a secret which can be a reasonably large
150 /// piece of data. [`Hasher`][] makes one concrete implementation
151 /// decision that uses dynamic memory allocation, but specialized
152 /// usages may desire more flexibility. This type, combined with
153 /// [`SecretBuffer`][], offer that flexibility at the cost of a
154 /// generic type.
155 pub struct RawHasher<S>(RawHasherCore<S>);
156
157 impl<S> RawHasher<S> {
158 /// Construct the hasher with the provided seed, secret, and
159 /// temporary buffer.
new(secret_buffer: SecretBuffer<S>) -> Self160 pub fn new(secret_buffer: SecretBuffer<S>) -> Self {
161 Self(RawHasherCore::new(secret_buffer))
162 }
163
164 /// Returns the secret.
into_secret(self) -> S165 pub fn into_secret(self) -> S {
166 self.0.into_secret()
167 }
168 }
169
170 impl<S> hash::Hasher for RawHasher<S>
171 where
172 S: FixedBuffer,
173 {
174 #[inline]
write(&mut self, input: &[u8])175 fn write(&mut self, input: &[u8]) {
176 self.0.write(input);
177 }
178
179 #[inline]
finish(&self) -> u64180 fn finish(&self) -> u64 {
181 self.0.finish(Finalize64)
182 }
183 }
184
185 struct Finalize64;
186
187 impl Finalize for Finalize64 {
188 type Output = u64;
189
190 #[inline(always)]
small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output191 fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output {
192 impl_oneshot(secret, seed, input)
193 }
194
195 #[inline(always)]
large( &self, vector: impl Vector, acc: [u64; 8], last_block: &[u8], last_stripe: &[u8; 64], secret: &Secret, len: usize, ) -> Self::Output196 fn large(
197 &self,
198 vector: impl Vector,
199 acc: [u64; 8],
200 last_block: &[u8],
201 last_stripe: &[u8; 64],
202 secret: &Secret,
203 len: usize,
204 ) -> Self::Output {
205 Algorithm(vector).finalize_64(acc, last_block, last_stripe, secret, len)
206 }
207 }
208
209 #[inline(always)]
impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64210 fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
211 match input.len() {
212 241.. => impl_241_plus_bytes(secret, input),
213
214 129..=240 => impl_129_to_240_bytes(secret, seed, input),
215
216 17..=128 => impl_17_to_128_bytes(secret, seed, input),
217
218 9..=16 => impl_9_to_16_bytes(secret, seed, input),
219
220 4..=8 => impl_4_to_8_bytes(secret, seed, input),
221
222 1..=3 => impl_1_to_3_bytes(secret, seed, input),
223
224 0 => impl_0_bytes(secret, seed),
225 }
226 }
227
228 #[inline(always)]
impl_0_bytes(secret: &Secret, seed: u64) -> u64229 fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 {
230 let secret_words = secret.for_64().words_for_0();
231 avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1])
232 }
233
234 #[inline(always)]
impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64235 fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
236 assert_input_range!(1..=3, input.len());
237 let combined = impl_1_to_3_bytes_combined(input);
238
239 let secret_words = secret.for_64().words_for_1_to_3();
240
241 let value = {
242 let secret = (secret_words[0] ^ secret_words[1]).into_u64();
243 secret.wrapping_add(seed) ^ combined.into_u64()
244 };
245
246 // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result."
247 avalanche_xxh64(value)
248 }
249
250 #[inline(always)]
impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64251 fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
252 assert_input_range!(4..=8, input.len());
253 let input_first = input.first_u32().unwrap();
254 let input_last = input.last_u32().unwrap();
255
256 let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
257 let secret_words = secret.for_64().words_for_4_to_8();
258
259 let combined = input_last.into_u64() | (input_first.into_u64() << 32);
260
261 let mut value = {
262 let a = secret_words[0] ^ secret_words[1];
263 let b = a.wrapping_sub(modified_seed);
264 b ^ combined
265 };
266 value ^= value.rotate_left(49) ^ value.rotate_left(24);
267 value = value.wrapping_mul(PRIME_MX2);
268 value ^= (value >> 35).wrapping_add(input.len().into_u64());
269 value = value.wrapping_mul(PRIME_MX2);
270 value ^= value >> 28;
271 value
272 }
273
274 #[inline(always)]
impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64275 fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
276 assert_input_range!(9..=16, input.len());
277 let input_first = input.first_u64().unwrap();
278 let input_last = input.last_u64().unwrap();
279
280 let secret_words = secret.for_64().words_for_9_to_16();
281 let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
282 let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last;
283 let mul_result = low.into_u128().wrapping_mul(high.into_u128());
284 let value = input
285 .len()
286 .into_u64()
287 .wrapping_add(low.swap_bytes())
288 .wrapping_add(high)
289 .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
290
291 avalanche(value)
292 }
293
294 #[inline]
impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64295 fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
296 assert_input_range!(17..=128, input.len());
297 let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
298
299 impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| {
300 acc = acc.wrapping_add(mix_step(fwd, &secret[0], seed));
301 acc = acc.wrapping_add(mix_step(bwd, &secret[1], seed));
302 });
303
304 avalanche(acc)
305 }
306
307 #[inline]
impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64308 fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
309 assert_input_range!(129..=240, input.len());
310 let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
311
312 let (head, _) = input.bp_as_chunks();
313 let mut head = head.iter();
314
315 let ss = secret.for_64().words_for_127_to_240_part1();
316 for (chunk, secret) in head.by_ref().zip(ss).take(8) {
317 acc = acc.wrapping_add(mix_step(chunk, secret, seed));
318 }
319
320 acc = avalanche(acc);
321
322 let ss = secret.for_64().words_for_127_to_240_part2();
323 for (chunk, secret) in head.zip(ss) {
324 acc = acc.wrapping_add(mix_step(chunk, secret, seed));
325 }
326
327 let last_chunk = input.last_chunk().unwrap();
328 let ss = secret.for_64().words_for_127_to_240_part3();
329 acc = acc.wrapping_add(mix_step(last_chunk, ss, seed));
330
331 avalanche(acc)
332 }
333
334 #[inline]
impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64335 fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 {
336 assert_input_range!(241.., input.len());
337 dispatch! {
338 fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u64
339 []
340 }
341 }
342
343 #[inline]
oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64344 fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 {
345 Algorithm(vector).oneshot(secret, input, Finalize64)
346 }
347
348 #[cfg(test)]
349 mod test {
350 use std::hash::Hasher as _;
351
352 use crate::xxhash3::test::bytes;
353
354 use super::*;
355
356 const _: () = {
is_clone<T: Clone>()357 const fn is_clone<T: Clone>() {}
358 is_clone::<Hasher>();
359 };
360
361 const EMPTY_BYTES: [u8; 0] = [];
362
hash_byte_by_byte(input: &[u8]) -> u64363 fn hash_byte_by_byte(input: &[u8]) -> u64 {
364 let mut hasher = Hasher::new();
365 for byte in input.chunks(1) {
366 hasher.write(byte)
367 }
368 hasher.finish()
369 }
370
hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64371 fn hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64 {
372 let mut hasher = Hasher::with_seed(seed);
373 for byte in input.chunks(1) {
374 hasher.write(byte)
375 }
376 hasher.finish()
377 }
378
379 #[test]
oneshot_empty()380 fn oneshot_empty() {
381 let hash = Hasher::oneshot(&EMPTY_BYTES);
382 assert_eq!(hash, 0x2d06_8005_38d3_94c2);
383 }
384
385 #[test]
streaming_empty()386 fn streaming_empty() {
387 let hash = hash_byte_by_byte(&EMPTY_BYTES);
388 assert_eq!(hash, 0x2d06_8005_38d3_94c2);
389 }
390
391 #[test]
oneshot_1_to_3_bytes()392 fn oneshot_1_to_3_bytes() {
393 test_1_to_3_bytes(Hasher::oneshot)
394 }
395
396 #[test]
streaming_1_to_3_bytes()397 fn streaming_1_to_3_bytes() {
398 test_1_to_3_bytes(hash_byte_by_byte)
399 }
400
401 #[track_caller]
test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u64)402 fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u64) {
403 let inputs = bytes![1, 2, 3];
404
405 let expected = [
406 0xc44b_dff4_074e_ecdb,
407 0xd664_5fc3_051a_9457,
408 0x5f42_99fc_161c_9cbb,
409 ];
410
411 for (input, expected) in inputs.iter().zip(expected) {
412 let hash = f(input);
413 assert_eq!(hash, expected, "input was {} bytes", input.len());
414 }
415 }
416
417 #[test]
oneshot_4_to_8_bytes()418 fn oneshot_4_to_8_bytes() {
419 test_4_to_8_bytes(Hasher::oneshot)
420 }
421
422 #[test]
streaming_4_to_8_bytes()423 fn streaming_4_to_8_bytes() {
424 test_4_to_8_bytes(hash_byte_by_byte)
425 }
426
427 #[track_caller]
test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u64)428 fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u64) {
429 let inputs = bytes![4, 5, 6, 7, 8];
430
431 let expected = [
432 0x60da_b036_a582_11f2,
433 0xb075_753a_84ca_0fbe,
434 0xa658_4d1d_9a6a_e704,
435 0x0cd2_084a_6240_6b69,
436 0x3a1c_2d7c_85af_88f8,
437 ];
438
439 for (input, expected) in inputs.iter().zip(expected) {
440 let hash = f(input);
441 assert_eq!(hash, expected, "input was {} bytes", input.len());
442 }
443 }
444
445 #[test]
oneshot_9_to_16_bytes()446 fn oneshot_9_to_16_bytes() {
447 test_9_to_16_bytes(Hasher::oneshot)
448 }
449
450 #[test]
streaming_9_to_16_bytes()451 fn streaming_9_to_16_bytes() {
452 test_9_to_16_bytes(hash_byte_by_byte)
453 }
454
455 #[track_caller]
test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u64)456 fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u64) {
457 let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16];
458
459 let expected = [
460 0xe961_2598_145b_b9dc,
461 0xab69_a08e_f83d_8f77,
462 0x1cf3_96aa_4de6_198d,
463 0x5ace_6a51_1c10_894b,
464 0xb7a5_d8a8_309a_2cb9,
465 0x4cf4_5c94_4a9a_2237,
466 0x55ec_edc2_b87b_b042,
467 0x8355_e3a6_f617_70db,
468 ];
469
470 for (input, expected) in inputs.iter().zip(expected) {
471 let hash = f(input);
472 assert_eq!(hash, expected, "input was {} bytes", input.len());
473 }
474 }
475
476 #[test]
oneshot_17_to_128_bytes()477 fn oneshot_17_to_128_bytes() {
478 test_17_to_128_bytes(Hasher::oneshot)
479 }
480
481 #[test]
streaming_17_to_128_bytes()482 fn streaming_17_to_128_bytes() {
483 test_17_to_128_bytes(hash_byte_by_byte)
484 }
485
486 #[track_caller]
test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u64)487 fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u64) {
488 let lower_boundary = bytes![17, 18, 19];
489 let chunk_boundary = bytes![31, 32, 33];
490 let upper_boundary = bytes![126, 127, 128];
491
492 let inputs = lower_boundary
493 .iter()
494 .chain(chunk_boundary)
495 .chain(upper_boundary);
496
497 let expected = [
498 // lower_boundary
499 0x9ef3_41a9_9de3_7328,
500 0xf691_2490_d4c0_eed5,
501 0x60e7_2614_3cf5_0312,
502 // chunk_boundary
503 0x4f36_db8e_4df3_78fd,
504 0x3523_581f_e96e_4c05,
505 0xe68c_56ba_8899_1e58,
506 // upper_boundary
507 0x6c2a_9eb7_459c_dc61,
508 0x120b_9787_f842_5f2f,
509 0x85c6_174c_7ff4_c46b,
510 ];
511
512 for (input, expected) in inputs.zip(expected) {
513 let hash = f(input);
514 assert_eq!(hash, expected, "input was {} bytes", input.len());
515 }
516 }
517
518 #[test]
oneshot_129_to_240_bytes()519 fn oneshot_129_to_240_bytes() {
520 test_129_to_240_bytes(Hasher::oneshot)
521 }
522
523 #[test]
streaming_129_to_240_bytes()524 fn streaming_129_to_240_bytes() {
525 test_129_to_240_bytes(hash_byte_by_byte)
526 }
527
528 #[track_caller]
test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u64)529 fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u64) {
530 let lower_boundary = bytes![129, 130, 131];
531 let upper_boundary = bytes![238, 239, 240];
532
533 let inputs = lower_boundary.iter().chain(upper_boundary);
534
535 let expected = [
536 // lower_boundary
537 0xec76_42b4_31ba_3e5a,
538 0x4d32_24b1_0090_8a87,
539 0xe57f_7ea6_741f_e3a0,
540 // upper_boundary
541 0x3044_9a0b_4899_dee9,
542 0x972b_14e3_c46f_214b,
543 0x375a_384d_957f_e865,
544 ];
545
546 for (input, expected) in inputs.zip(expected) {
547 let hash = f(input);
548 assert_eq!(hash, expected, "input was {} bytes", input.len());
549 }
550 }
551
552 #[test]
oneshot_241_plus_bytes()553 fn oneshot_241_plus_bytes() {
554 test_241_plus_bytes(Hasher::oneshot)
555 }
556
557 #[test]
streaming_241_plus_bytes()558 fn streaming_241_plus_bytes() {
559 test_241_plus_bytes(hash_byte_by_byte)
560 }
561
562 #[track_caller]
test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u64)563 fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u64) {
564 let inputs = bytes![241, 242, 243, 244, 1024, 10240];
565
566 let expected = [
567 0x02e8_cd95_421c_6d02,
568 0xddcb_33c4_9405_1832,
569 0x8835_f952_9193_e3dc,
570 0xbc17_c91e_c3cf_8d7f,
571 0xe5d7_8baf_a45b_2aa5,
572 0xbcd6_3266_df6e_2244,
573 ];
574
575 for (input, expected) in inputs.iter().zip(expected) {
576 let hash = f(input);
577 assert_eq!(hash, expected, "input was {} bytes", input.len());
578 }
579 }
580
581 #[test]
oneshot_with_seed()582 fn oneshot_with_seed() {
583 test_with_seed(Hasher::oneshot_with_seed)
584 }
585
586 #[test]
streaming_with_seed()587 fn streaming_with_seed() {
588 test_with_seed(hash_byte_by_byte_with_seed)
589 }
590
591 #[track_caller]
test_with_seed(mut f: impl FnMut(u64, &[u8]) -> u64)592 fn test_with_seed(mut f: impl FnMut(u64, &[u8]) -> u64) {
593 let inputs = bytes![0, 1, 4, 9, 17, 129, 241, 1024];
594
595 let expected = [
596 0x4aed_e683_89c0_e311,
597 0x78fc_079a_75aa_f3c0,
598 0x1b73_06b8_9f25_4507,
599 0x7df7_627f_d1f9_39b6,
600 0x49ca_0fff_0950_1622,
601 0x2bfd_caec_30ff_3000,
602 0xf984_56bc_25be_0901,
603 0x2483_9f0f_cdf4_d078,
604 ];
605
606 for (input, expected) in inputs.iter().zip(expected) {
607 let hash = f(0xdead_cafe, input);
608 assert_eq!(hash, expected, "input was {} bytes", input.len());
609 }
610 }
611 }
612