• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2    xxHash - Extremely Fast Hash algorithm
3    Header File
4    Copyright (C) 2012-2016, Yann Collet.
5 
6    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions are
10    met:
11 
12        * Redistributions of source code must retain the above copyright
13    notice, this list of conditions and the following disclaimer.
14        * Redistributions in binary form must reproduce the above
15    copyright notice, this list of conditions and the following disclaimer
16    in the documentation and/or other materials provided with the
17    distribution.
18 
19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31    You can contact the author at :
32    - xxHash source repository : https://github.com/Cyan4973/xxHash
33 */
34 
35 /* Notice extracted from xxHash homepage :
36 
37 xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38 It also successfully passes all tests from the SMHasher suite.
39 
40 Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41 
42 Name            Speed       Q.Score   Author
43 xxHash          5.4 GB/s     10
44 CrapWow         3.2 GB/s      2       Andrew
45 MumurHash 3a    2.7 GB/s     10       Austin Appleby
46 SpookyHash      2.0 GB/s     10       Bob Jenkins
47 SBox            1.4 GB/s      9       Bret Mulvey
48 Lookup3         1.2 GB/s      9       Bob Jenkins
49 SuperFastHash   1.2 GB/s      1       Paul Hsieh
50 CityHash64      1.05 GB/s    10       Pike & Alakuijala
51 FNV             0.55 GB/s     5       Fowler, Noll, Vo
52 CRC32           0.43 GB/s     9
53 MD5-32          0.33 GB/s    10       Ronald L. Rivest
54 SHA1-32         0.28 GB/s    10
55 
56 Q.Score is a measure of quality of the hash function.
57 It depends on successfully passing SMHasher test set.
58 10 is a perfect score.
59 
60 Note : SMHasher's CRC32 implementation is not the fastest one.
61 Other speed-oriented implementations can be faster,
62 especially in combination with PCLMUL instruction :
63 http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
64 
65 A 64-bit version, named XXH64, is available since r35.
66 It offers much better speed, but for 64-bit applications only.
67 Name     Speed on 64 bits    Speed on 32 bits
68 XXH64       13.8 GB/s            1.9 GB/s
69 XXH32        6.8 GB/s            6.0 GB/s
70 */
71 
72 /* Mesa leaves strict aliasing on in the compiler, and this code likes to
73  * dereference the passed in data as u32*, which means that the compiler is
74  * free to move the u32 read before the write of the struct members being
75  * hashed, and in practice it did in freedreno.  Forcing these two things
76  * prevents it.
77  */
78 #define XXH_FORCE_ALIGN_CHECK 0
79 #define XXH_FORCE_MEMORY_ACCESS 0
80 
81 #if defined (__cplusplus)
82 extern "C" {
83 #endif
84 
85 
86 #ifndef XXHASH_H_5627135585666179
87 #define XXHASH_H_5627135585666179 1
88 
89 /* ****************************
90  *  API modifier
91  ******************************/
92 /** XXH_INLINE_ALL (and XXH_PRIVATE_API)
93  *  This build macro includes xxhash functions in `static` mode
94  *  in order to inline them, and remove their symbol from the public list.
95  *  Inlining offers great performance improvement on small keys,
96  *  and dramatic ones when length is expressed as a compile-time constant.
97  *  See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html .
98  *  Methodology :
99  *     #define XXH_INLINE_ALL
100  *     #include "xxhash.h"
101  * `xxhash.c` is automatically included.
102  *  It's not useful to compile and link it as a separate object.
103  */
104 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
105 #  ifndef XXH_STATIC_LINKING_ONLY
106 #    define XXH_STATIC_LINKING_ONLY
107 #  endif
108 #  if defined(__GNUC__)
109 #    define XXH_PUBLIC_API static __inline __attribute__((unused))
110 #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
111 #    define XXH_PUBLIC_API static inline
112 #  elif defined(_MSC_VER)
113 #    define XXH_PUBLIC_API static __inline
114 #  else
115      /* this version may generate warnings for unused static functions */
116 #    define XXH_PUBLIC_API static
117 #  endif
118 #else
119 #  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
120 #    ifdef XXH_EXPORT
121 #      define XXH_PUBLIC_API __declspec(dllexport)
122 #    elif XXH_IMPORT
123 #      define XXH_PUBLIC_API __declspec(dllimport)
124 #    endif
125 #  else
126 #    define XXH_PUBLIC_API   /* do nothing */
127 #  endif
128 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
129 
130 /*! XXH_NAMESPACE, aka Namespace Emulation :
131  *
132  * If you want to include _and expose_ xxHash functions from within your own library,
133  * but also want to avoid symbol collisions with other libraries which may also include xxHash,
134  *
135  * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
136  * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
137  *
138  * Note that no change is required within the calling program as long as it includes `xxhash.h` :
139  * regular symbol name will be automatically translated by this header.
140  */
141 #ifdef XXH_NAMESPACE
142 #  define XXH_CAT(A,B) A##B
143 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
144 #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
145 #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
146 #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
147 #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
148 #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
149 #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
150 #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
151 #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
152 #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
153 #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
154 #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
155 #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
156 #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
157 #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
158 #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
159 #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
160 #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
161 #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
162 #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
163 #endif
164 
165 
166 /* *************************************
167 *  Version
168 ***************************************/
169 #define XXH_VERSION_MAJOR    0
170 #define XXH_VERSION_MINOR    7
171 #define XXH_VERSION_RELEASE  2
172 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
173 XXH_PUBLIC_API unsigned XXH_versionNumber (void);
174 
175 
176 /* ****************************
177 *  Definitions
178 ******************************/
179 #include <stddef.h>   /* size_t */
180 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
181 
182 
183 /*-**********************************************************************
184 *  32-bit hash
185 ************************************************************************/
186 #if !defined (__VMS) \
187   && (defined (__cplusplus) \
188   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
189 #   include <stdint.h>
190     typedef uint32_t XXH32_hash_t;
191 #else
192 #   include <limits.h>
193 #   if UINT_MAX == 0xFFFFFFFFUL
194       typedef unsigned int XXH32_hash_t;
195 #   else
196 #     if ULONG_MAX == 0xFFFFFFFFUL
197         typedef unsigned long XXH32_hash_t;
198 #     else
199 #       error "unsupported platform : need a 32-bit type"
200 #     endif
201 #   endif
202 #endif
203 
204 /*! XXH32() :
205     Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
206     The memory between input & input+length must be valid (allocated and read-accessible).
207     "seed" can be used to alter the result predictably.
208     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
209 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
210 
211 /*******   Streaming   *******/
212 
213 /*
214  * Streaming functions generate the xxHash value from an incrememtal input.
215  * This method is slower than single-call functions, due to state management.
216  * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
217  *
218  * XXH state must first be allocated, using XXH*_createState() .
219  *
220  * Start a new hash by initializing state with a seed, using XXH*_reset().
221  *
222  * Then, feed the hash state by calling XXH*_update() as many times as necessary.
223  * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
224  *
225  * Finally, a hash value can be produced anytime, by using XXH*_digest().
226  * This function returns the nn-bits hash as an int or long long.
227  *
228  * It's still possible to continue inserting input into the hash state after a digest,
229  * and generate some new hash values later on, by invoking again XXH*_digest().
230  *
231  * When done, release the state, using XXH*_freeState().
232  */
233 
234 typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
235 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
236 XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
237 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
238 
239 XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
240 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
241 XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
242 
243 /*******   Canonical representation   *******/
244 
245 /* Default return values from XXH functions are basic unsigned 32 and 64 bits.
246  * This the simplest and fastest format for further post-processing.
247  * However, this leaves open the question of what is the order of bytes,
248  * since little and big endian conventions will write the same number differently.
249  *
250  * The canonical representation settles this issue,
251  * by mandating big-endian convention,
252  * aka, the same convention as human-readable numbers (large digits first).
253  * When writing hash values to storage, sending them over a network, or printing them,
254  * it's highly recommended to use the canonical representation,
255  * to ensure portability across a wider range of systems, present and future.
256  *
257  * The following functions allow transformation of hash values into and from canonical format.
258  */
259 
260 typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
261 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
262 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
263 
264 
265 #ifndef XXH_NO_LONG_LONG
266 /*-**********************************************************************
267 *  64-bit hash
268 ************************************************************************/
269 #if !defined (__VMS) \
270   && (defined (__cplusplus) \
271   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
272 #   include <stdint.h>
273     typedef uint64_t XXH64_hash_t;
274 #else
275     /* the following type must have a width of 64-bit */
276     typedef unsigned long long XXH64_hash_t;
277 #endif
278 
279 /*! XXH64() :
280  *  Returns the 64-bit hash of sequence of length @length stored at memory address @input.
281  *  @seed can be used to alter the result predictably.
282  *  This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
283  */
284 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed);
285 
286 /*******   Streaming   *******/
287 typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
288 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
289 XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
290 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
291 
292 XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
293 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
294 XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
295 
296 /*******   Canonical representation   *******/
297 typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
298 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
299 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
300 
301 
302 #endif  /* XXH_NO_LONG_LONG */
303 
304 #endif /* XXHASH_H_5627135585666179 */
305 
306 
307 
308 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
309 #define XXHASH_H_STATIC_13879238742
310 /* ************************************************************************************************
311    This section contains declarations which are not guaranteed to remain stable.
312    They may change in future versions, becoming incompatible with a different version of the library.
313    These declarations should only be used with static linking.
314    Never use them in association with dynamic linking !
315 *************************************************************************************************** */
316 
317 /* These definitions are only present to allow
318  * static allocation of XXH state, on stack or in a struct for example.
319  * Never **ever** use members directly. */
320 
321 struct XXH32_state_s {
322    XXH32_hash_t total_len_32;
323    XXH32_hash_t large_len;
324    XXH32_hash_t v1;
325    XXH32_hash_t v2;
326    XXH32_hash_t v3;
327    XXH32_hash_t v4;
328    XXH32_hash_t mem32[4];
329    XXH32_hash_t memsize;
330    XXH32_hash_t reserved;   /* never read nor write, might be removed in a future version */
331 };   /* typedef'd to XXH32_state_t */
332 
333 
334 #ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
335 
336 struct XXH64_state_s {
337    XXH64_hash_t total_len;
338    XXH64_hash_t v1;
339    XXH64_hash_t v2;
340    XXH64_hash_t v3;
341    XXH64_hash_t v4;
342    XXH64_hash_t mem64[4];
343    XXH32_hash_t memsize;
344    XXH32_hash_t reserved32;  /* required for padding anyway */
345    XXH64_hash_t reserved64;  /* never read nor write, might be removed in a future version */
346 };   /* typedef'd to XXH64_state_t */
347 
348 #endif  /* XXH_NO_LONG_LONG */
349 
350 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
351 #  define XXH_IMPLEMENTATION
352 #endif
353 
354 #endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
355 
356 
357 
358 /*-**********************************************************************
359 *  xxHash implementation
360 *  Functions implementation used to be hosted within xxhash.c .
361 *  However, code inlining requires to place implementation in the header file.
362 *  As a consequence, xxhash.c used to be included within xxhash.h .
363 *  But some build systems don't like *.c inclusions.
364 *  So the implementation is now directly integrated within xxhash.h .
365 *  Another small advantage is that xxhash.c is no longer required in /includes .
366 ************************************************************************/
367 
368 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
369    || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
370 #  define XXH_IMPLEM_13a8737387
371 
372 /* *************************************
373 *  Tuning parameters
374 ***************************************/
375 /*!XXH_FORCE_MEMORY_ACCESS :
376  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
377  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
378  * The below switch allow to select different access method for improved performance.
379  * Method 0 (default) : use `memcpy()`. Safe and portable.
380  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
381  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
382  * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
383  *            It can generate buggy code on targets which do not support unaligned memory accesses.
384  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
385  * See http://stackoverflow.com/a/32095106/646947 for details.
386  * Prefer these methods in priority order (0 > 1 > 2)
387  */
388 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
389 #  if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
390 #    define XXH_FORCE_MEMORY_ACCESS 2
391 #  elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
392   (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
393 #    define XXH_FORCE_MEMORY_ACCESS 1
394 #  endif
395 #endif
396 
397 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
398  * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
399  * When this macro is enabled, xxHash actively checks input for null pointer.
400  * It it is, result for null input pointers is the same as a null-length input.
401  */
402 #ifndef XXH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
403 #  define XXH_ACCEPT_NULL_INPUT_POINTER 0
404 #endif
405 
406 /*!XXH_FORCE_ALIGN_CHECK :
407  * This is a minor performance trick, only useful with lots of very small keys.
408  * It means : check for aligned/unaligned input.
409  * The check costs one initial branch per hash;
410  * set it to 0 when the input is guaranteed to be aligned,
411  * or when alignment doesn't matter for performance.
412  */
413 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
414 #  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
415 #    define XXH_FORCE_ALIGN_CHECK 0
416 #  else
417 #    define XXH_FORCE_ALIGN_CHECK 1
418 #  endif
419 #endif
420 
421 /*!XXH_REROLL:
422  * Whether to reroll XXH32_finalize, and XXH64_finalize,
423  * instead of using an unrolled jump table/if statement loop.
424  *
425  * This is automatically defined on -Os/-Oz on GCC and Clang. */
426 #ifndef XXH_REROLL
427 #  if defined(__OPTIMIZE_SIZE__)
428 #    define XXH_REROLL 1
429 #  else
430 #    define XXH_REROLL 0
431 #  endif
432 #endif
433 
434 
435 /* *************************************
436 *  Includes & Memory related functions
437 ***************************************/
438 /*! Modify the local functions below should you wish to use some other memory routines
439 *   for malloc(), free() */
440 #include <stdlib.h>
XXH_malloc(size_t s)441 static void* XXH_malloc(size_t s) { return malloc(s); }
XXH_free(void * p)442 static void  XXH_free  (void* p)  { free(p); }
443 /*! and for memcpy() */
444 #include <string.h>
XXH_memcpy(void * dest,const void * src,size_t size)445 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
446 
447 #include <limits.h>   /* ULLONG_MAX */
448 
449 
450 /* *************************************
451 *  Compiler Specific Options
452 ***************************************/
453 #ifdef _MSC_VER    /* Visual Studio */
454 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
455 #  define XXH_FORCE_INLINE static __forceinline
456 #  define XXH_NO_INLINE static __declspec(noinline)
457 #else
458 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
459 #    ifdef __GNUC__
460 #      define XXH_FORCE_INLINE static inline __attribute__((always_inline))
461 #      define XXH_NO_INLINE static __attribute__((noinline))
462 #    else
463 #      define XXH_FORCE_INLINE static inline
464 #      define XXH_NO_INLINE static
465 #    endif
466 #  else
467 #    define XXH_FORCE_INLINE static
468 #    define XXH_NO_INLINE static
469 #  endif /* __STDC_VERSION__ */
470 #endif
471 
472 
473 
474 /* *************************************
475 *  Debug
476 ***************************************/
477 /* DEBUGLEVEL is expected to be defined externally,
478  * typically through compiler command line.
479  * Value must be a number. */
480 #ifndef DEBUGLEVEL
481 #  define DEBUGLEVEL 0
482 #endif
483 
484 #if (DEBUGLEVEL>=1)
485 #  include <assert.h>   /* note : can still be disabled with NDEBUG */
486 #  define XXH_ASSERT(c)   assert(c)
487 #else
488 #  define XXH_ASSERT(c)   ((void)0)
489 #endif
490 
491 /* note : use after variable declarations */
492 #define XXH_STATIC_ASSERT(c)  { enum { XXH_sa = 1/(int)(!!(c)) }; }
493 
494 
495 /* *************************************
496 *  Basic Types
497 ***************************************/
498 #if !defined (__VMS) \
499  && (defined (__cplusplus) \
500  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
501 # include <stdint.h>
502   typedef uint8_t  xxh_u8;
503 #else
504   typedef unsigned char      xxh_u8;
505 #endif
506 typedef XXH32_hash_t xxh_u32;
507 
508 
509 /* ***   Memory access   *** */
510 
511 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
512 
513 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
XXH_read32(const void * memPtr)514 static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
515 
516 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
517 
518 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
519 /* currently only defined for gcc and icc */
520 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
XXH_read32(const void * ptr)521 static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
522 
523 #else
524 
525 /* portable and safe solution. Generally efficient.
526  * see : http://stackoverflow.com/a/32095106/646947
527  */
XXH_read32(const void * memPtr)528 static xxh_u32 XXH_read32(const void* memPtr)
529 {
530     xxh_u32 val;
531     memcpy(&val, memPtr, sizeof(val));
532     return val;
533 }
534 
535 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
536 
537 
538 /* ***   Endianess   *** */
539 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
540 
541 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
542 #ifndef XXH_CPU_LITTLE_ENDIAN
543 #  if defined(_WIN32) /* Windows is always little endian */ \
544      || defined(__LITTLE_ENDIAN__) \
545      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
546 #    define XXH_CPU_LITTLE_ENDIAN 1
547 #  elif defined(__BIG_ENDIAN__) \
548      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
549 #    define XXH_CPU_LITTLE_ENDIAN 0
550 #  else
XXH_isLittleEndian(void)551 static int XXH_isLittleEndian(void)
552 {
553     const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
554     return one.c[0];
555 }
556 #   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
557 #  endif
558 #endif
559 
560 
561 
562 
563 /* ****************************************
564 *  Compiler-specific Functions and Macros
565 ******************************************/
566 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
567 
568 #ifndef __has_builtin
569 #  define __has_builtin(x) 0
570 #endif
571 
572 #if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64)
573 #  define XXH_rotl32 __builtin_rotateleft32
574 #  define XXH_rotl64 __builtin_rotateleft64
575 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
576 #elif defined(_MSC_VER)
577 #  define XXH_rotl32(x,r) _rotl(x,r)
578 #  define XXH_rotl64(x,r) _rotl64(x,r)
579 #else
580 #  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
581 #  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
582 #endif
583 
584 #if defined(_MSC_VER)     /* Visual Studio */
585 #  define XXH_swap32 _byteswap_ulong
586 #elif XXH_GCC_VERSION >= 403
587 #  define XXH_swap32 __builtin_bswap32
588 #else
XXH_swap32(xxh_u32 x)589 static xxh_u32 XXH_swap32 (xxh_u32 x)
590 {
591     return  ((x << 24) & 0xff000000 ) |
592             ((x <<  8) & 0x00ff0000 ) |
593             ((x >>  8) & 0x0000ff00 ) |
594             ((x >> 24) & 0x000000ff );
595 }
596 #endif
597 
598 
599 /* ***************************
600 *  Memory reads
601 *****************************/
602 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
603 
XXH_readLE32(const void * ptr)604 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
605 {
606     return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
607 }
608 
XXH_readBE32(const void * ptr)609 static xxh_u32 XXH_readBE32(const void* ptr)
610 {
611     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
612 }
613 
614 XXH_FORCE_INLINE xxh_u32
XXH_readLE32_align(const void * ptr,XXH_alignment align)615 XXH_readLE32_align(const void* ptr, XXH_alignment align)
616 {
617     if (align==XXH_unaligned) {
618         return XXH_readLE32(ptr);
619     } else {
620         return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
621     }
622 }
623 
624 
625 /* *************************************
626 *  Misc
627 ***************************************/
XXH_versionNumber(void)628 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
629 
630 
631 /* *******************************************************************
632 *  32-bit hash functions
633 *********************************************************************/
634 static const xxh_u32 PRIME32_1 = 0x9E3779B1U;   /* 0b10011110001101110111100110110001 */
635 static const xxh_u32 PRIME32_2 = 0x85EBCA77U;   /* 0b10000101111010111100101001110111 */
636 static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU;   /* 0b11000010101100101010111000111101 */
637 static const xxh_u32 PRIME32_4 = 0x27D4EB2FU;   /* 0b00100111110101001110101100101111 */
638 static const xxh_u32 PRIME32_5 = 0x165667B1U;   /* 0b00010110010101100110011110110001 */
639 
XXH32_round(xxh_u32 acc,xxh_u32 input)640 static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
641 {
642     acc += input * PRIME32_2;
643     acc  = XXH_rotl32(acc, 13);
644     acc *= PRIME32_1;
645 #if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE)
646     /* UGLY HACK:
647      * This inline assembly hack forces acc into a normal register. This is the
648      * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop
649      * (pragmas and attributes don't work for some resason) without globally
650      * disabling SSE4.1.
651      *
652      * The reason we want to avoid vectorization is because despite working on
653      * 4 integers at a time, there are multiple factors slowing XXH32 down on
654      * SSE4:
655      * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!)
656      *   making it slightly slower to multiply four integers at once compared to four
657      *   integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is
658      *   still not worth it to go into SSE just to multiply unless doing a long operation.
659      *
660      * - Four instructions are required to rotate,
661      *      movqda tmp,  v // not required with VEX encoding
662      *      pslld  tmp, 13 // tmp <<= 13
663      *      psrld  v,   19 // x >>= 19
664      *      por    v,  tmp // x |= tmp
665      *   compared to one for scalar:
666      *      roll   v, 13    // reliably fast across the board
667      *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
668      *
669      * - Instruction level parallelism is actually more beneficial here because the
670      *   SIMD actually serializes this operation: While v1 is rotating, v2 can load data,
671      *   while v3 can multiply. SSE forces them to operate together.
672      *
673      * How this hack works:
674      * __asm__(""       // Declare an assembly block but don't declare any instructions
675      *          :       // However, as an Input/Output Operand,
676      *          "+r"    // constrain a read/write operand (+) as a general purpose register (r).
677      *          (acc)   // and set acc as the operand
678      * );
679      *
680      * Because of the 'r', the compiler has promised that seed will be in a
681      * general purpose register and the '+' says that it will be 'read/write',
682      * so it has to assume it has changed. It is like volatile without all the
683      * loads and stores.
684      *
685      * Since the argument has to be in a normal register (not an SSE register),
686      * each time XXH32_round is called, it is impossible to vectorize. */
687     __asm__("" : "+r" (acc));
688 #endif
689     return acc;
690 }
691 
692 /* mix all bits */
XXH32_avalanche(xxh_u32 h32)693 static xxh_u32 XXH32_avalanche(xxh_u32 h32)
694 {
695     h32 ^= h32 >> 15;
696     h32 *= PRIME32_2;
697     h32 ^= h32 >> 13;
698     h32 *= PRIME32_3;
699     h32 ^= h32 >> 16;
700     return(h32);
701 }
702 
703 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
704 
705 static xxh_u32
XXH32_finalize(xxh_u32 h32,const xxh_u8 * ptr,size_t len,XXH_alignment align)706 XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
707 {
708 #define PROCESS1               \
709     h32 += (*ptr++) * PRIME32_5; \
710     h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
711 
712 #define PROCESS4                         \
713     h32 += XXH_get32bits(ptr) * PRIME32_3; \
714     ptr+=4;                                \
715     h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
716 
717     /* Compact rerolled version */
718     if (XXH_REROLL) {
719         len &= 15;
720         while (len >= 4) {
721             PROCESS4;
722             len -= 4;
723         }
724         while (len > 0) {
725             PROCESS1;
726             --len;
727         }
728         return XXH32_avalanche(h32);
729     } else {
730          switch(len&15) /* or switch(bEnd - p) */ {
731            case 12:      PROCESS4;
732                          /* fallthrough */
733            case 8:       PROCESS4;
734                          /* fallthrough */
735            case 4:       PROCESS4;
736                          return XXH32_avalanche(h32);
737 
738            case 13:      PROCESS4;
739                          /* fallthrough */
740            case 9:       PROCESS4;
741                          /* fallthrough */
742            case 5:       PROCESS4;
743                          PROCESS1;
744                          return XXH32_avalanche(h32);
745 
746            case 14:      PROCESS4;
747                          /* fallthrough */
748            case 10:      PROCESS4;
749                          /* fallthrough */
750            case 6:       PROCESS4;
751                          PROCESS1;
752                          PROCESS1;
753                          return XXH32_avalanche(h32);
754 
755            case 15:      PROCESS4;
756                          /* fallthrough */
757            case 11:      PROCESS4;
758                          /* fallthrough */
759            case 7:       PROCESS4;
760                          /* fallthrough */
761            case 3:       PROCESS1;
762                          /* fallthrough */
763            case 2:       PROCESS1;
764                          /* fallthrough */
765            case 1:       PROCESS1;
766                          /* fallthrough */
767            case 0:       return XXH32_avalanche(h32);
768         }
769         XXH_ASSERT(0);
770         return h32;   /* reaching this point is deemed impossible */
771     }
772 }
773 
774 XXH_FORCE_INLINE xxh_u32
XXH32_endian_align(const xxh_u8 * input,size_t len,xxh_u32 seed,XXH_alignment align)775 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
776 {
777     const xxh_u8* bEnd = input + len;
778     xxh_u32 h32;
779 
780 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
781     if (input==NULL) {
782         len=0;
783         bEnd=input=(const xxh_u8*)(size_t)16;
784     }
785 #endif
786 
787     if (len>=16) {
788         const xxh_u8* const limit = bEnd - 15;
789         xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2;
790         xxh_u32 v2 = seed + PRIME32_2;
791         xxh_u32 v3 = seed + 0;
792         xxh_u32 v4 = seed - PRIME32_1;
793 
794         do {
795             v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
796             v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
797             v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
798             v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
799         } while (input < limit);
800 
801         h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
802             + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
803     } else {
804         h32  = seed + PRIME32_5;
805     }
806 
807     h32 += (xxh_u32)len;
808 
809     return XXH32_finalize(h32, input, len&15, align);
810 }
811 
812 
XXH32(const void * input,size_t len,XXH32_hash_t seed)813 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
814 {
815 #if 0
816     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
817     XXH32_state_t state;
818     XXH32_reset(&state, seed);
819     XXH32_update(&state, (const xxh_u8*)input, len);
820     return XXH32_digest(&state);
821 
822 #else
823 
824     if (XXH_FORCE_ALIGN_CHECK) {
825         if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
826             return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
827     }   }
828 
829     return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
830 #endif
831 }
832 
833 
834 
835 /*******   Hash streaming   *******/
836 
XXH32_createState(void)837 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
838 {
839     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
840 }
XXH32_freeState(XXH32_state_t * statePtr)841 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
842 {
843     XXH_free(statePtr);
844     return XXH_OK;
845 }
846 
XXH32_copyState(XXH32_state_t * dstState,const XXH32_state_t * srcState)847 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
848 {
849     memcpy(dstState, srcState, sizeof(*dstState));
850 }
851 
XXH32_reset(XXH32_state_t * statePtr,XXH32_hash_t seed)852 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
853 {
854     XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
855     memset(&state, 0, sizeof(state));
856     state.v1 = seed + PRIME32_1 + PRIME32_2;
857     state.v2 = seed + PRIME32_2;
858     state.v3 = seed + 0;
859     state.v4 = seed - PRIME32_1;
860     /* do not write into reserved, planned to be removed in a future version */
861     memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
862     return XXH_OK;
863 }
864 
865 
866 XXH_PUBLIC_API XXH_errorcode
XXH32_update(XXH32_state_t * state,const void * input,size_t len)867 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
868 {
869     if (input==NULL)
870 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
871         return XXH_OK;
872 #else
873         return XXH_ERROR;
874 #endif
875 
876     {   const xxh_u8* p = (const xxh_u8*)input;
877         const xxh_u8* const bEnd = p + len;
878 
879         state->total_len_32 += (XXH32_hash_t)len;
880         state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
881 
882         if (state->memsize + len < 16)  {   /* fill in tmp buffer */
883             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
884             state->memsize += (XXH32_hash_t)len;
885             return XXH_OK;
886         }
887 
888         if (state->memsize) {   /* some data left from previous update */
889             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
890             {   const xxh_u32* p32 = state->mem32;
891                 state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
892                 state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
893                 state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
894                 state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
895             }
896             p += 16-state->memsize;
897             state->memsize = 0;
898         }
899 
900         if (p <= bEnd-16) {
901             const xxh_u8* const limit = bEnd - 16;
902             xxh_u32 v1 = state->v1;
903             xxh_u32 v2 = state->v2;
904             xxh_u32 v3 = state->v3;
905             xxh_u32 v4 = state->v4;
906 
907             do {
908                 v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
909                 v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
910                 v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
911                 v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
912             } while (p<=limit);
913 
914             state->v1 = v1;
915             state->v2 = v2;
916             state->v3 = v3;
917             state->v4 = v4;
918         }
919 
920         if (p < bEnd) {
921             XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
922             state->memsize = (unsigned)(bEnd-p);
923         }
924     }
925 
926     return XXH_OK;
927 }
928 
929 
XXH32_digest(const XXH32_state_t * state)930 XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state)
931 {
932     xxh_u32 h32;
933 
934     if (state->large_len) {
935         h32 = XXH_rotl32(state->v1, 1)
936             + XXH_rotl32(state->v2, 7)
937             + XXH_rotl32(state->v3, 12)
938             + XXH_rotl32(state->v4, 18);
939     } else {
940         h32 = state->v3 /* == seed */ + PRIME32_5;
941     }
942 
943     h32 += state->total_len_32;
944 
945     return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
946 }
947 
948 
949 /*******   Canonical representation   *******/
950 
951 /*! Default XXH result types are basic unsigned 32 and 64 bits.
952 *   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
953 *   These functions allow transformation of hash result into and from its canonical format.
954 *   This way, hash values can be written into a file or buffer, remaining comparable across different systems.
955 */
956 
XXH32_canonicalFromHash(XXH32_canonical_t * dst,XXH32_hash_t hash)957 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
958 {
959     XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
960     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
961     memcpy(dst, &hash, sizeof(*dst));
962 }
963 
XXH32_hashFromCanonical(const XXH32_canonical_t * src)964 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
965 {
966     return XXH_readBE32(src);
967 }
968 
969 
970 #ifndef XXH_NO_LONG_LONG
971 
972 /* *******************************************************************
973 *  64-bit hash functions
974 *********************************************************************/
975 
976 /*******   Memory access   *******/
977 
978 typedef XXH64_hash_t xxh_u64;
979 
980 
981 /*! XXH_REROLL_XXH64:
982  * Whether to reroll the XXH64_finalize() loop.
983  *
984  * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a performance gain
985  * on 64-bit hosts, as only one jump is required.
986  *
987  * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit registers,
988  * and 64-bit arithmetic needs to be simulated, it isn't beneficial to unroll. The code becomes
989  * ridiculously large (the largest function in the binary on i386!), and rerolling it saves
990  * anywhere from 3kB to 20kB. It is also slightly faster because it fits into cache better
991  * and is more likely to be inlined by the compiler.
992  *
993  * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. */
994 #ifndef XXH_REROLL_XXH64
995 #  if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \
996    || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \
997      || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \
998      || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \
999      || defined(__mips64__) || defined(__mips64)) /* mips64 */ \
1000    || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */
1001 #    define XXH_REROLL_XXH64 1
1002 #  else
1003 #    define XXH_REROLL_XXH64 0
1004 #  endif
1005 #endif /* !defined(XXH_REROLL_XXH64) */
1006 
1007 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1008 
1009 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
XXH_read64(const void * memPtr)1010 static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
1011 
1012 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1013 
1014 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
1015 /* currently only defined for gcc and icc */
1016 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
XXH_read64(const void * ptr)1017 static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
1018 
1019 #else
1020 
1021 /* portable and safe solution. Generally efficient.
1022  * see : http://stackoverflow.com/a/32095106/646947
1023  */
1024 
XXH_read64(const void * memPtr)1025 static xxh_u64 XXH_read64(const void* memPtr)
1026 {
1027     xxh_u64 val;
1028     memcpy(&val, memPtr, sizeof(val));
1029     return val;
1030 }
1031 
1032 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
1033 
1034 #if defined(_MSC_VER)     /* Visual Studio */
1035 #  define XXH_swap64 _byteswap_uint64
1036 #elif XXH_GCC_VERSION >= 403
1037 #  define XXH_swap64 __builtin_bswap64
1038 #else
XXH_swap64(xxh_u64 x)1039 static xxh_u64 XXH_swap64 (xxh_u64 x)
1040 {
1041     return  ((x << 56) & 0xff00000000000000ULL) |
1042             ((x << 40) & 0x00ff000000000000ULL) |
1043             ((x << 24) & 0x0000ff0000000000ULL) |
1044             ((x << 8)  & 0x000000ff00000000ULL) |
1045             ((x >> 8)  & 0x00000000ff000000ULL) |
1046             ((x >> 24) & 0x0000000000ff0000ULL) |
1047             ((x >> 40) & 0x000000000000ff00ULL) |
1048             ((x >> 56) & 0x00000000000000ffULL);
1049 }
1050 #endif
1051 
XXH_readLE64(const void * ptr)1052 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
1053 {
1054     return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
1055 }
1056 
XXH_readBE64(const void * ptr)1057 static xxh_u64 XXH_readBE64(const void* ptr)
1058 {
1059     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
1060 }
1061 
1062 XXH_FORCE_INLINE xxh_u64
XXH_readLE64_align(const void * ptr,XXH_alignment align)1063 XXH_readLE64_align(const void* ptr, XXH_alignment align)
1064 {
1065     if (align==XXH_unaligned)
1066         return XXH_readLE64(ptr);
1067     else
1068         return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
1069 }
1070 
1071 
1072 /*******   xxh64   *******/
1073 
1074 static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL;   /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
1075 static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;   /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
1076 static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL;   /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
1077 static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL;   /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
1078 static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL;   /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
1079 
XXH64_round(xxh_u64 acc,xxh_u64 input)1080 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
1081 {
1082     acc += input * PRIME64_2;
1083     acc  = XXH_rotl64(acc, 31);
1084     acc *= PRIME64_1;
1085     return acc;
1086 }
1087 
XXH64_mergeRound(xxh_u64 acc,xxh_u64 val)1088 static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
1089 {
1090     val  = XXH64_round(0, val);
1091     acc ^= val;
1092     acc  = acc * PRIME64_1 + PRIME64_4;
1093     return acc;
1094 }
1095 
XXH64_avalanche(xxh_u64 h64)1096 static xxh_u64 XXH64_avalanche(xxh_u64 h64)
1097 {
1098     h64 ^= h64 >> 33;
1099     h64 *= PRIME64_2;
1100     h64 ^= h64 >> 29;
1101     h64 *= PRIME64_3;
1102     h64 ^= h64 >> 32;
1103     return h64;
1104 }
1105 
1106 
1107 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
1108 
1109 static xxh_u64
XXH64_finalize(xxh_u64 h64,const xxh_u8 * ptr,size_t len,XXH_alignment align)1110 XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
1111 {
1112 #define PROCESS1_64            \
1113     h64 ^= (*ptr++) * PRIME64_5; \
1114     h64 = XXH_rotl64(h64, 11) * PRIME64_1;
1115 
1116 #define PROCESS4_64          \
1117     h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \
1118     ptr+=4;                    \
1119     h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
1120 
1121 #define PROCESS8_64 {        \
1122     xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
1123     ptr+=8;                    \
1124     h64 ^= k1;               \
1125     h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
1126 }
1127 
1128     /* Rerolled version for 32-bit targets is faster and much smaller. */
1129     if (XXH_REROLL || XXH_REROLL_XXH64) {
1130         len &= 31;
1131         while (len >= 8) {
1132             PROCESS8_64;
1133             len -= 8;
1134         }
1135         if (len >= 4) {
1136             PROCESS4_64;
1137             len -= 4;
1138         }
1139         while (len > 0) {
1140             PROCESS1_64;
1141             --len;
1142         }
1143          return  XXH64_avalanche(h64);
1144     } else {
1145         switch(len & 31) {
1146            case 24: PROCESS8_64;
1147                          /* fallthrough */
1148            case 16: PROCESS8_64;
1149                          /* fallthrough */
1150            case  8: PROCESS8_64;
1151                     return XXH64_avalanche(h64);
1152 
1153            case 28: PROCESS8_64;
1154                          /* fallthrough */
1155            case 20: PROCESS8_64;
1156                          /* fallthrough */
1157            case 12: PROCESS8_64;
1158                          /* fallthrough */
1159            case  4: PROCESS4_64;
1160                     return XXH64_avalanche(h64);
1161 
1162            case 25: PROCESS8_64;
1163                          /* fallthrough */
1164            case 17: PROCESS8_64;
1165                          /* fallthrough */
1166            case  9: PROCESS8_64;
1167                     PROCESS1_64;
1168                     return XXH64_avalanche(h64);
1169 
1170            case 29: PROCESS8_64;
1171                          /* fallthrough */
1172            case 21: PROCESS8_64;
1173                          /* fallthrough */
1174            case 13: PROCESS8_64;
1175                          /* fallthrough */
1176            case  5: PROCESS4_64;
1177                     PROCESS1_64;
1178                     return XXH64_avalanche(h64);
1179 
1180            case 26: PROCESS8_64;
1181                          /* fallthrough */
1182            case 18: PROCESS8_64;
1183                          /* fallthrough */
1184            case 10: PROCESS8_64;
1185                     PROCESS1_64;
1186                     PROCESS1_64;
1187                     return XXH64_avalanche(h64);
1188 
1189            case 30: PROCESS8_64;
1190                          /* fallthrough */
1191            case 22: PROCESS8_64;
1192                          /* fallthrough */
1193            case 14: PROCESS8_64;
1194                          /* fallthrough */
1195            case  6: PROCESS4_64;
1196                     PROCESS1_64;
1197                     PROCESS1_64;
1198                     return XXH64_avalanche(h64);
1199 
1200            case 27: PROCESS8_64;
1201                          /* fallthrough */
1202            case 19: PROCESS8_64;
1203                          /* fallthrough */
1204            case 11: PROCESS8_64;
1205                     PROCESS1_64;
1206                     PROCESS1_64;
1207                     PROCESS1_64;
1208                     return XXH64_avalanche(h64);
1209 
1210            case 31: PROCESS8_64;
1211                          /* fallthrough */
1212            case 23: PROCESS8_64;
1213                          /* fallthrough */
1214            case 15: PROCESS8_64;
1215                          /* fallthrough */
1216            case  7: PROCESS4_64;
1217                          /* fallthrough */
1218            case  3: PROCESS1_64;
1219                          /* fallthrough */
1220            case  2: PROCESS1_64;
1221                          /* fallthrough */
1222            case  1: PROCESS1_64;
1223                          /* fallthrough */
1224            case  0: return XXH64_avalanche(h64);
1225         }
1226     }
1227     /* impossible to reach */
1228     XXH_ASSERT(0);
1229     return 0;  /* unreachable, but some compilers complain without it */
1230 }
1231 
1232 XXH_FORCE_INLINE xxh_u64
XXH64_endian_align(const xxh_u8 * input,size_t len,xxh_u64 seed,XXH_alignment align)1233 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
1234 {
1235     const xxh_u8* bEnd = input + len;
1236     xxh_u64 h64;
1237 
1238 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1239     if (input==NULL) {
1240         len=0;
1241         bEnd=input=(const xxh_u8*)(size_t)32;
1242     }
1243 #endif
1244 
1245     if (len>=32) {
1246         const xxh_u8* const limit = bEnd - 32;
1247         xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2;
1248         xxh_u64 v2 = seed + PRIME64_2;
1249         xxh_u64 v3 = seed + 0;
1250         xxh_u64 v4 = seed - PRIME64_1;
1251 
1252         do {
1253             v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
1254             v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
1255             v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
1256             v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
1257         } while (input<=limit);
1258 
1259         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1260         h64 = XXH64_mergeRound(h64, v1);
1261         h64 = XXH64_mergeRound(h64, v2);
1262         h64 = XXH64_mergeRound(h64, v3);
1263         h64 = XXH64_mergeRound(h64, v4);
1264 
1265     } else {
1266         h64  = seed + PRIME64_5;
1267     }
1268 
1269     h64 += (xxh_u64) len;
1270 
1271     return XXH64_finalize(h64, input, len, align);
1272 }
1273 
1274 
XXH64(const void * input,size_t len,XXH64_hash_t seed)1275 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
1276 {
1277 #if 0
1278     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
1279     XXH64_state_t state;
1280     XXH64_reset(&state, seed);
1281     XXH64_update(&state, (const xxh_u8*)input, len);
1282     return XXH64_digest(&state);
1283 
1284 #else
1285 
1286     if (XXH_FORCE_ALIGN_CHECK) {
1287         if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
1288             return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
1289     }   }
1290 
1291     return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
1292 
1293 #endif
1294 }
1295 
1296 /*******   Hash Streaming   *******/
1297 
XXH64_createState(void)1298 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
1299 {
1300     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
1301 }
XXH64_freeState(XXH64_state_t * statePtr)1302 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
1303 {
1304     XXH_free(statePtr);
1305     return XXH_OK;
1306 }
1307 
XXH64_copyState(XXH64_state_t * dstState,const XXH64_state_t * srcState)1308 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
1309 {
1310     memcpy(dstState, srcState, sizeof(*dstState));
1311 }
1312 
XXH64_reset(XXH64_state_t * statePtr,XXH64_hash_t seed)1313 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
1314 {
1315     XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
1316     memset(&state, 0, sizeof(state));
1317     state.v1 = seed + PRIME64_1 + PRIME64_2;
1318     state.v2 = seed + PRIME64_2;
1319     state.v3 = seed + 0;
1320     state.v4 = seed - PRIME64_1;
1321      /* do not write into reserved64, might be removed in a future version */
1322     memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
1323     return XXH_OK;
1324 }
1325 
1326 XXH_PUBLIC_API XXH_errorcode
XXH64_update(XXH64_state_t * state,const void * input,size_t len)1327 XXH64_update (XXH64_state_t* state, const void* input, size_t len)
1328 {
1329     if (input==NULL)
1330 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1331         return XXH_OK;
1332 #else
1333         return XXH_ERROR;
1334 #endif
1335 
1336     {   const xxh_u8* p = (const xxh_u8*)input;
1337         const xxh_u8* const bEnd = p + len;
1338 
1339         state->total_len += len;
1340 
1341         if (state->memsize + len < 32) {  /* fill in tmp buffer */
1342             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
1343             state->memsize += (xxh_u32)len;
1344             return XXH_OK;
1345         }
1346 
1347         if (state->memsize) {   /* tmp buffer is full */
1348             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
1349             state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
1350             state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
1351             state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
1352             state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
1353             p += 32-state->memsize;
1354             state->memsize = 0;
1355         }
1356 
1357         if (p+32 <= bEnd) {
1358             const xxh_u8* const limit = bEnd - 32;
1359             xxh_u64 v1 = state->v1;
1360             xxh_u64 v2 = state->v2;
1361             xxh_u64 v3 = state->v3;
1362             xxh_u64 v4 = state->v4;
1363 
1364             do {
1365                 v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
1366                 v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
1367                 v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
1368                 v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
1369             } while (p<=limit);
1370 
1371             state->v1 = v1;
1372             state->v2 = v2;
1373             state->v3 = v3;
1374             state->v4 = v4;
1375         }
1376 
1377         if (p < bEnd) {
1378             XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
1379             state->memsize = (unsigned)(bEnd-p);
1380         }
1381     }
1382 
1383     return XXH_OK;
1384 }
1385 
1386 
XXH64_digest(const XXH64_state_t * state)1387 XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state)
1388 {
1389     xxh_u64 h64;
1390 
1391     if (state->total_len >= 32) {
1392         xxh_u64 const v1 = state->v1;
1393         xxh_u64 const v2 = state->v2;
1394         xxh_u64 const v3 = state->v3;
1395         xxh_u64 const v4 = state->v4;
1396 
1397         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1398         h64 = XXH64_mergeRound(h64, v1);
1399         h64 = XXH64_mergeRound(h64, v2);
1400         h64 = XXH64_mergeRound(h64, v3);
1401         h64 = XXH64_mergeRound(h64, v4);
1402     } else {
1403         h64  = state->v3 /*seed*/ + PRIME64_5;
1404     }
1405 
1406     h64 += (xxh_u64) state->total_len;
1407 
1408     return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
1409 }
1410 
1411 
1412 /******* Canonical representation   *******/
1413 
XXH64_canonicalFromHash(XXH64_canonical_t * dst,XXH64_hash_t hash)1414 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
1415 {
1416     XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
1417     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
1418     memcpy(dst, &hash, sizeof(*dst));
1419 }
1420 
XXH64_hashFromCanonical(const XXH64_canonical_t * src)1421 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
1422 {
1423     return XXH_readBE64(src);
1424 }
1425 
1426 
1427 
1428 /* *********************************************************************
1429 *  XXH3
1430 *  New generation hash designed for speed on small keys and vectorization
1431 ************************************************************************ */
1432 
1433 /* #include "xxh3.h" */
1434 
1435 
1436 #endif  /* XXH_NO_LONG_LONG */
1437 
1438 
1439 #endif  /* XXH_IMPLEMENTATION */
1440 
1441 
1442 #if defined (__cplusplus)
1443 }
1444 #endif
1445