1 /*
2 * xxHash - Extremely Fast Hash algorithm
3 * Copyright (C) 2020 Yann Collet
4 *
5 * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at:
31 * - xxHash homepage: https://www.xxhash.com
32 * - xxHash source repository: https://github.com/Cyan4973/xxHash
33 */
34
35
36 /*!
37 * @file xxh_x86dispatch.c
38 *
39 * Automatic dispatcher code for the @ref xxh3_family on x86-based targets.
40 *
41 * Optional add-on.
42 *
43 * **Compile this file with the default flags for your target.** Do not compile
44 * with flags like `-mavx*`, `-march=native`, or `/arch:AVX*`, there will be
45 * an error. See @ref XXH_X86DISPATCH_ALLOW_AVX for details.
46 *
47 * @defgroup dispatch x86 Dispatcher
48 * @{
49 */
50
51 #if defined (__cplusplus)
52 extern "C" {
53 #endif
54
55 #if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64))
56 # error "Dispatching is currently only supported on x86 and x86_64."
57 #endif
58
59 /*!
60 * @def XXH_X86DISPATCH_ALLOW_AVX
61 * @brief Disables the AVX sanity check.
62 *
63 * Don't compile xxh_x86dispatch.c with options like `-mavx*`, `-march=native`,
64 * or `/arch:AVX*`. It is intended to be compiled for the minimum target, and
65 * it selectively enables SSE2, AVX2, and AVX512 when it is needed.
66 *
67 * Using this option _globally_ allows this feature, and therefore makes it
68 * undefined behavior to execute on any CPU without said feature.
69 *
70 * Even if the source code isn't directly using AVX intrinsics in a function,
71 * the compiler can still generate AVX code from autovectorization and by
72 * "upgrading" SSE2 intrinsics to use the VEX prefixes (a.k.a. AVX128).
73 *
74 * Use the same flags that you use to compile the rest of the program; this
75 * file will safely generate SSE2, AVX2, and AVX512 without these flags.
76 *
77 * Define XXH_X86DISPATCH_ALLOW_AVX to ignore this check, and feel free to open
78 * an issue if there is a target in the future where AVX is a default feature.
79 */
80 #ifdef XXH_DOXYGEN
81 # define XXH_X86DISPATCH_ALLOW_AVX
82 #endif
83
84 #if defined(__AVX__) && !defined(XXH_X86DISPATCH_ALLOW_AVX)
85 # error "Do not compile xxh_x86dispatch.c with AVX enabled! See the comment above."
86 #endif
87
88 #ifdef __has_include
89 # define XXH_HAS_INCLUDE(header) __has_include(header)
90 #else
91 # define XXH_HAS_INCLUDE(header) 0
92 #endif
93
94 /*!
95 * @def XXH_DISPATCH_SCALAR
96 * @brief Enables/dispatching the scalar code path.
97 *
98 * If this is defined to 0, SSE2 support is assumed. This reduces code size
99 * when the scalar path is not needed.
100 *
101 * This is automatically defined to 0 when...
102 * - SSE2 support is enabled in the compiler
103 * - Targeting x86_64
104 * - Targeting Android x86
105 * - Targeting macOS
106 */
107 #ifndef XXH_DISPATCH_SCALAR
108 # if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) /* SSE2 on by default */ \
109 || defined(__x86_64__) || defined(_M_X64) /* x86_64 */ \
110 || defined(__ANDROID__) || defined(__APPLEv__) /* Android or macOS */
111 # define XXH_DISPATCH_SCALAR 0 /* disable */
112 # else
113 # define XXH_DISPATCH_SCALAR 1
114 # endif
115 #endif
116 /*!
117 * @def XXH_DISPATCH_AVX2
118 * @brief Enables/disables dispatching for AVX2.
119 *
120 * This is automatically detected if it is not defined.
121 * - GCC 4.7 and later are known to support AVX2, but >4.9 is required for
122 * to get the AVX2 intrinsics and typedefs without -mavx -mavx2.
123 * - Visual Studio 2013 Update 2 and later are known to support AVX2.
124 * - The GCC/Clang internal header `<avx2intrin.h>` is detected. While this is
125 * not allowed to be included directly, it still appears in the builtin
126 * include path and is detectable with `__has_include`.
127 *
128 * @see XXH_AVX2
129 */
130 #ifndef XXH_DISPATCH_AVX2
131 # if (defined(__GNUC__) && (__GNUC__ > 4)) /* GCC 5.0+ */ \
132 || (defined(_MSC_VER) && _MSC_VER >= 1900) /* VS 2015+ */ \
133 || (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 180030501) /* VS 2013 Update 2 */ \
134 || XXH_HAS_INCLUDE(<avx2intrin.h>) /* GCC/Clang internal header */
135 # define XXH_DISPATCH_AVX2 1 /* enable dispatch towards AVX2 */
136 # else
137 # define XXH_DISPATCH_AVX2 0
138 # endif
139 #endif /* XXH_DISPATCH_AVX2 */
140
141 /*!
142 * @def XXH_DISPATCH_AVX512
143 * @brief Enables/disables dispatching for AVX512.
144 *
145 * Automatically detected if one of the following conditions is met:
146 * - GCC 4.9 and later are known to support AVX512.
147 * - Visual Studio 2017 and later are known to support AVX2.
148 * - The GCC/Clang internal header `<avx512fintrin.h>` is detected. While this
149 * is not allowed to be included directly, it still appears in the builtin
150 * include path and is detectable with `__has_include`.
151 *
152 * @see XXH_AVX512
153 */
154 #ifndef XXH_DISPATCH_AVX512
155 # if (defined(__GNUC__) \
156 && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))) /* GCC 4.9+ */ \
157 || (defined(_MSC_VER) && _MSC_VER >= 1910) /* VS 2017+ */ \
158 || XXH_HAS_INCLUDE(<avx512fintrin.h>) /* GCC/Clang internal header */
159 # define XXH_DISPATCH_AVX512 1 /* enable dispatch towards AVX512 */
160 # else
161 # define XXH_DISPATCH_AVX512 0
162 # endif
163 #endif /* XXH_DISPATCH_AVX512 */
164
165 /*!
166 * @def XXH_TARGET_SSE2
167 * @brief Allows a function to be compiled with SSE2 intrinsics.
168 *
169 * Uses `__attribute__((__target__("sse2")))` on GCC to allow SSE2 to be used
170 * even with `-mno-sse2`.
171 *
172 * @def XXH_TARGET_AVX2
173 * @brief Like @ref XXH_TARGET_SSE2, but for AVX2.
174 *
175 * @def XXH_TARGET_AVX512
176 * @brief Like @ref XXH_TARGET_SSE2, but for AVX512.
177 */
178 #if defined(__GNUC__)
179 # include <emmintrin.h> /* SSE2 */
180 # if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
181 # include <immintrin.h> /* AVX2, AVX512F */
182 # endif
183 # define XXH_TARGET_SSE2 __attribute__((__target__("sse2")))
184 # define XXH_TARGET_AVX2 __attribute__((__target__("avx2")))
185 # define XXH_TARGET_AVX512 __attribute__((__target__("avx512f")))
186 #elif defined(_MSC_VER)
187 # include <intrin.h>
188 # define XXH_TARGET_SSE2
189 # define XXH_TARGET_AVX2
190 # define XXH_TARGET_AVX512
191 #else
192 # error "Dispatching is currently not supported for your compiler."
193 #endif
194
195 #ifdef XXH_DISPATCH_DEBUG
196 /* debug logging */
197 # include <stdio.h>
198 # define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); }
199 #else
200 # define XXH_debugPrint(str) ((void)0)
201 # undef NDEBUG /* avoid redefinition */
202 # define NDEBUG
203 #endif
204 #include <assert.h>
205
206 #define XXH_INLINE_ALL
207 #define XXH_X86DISPATCH
208 #include "xxhash.h"
209
210 /*
211 * Support both AT&T and Intel dialects
212 *
213 * GCC doesn't convert AT&T syntax to Intel syntax, and will error out if
214 * compiled with -masm=intel. Instead, it supports dialect switching with
215 * curly braces: { AT&T syntax | Intel syntax }
216 *
217 * Clang's integrated assembler automatically converts AT&T syntax to Intel if
218 * needed, making the dialect switching useless (it isn't even supported).
219 *
220 * Note: Comments are written in the inline assembly itself.
221 */
222 #ifdef __clang__
223 # define XXH_I_ATT(intel, att) att "\n\t"
224 #else
225 # define XXH_I_ATT(intel, att) "{" att "|" intel "}\n\t"
226 #endif
227
228 /*!
229 * @internal
230 * @brief Runs CPUID.
231 *
232 * @param eax, ecx The parameters to pass to CPUID, %eax and %ecx respectively.
233 * @param abcd The array to store the result in, `{ eax, ebx, ecx, edx }`
234 */
XXH_cpuid(xxh_u32 eax,xxh_u32 ecx,xxh_u32 * abcd)235 static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd)
236 {
237 #if defined(_MSC_VER)
238 __cpuidex(abcd, eax, ecx);
239 #else
240 xxh_u32 ebx, edx;
241 # if defined(__i386__) && defined(__PIC__)
242 __asm__(
243 "# Call CPUID\n\t"
244 "#\n\t"
245 "# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t"
246 "# EBX, so we use EDI instead.\n\t"
247 XXH_I_ATT("mov edi, ebx", "movl %%ebx, %%edi")
248 XXH_I_ATT("cpuid", "cpuid" )
249 XXH_I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi")
250 : "=D" (ebx),
251 # else
252 __asm__(
253 "# Call CPUID\n\t"
254 XXH_I_ATT("cpuid", "cpuid")
255 : "=b" (ebx),
256 # endif
257 "+a" (eax), "+c" (ecx), "=d" (edx));
258 abcd[0] = eax;
259 abcd[1] = ebx;
260 abcd[2] = ecx;
261 abcd[3] = edx;
262 #endif
263 }
264
265 /*
266 * Modified version of Intel's guide
267 * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
268 */
269
270 #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
271 /*!
272 * @internal
273 * @brief Runs `XGETBV`.
274 *
275 * While the CPU may support AVX2, the operating system might not properly save
276 * the full YMM/ZMM registers.
277 *
278 * xgetbv is used for detecting this: Any compliant operating system will define
279 * a set of flags in the xcr0 register indicating how it saves the AVX registers.
280 *
281 * You can manually disable this flag on Windows by running, as admin:
282 *
283 * bcdedit.exe /set xsavedisable 1
284 *
285 * and rebooting. Run the same command with 0 to re-enable it.
286 */
XXH_xgetbv(void)287 static xxh_u64 XXH_xgetbv(void)
288 {
289 #if defined(_MSC_VER)
290 return _xgetbv(0); /* min VS2010 SP1 compiler is required */
291 #else
292 xxh_u32 xcr0_lo, xcr0_hi;
293 __asm__(
294 "# Call XGETBV\n\t"
295 "#\n\t"
296 "# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t"
297 "# the XGETBV opcode, so we encode it by hand instead.\n\t"
298 "# See <https://github.com/asmjit/asmjit/issues/78> for details.\n\t"
299 ".byte 0x0f, 0x01, 0xd0\n\t"
300 : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
301 return xcr0_lo | ((xxh_u64)xcr0_hi << 32);
302 #endif
303 }
304 #endif
305
306 #define XXH_SSE2_CPUID_MASK (1 << 26)
307 #define XXH_OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27))
308 #define XXH_AVX2_CPUID_MASK (1 << 5)
309 #define XXH_AVX2_XGETBV_MASK ((1 << 2) | (1 << 1))
310 #define XXH_AVX512F_CPUID_MASK (1 << 16)
311 #define XXH_AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1))
312
313 /*!
314 * @internal
315 * @brief Returns the best XXH3 implementation.
316 *
317 * Runs various CPUID/XGETBV tests to try and determine the best implementation.
318 *
319 * @ret The best @ref XXH_VECTOR implementation.
320 * @see XXH_VECTOR_TYPES
321 */
XXH_featureTest(void)322 static int XXH_featureTest(void)
323 {
324 xxh_u32 abcd[4];
325 xxh_u32 max_leaves;
326 int best = XXH_SCALAR;
327 #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
328 xxh_u64 xgetbv_val;
329 #endif
330 #if defined(__GNUC__) && defined(__i386__)
331 xxh_u32 cpuid_supported;
332 __asm__(
333 "# For the sake of ruthless backwards compatibility, check if CPUID\n\t"
334 "# is supported in the EFLAGS on i386.\n\t"
335 "# This is not necessary on x86_64 - CPUID is mandatory.\n\t"
336 "# The ID flag (bit 21) in the EFLAGS register indicates support\n\t"
337 "# for the CPUID instruction. If a software procedure can set and\n\t"
338 "# clear this flag, the processor executing the procedure supports\n\t"
339 "# the CPUID instruction.\n\t"
340 "# <https://c9x.me/x86/html/file_module_x86_id_45.html>\n\t"
341 "#\n\t"
342 "# Routine is from <https://wiki.osdev.org/CPUID>.\n\t"
343
344 "# Save EFLAGS\n\t"
345 XXH_I_ATT("pushfd", "pushfl" )
346 "# Store EFLAGS\n\t"
347 XXH_I_ATT("pushfd", "pushfl" )
348 "# Invert the ID bit in stored EFLAGS\n\t"
349 XXH_I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)")
350 "# Load stored EFLAGS (with ID bit inverted)\n\t"
351 XXH_I_ATT("popfd", "popfl" )
352 "# Store EFLAGS again (ID bit may or not be inverted)\n\t"
353 XXH_I_ATT("pushfd", "pushfl" )
354 "# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t"
355 XXH_I_ATT("pop eax", "popl %%eax" )
356 "# eax = whichever bits were changed\n\t"
357 XXH_I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" )
358 "# Restore original EFLAGS\n\t"
359 XXH_I_ATT("popfd", "popfl" )
360 "# eax = zero if ID bit can't be changed, else non-zero\n\t"
361 XXH_I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" )
362 : "=a" (cpuid_supported) :: "cc");
363
364 if (XXH_unlikely(!cpuid_supported)) {
365 XXH_debugPrint("CPUID support is not detected!");
366 return best;
367 }
368
369 #endif
370 /* Check how many CPUID pages we have */
371 XXH_cpuid(0, 0, abcd);
372 max_leaves = abcd[0];
373
374 /* Shouldn't happen on hardware, but happens on some QEMU configs. */
375 if (XXH_unlikely(max_leaves == 0)) {
376 XXH_debugPrint("Max CPUID leaves == 0!");
377 return best;
378 }
379
380 /* Check for SSE2, OSXSAVE and xgetbv */
381 XXH_cpuid(1, 0, abcd);
382
383 /*
384 * Test for SSE2. The check is redundant on x86_64, but it doesn't hurt.
385 */
386 if (XXH_unlikely((abcd[3] & XXH_SSE2_CPUID_MASK) != XXH_SSE2_CPUID_MASK))
387 return best;
388
389 XXH_debugPrint("SSE2 support detected.");
390
391 best = XXH_SSE2;
392 #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
393 /* Make sure we have enough leaves */
394 if (XXH_unlikely(max_leaves < 7))
395 return best;
396
397 /* Test for OSXSAVE and XGETBV */
398 if ((abcd[2] & XXH_OSXSAVE_CPUID_MASK) != XXH_OSXSAVE_CPUID_MASK)
399 return best;
400
401 /* CPUID check for AVX features */
402 XXH_cpuid(7, 0, abcd);
403
404 xgetbv_val = XXH_xgetbv();
405 #if XXH_DISPATCH_AVX2
406 /* Validate that AVX2 is supported by the CPU */
407 if ((abcd[1] & XXH_AVX2_CPUID_MASK) != XXH_AVX2_CPUID_MASK)
408 return best;
409
410 /* Validate that the OS supports YMM registers */
411 if ((xgetbv_val & XXH_AVX2_XGETBV_MASK) != XXH_AVX2_XGETBV_MASK) {
412 XXH_debugPrint("AVX2 supported by the CPU, but not the OS.");
413 return best;
414 }
415
416 /* AVX2 supported */
417 XXH_debugPrint("AVX2 support detected.");
418 best = XXH_AVX2;
419 #endif
420 #if XXH_DISPATCH_AVX512
421 /* Check if AVX512F is supported by the CPU */
422 if ((abcd[1] & XXH_AVX512F_CPUID_MASK) != XXH_AVX512F_CPUID_MASK) {
423 XXH_debugPrint("AVX512F not supported by CPU");
424 return best;
425 }
426
427 /* Validate that the OS supports ZMM registers */
428 if ((xgetbv_val & XXH_AVX512F_XGETBV_MASK) != XXH_AVX512F_XGETBV_MASK) {
429 XXH_debugPrint("AVX512F supported by the CPU, but not the OS.");
430 return best;
431 }
432
433 /* AVX512F supported */
434 XXH_debugPrint("AVX512F support detected.");
435 best = XXH_AVX512;
436 #endif
437 #endif
438 return best;
439 }
440
441
442 /* === Vector implementations === */
443
444 /*!
445 * @internal
446 * @brief Defines the various dispatch functions.
447 *
448 * TODO: Consolidate?
449 *
450 * @param suffix The suffix for the functions, e.g. sse2 or scalar
451 * @param target XXH_TARGET_* or empty.
452 */
453 #define XXH_DEFINE_DISPATCH_FUNCS(suffix, target) \
454 \
455 /* === XXH3, default variants === */ \
456 \
457 XXH_NO_INLINE target XXH64_hash_t \
458 XXHL64_default_##suffix(const void* XXH_RESTRICT input, size_t len) \
459 { \
460 return XXH3_hashLong_64b_internal( \
461 input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \
462 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \
463 ); \
464 } \
465 \
466 /* === XXH3, Seeded variants === */ \
467 \
468 XXH_NO_INLINE target XXH64_hash_t \
469 XXHL64_seed_##suffix(const void* XXH_RESTRICT input, size_t len, \
470 XXH64_hash_t seed) \
471 { \
472 return XXH3_hashLong_64b_withSeed_internal( \
473 input, len, seed, XXH3_accumulate_512_##suffix, \
474 XXH3_scrambleAcc_##suffix, XXH3_initCustomSecret_##suffix \
475 ); \
476 } \
477 \
478 /* === XXH3, Secret variants === */ \
479 \
480 XXH_NO_INLINE target XXH64_hash_t \
481 XXHL64_secret_##suffix(const void* XXH_RESTRICT input, size_t len, \
482 const void* secret, size_t secretLen) \
483 { \
484 return XXH3_hashLong_64b_internal( \
485 input, len, secret, secretLen, \
486 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \
487 ); \
488 } \
489 \
490 /* === XXH3 update variants === */ \
491 \
492 XXH_NO_INLINE target XXH_errorcode \
493 XXH3_update_##suffix(XXH3_state_t* state, const void* input, size_t len) \
494 { \
495 return XXH3_update(state, (const xxh_u8*)input, len, \
496 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \
497 } \
498 \
499 /* === XXH128 default variants === */ \
500 \
501 XXH_NO_INLINE target XXH128_hash_t \
502 XXHL128_default_##suffix(const void* XXH_RESTRICT input, size_t len) \
503 { \
504 return XXH3_hashLong_128b_internal( \
505 input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \
506 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \
507 ); \
508 } \
509 \
510 /* === XXH128 Secret variants === */ \
511 \
512 XXH_NO_INLINE target XXH128_hash_t \
513 XXHL128_secret_##suffix(const void* XXH_RESTRICT input, size_t len, \
514 const void* XXH_RESTRICT secret, size_t secretLen) \
515 { \
516 return XXH3_hashLong_128b_internal( \
517 input, len, (const xxh_u8*)secret, secretLen, \
518 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \
519 } \
520 \
521 /* === XXH128 Seeded variants === */ \
522 \
523 XXH_NO_INLINE target XXH128_hash_t \
524 XXHL128_seed_##suffix(const void* XXH_RESTRICT input, size_t len, \
525 XXH64_hash_t seed) \
526 { \
527 return XXH3_hashLong_128b_withSeed_internal(input, len, seed, \
528 XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix, \
529 XXH3_initCustomSecret_##suffix); \
530 }
531
532 /* End XXH_DEFINE_DISPATCH_FUNCS */
533
534 #if XXH_DISPATCH_SCALAR
535 XXH_DEFINE_DISPATCH_FUNCS(scalar, /* nothing */)
536 #endif
537 XXH_DEFINE_DISPATCH_FUNCS(sse2, XXH_TARGET_SSE2)
538 #if XXH_DISPATCH_AVX2
539 XXH_DEFINE_DISPATCH_FUNCS(avx2, XXH_TARGET_AVX2)
540 #endif
541 #if XXH_DISPATCH_AVX512
542 XXH_DEFINE_DISPATCH_FUNCS(avx512, XXH_TARGET_AVX512)
543 #endif
544 #undef XXH_DEFINE_DISPATCH_FUNCS
545
546 /* ==== Dispatchers ==== */
547
548 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(const void* XXH_RESTRICT, size_t);
549
550 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t);
551
552 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
553
554 typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH3_state_t*, const void*, size_t);
555
556 typedef struct {
557 XXH3_dispatchx86_hashLong64_default hashLong64_default;
558 XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed;
559 XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret;
560 XXH3_dispatchx86_update update;
561 } XXH_dispatchFunctions_s;
562
563 #define XXH_NB_DISPATCHES 4
564
565 /*!
566 * @internal
567 * @brief Table of dispatchers for @ref XXH3_64bits().
568 *
569 * @pre The indices must match @ref XXH_VECTOR_TYPE.
570 */
571 static const XXH_dispatchFunctions_s XXH_kDispatch[XXH_NB_DISPATCHES] = {
572 #if XXH_DISPATCH_SCALAR
573 /* Scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_update_scalar },
574 #else
575 /* Scalar */ { NULL, NULL, NULL, NULL },
576 #endif
577 /* SSE2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_update_sse2 },
578 #if XXH_DISPATCH_AVX2
579 /* AVX2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_update_avx2 },
580 #else
581 /* AVX2 */ { NULL, NULL, NULL, NULL },
582 #endif
583 #if XXH_DISPATCH_AVX512
584 /* AVX512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_update_avx512 }
585 #else
586 /* AVX512 */ { NULL, NULL, NULL, NULL }
587 #endif
588 };
589 /*!
590 * @internal
591 * @brief The selected dispatch table for @ref XXH3_64bits().
592 */
593 static XXH_dispatchFunctions_s XXH_g_dispatch = { NULL, NULL, NULL, NULL };
594
595
596 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(const void* XXH_RESTRICT, size_t);
597
598 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t);
599
600 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
601
602 typedef struct {
603 XXH3_dispatchx86_hashLong128_default hashLong128_default;
604 XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed;
605 XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret;
606 XXH3_dispatchx86_update update;
607 } XXH_dispatch128Functions_s;
608
609
610 /*!
611 * @internal
612 * @brief Table of dispatchers for @ref XXH3_128bits().
613 *
614 * @pre The indices must match @ref XXH_VECTOR_TYPE.
615 */
616 static const XXH_dispatch128Functions_s XXH_kDispatch128[XXH_NB_DISPATCHES] = {
617 #if XXH_DISPATCH_SCALAR
618 /* Scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_update_scalar },
619 #else
620 /* Scalar */ { NULL, NULL, NULL, NULL },
621 #endif
622 /* SSE2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_update_sse2 },
623 #if XXH_DISPATCH_AVX2
624 /* AVX2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_update_avx2 },
625 #else
626 /* AVX2 */ { NULL, NULL, NULL, NULL },
627 #endif
628 #if XXH_DISPATCH_AVX512
629 /* AVX512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_update_avx512 }
630 #else
631 /* AVX512 */ { NULL, NULL, NULL, NULL }
632 #endif
633 };
634
635 /*!
636 * @internal
637 * @brief The selected dispatch table for @ref XXH3_64bits().
638 */
639 static XXH_dispatch128Functions_s XXH_g_dispatch128 = { NULL, NULL, NULL, NULL };
640
641 /*!
642 * @internal
643 * @brief Runs a CPUID check and sets the correct dispatch tables.
644 */
XXH_setDispatch(void)645 static void XXH_setDispatch(void)
646 {
647 int vecID = XXH_featureTest();
648 XXH_STATIC_ASSERT(XXH_AVX512 == XXH_NB_DISPATCHES-1);
649 assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512);
650 #if !XXH_DISPATCH_SCALAR
651 assert(vecID != XXH_SCALAR);
652 #endif
653 #if !XXH_DISPATCH_AVX512
654 assert(vecID != XXH_AVX512);
655 #endif
656 #if !XXH_DISPATCH_AVX2
657 assert(vecID != XXH_AVX2);
658 #endif
659 XXH_g_dispatch = XXH_kDispatch[vecID];
660 XXH_g_dispatch128 = XXH_kDispatch128[vecID];
661 }
662
663
664 /* ==== XXH3 public functions ==== */
665
666 static XXH64_hash_t
XXH3_hashLong_64b_defaultSecret_selection(const void * input,size_t len,XXH64_hash_t seed64,const xxh_u8 * secret,size_t secretLen)667 XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len,
668 XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
669 {
670 (void)seed64; (void)secret; (void)secretLen;
671 if (XXH_g_dispatch.hashLong64_default == NULL) XXH_setDispatch();
672 return XXH_g_dispatch.hashLong64_default(input, len);
673 }
674
XXH3_64bits_dispatch(const void * input,size_t len)675 XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len)
676 {
677 return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection);
678 }
679
680 static XXH64_hash_t
XXH3_hashLong_64b_withSeed_selection(const void * input,size_t len,XXH64_hash_t seed64,const xxh_u8 * secret,size_t secretLen)681 XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len,
682 XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
683 {
684 (void)secret; (void)secretLen;
685 if (XXH_g_dispatch.hashLong64_seed == NULL) XXH_setDispatch();
686 return XXH_g_dispatch.hashLong64_seed(input, len, seed64);
687 }
688
XXH3_64bits_withSeed_dispatch(const void * input,size_t len,XXH64_hash_t seed)689 XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed)
690 {
691 return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection);
692 }
693
694 static XXH64_hash_t
XXH3_hashLong_64b_withSecret_selection(const void * input,size_t len,XXH64_hash_t seed64,const xxh_u8 * secret,size_t secretLen)695 XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len,
696 XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
697 {
698 (void)seed64;
699 if (XXH_g_dispatch.hashLong64_secret == NULL) XXH_setDispatch();
700 return XXH_g_dispatch.hashLong64_secret(input, len, secret, secretLen);
701 }
702
XXH3_64bits_withSecret_dispatch(const void * input,size_t len,const void * secret,size_t secretLen)703 XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen)
704 {
705 return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection);
706 }
707
708 XXH_errorcode
XXH3_64bits_update_dispatch(XXH3_state_t * state,const void * input,size_t len)709 XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
710 {
711 if (XXH_g_dispatch.update == NULL) XXH_setDispatch();
712 return XXH_g_dispatch.update(state, (const xxh_u8*)input, len);
713 }
714
715
716 /* ==== XXH128 public functions ==== */
717
718 static XXH128_hash_t
XXH3_hashLong_128b_defaultSecret_selection(const void * input,size_t len,XXH64_hash_t seed64,const void * secret,size_t secretLen)719 XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len,
720 XXH64_hash_t seed64, const void* secret, size_t secretLen)
721 {
722 (void)seed64; (void)secret; (void)secretLen;
723 if (XXH_g_dispatch128.hashLong128_default == NULL) XXH_setDispatch();
724 return XXH_g_dispatch128.hashLong128_default(input, len);
725 }
726
XXH3_128bits_dispatch(const void * input,size_t len)727 XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len)
728 {
729 return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection);
730 }
731
732 static XXH128_hash_t
XXH3_hashLong_128b_withSeed_selection(const void * input,size_t len,XXH64_hash_t seed64,const void * secret,size_t secretLen)733 XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len,
734 XXH64_hash_t seed64, const void* secret, size_t secretLen)
735 {
736 (void)secret; (void)secretLen;
737 if (XXH_g_dispatch128.hashLong128_seed == NULL) XXH_setDispatch();
738 return XXH_g_dispatch128.hashLong128_seed(input, len, seed64);
739 }
740
XXH3_128bits_withSeed_dispatch(const void * input,size_t len,XXH64_hash_t seed)741 XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed)
742 {
743 return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection);
744 }
745
746 static XXH128_hash_t
XXH3_hashLong_128b_withSecret_selection(const void * input,size_t len,XXH64_hash_t seed64,const void * secret,size_t secretLen)747 XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len,
748 XXH64_hash_t seed64, const void* secret, size_t secretLen)
749 {
750 (void)seed64;
751 if (XXH_g_dispatch128.hashLong128_secret == NULL) XXH_setDispatch();
752 return XXH_g_dispatch128.hashLong128_secret(input, len, secret, secretLen);
753 }
754
XXH3_128bits_withSecret_dispatch(const void * input,size_t len,const void * secret,size_t secretLen)755 XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen)
756 {
757 return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection);
758 }
759
760 XXH_errorcode
XXH3_128bits_update_dispatch(XXH3_state_t * state,const void * input,size_t len)761 XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
762 {
763 if (XXH_g_dispatch128.update == NULL) XXH_setDispatch();
764 return XXH_g_dispatch128.update(state, (const xxh_u8*)input, len);
765 }
766
767 #if defined (__cplusplus)
768 }
769 #endif
770 /*! @} */
771