1 // Copyright (c) 2011 Google, Inc.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 //
21 // CityHash, by Geoff Pike and Jyrki Alakuijala
22 //
23 // This file provides CityHash64() and related functions.
24 //
25 // It's probably possible to create even faster hash functions by
26 // writing a program that systematically explores some of the space of
27 // possible hash functions, by using SIMD instructions, or by
28 // compromising on hash quality.
29
30 #include "city.h"
31
32 #include <string.h> // for memcpy and memset
33 #include <algorithm>
34
35 using std::make_pair;
36 using std::pair;
37
38 #if defined(__clang__)
39
40 // Use builtins where possible. On Windows for instance, this may prevent a
41 // function call instead of emitting a single instruction.
42 #define bswap_32(x) __builtin_bswap32(x)
43 #define bswap_64(x) __builtin_bswap64(x)
44
45 #elif _MSC_VER
46
47 #include <stdlib.h>
48 #define bswap_32(x) _byteswap_ulong(x)
49 #define bswap_64(x) _byteswap_uint64(x)
50
51 #elif defined(__APPLE__)
52
53 // Mac OS X / Darwin features
54 #include <libkern/OSByteOrder.h>
55 #define bswap_32(x) OSSwapInt32(x)
56 #define bswap_64(x) OSSwapInt64(x)
57
58 #elif defined(__sun) || defined(sun)
59
60 #include <sys/byteorder.h>
61 #define bswap_32(x) BSWAP_32(x)
62 #define bswap_64(x) BSWAP_64(x)
63
64 #elif defined(__FreeBSD__)
65
66 #include <sys/endian.h>
67 #define bswap_32(x) bswap32(x)
68 #define bswap_64(x) bswap64(x)
69
70 #elif defined(__OpenBSD__)
71
72 #include <sys/types.h>
73 #define bswap_32(x) swap32(x)
74 #define bswap_64(x) swap64(x)
75
76 #elif defined(__NetBSD__)
77
78 #include <machine/bswap.h>
79 #include <sys/types.h>
80 #if defined(__BSWAP_RENAME) && !defined(__bswap_32)
81 #define bswap_32(x) bswap32(x)
82 #define bswap_64(x) bswap64(x)
83 #endif
84
85 #else
86
87 // XXX(cavalcanti): building 'native_client' fails with this header.
88 //#include <byteswap.h>
89
90 // Falling back to compiler builtins instead.
91 #define bswap_32(x) __builtin_bswap32(x)
92 #define bswap_64(x) __builtin_bswap64(x)
93
94 #endif
95
96 namespace base {
97 namespace internal {
98 namespace cityhash_v111 {
99
100 #ifdef WORDS_BIGENDIAN
101 #define uint32_in_expected_order(x) (bswap_32(x))
102 #define uint64_in_expected_order(x) (bswap_64(x))
103 #else
104 #define uint32_in_expected_order(x) (x)
105 #define uint64_in_expected_order(x) (x)
106 #endif
107
108 #if !defined(LIKELY)
109 #if HAVE_BUILTIN_EXPECT
110 #define LIKELY(x) (__builtin_expect(!!(x), 1))
111 #else
112 #define LIKELY(x) (x)
113 #endif
114 #endif
115
UNALIGNED_LOAD64(const char * p)116 static uint64 UNALIGNED_LOAD64(const char* p) {
117 uint64 result;
118 memcpy(&result, p, sizeof(result));
119 return result;
120 }
121
UNALIGNED_LOAD32(const char * p)122 static uint32 UNALIGNED_LOAD32(const char* p) {
123 uint32 result;
124 memcpy(&result, p, sizeof(result));
125 return result;
126 }
127
Fetch64(const char * p)128 static uint64 Fetch64(const char* p) {
129 return uint64_in_expected_order(UNALIGNED_LOAD64(p));
130 }
131
Fetch32(const char * p)132 static uint32 Fetch32(const char* p) {
133 return uint32_in_expected_order(UNALIGNED_LOAD32(p));
134 }
135
136 // Some primes between 2^63 and 2^64 for various uses.
137 static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
138 static const uint64 k1 = 0xb492b66fbe98f273ULL;
139 static const uint64 k2 = 0x9ae16a3b2f90404fULL;
140
141 // Magic numbers for 32-bit hashing. Copied from Murmur3.
142 static const uint32 c1 = 0xcc9e2d51;
143 static const uint32 c2 = 0x1b873593;
144
145 // A 32-bit to 32-bit integer hash copied from Murmur3.
fmix(uint32 h)146 static uint32 fmix(uint32 h) {
147 h ^= h >> 16;
148 h *= 0x85ebca6b;
149 h ^= h >> 13;
150 h *= 0xc2b2ae35;
151 h ^= h >> 16;
152 return h;
153 }
154
Rotate32(uint32 val,int shift)155 static uint32 Rotate32(uint32 val, int shift) {
156 // Avoid shifting by 32: doing so yields an undefined result.
157 return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
158 }
159
160 #undef PERMUTE3
161 #define PERMUTE3(a, b, c) \
162 do { \
163 std::swap(a, b); \
164 std::swap(a, c); \
165 } while (0)
166
Mur(uint32 a,uint32 h)167 static uint32 Mur(uint32 a, uint32 h) {
168 // Helper from Murmur3 for combining two 32-bit values.
169 a *= c1;
170 a = Rotate32(a, 17);
171 a *= c2;
172 h ^= a;
173 h = Rotate32(h, 19);
174 return h * 5 + 0xe6546b64;
175 }
176
Hash32Len13to24(const char * s,size_t len)177 static uint32 Hash32Len13to24(const char* s, size_t len) {
178 uint32 a = Fetch32(s - 4 + (len >> 1));
179 uint32 b = Fetch32(s + 4);
180 uint32 c = Fetch32(s + len - 8);
181 uint32 d = Fetch32(s + (len >> 1));
182 uint32 e = Fetch32(s);
183 uint32 f = Fetch32(s + len - 4);
184 uint32 h = static_cast<uint32>(len);
185
186 return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
187 }
188
Hash32Len0to4(const char * s,size_t len)189 static uint32 Hash32Len0to4(const char* s, size_t len) {
190 uint32 b = 0;
191 uint32 c = 9;
192 for (size_t i = 0; i < len; i++) {
193 signed char v = static_cast<signed char>(s[i]);
194 b = b * c1 + static_cast<uint32>(v);
195 c ^= b;
196 }
197 return fmix(Mur(b, Mur(static_cast<uint32>(len), c)));
198 }
199
Hash32Len5to12(const char * s,size_t len)200 static uint32 Hash32Len5to12(const char* s, size_t len) {
201 uint32 a = static_cast<uint32>(len), b = a * 5, c = 9, d = b;
202 a += Fetch32(s);
203 b += Fetch32(s + len - 4);
204 c += Fetch32(s + ((len >> 1) & 4));
205 return fmix(Mur(c, Mur(b, Mur(a, d))));
206 }
207
CityHash32(const char * s,size_t len)208 uint32 CityHash32(const char* s, size_t len) {
209 if (len <= 24) {
210 return len <= 12
211 ? (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len))
212 : Hash32Len13to24(s, len);
213 }
214
215 // len > 24
216 uint32 h = static_cast<uint32>(len), g = c1 * h, f = g;
217 uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
218 uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
219 uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
220 uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
221 uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
222 h ^= a0;
223 h = Rotate32(h, 19);
224 h = h * 5 + 0xe6546b64;
225 h ^= a2;
226 h = Rotate32(h, 19);
227 h = h * 5 + 0xe6546b64;
228 g ^= a1;
229 g = Rotate32(g, 19);
230 g = g * 5 + 0xe6546b64;
231 g ^= a3;
232 g = Rotate32(g, 19);
233 g = g * 5 + 0xe6546b64;
234 f += a4;
235 f = Rotate32(f, 19);
236 f = f * 5 + 0xe6546b64;
237 size_t iters = (len - 1) / 20;
238 do {
239 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
240 a1 = Fetch32(s + 4);
241 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
242 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
243 a4 = Fetch32(s + 16);
244 h ^= a0;
245 h = Rotate32(h, 18);
246 h = h * 5 + 0xe6546b64;
247 f += a1;
248 f = Rotate32(f, 19);
249 f = f * c1;
250 g += a2;
251 g = Rotate32(g, 18);
252 g = g * 5 + 0xe6546b64;
253 h ^= a3 + a1;
254 h = Rotate32(h, 19);
255 h = h * 5 + 0xe6546b64;
256 g ^= a4;
257 g = bswap_32(g) * 5;
258 h += a4 * 5;
259 h = bswap_32(h);
260 f += a0;
261 PERMUTE3(f, h, g);
262 s += 20;
263 } while (--iters != 0);
264 g = Rotate32(g, 11) * c1;
265 g = Rotate32(g, 17) * c1;
266 f = Rotate32(f, 11) * c1;
267 f = Rotate32(f, 17) * c1;
268 h = Rotate32(h + g, 19);
269 h = h * 5 + 0xe6546b64;
270 h = Rotate32(h, 17) * c1;
271 h = Rotate32(h + f, 19);
272 h = h * 5 + 0xe6546b64;
273 h = Rotate32(h, 17) * c1;
274 return h;
275 }
276
277 // Bitwise right rotate. Normally this will compile to a single
278 // instruction, especially if the shift is a manifest constant.
Rotate(uint64 val,int shift)279 static uint64 Rotate(uint64 val, int shift) {
280 // Avoid shifting by 64: doing so yields an undefined result.
281 return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
282 }
283
ShiftMix(uint64 val)284 static uint64 ShiftMix(uint64 val) {
285 return val ^ (val >> 47);
286 }
287
HashLen16(uint64 u,uint64 v)288 static uint64 HashLen16(uint64 u, uint64 v) {
289 return Hash128to64(uint128(u, v));
290 }
291
HashLen16(uint64 u,uint64 v,uint64 mul)292 static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
293 // Murmur-inspired hashing.
294 uint64 a = (u ^ v) * mul;
295 a ^= (a >> 47);
296 uint64 b = (v ^ a) * mul;
297 b ^= (b >> 47);
298 b *= mul;
299 return b;
300 }
301
HashLen0to16(const char * s,size_t len)302 static uint64 HashLen0to16(const char* s, size_t len) {
303 if (len >= 8) {
304 uint64 mul = k2 + len * 2;
305 uint64 a = Fetch64(s) + k2;
306 uint64 b = Fetch64(s + len - 8);
307 uint64 c = Rotate(b, 37) * mul + a;
308 uint64 d = (Rotate(a, 25) + b) * mul;
309 return HashLen16(c, d, mul);
310 }
311 if (len >= 4) {
312 uint64 mul = k2 + len * 2;
313 uint64 a = Fetch32(s);
314 return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
315 }
316 if (len > 0) {
317 uint8 a = static_cast<uint8>(s[0]);
318 uint8 b = static_cast<uint8>(s[len >> 1]);
319 uint8 c = static_cast<uint8>(s[len - 1]);
320 uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
321 uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2);
322 return ShiftMix(y * k2 ^ z * k0) * k2;
323 }
324 return k2;
325 }
326
327 // This probably works well for 16-byte strings as well, but it may be overkill
328 // in that case.
HashLen17to32(const char * s,size_t len)329 static uint64 HashLen17to32(const char* s, size_t len) {
330 uint64 mul = k2 + len * 2;
331 uint64 a = Fetch64(s) * k1;
332 uint64 b = Fetch64(s + 8);
333 uint64 c = Fetch64(s + len - 8) * mul;
334 uint64 d = Fetch64(s + len - 16) * k2;
335 return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
336 a + Rotate(b + k2, 18) + c, mul);
337 }
338
339 // Return a 16-byte hash for 48 bytes. Quick and dirty.
340 // Callers do best to use "random-looking" values for a and b.
WeakHashLen32WithSeeds(uint64 w,uint64 x,uint64 y,uint64 z,uint64 a,uint64 b)341 static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w,
342 uint64 x,
343 uint64 y,
344 uint64 z,
345 uint64 a,
346 uint64 b) {
347 a += w;
348 b = Rotate(b + a + z, 21);
349 uint64 c = a;
350 a += x;
351 a += y;
352 b += Rotate(a, 44);
353 return make_pair(a + z, b + c);
354 }
355
356 // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
WeakHashLen32WithSeeds(const char * s,uint64 a,uint64 b)357 static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s,
358 uint64 a,
359 uint64 b) {
360 return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16),
361 Fetch64(s + 24), a, b);
362 }
363
364 // Return an 8-byte hash for 33 to 64 bytes.
HashLen33to64(const char * s,size_t len)365 static uint64 HashLen33to64(const char* s, size_t len) {
366 uint64 mul = k2 + len * 2;
367 uint64 a = Fetch64(s) * k2;
368 uint64 b = Fetch64(s + 8);
369 uint64 c = Fetch64(s + len - 24);
370 uint64 d = Fetch64(s + len - 32);
371 uint64 e = Fetch64(s + 16) * k2;
372 uint64 f = Fetch64(s + 24) * 9;
373 uint64 g = Fetch64(s + len - 8);
374 uint64 h = Fetch64(s + len - 16) * mul;
375 uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
376 uint64 v = ((a + g) ^ d) + f + 1;
377 uint64 w = bswap_64((u + v) * mul) + h;
378 uint64 x = Rotate(e + f, 42) + c;
379 uint64 y = (bswap_64((v + w) * mul) + g) * mul;
380 uint64 z = e + f + c;
381 a = bswap_64((x + z) * mul + y) + b;
382 b = ShiftMix((z + a) * mul + d + h) * mul;
383 return b + x;
384 }
385
CityHash64(const char * s,size_t len)386 uint64 CityHash64(const char* s, size_t len) {
387 if (len <= 32) {
388 if (len <= 16) {
389 return HashLen0to16(s, len);
390 } else {
391 return HashLen17to32(s, len);
392 }
393 } else if (len <= 64) {
394 return HashLen33to64(s, len);
395 }
396
397 // For strings over 64 bytes we hash the end first, and then as we
398 // loop we keep 56 bytes of state: v, w, x, y, and z.
399 uint64 x = Fetch64(s + len - 40);
400 uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
401 uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
402 pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
403 pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
404 x = x * k1 + Fetch64(s);
405
406 // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
407 len = (len - 1) & ~static_cast<size_t>(63);
408 do {
409 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
410 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
411 x ^= w.second;
412 y += v.first + Fetch64(s + 40);
413 z = Rotate(z + w.first, 33) * k1;
414 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
415 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
416 std::swap(z, x);
417 s += 64;
418 len -= 64;
419 } while (len != 0);
420 return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
421 HashLen16(v.second, w.second) + x);
422 }
423
CityHash64WithSeed(const char * s,size_t len,uint64 seed)424 uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
425 return CityHash64WithSeeds(s, len, k2, seed);
426 }
427
CityHash64WithSeeds(const char * s,size_t len,uint64 seed0,uint64 seed1)428 uint64 CityHash64WithSeeds(const char* s,
429 size_t len,
430 uint64 seed0,
431 uint64 seed1) {
432 return HashLen16(CityHash64(s, len) - seed0, seed1);
433 }
434
435 // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
436 // of any length representable in signed long. Based on City and Murmur.
CityMurmur(const char * s,size_t len,uint128 seed)437 static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
438 uint64 a = Uint128Low64(seed);
439 uint64 b = Uint128High64(seed);
440 uint64 c = 0;
441 uint64 d = 0;
442 if (len <= 16) {
443 a = ShiftMix(a * k1) * k1;
444 c = b * k1 + HashLen0to16(s, len);
445 d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
446 } else {
447 c = HashLen16(Fetch64(s + len - 8) + k1, a);
448 d = HashLen16(b + len, c + Fetch64(s + len - 16));
449 a += d;
450 // len > 16 here, so do...while is safe
451 do {
452 a ^= ShiftMix(Fetch64(s) * k1) * k1;
453 a *= k1;
454 b ^= a;
455 c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
456 c *= k1;
457 d ^= c;
458 s += 16;
459 len -= 16;
460 } while (len > 16);
461 }
462 a = HashLen16(a, c);
463 b = HashLen16(d, b);
464 return uint128(a ^ b, HashLen16(b, a));
465 }
466
CityHash128WithSeed(const char * s,size_t len,uint128 seed)467 uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
468 if (len < 128) {
469 return CityMurmur(s, len, seed);
470 }
471
472 // We expect len >= 128 to be the common case. Keep 56 bytes of state:
473 // v, w, x, y, and z.
474 pair<uint64, uint64> v, w;
475 uint64 x = Uint128Low64(seed);
476 uint64 y = Uint128High64(seed);
477 uint64 z = len * k1;
478 v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
479 v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
480 w.first = Rotate(y + z, 35) * k1 + x;
481 w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
482
483 // This is the same inner loop as CityHash64(), manually unrolled.
484 do {
485 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
486 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
487 x ^= w.second;
488 y += v.first + Fetch64(s + 40);
489 z = Rotate(z + w.first, 33) * k1;
490 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
491 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
492 std::swap(z, x);
493 s += 64;
494 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
495 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
496 x ^= w.second;
497 y += v.first + Fetch64(s + 40);
498 z = Rotate(z + w.first, 33) * k1;
499 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
500 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
501 std::swap(z, x);
502 s += 64;
503 len -= 128;
504 } while (LIKELY(len >= 128));
505 x += Rotate(v.first + z, 49) * k0;
506 y = y * k0 + Rotate(w.second, 37);
507 z = z * k0 + Rotate(w.first, 27);
508 w.first *= 9;
509 v.first *= k0;
510 // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
511 for (size_t tail_done = 0; tail_done < len;) {
512 tail_done += 32;
513 y = Rotate(x + y, 42) * k0 + v.second;
514 w.first += Fetch64(s + len - tail_done + 16);
515 x = x * k0 + w.first;
516 z += w.second + Fetch64(s + len - tail_done);
517 w.second += v.first;
518 v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
519 v.first *= k0;
520 }
521 // At this point our 56 bytes of state should contain more than
522 // enough information for a strong 128-bit hash. We use two
523 // different 56-byte-to-8-byte hashes to get a 16-byte final result.
524 x = HashLen16(x, v.first);
525 y = HashLen16(y + z, w.first);
526 return uint128(HashLen16(x + v.second, w.second) + y,
527 HashLen16(x + w.second, y + v.second));
528 }
529
CityHash128(const char * s,size_t len)530 uint128 CityHash128(const char* s, size_t len) {
531 return len >= 16
532 ? CityHash128WithSeed(s + 16, len - 16,
533 uint128(Fetch64(s), Fetch64(s + 8) + k0))
534 : CityHash128WithSeed(s, len, uint128(k0, k1));
535 }
536
537 } // namespace cityhash_v111
538 } // namespace internal
539 } // namespace base
540