1 // Copyright (c) 2011 Google, Inc.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 //
21 // CityHash, by Geoff Pike and Jyrki Alakuijala
22 //
23 // This file provides CityHash64() and related functions.
24 //
25 // It's probably possible to create even faster hash functions by
26 // writing a program that systematically explores some of the space of
27 // possible hash functions, by using SIMD instructions, or by
28 // compromising on hash quality.
29
30 #include "City.h"
31
32 #include <algorithm>
33 #include <string.h> // for memcpy and memset
34
35 using namespace std;
36
UNALIGNED_LOAD64(const char * p)37 static uint64 UNALIGNED_LOAD64(const char *p) {
38 uint64 result;
39 memcpy(&result, p, sizeof(result));
40 return result;
41 }
42
UNALIGNED_LOAD32(const char * p)43 static uint32 UNALIGNED_LOAD32(const char *p) {
44 uint32 result;
45 memcpy(&result, p, sizeof(result));
46 return result;
47 }
48
49 #ifndef __BIG_ENDIAN__
50
51 #define uint32_in_expected_order(x) (x)
52 #define uint64_in_expected_order(x) (x)
53
54 #else
55
56 #ifdef _MSC_VER
57 #include <stdlib.h>
58 #define bswap_32(x) _byteswap_ulong(x)
59 #define bswap_64(x) _byteswap_uint64(x)
60
61 #elif defined(__APPLE__)
62 // Mac OS X / Darwin features
63 #include <libkern/OSByteOrder.h>
64 #define bswap_32(x) OSSwapInt32(x)
65 #define bswap_64(x) OSSwapInt64(x)
66
67 #else
68 #include <byteswap.h>
69 #endif
70
71 #define uint32_in_expected_order(x) (bswap_32(x))
72 #define uint64_in_expected_order(x) (bswap_64(x))
73
74 #endif // __BIG_ENDIAN__
75
76 #if !defined(LIKELY)
77 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
78 #define LIKELY(x) (__builtin_expect(!!(x), 1))
79 #else
80 #define LIKELY(x) (x)
81 #endif
82 #endif
83
Fetch64(const char * p)84 static uint64 Fetch64(const char *p) {
85 return uint64_in_expected_order(UNALIGNED_LOAD64(p));
86 }
87
Fetch32(const char * p)88 static uint32 Fetch32(const char *p) {
89 return uint32_in_expected_order(UNALIGNED_LOAD32(p));
90 }
91
92 // Some primes between 2^63 and 2^64 for various uses.
93 static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
94 static const uint64 k1 = 0xb492b66fbe98f273ULL;
95 static const uint64 k2 = 0x9ae16a3b2f90404fULL;
96 static const uint64 k3 = 0xc949d7c7509e6557ULL;
97
98 // Bitwise right rotate. Normally this will compile to a single
99 // instruction, especially if the shift is a manifest constant.
Rotate(uint64 val,int shift)100 static uint64 Rotate(uint64 val, int shift) {
101 // Avoid shifting by 64: doing so yields an undefined result.
102 return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
103 }
104
105 // Equivalent to Rotate(), but requires the second arg to be non-zero.
106 // On x86-64, and probably others, it's possible for this to compile
107 // to a single instruction if both args are already in registers.
RotateByAtLeast1(uint64 val,int shift)108 static uint64 RotateByAtLeast1(uint64 val, int shift) {
109 return (val >> shift) | (val << (64 - shift));
110 }
111
ShiftMix(uint64 val)112 static uint64 ShiftMix(uint64 val) {
113 return val ^ (val >> 47);
114 }
115
HashLen16(uint64 u,uint64 v)116 static uint64 HashLen16(uint64 u, uint64 v) {
117 return Hash128to64(uint128(u, v));
118 }
119
HashLen0to16(const char * s,size_t len)120 static uint64 HashLen0to16(const char *s, size_t len) {
121 if (len > 8) {
122 uint64 a = Fetch64(s);
123 uint64 b = Fetch64(s + len - 8);
124 return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
125 }
126 if (len >= 4) {
127 uint64 a = Fetch32(s);
128 return HashLen16(len + (a << 3), Fetch32(s + len - 4));
129 }
130 if (len > 0) {
131 uint8 a = s[0];
132 uint8 b = s[len >> 1];
133 uint8 c = s[len - 1];
134 uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
135 uint32 z = len + (static_cast<uint32>(c) << 2);
136 return ShiftMix(y * k2 ^ z * k3) * k2;
137 }
138 return k2;
139 }
140
141 // This probably works well for 16-byte strings as well, but it may be overkill
142 // in that case.
HashLen17to32(const char * s,size_t len)143 static uint64 HashLen17to32(const char *s, size_t len) {
144 uint64 a = Fetch64(s) * k1;
145 uint64 b = Fetch64(s + 8);
146 uint64 c = Fetch64(s + len - 8) * k2;
147 uint64 d = Fetch64(s + len - 16) * k0;
148 return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
149 a + Rotate(b ^ k3, 20) - c + len);
150 }
151
152 // Return a 16-byte hash for 48 bytes. Quick and dirty.
153 // Callers do best to use "random-looking" values for a and b.
WeakHashLen32WithSeeds(uint64 w,uint64 x,uint64 y,uint64 z,uint64 a,uint64 b)154 static pair<uint64, uint64> WeakHashLen32WithSeeds(
155 uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
156 a += w;
157 b = Rotate(b + a + z, 21);
158 uint64 c = a;
159 a += x;
160 a += y;
161 b += Rotate(a, 44);
162 return make_pair(a + z, b + c);
163 }
164
165 // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
WeakHashLen32WithSeeds(const char * s,uint64 a,uint64 b)166 static pair<uint64, uint64> WeakHashLen32WithSeeds(
167 const char* s, uint64 a, uint64 b) {
168 return WeakHashLen32WithSeeds(Fetch64(s),
169 Fetch64(s + 8),
170 Fetch64(s + 16),
171 Fetch64(s + 24),
172 a,
173 b);
174 }
175
176 // Return an 8-byte hash for 33 to 64 bytes.
HashLen33to64(const char * s,size_t len)177 static uint64 HashLen33to64(const char *s, size_t len) {
178 uint64 z = Fetch64(s + 24);
179 uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
180 uint64 b = Rotate(a + z, 52);
181 uint64 c = Rotate(a, 37);
182 a += Fetch64(s + 8);
183 c += Rotate(a, 7);
184 a += Fetch64(s + 16);
185 uint64 vf = a + z;
186 uint64 vs = b + Rotate(a, 31) + c;
187 a = Fetch64(s + 16) + Fetch64(s + len - 32);
188 z = Fetch64(s + len - 8);
189 b = Rotate(a + z, 52);
190 c = Rotate(a, 37);
191 a += Fetch64(s + len - 24);
192 c += Rotate(a, 7);
193 a += Fetch64(s + len - 16);
194 uint64 wf = a + z;
195 uint64 ws = b + Rotate(a, 31) + c;
196 uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
197 return ShiftMix(r * k0 + vs) * k2;
198 }
199
CityHash64(const char * s,size_t len)200 uint64 CityHash64(const char *s, size_t len) {
201 if (len <= 32) {
202 if (len <= 16) {
203 return HashLen0to16(s, len);
204 } else {
205 return HashLen17to32(s, len);
206 }
207 } else if (len <= 64) {
208 return HashLen33to64(s, len);
209 }
210
211 // For strings over 64 bytes we hash the end first, and then as we
212 // loop we keep 56 bytes of state: v, w, x, y, and z.
213 uint64 x = Fetch64(s + len - 40);
214 uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
215 uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
216 pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
217 pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
218 x = x * k1 + Fetch64(s);
219
220 // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
221 len = (len - 1) & ~static_cast<size_t>(63);
222 do {
223 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
224 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
225 x ^= w.second;
226 y += v.first + Fetch64(s + 40);
227 z = Rotate(z + w.first, 33) * k1;
228 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
229 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
230 std::swap(z, x);
231 s += 64;
232 len -= 64;
233 } while (len != 0);
234 return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
235 HashLen16(v.second, w.second) + x);
236 }
237
CityHash64WithSeed(const char * s,size_t len,uint64 seed)238 uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
239 return CityHash64WithSeeds(s, len, k2, seed);
240 }
241
CityHash64WithSeeds(const char * s,size_t len,uint64 seed0,uint64 seed1)242 uint64 CityHash64WithSeeds(const char *s, size_t len,
243 uint64 seed0, uint64 seed1) {
244 return HashLen16(CityHash64(s, len) - seed0, seed1);
245 }
246
247 // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
248 // of any length representable in signed long. Based on City and Murmur.
CityMurmur(const char * s,size_t len,uint128 seed)249 static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
250 uint64 a = Uint128Low64(seed);
251 uint64 b = Uint128High64(seed);
252 uint64 c = 0;
253 uint64 d = 0;
254 signed long l = len - 16;
255 if (l <= 0) { // len <= 16
256 a = ShiftMix(a * k1) * k1;
257 c = b * k1 + HashLen0to16(s, len);
258 d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
259 } else { // len > 16
260 c = HashLen16(Fetch64(s + len - 8) + k1, a);
261 d = HashLen16(b + len, c + Fetch64(s + len - 16));
262 a += d;
263 do {
264 a ^= ShiftMix(Fetch64(s) * k1) * k1;
265 a *= k1;
266 b ^= a;
267 c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
268 c *= k1;
269 d ^= c;
270 s += 16;
271 l -= 16;
272 } while (l > 0);
273 }
274 a = HashLen16(a, c);
275 b = HashLen16(d, b);
276 return uint128(a ^ b, HashLen16(b, a));
277 }
278
CityHash128WithSeed(const char * s,size_t len,uint128 seed)279 uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
280 if (len < 128) {
281 return CityMurmur(s, len, seed);
282 }
283
284 // We expect len >= 128 to be the common case. Keep 56 bytes of state:
285 // v, w, x, y, and z.
286 pair<uint64, uint64> v, w;
287 uint64 x = Uint128Low64(seed);
288 uint64 y = Uint128High64(seed);
289 uint64 z = len * k1;
290 v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
291 v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
292 w.first = Rotate(y + z, 35) * k1 + x;
293 w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
294
295 // This is the same inner loop as CityHash64(), manually unrolled.
296 do {
297 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
298 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
299 x ^= w.second;
300 y += v.first + Fetch64(s + 40);
301 z = Rotate(z + w.first, 33) * k1;
302 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
303 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
304 std::swap(z, x);
305 s += 64;
306 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
307 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
308 x ^= w.second;
309 y += v.first + Fetch64(s + 40);
310 z = Rotate(z + w.first, 33) * k1;
311 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
312 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
313 std::swap(z, x);
314 s += 64;
315 len -= 128;
316 } while (LIKELY(len >= 128));
317 x += Rotate(v.first + z, 49) * k0;
318 z += Rotate(w.first, 37) * k0;
319 // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
320 for (size_t tail_done = 0; tail_done < len; ) {
321 tail_done += 32;
322 y = Rotate(x + y, 42) * k0 + v.second;
323 w.first += Fetch64(s + len - tail_done + 16);
324 x = x * k0 + w.first;
325 z += w.second + Fetch64(s + len - tail_done);
326 w.second += v.first;
327 v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
328 }
329 // At this point our 56 bytes of state should contain more than
330 // enough information for a strong 128-bit hash. We use two
331 // different 56-byte-to-8-byte hashes to get a 16-byte final result.
332 x = HashLen16(x, v.first);
333 y = HashLen16(y + z, w.first);
334 return uint128(HashLen16(x + v.second, w.second) + y,
335 HashLen16(x + w.second, y + v.second));
336 }
337
CityHash128(const char * s,size_t len)338 uint128 CityHash128(const char *s, size_t len) {
339 if (len >= 16) {
340 return CityHash128WithSeed(s + 16,
341 len - 16,
342 uint128(Fetch64(s) ^ k3,
343 Fetch64(s + 8)));
344 } else if (len >= 8) {
345 return CityHash128WithSeed(NULL,
346 0,
347 uint128(Fetch64(s) ^ (len * k0),
348 Fetch64(s + len - 8) ^ k1));
349 } else {
350 return CityHash128WithSeed(s, len, uint128(k0, k1));
351 }
352 }
353
354 #if defined(__SSE4_2__) && defined(__x86_64__)
355 #include <nmmintrin.h>
356
357 // Requires len >= 240.
CityHashCrc256Long(const char * s,size_t len,uint32 seed,uint64 * result)358 static void CityHashCrc256Long(const char *s, size_t len,
359 uint32 seed, uint64 *result) {
360 uint64 a = Fetch64(s + 56) + k0;
361 uint64 b = Fetch64(s + 96) + k0;
362 uint64 c = result[0] = HashLen16(b, len);
363 uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
364 uint64 e = Fetch64(s + 184) + seed;
365 uint64 f = seed;
366 uint64 g = 0;
367 uint64 h = 0;
368 uint64 i = 0;
369 uint64 j = 0;
370 uint64 t = c + d;
371
372 // 240 bytes of input per iter.
373 size_t iters = len / 240;
374 len -= iters * 240;
375 do {
376 #define CHUNK(multiplier, z) \
377 { \
378 uint64 old_a = a; \
379 a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s); \
380 b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8); \
381 c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16); \
382 d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24); \
383 e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32); \
384 t = old_a; \
385 } \
386 f = _mm_crc32_u64(f, a); \
387 g = _mm_crc32_u64(g, b); \
388 h = _mm_crc32_u64(h, c); \
389 i = _mm_crc32_u64(i, d); \
390 j = _mm_crc32_u64(j, e); \
391 s += 40
392
393 CHUNK(1, 1); CHUNK(k0, 0);
394 CHUNK(1, 1); CHUNK(k0, 0);
395 CHUNK(1, 1); CHUNK(k0, 0);
396 } while (--iters > 0);
397
398 while (len >= 40) {
399 CHUNK(k0, 0);
400 len -= 40;
401 }
402 if (len > 0) {
403 s = s + len - 40;
404 CHUNK(k0, 0);
405 }
406 j += i << 32;
407 a = HashLen16(a, j);
408 h += g << 32;
409 b += h;
410 c = HashLen16(c, f) + i;
411 d = HashLen16(d, e + result[0]);
412 j += e;
413 i += HashLen16(h, t);
414 e = HashLen16(a, d) + j;
415 f = HashLen16(b, c) + a;
416 g = HashLen16(j, i) + c;
417 result[0] = e + f + g + h;
418 a = ShiftMix((a + g) * k0) * k0 + b;
419 result[1] += a + result[0];
420 a = ShiftMix(a * k0) * k0 + c;
421 result[2] = a + result[1];
422 a = ShiftMix((a + e) * k0) * k0;
423 result[3] = a + result[2];
424 }
425
426 // Requires len < 240.
CityHashCrc256Short(const char * s,size_t len,uint64 * result)427 static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
428 char buf[240];
429 memcpy(buf, s, len);
430 memset(buf + len, 0, 240 - len);
431 CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
432 }
433
CityHashCrc256(const char * s,size_t len,uint64 * result)434 void CityHashCrc256(const char *s, size_t len, uint64 *result) {
435 if (LIKELY(len >= 240)) {
436 CityHashCrc256Long(s, len, 0, result);
437 } else {
438 CityHashCrc256Short(s, len, result);
439 }
440 }
441
CityHashCrc128WithSeed(const char * s,size_t len,uint128 seed)442 uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
443 if (len <= 900) {
444 return CityHash128WithSeed(s, len, seed);
445 } else {
446 uint64 result[4];
447 CityHashCrc256(s, len, result);
448 uint64 u = Uint128High64(seed) + result[0];
449 uint64 v = Uint128Low64(seed) + result[1];
450 return uint128(HashLen16(u, v + result[2]),
451 HashLen16(Rotate(v, 32), u * k0 + result[3]));
452 }
453 }
454
CityHashCrc128(const char * s,size_t len)455 uint128 CityHashCrc128(const char *s, size_t len) {
456 if (len <= 900) {
457 return CityHash128(s, len);
458 } else {
459 uint64 result[4];
460 CityHashCrc256(s, len, result);
461 return uint128(result[2], result[3]);
462 }
463 }
464
465 #endif
466