• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 Google, Inc.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 //
21 // CityHash, by Geoff Pike and Jyrki Alakuijala
22 //
23 // This file provides CityHash64() and related functions.
24 //
25 // It's probably possible to create even faster hash functions by
26 // writing a program that systematically explores some of the space of
27 // possible hash functions, by using SIMD instructions, or by
28 // compromising on hash quality.
29 
30 #include "City.h"
31 
32 #include <algorithm>
33 #include <string.h>  // for memcpy and memset
34 
35 using namespace std;
36 
UNALIGNED_LOAD64(const char * p)37 static uint64 UNALIGNED_LOAD64(const char *p) {
38   uint64 result;
39   memcpy(&result, p, sizeof(result));
40   return result;
41 }
42 
UNALIGNED_LOAD32(const char * p)43 static uint32 UNALIGNED_LOAD32(const char *p) {
44   uint32 result;
45   memcpy(&result, p, sizeof(result));
46   return result;
47 }
48 
49 #ifndef __BIG_ENDIAN__
50 
51 #define uint32_in_expected_order(x) (x)
52 #define uint64_in_expected_order(x) (x)
53 
54 #else
55 
56 #ifdef _MSC_VER
57 #include <stdlib.h>
58 #define bswap_32(x) _byteswap_ulong(x)
59 #define bswap_64(x) _byteswap_uint64(x)
60 
61 #elif defined(__APPLE__)
62 // Mac OS X / Darwin features
63 #include <libkern/OSByteOrder.h>
64 #define bswap_32(x) OSSwapInt32(x)
65 #define bswap_64(x) OSSwapInt64(x)
66 
67 #else
68 #include <byteswap.h>
69 #endif
70 
71 #define uint32_in_expected_order(x) (bswap_32(x))
72 #define uint64_in_expected_order(x) (bswap_64(x))
73 
74 #endif  // __BIG_ENDIAN__
75 
76 #if !defined(LIKELY)
77 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
78 #define LIKELY(x) (__builtin_expect(!!(x), 1))
79 #else
80 #define LIKELY(x) (x)
81 #endif
82 #endif
83 
Fetch64(const char * p)84 static uint64 Fetch64(const char *p) {
85   return uint64_in_expected_order(UNALIGNED_LOAD64(p));
86 }
87 
Fetch32(const char * p)88 static uint32 Fetch32(const char *p) {
89   return uint32_in_expected_order(UNALIGNED_LOAD32(p));
90 }
91 
92 // Some primes between 2^63 and 2^64 for various uses.
93 static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
94 static const uint64 k1 = 0xb492b66fbe98f273ULL;
95 static const uint64 k2 = 0x9ae16a3b2f90404fULL;
96 static const uint64 k3 = 0xc949d7c7509e6557ULL;
97 
98 // Bitwise right rotate.  Normally this will compile to a single
99 // instruction, especially if the shift is a manifest constant.
Rotate(uint64 val,int shift)100 static uint64 Rotate(uint64 val, int shift) {
101   // Avoid shifting by 64: doing so yields an undefined result.
102   return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
103 }
104 
105 // Equivalent to Rotate(), but requires the second arg to be non-zero.
106 // On x86-64, and probably others, it's possible for this to compile
107 // to a single instruction if both args are already in registers.
RotateByAtLeast1(uint64 val,int shift)108 static uint64 RotateByAtLeast1(uint64 val, int shift) {
109   return (val >> shift) | (val << (64 - shift));
110 }
111 
ShiftMix(uint64 val)112 static uint64 ShiftMix(uint64 val) {
113   return val ^ (val >> 47);
114 }
115 
HashLen16(uint64 u,uint64 v)116 static uint64 HashLen16(uint64 u, uint64 v) {
117   return Hash128to64(uint128(u, v));
118 }
119 
HashLen0to16(const char * s,size_t len)120 static uint64 HashLen0to16(const char *s, size_t len) {
121   if (len > 8) {
122     uint64 a = Fetch64(s);
123     uint64 b = Fetch64(s + len - 8);
124     return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
125   }
126   if (len >= 4) {
127     uint64 a = Fetch32(s);
128     return HashLen16(len + (a << 3), Fetch32(s + len - 4));
129   }
130   if (len > 0) {
131     uint8 a = s[0];
132     uint8 b = s[len >> 1];
133     uint8 c = s[len - 1];
134     uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
135     uint32 z = len + (static_cast<uint32>(c) << 2);
136     return ShiftMix(y * k2 ^ z * k3) * k2;
137   }
138   return k2;
139 }
140 
141 // This probably works well for 16-byte strings as well, but it may be overkill
142 // in that case.
HashLen17to32(const char * s,size_t len)143 static uint64 HashLen17to32(const char *s, size_t len) {
144   uint64 a = Fetch64(s) * k1;
145   uint64 b = Fetch64(s + 8);
146   uint64 c = Fetch64(s + len - 8) * k2;
147   uint64 d = Fetch64(s + len - 16) * k0;
148   return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
149                    a + Rotate(b ^ k3, 20) - c + len);
150 }
151 
152 // Return a 16-byte hash for 48 bytes.  Quick and dirty.
153 // Callers do best to use "random-looking" values for a and b.
WeakHashLen32WithSeeds(uint64 w,uint64 x,uint64 y,uint64 z,uint64 a,uint64 b)154 static pair<uint64, uint64> WeakHashLen32WithSeeds(
155     uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
156   a += w;
157   b = Rotate(b + a + z, 21);
158   uint64 c = a;
159   a += x;
160   a += y;
161   b += Rotate(a, 44);
162   return make_pair(a + z, b + c);
163 }
164 
165 // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
WeakHashLen32WithSeeds(const char * s,uint64 a,uint64 b)166 static pair<uint64, uint64> WeakHashLen32WithSeeds(
167     const char* s, uint64 a, uint64 b) {
168   return WeakHashLen32WithSeeds(Fetch64(s),
169                                 Fetch64(s + 8),
170                                 Fetch64(s + 16),
171                                 Fetch64(s + 24),
172                                 a,
173                                 b);
174 }
175 
176 // Return an 8-byte hash for 33 to 64 bytes.
HashLen33to64(const char * s,size_t len)177 static uint64 HashLen33to64(const char *s, size_t len) {
178   uint64 z = Fetch64(s + 24);
179   uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
180   uint64 b = Rotate(a + z, 52);
181   uint64 c = Rotate(a, 37);
182   a += Fetch64(s + 8);
183   c += Rotate(a, 7);
184   a += Fetch64(s + 16);
185   uint64 vf = a + z;
186   uint64 vs = b + Rotate(a, 31) + c;
187   a = Fetch64(s + 16) + Fetch64(s + len - 32);
188   z = Fetch64(s + len - 8);
189   b = Rotate(a + z, 52);
190   c = Rotate(a, 37);
191   a += Fetch64(s + len - 24);
192   c += Rotate(a, 7);
193   a += Fetch64(s + len - 16);
194   uint64 wf = a + z;
195   uint64 ws = b + Rotate(a, 31) + c;
196   uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
197   return ShiftMix(r * k0 + vs) * k2;
198 }
199 
CityHash64(const char * s,size_t len)200 uint64 CityHash64(const char *s, size_t len) {
201   if (len <= 32) {
202     if (len <= 16) {
203       return HashLen0to16(s, len);
204     } else {
205       return HashLen17to32(s, len);
206     }
207   } else if (len <= 64) {
208     return HashLen33to64(s, len);
209   }
210 
211   // For strings over 64 bytes we hash the end first, and then as we
212   // loop we keep 56 bytes of state: v, w, x, y, and z.
213   uint64 x = Fetch64(s + len - 40);
214   uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
215   uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
216   pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
217   pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
218   x = x * k1 + Fetch64(s);
219 
220   // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
221   len = (len - 1) & ~static_cast<size_t>(63);
222   do {
223     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
224     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
225     x ^= w.second;
226     y += v.first + Fetch64(s + 40);
227     z = Rotate(z + w.first, 33) * k1;
228     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
229     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
230     std::swap(z, x);
231     s += 64;
232     len -= 64;
233   } while (len != 0);
234   return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
235                    HashLen16(v.second, w.second) + x);
236 }
237 
CityHash64WithSeed(const char * s,size_t len,uint64 seed)238 uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
239   return CityHash64WithSeeds(s, len, k2, seed);
240 }
241 
CityHash64WithSeeds(const char * s,size_t len,uint64 seed0,uint64 seed1)242 uint64 CityHash64WithSeeds(const char *s, size_t len,
243                            uint64 seed0, uint64 seed1) {
244   return HashLen16(CityHash64(s, len) - seed0, seed1);
245 }
246 
247 // A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
248 // of any length representable in signed long.  Based on City and Murmur.
CityMurmur(const char * s,size_t len,uint128 seed)249 static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
250   uint64 a = Uint128Low64(seed);
251   uint64 b = Uint128High64(seed);
252   uint64 c = 0;
253   uint64 d = 0;
254   signed long l = len - 16;
255   if (l <= 0) {  // len <= 16
256     a = ShiftMix(a * k1) * k1;
257     c = b * k1 + HashLen0to16(s, len);
258     d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
259   } else {  // len > 16
260     c = HashLen16(Fetch64(s + len - 8) + k1, a);
261     d = HashLen16(b + len, c + Fetch64(s + len - 16));
262     a += d;
263     do {
264       a ^= ShiftMix(Fetch64(s) * k1) * k1;
265       a *= k1;
266       b ^= a;
267       c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
268       c *= k1;
269       d ^= c;
270       s += 16;
271       l -= 16;
272     } while (l > 0);
273   }
274   a = HashLen16(a, c);
275   b = HashLen16(d, b);
276   return uint128(a ^ b, HashLen16(b, a));
277 }
278 
CityHash128WithSeed(const char * s,size_t len,uint128 seed)279 uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
280   if (len < 128) {
281     return CityMurmur(s, len, seed);
282   }
283 
284   // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
285   // v, w, x, y, and z.
286   pair<uint64, uint64> v, w;
287   uint64 x = Uint128Low64(seed);
288   uint64 y = Uint128High64(seed);
289   uint64 z = len * k1;
290   v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
291   v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
292   w.first = Rotate(y + z, 35) * k1 + x;
293   w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
294 
295   // This is the same inner loop as CityHash64(), manually unrolled.
296   do {
297     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
298     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
299     x ^= w.second;
300     y += v.first + Fetch64(s + 40);
301     z = Rotate(z + w.first, 33) * k1;
302     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
303     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
304     std::swap(z, x);
305     s += 64;
306     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
307     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
308     x ^= w.second;
309     y += v.first + Fetch64(s + 40);
310     z = Rotate(z + w.first, 33) * k1;
311     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
312     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
313     std::swap(z, x);
314     s += 64;
315     len -= 128;
316   } while (LIKELY(len >= 128));
317   x += Rotate(v.first + z, 49) * k0;
318   z += Rotate(w.first, 37) * k0;
319   // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
320   for (size_t tail_done = 0; tail_done < len; ) {
321     tail_done += 32;
322     y = Rotate(x + y, 42) * k0 + v.second;
323     w.first += Fetch64(s + len - tail_done + 16);
324     x = x * k0 + w.first;
325     z += w.second + Fetch64(s + len - tail_done);
326     w.second += v.first;
327     v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
328   }
329   // At this point our 56 bytes of state should contain more than
330   // enough information for a strong 128-bit hash.  We use two
331   // different 56-byte-to-8-byte hashes to get a 16-byte final result.
332   x = HashLen16(x, v.first);
333   y = HashLen16(y + z, w.first);
334   return uint128(HashLen16(x + v.second, w.second) + y,
335                  HashLen16(x + w.second, y + v.second));
336 }
337 
CityHash128(const char * s,size_t len)338 uint128 CityHash128(const char *s, size_t len) {
339   if (len >= 16) {
340     return CityHash128WithSeed(s + 16,
341                                len - 16,
342                                uint128(Fetch64(s) ^ k3,
343                                        Fetch64(s + 8)));
344   } else if (len >= 8) {
345     return CityHash128WithSeed(NULL,
346                                0,
347                                uint128(Fetch64(s) ^ (len * k0),
348                                        Fetch64(s + len - 8) ^ k1));
349   } else {
350     return CityHash128WithSeed(s, len, uint128(k0, k1));
351   }
352 }
353 
354 #if defined(__SSE4_2__) && defined(__x86_64__)
355 #include <nmmintrin.h>
356 
357 // Requires len >= 240.
CityHashCrc256Long(const char * s,size_t len,uint32 seed,uint64 * result)358 static void CityHashCrc256Long(const char *s, size_t len,
359                                uint32 seed, uint64 *result) {
360   uint64 a = Fetch64(s + 56) + k0;
361   uint64 b = Fetch64(s + 96) + k0;
362   uint64 c = result[0] = HashLen16(b, len);
363   uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
364   uint64 e = Fetch64(s + 184) + seed;
365   uint64 f = seed;
366   uint64 g = 0;
367   uint64 h = 0;
368   uint64 i = 0;
369   uint64 j = 0;
370   uint64 t = c + d;
371 
372   // 240 bytes of input per iter.
373   size_t iters = len / 240;
374   len -= iters * 240;
375   do {
376 #define CHUNK(multiplier, z)                                    \
377     {                                                           \
378       uint64 old_a = a;                                         \
379       a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s);          \
380       b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8);      \
381       c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16);     \
382       d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24);     \
383       e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32);     \
384       t = old_a;                                                \
385     }                                                           \
386     f = _mm_crc32_u64(f, a);                                    \
387     g = _mm_crc32_u64(g, b);                                    \
388     h = _mm_crc32_u64(h, c);                                    \
389     i = _mm_crc32_u64(i, d);                                    \
390     j = _mm_crc32_u64(j, e);                                    \
391     s += 40
392 
393     CHUNK(1, 1); CHUNK(k0, 0);
394     CHUNK(1, 1); CHUNK(k0, 0);
395     CHUNK(1, 1); CHUNK(k0, 0);
396   } while (--iters > 0);
397 
398   while (len >= 40) {
399     CHUNK(k0, 0);
400     len -= 40;
401   }
402   if (len > 0) {
403     s = s + len - 40;
404     CHUNK(k0, 0);
405   }
406   j += i << 32;
407   a = HashLen16(a, j);
408   h += g << 32;
409   b += h;
410   c = HashLen16(c, f) + i;
411   d = HashLen16(d, e + result[0]);
412   j += e;
413   i += HashLen16(h, t);
414   e = HashLen16(a, d) + j;
415   f = HashLen16(b, c) + a;
416   g = HashLen16(j, i) + c;
417   result[0] = e + f + g + h;
418   a = ShiftMix((a + g) * k0) * k0 + b;
419   result[1] += a + result[0];
420   a = ShiftMix(a * k0) * k0 + c;
421   result[2] = a + result[1];
422   a = ShiftMix((a + e) * k0) * k0;
423   result[3] = a + result[2];
424 }
425 
426 // Requires len < 240.
CityHashCrc256Short(const char * s,size_t len,uint64 * result)427 static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
428   char buf[240];
429   memcpy(buf, s, len);
430   memset(buf + len, 0, 240 - len);
431   CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
432 }
433 
CityHashCrc256(const char * s,size_t len,uint64 * result)434 void CityHashCrc256(const char *s, size_t len, uint64 *result) {
435   if (LIKELY(len >= 240)) {
436     CityHashCrc256Long(s, len, 0, result);
437   } else {
438     CityHashCrc256Short(s, len, result);
439   }
440 }
441 
CityHashCrc128WithSeed(const char * s,size_t len,uint128 seed)442 uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
443   if (len <= 900) {
444     return CityHash128WithSeed(s, len, seed);
445   } else {
446     uint64 result[4];
447     CityHashCrc256(s, len, result);
448     uint64 u = Uint128High64(seed) + result[0];
449     uint64 v = Uint128Low64(seed) + result[1];
450     return uint128(HashLen16(u, v + result[2]),
451                    HashLen16(Rotate(v, 32), u * k0 + result[3]));
452   }
453 }
454 
CityHashCrc128(const char * s,size_t len)455 uint128 CityHashCrc128(const char *s, size_t len) {
456   if (len <= 900) {
457     return CityHash128(s, len);
458   } else {
459     uint64 result[4];
460     CityHashCrc256(s, len, result);
461     return uint128(result[2], result[3]);
462   }
463 }
464 
465 #endif
466