• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 Google, Inc.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 //
21 // CityHash, by Geoff Pike and Jyrki Alakuijala
22 //
23 // This file provides CityHash64() and related functions.
24 //
25 // It's probably possible to create even faster hash functions by
26 // writing a program that systematically explores some of the space of
27 // possible hash functions, by using SIMD instructions, or by
28 // compromising on hash quality.
29 
30 #include "base/third_party/cityhash_v103/src/city_v103.h"
31 
32 #include <string.h>  // for memcpy and memset
33 #include <algorithm>
34 
35 #include "build/build_config.h"
36 
37 namespace base {
38 namespace internal {
39 namespace cityhash_v103 {
40 
UNALIGNED_LOAD64(const char * p)41 static uint64 UNALIGNED_LOAD64(const char* p) {
42   uint64 result;
43   memcpy(&result, p, sizeof(result));
44   return result;
45 }
46 
UNALIGNED_LOAD32(const char * p)47 static uint32 UNALIGNED_LOAD32(const char* p) {
48   uint32 result;
49   memcpy(&result, p, sizeof(result));
50   return result;
51 }
52 
53 #if defined(ARCH_CPU_LITTLE_ENDIAN)
54 
55 #define uint32_in_expected_order(x) (x)
56 #define uint64_in_expected_order(x) (x)
57 
58 #else
59 
60 #if defined(__clang__)
61 
62 // Use builtins where possible. On Windows for instance, this may prevent a
63 // function call instead of emitting a single instruction.
64 #define bswap_32(x) __builtin_bswap32(x)
65 #define bswap_64(x) __builtin_bswap64(x)
66 
67 #elif _MSC_VER
68 #include <stdlib.h>
69 #define bswap_32(x) _byteswap_ulong(x)
70 #define bswap_64(x) _byteswap_uint64(x)
71 
72 #elif defined(__APPLE__)
73 // Mac OS X / Darwin features
74 #include <libkern/OSByteOrder.h>
75 #define bswap_32(x) OSSwapInt32(x)
76 #define bswap_64(x) OSSwapInt64(x)
77 
78 #else
79 #include <byteswap.h>
80 #endif
81 
82 #define uint32_in_expected_order(x) (bswap_32(x))
83 #define uint64_in_expected_order(x) (bswap_64(x))
84 
85 #endif  // defined(ARCH_CPU_LITTLE_ENDIAN)
86 
87 #if !defined(LIKELY)
88 #if HAVE_BUILTIN_EXPECT
89 #define LIKELY(x) (__builtin_expect(!!(x), 1))
90 #else
91 #define LIKELY(x) (x)
92 #endif
93 #endif
94 
Fetch64(const char * p)95 static uint64 Fetch64(const char* p) {
96   return uint64_in_expected_order(UNALIGNED_LOAD64(p));
97 }
98 
Fetch32(const char * p)99 static uint32 Fetch32(const char* p) {
100   return uint32_in_expected_order(UNALIGNED_LOAD32(p));
101 }
102 
103 // Some primes between 2^63 and 2^64 for various uses.
104 static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
105 static const uint64 k1 = 0xb492b66fbe98f273ULL;
106 static const uint64 k2 = 0x9ae16a3b2f90404fULL;
107 static const uint64 k3 = 0xc949d7c7509e6557ULL;
108 
109 // Bitwise right rotate.  Normally this will compile to a single
110 // instruction, especially if the shift is a manifest constant.
Rotate(uint64 val,int shift)111 static uint64 Rotate(uint64 val, int shift) {
112   // Avoid shifting by 64: doing so yields an undefined result.
113   return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
114 }
115 
116 // Equivalent to Rotate(), but requires the second arg to be non-zero.
117 // On x86-64, and probably others, it's possible for this to compile
118 // to a single instruction if both args are already in registers.
RotateByAtLeast1(uint64 val,size_t shift)119 static uint64 RotateByAtLeast1(uint64 val, size_t shift) {
120   return (val >> shift) | (val << (64 - shift));
121 }
122 
ShiftMix(uint64 val)123 static uint64 ShiftMix(uint64 val) {
124   return val ^ (val >> 47);
125 }
126 
HashLen16(uint64 u,uint64 v)127 static uint64 HashLen16(uint64 u, uint64 v) {
128   return Hash128to64(uint128(u, v));
129 }
130 
HashLen0to16(const char * s,size_t len)131 static uint64 HashLen0to16(const char* s, size_t len) {
132   if (len > 8) {
133     uint64 a = Fetch64(s);
134     uint64 b = Fetch64(s + len - 8);
135     return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
136   }
137   if (len >= 4) {
138     uint64 a = Fetch32(s);
139     return HashLen16(len + (a << 3), Fetch32(s + len - 4));
140   }
141   if (len > 0) {
142     uint8 a = static_cast<uint8>(s[0]);
143     uint8 b = static_cast<uint8>(s[len >> 1]);
144     uint8 c = static_cast<uint8>(s[len - 1]);
145     uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
146     uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2);
147     return ShiftMix(y * k2 ^ z * k3) * k2;
148   }
149   return k2;
150 }
151 
152 // This probably works well for 16-byte strings as well, but it may be overkill
153 // in that case.
HashLen17to32(const char * s,size_t len)154 static uint64 HashLen17to32(const char* s, size_t len) {
155   uint64 a = Fetch64(s) * k1;
156   uint64 b = Fetch64(s + 8);
157   uint64 c = Fetch64(s + len - 8) * k2;
158   uint64 d = Fetch64(s + len - 16) * k0;
159   return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
160                    a + Rotate(b ^ k3, 20) - c + len);
161 }
162 
163 // Return a 16-byte hash for 48 bytes.  Quick and dirty.
164 // Callers do best to use "random-looking" values for a and b.
WeakHashLen32WithSeeds(uint64 w,uint64 x,uint64 y,uint64 z,uint64 a,uint64 b)165 static std::pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w,
166                                                         uint64 x,
167                                                         uint64 y,
168                                                         uint64 z,
169                                                         uint64 a,
170                                                         uint64 b) {
171   a += w;
172   b = Rotate(b + a + z, 21);
173   uint64 c = a;
174   a += x;
175   a += y;
176   b += Rotate(a, 44);
177   return std::make_pair(a + z, b + c);
178 }
179 
180 // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
WeakHashLen32WithSeeds(const char * s,uint64 a,uint64 b)181 static std::pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s,
182                                                         uint64 a,
183                                                         uint64 b) {
184   return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16),
185                                 Fetch64(s + 24), a, b);
186 }
187 
188 // Return an 8-byte hash for 33 to 64 bytes.
HashLen33to64(const char * s,size_t len)189 static uint64 HashLen33to64(const char* s, size_t len) {
190   uint64 z = Fetch64(s + 24);
191   uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
192   uint64 b = Rotate(a + z, 52);
193   uint64 c = Rotate(a, 37);
194   a += Fetch64(s + 8);
195   c += Rotate(a, 7);
196   a += Fetch64(s + 16);
197   uint64 vf = a + z;
198   uint64 vs = b + Rotate(a, 31) + c;
199   a = Fetch64(s + 16) + Fetch64(s + len - 32);
200   z = Fetch64(s + len - 8);
201   b = Rotate(a + z, 52);
202   c = Rotate(a, 37);
203   a += Fetch64(s + len - 24);
204   c += Rotate(a, 7);
205   a += Fetch64(s + len - 16);
206   uint64 wf = a + z;
207   uint64 ws = b + Rotate(a, 31) + c;
208   uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
209   return ShiftMix(r * k0 + vs) * k2;
210 }
211 
CityHash64(const char * s,size_t len)212 uint64 CityHash64(const char* s, size_t len) {
213   if (len <= 32) {
214     if (len <= 16) {
215       return HashLen0to16(s, len);
216     } else {
217       return HashLen17to32(s, len);
218     }
219   } else if (len <= 64) {
220     return HashLen33to64(s, len);
221   }
222 
223   // For strings over 64 bytes we hash the end first, and then as we
224   // loop we keep 56 bytes of state: v, w, x, y, and z.
225   uint64 x = Fetch64(s + len - 40);
226   uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
227   uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
228   std::pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
229   std::pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
230   x = x * k1 + Fetch64(s);
231 
232   // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
233   len = (len - 1) & ~static_cast<size_t>(63);
234   do {
235     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
236     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
237     x ^= w.second;
238     y += v.first + Fetch64(s + 40);
239     z = Rotate(z + w.first, 33) * k1;
240     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
241     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
242     std::swap(z, x);
243     s += 64;
244     len -= 64;
245   } while (len != 0);
246   return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
247                    HashLen16(v.second, w.second) + x);
248 }
249 
CityHash64WithSeed(const char * s,size_t len,uint64 seed)250 uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
251   return CityHash64WithSeeds(s, len, k2, seed);
252 }
253 
CityHash64WithSeeds(const char * s,size_t len,uint64 seed0,uint64 seed1)254 uint64 CityHash64WithSeeds(const char* s,
255                            size_t len,
256                            uint64 seed0,
257                            uint64 seed1) {
258   return HashLen16(CityHash64(s, len) - seed0, seed1);
259 }
260 
261 // A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
262 // of any length representable in signed long.  Based on City and Murmur.
CityMurmur(const char * s,size_t len,uint128 seed)263 static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
264   uint64 a = Uint128Low64(seed);
265   uint64 b = Uint128High64(seed);
266   uint64 c = 0;
267   uint64 d = 0;
268   if (len <= 16) {
269     a = ShiftMix(a * k1) * k1;
270     c = b * k1 + HashLen0to16(s, len);
271     d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
272   } else {
273     c = HashLen16(Fetch64(s + len - 8) + k1, a);
274     d = HashLen16(b + len, c + Fetch64(s + len - 16));
275     a += d;
276     // len > 16 here, so do...while is safe
277     do {
278       a ^= ShiftMix(Fetch64(s) * k1) * k1;
279       a *= k1;
280       b ^= a;
281       c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
282       c *= k1;
283       d ^= c;
284       s += 16;
285       len -= 16;
286     } while (len > 16);
287   }
288   a = HashLen16(a, c);
289   b = HashLen16(d, b);
290   return uint128(a ^ b, HashLen16(b, a));
291 }
292 
CityHash128WithSeed(const char * s,size_t len,uint128 seed)293 uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
294   if (len < 128) {
295     return CityMurmur(s, len, seed);
296   }
297 
298   // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
299   // v, w, x, y, and z.
300   std::pair<uint64, uint64> v, w;
301   uint64 x = Uint128Low64(seed);
302   uint64 y = Uint128High64(seed);
303   uint64 z = len * k1;
304   v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
305   v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
306   w.first = Rotate(y + z, 35) * k1 + x;
307   w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
308 
309   // This is the same inner loop as CityHash64(), manually unrolled.
310   do {
311     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
312     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
313     x ^= w.second;
314     y += v.first + Fetch64(s + 40);
315     z = Rotate(z + w.first, 33) * k1;
316     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
317     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
318     std::swap(z, x);
319     s += 64;
320     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
321     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
322     x ^= w.second;
323     y += v.first + Fetch64(s + 40);
324     z = Rotate(z + w.first, 33) * k1;
325     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
326     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
327     std::swap(z, x);
328     s += 64;
329     len -= 128;
330   } while (LIKELY(len >= 128));
331   x += Rotate(v.first + z, 49) * k0;
332   z += Rotate(w.first, 37) * k0;
333   // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
334   for (size_t tail_done = 0; tail_done < len;) {
335     tail_done += 32;
336     y = Rotate(x + y, 42) * k0 + v.second;
337     w.first += Fetch64(s + len - tail_done + 16);
338     x = x * k0 + w.first;
339     z += w.second + Fetch64(s + len - tail_done);
340     w.second += v.first;
341     v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
342   }
343   // At this point our 56 bytes of state should contain more than
344   // enough information for a strong 128-bit hash.  We use two
345   // different 56-byte-to-8-byte hashes to get a 16-byte final result.
346   x = HashLen16(x, v.first);
347   y = HashLen16(y + z, w.first);
348   return uint128(HashLen16(x + v.second, w.second) + y,
349                  HashLen16(x + w.second, y + v.second));
350 }
351 
CityHash128(const char * s,size_t len)352 uint128 CityHash128(const char* s, size_t len) {
353   if (len >= 16) {
354     return CityHash128WithSeed(s + 16, len - 16,
355                                uint128(Fetch64(s) ^ k3, Fetch64(s + 8)));
356   } else if (len >= 8) {
357     return CityHash128WithSeed(
358         nullptr, 0,
359         uint128(Fetch64(s) ^ (len * k0), Fetch64(s + len - 8) ^ k1));
360   } else {
361     return CityHash128WithSeed(s, len, uint128(k0, k1));
362   }
363 }
364 
365 }  // namespace cityhash_v103
366 }  // namespace internal
367 }  // namespace base
368