1 // Copyright (c) 2011 Google, Inc.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 //
21 // CityHash, by Geoff Pike and Jyrki Alakuijala
22 //
23 // This file provides CityHash64() and related functions.
24 //
25 // It's probably possible to create even faster hash functions by
26 // writing a program that systematically explores some of the space of
27 // possible hash functions, by using SIMD instructions, or by
28 // compromising on hash quality.
29
30 #include "base/third_party/cityhash_v103/src/city_v103.h"
31
32 #include <string.h> // for memcpy and memset
33 #include <algorithm>
34
35 #include "build/build_config.h"
36
37 namespace base {
38 namespace internal {
39 namespace cityhash_v103 {
40
UNALIGNED_LOAD64(const char * p)41 static uint64 UNALIGNED_LOAD64(const char* p) {
42 uint64 result;
43 memcpy(&result, p, sizeof(result));
44 return result;
45 }
46
UNALIGNED_LOAD32(const char * p)47 static uint32 UNALIGNED_LOAD32(const char* p) {
48 uint32 result;
49 memcpy(&result, p, sizeof(result));
50 return result;
51 }
52
53 #if defined(ARCH_CPU_LITTLE_ENDIAN)
54
55 #define uint32_in_expected_order(x) (x)
56 #define uint64_in_expected_order(x) (x)
57
58 #else
59
60 #if defined(__clang__)
61
62 // Use builtins where possible. On Windows for instance, this may prevent a
63 // function call instead of emitting a single instruction.
64 #define bswap_32(x) __builtin_bswap32(x)
65 #define bswap_64(x) __builtin_bswap64(x)
66
67 #elif _MSC_VER
68 #include <stdlib.h>
69 #define bswap_32(x) _byteswap_ulong(x)
70 #define bswap_64(x) _byteswap_uint64(x)
71
72 #elif defined(__APPLE__)
73 // Mac OS X / Darwin features
74 #include <libkern/OSByteOrder.h>
75 #define bswap_32(x) OSSwapInt32(x)
76 #define bswap_64(x) OSSwapInt64(x)
77
78 #else
79 #include <byteswap.h>
80 #endif
81
82 #define uint32_in_expected_order(x) (bswap_32(x))
83 #define uint64_in_expected_order(x) (bswap_64(x))
84
85 #endif // defined(ARCH_CPU_LITTLE_ENDIAN)
86
87 #if !defined(LIKELY)
88 #if HAVE_BUILTIN_EXPECT
89 #define LIKELY(x) (__builtin_expect(!!(x), 1))
90 #else
91 #define LIKELY(x) (x)
92 #endif
93 #endif
94
Fetch64(const char * p)95 static uint64 Fetch64(const char* p) {
96 return uint64_in_expected_order(UNALIGNED_LOAD64(p));
97 }
98
Fetch32(const char * p)99 static uint32 Fetch32(const char* p) {
100 return uint32_in_expected_order(UNALIGNED_LOAD32(p));
101 }
102
103 // Some primes between 2^63 and 2^64 for various uses.
104 static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
105 static const uint64 k1 = 0xb492b66fbe98f273ULL;
106 static const uint64 k2 = 0x9ae16a3b2f90404fULL;
107 static const uint64 k3 = 0xc949d7c7509e6557ULL;
108
109 // Bitwise right rotate. Normally this will compile to a single
110 // instruction, especially if the shift is a manifest constant.
Rotate(uint64 val,int shift)111 static uint64 Rotate(uint64 val, int shift) {
112 // Avoid shifting by 64: doing so yields an undefined result.
113 return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
114 }
115
116 // Equivalent to Rotate(), but requires the second arg to be non-zero.
117 // On x86-64, and probably others, it's possible for this to compile
118 // to a single instruction if both args are already in registers.
RotateByAtLeast1(uint64 val,size_t shift)119 static uint64 RotateByAtLeast1(uint64 val, size_t shift) {
120 return (val >> shift) | (val << (64 - shift));
121 }
122
ShiftMix(uint64 val)123 static uint64 ShiftMix(uint64 val) {
124 return val ^ (val >> 47);
125 }
126
HashLen16(uint64 u,uint64 v)127 static uint64 HashLen16(uint64 u, uint64 v) {
128 return Hash128to64(uint128(u, v));
129 }
130
HashLen0to16(const char * s,size_t len)131 static uint64 HashLen0to16(const char* s, size_t len) {
132 if (len > 8) {
133 uint64 a = Fetch64(s);
134 uint64 b = Fetch64(s + len - 8);
135 return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
136 }
137 if (len >= 4) {
138 uint64 a = Fetch32(s);
139 return HashLen16(len + (a << 3), Fetch32(s + len - 4));
140 }
141 if (len > 0) {
142 uint8 a = static_cast<uint8>(s[0]);
143 uint8 b = static_cast<uint8>(s[len >> 1]);
144 uint8 c = static_cast<uint8>(s[len - 1]);
145 uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
146 uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2);
147 return ShiftMix(y * k2 ^ z * k3) * k2;
148 }
149 return k2;
150 }
151
152 // This probably works well for 16-byte strings as well, but it may be overkill
153 // in that case.
HashLen17to32(const char * s,size_t len)154 static uint64 HashLen17to32(const char* s, size_t len) {
155 uint64 a = Fetch64(s) * k1;
156 uint64 b = Fetch64(s + 8);
157 uint64 c = Fetch64(s + len - 8) * k2;
158 uint64 d = Fetch64(s + len - 16) * k0;
159 return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
160 a + Rotate(b ^ k3, 20) - c + len);
161 }
162
163 // Return a 16-byte hash for 48 bytes. Quick and dirty.
164 // Callers do best to use "random-looking" values for a and b.
WeakHashLen32WithSeeds(uint64 w,uint64 x,uint64 y,uint64 z,uint64 a,uint64 b)165 static std::pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w,
166 uint64 x,
167 uint64 y,
168 uint64 z,
169 uint64 a,
170 uint64 b) {
171 a += w;
172 b = Rotate(b + a + z, 21);
173 uint64 c = a;
174 a += x;
175 a += y;
176 b += Rotate(a, 44);
177 return std::make_pair(a + z, b + c);
178 }
179
180 // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
WeakHashLen32WithSeeds(const char * s,uint64 a,uint64 b)181 static std::pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s,
182 uint64 a,
183 uint64 b) {
184 return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16),
185 Fetch64(s + 24), a, b);
186 }
187
188 // Return an 8-byte hash for 33 to 64 bytes.
HashLen33to64(const char * s,size_t len)189 static uint64 HashLen33to64(const char* s, size_t len) {
190 uint64 z = Fetch64(s + 24);
191 uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
192 uint64 b = Rotate(a + z, 52);
193 uint64 c = Rotate(a, 37);
194 a += Fetch64(s + 8);
195 c += Rotate(a, 7);
196 a += Fetch64(s + 16);
197 uint64 vf = a + z;
198 uint64 vs = b + Rotate(a, 31) + c;
199 a = Fetch64(s + 16) + Fetch64(s + len - 32);
200 z = Fetch64(s + len - 8);
201 b = Rotate(a + z, 52);
202 c = Rotate(a, 37);
203 a += Fetch64(s + len - 24);
204 c += Rotate(a, 7);
205 a += Fetch64(s + len - 16);
206 uint64 wf = a + z;
207 uint64 ws = b + Rotate(a, 31) + c;
208 uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
209 return ShiftMix(r * k0 + vs) * k2;
210 }
211
CityHash64(const char * s,size_t len)212 uint64 CityHash64(const char* s, size_t len) {
213 if (len <= 32) {
214 if (len <= 16) {
215 return HashLen0to16(s, len);
216 } else {
217 return HashLen17to32(s, len);
218 }
219 } else if (len <= 64) {
220 return HashLen33to64(s, len);
221 }
222
223 // For strings over 64 bytes we hash the end first, and then as we
224 // loop we keep 56 bytes of state: v, w, x, y, and z.
225 uint64 x = Fetch64(s + len - 40);
226 uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
227 uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
228 std::pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
229 std::pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
230 x = x * k1 + Fetch64(s);
231
232 // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
233 len = (len - 1) & ~static_cast<size_t>(63);
234 do {
235 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
236 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
237 x ^= w.second;
238 y += v.first + Fetch64(s + 40);
239 z = Rotate(z + w.first, 33) * k1;
240 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
241 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
242 std::swap(z, x);
243 s += 64;
244 len -= 64;
245 } while (len != 0);
246 return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
247 HashLen16(v.second, w.second) + x);
248 }
249
CityHash64WithSeed(const char * s,size_t len,uint64 seed)250 uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
251 return CityHash64WithSeeds(s, len, k2, seed);
252 }
253
CityHash64WithSeeds(const char * s,size_t len,uint64 seed0,uint64 seed1)254 uint64 CityHash64WithSeeds(const char* s,
255 size_t len,
256 uint64 seed0,
257 uint64 seed1) {
258 return HashLen16(CityHash64(s, len) - seed0, seed1);
259 }
260
261 // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
262 // of any length representable in signed long. Based on City and Murmur.
CityMurmur(const char * s,size_t len,uint128 seed)263 static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
264 uint64 a = Uint128Low64(seed);
265 uint64 b = Uint128High64(seed);
266 uint64 c = 0;
267 uint64 d = 0;
268 if (len <= 16) {
269 a = ShiftMix(a * k1) * k1;
270 c = b * k1 + HashLen0to16(s, len);
271 d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
272 } else {
273 c = HashLen16(Fetch64(s + len - 8) + k1, a);
274 d = HashLen16(b + len, c + Fetch64(s + len - 16));
275 a += d;
276 // len > 16 here, so do...while is safe
277 do {
278 a ^= ShiftMix(Fetch64(s) * k1) * k1;
279 a *= k1;
280 b ^= a;
281 c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
282 c *= k1;
283 d ^= c;
284 s += 16;
285 len -= 16;
286 } while (len > 16);
287 }
288 a = HashLen16(a, c);
289 b = HashLen16(d, b);
290 return uint128(a ^ b, HashLen16(b, a));
291 }
292
CityHash128WithSeed(const char * s,size_t len,uint128 seed)293 uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
294 if (len < 128) {
295 return CityMurmur(s, len, seed);
296 }
297
298 // We expect len >= 128 to be the common case. Keep 56 bytes of state:
299 // v, w, x, y, and z.
300 std::pair<uint64, uint64> v, w;
301 uint64 x = Uint128Low64(seed);
302 uint64 y = Uint128High64(seed);
303 uint64 z = len * k1;
304 v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
305 v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
306 w.first = Rotate(y + z, 35) * k1 + x;
307 w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
308
309 // This is the same inner loop as CityHash64(), manually unrolled.
310 do {
311 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
312 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
313 x ^= w.second;
314 y += v.first + Fetch64(s + 40);
315 z = Rotate(z + w.first, 33) * k1;
316 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
317 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
318 std::swap(z, x);
319 s += 64;
320 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
321 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
322 x ^= w.second;
323 y += v.first + Fetch64(s + 40);
324 z = Rotate(z + w.first, 33) * k1;
325 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
326 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
327 std::swap(z, x);
328 s += 64;
329 len -= 128;
330 } while (LIKELY(len >= 128));
331 x += Rotate(v.first + z, 49) * k0;
332 z += Rotate(w.first, 37) * k0;
333 // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
334 for (size_t tail_done = 0; tail_done < len;) {
335 tail_done += 32;
336 y = Rotate(x + y, 42) * k0 + v.second;
337 w.first += Fetch64(s + len - tail_done + 16);
338 x = x * k0 + w.first;
339 z += w.second + Fetch64(s + len - tail_done);
340 w.second += v.first;
341 v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
342 }
343 // At this point our 56 bytes of state should contain more than
344 // enough information for a strong 128-bit hash. We use two
345 // different 56-byte-to-8-byte hashes to get a 16-byte final result.
346 x = HashLen16(x, v.first);
347 y = HashLen16(y + z, w.first);
348 return uint128(HashLen16(x + v.second, w.second) + y,
349 HashLen16(x + w.second, y + v.second));
350 }
351
CityHash128(const char * s,size_t len)352 uint128 CityHash128(const char* s, size_t len) {
353 if (len >= 16) {
354 return CityHash128WithSeed(s + 16, len - 16,
355 uint128(Fetch64(s) ^ k3, Fetch64(s + 8)));
356 } else if (len >= 8) {
357 return CityHash128WithSeed(
358 nullptr, 0,
359 uint128(Fetch64(s) ^ (len * k0), Fetch64(s + len - 8) ^ k1));
360 } else {
361 return CityHash128WithSeed(s, len, uint128(k0, k1));
362 }
363 }
364
365 } // namespace cityhash_v103
366 } // namespace internal
367 } // namespace base
368