• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/hash/hash.h"
6 
7 #include <cstddef>
8 #include <cstdint>
9 #include <limits>
10 #include <string>
11 #include <string_view>
12 
13 #include "base/containers/span.h"
14 #include "base/dcheck_is_on.h"
15 #include "base/notreached.h"
16 #include "base/numerics/safe_conversions.h"
17 #include "base/third_party/cityhash/city.h"
18 
19 // Definition in base/third_party/superfasthash/superfasthash.c. (Third-party
20 // code did not come with its own header file, so declaring the function here.)
21 // Note: This algorithm is also in Blink under Source/wtf/StringHasher.h.
22 extern "C" uint32_t SuperFastHash(const char* data, int len);
23 
24 namespace base {
25 
26 namespace {
27 
FastHashImpl(base::span<const uint8_t> data)28 size_t FastHashImpl(base::span<const uint8_t> data) {
29   auto chars = as_chars(data);
30   // We use the updated CityHash within our namespace (not the deprecated
31   // version from third_party/smhasher).
32   if constexpr (sizeof(size_t) > 4) {
33     return base::internal::cityhash_v111::CityHash64(chars.data(),
34                                                      chars.size());
35   } else {
36     return base::internal::cityhash_v111::CityHash32(chars.data(),
37                                                      chars.size());
38   }
39 }
40 
41 // Implement hashing for pairs of at-most 32 bit integer values.
42 // When size_t is 32 bits, we turn the 64-bit hash code into 32 bits by using
43 // multiply-add hashing. This algorithm, as described in
44 // Theorem 4.3.3 of the thesis "Über die Komplexität der Multiplikation in
45 // eingeschränkten Branchingprogrammmodellen" by Woelfel, is:
46 //
47 //   h32(x32, y32) = (h64(x32, y32) * rand_odd64 + rand16 * 2^16) % 2^64 / 2^32
48 //
49 // Contact danakj@chromium.org for any questions.
HashInts32Impl(uint32_t value1,uint32_t value2)50 size_t HashInts32Impl(uint32_t value1, uint32_t value2) {
51   uint64_t value1_64 = value1;
52   uint64_t hash64 = (value1_64 << 32) | value2;
53 
54   if (sizeof(size_t) >= sizeof(uint64_t))
55     return static_cast<size_t>(hash64);
56 
57   uint64_t odd_random = 481046412LL << 32 | 1025306955LL;
58   uint32_t shift_random = 10121U << 16;
59 
60   hash64 = hash64 * odd_random + shift_random;
61   size_t high_bits =
62       static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
63   return high_bits;
64 }
65 
66 // Implement hashing for pairs of up-to 64-bit integer values.
67 // We use the compound integer hash method to produce a 64-bit hash code, by
68 // breaking the two 64-bit inputs into 4 32-bit values:
69 // http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000
70 // Then we reduce our result to 32 bits if required, similar to above.
HashInts64Impl(uint64_t value1,uint64_t value2)71 size_t HashInts64Impl(uint64_t value1, uint64_t value2) {
72   uint32_t short_random1 = 842304669U;
73   uint32_t short_random2 = 619063811U;
74   uint32_t short_random3 = 937041849U;
75   uint32_t short_random4 = 3309708029U;
76 
77   uint32_t value1a = static_cast<uint32_t>(value1 & 0xffffffff);
78   uint32_t value1b = static_cast<uint32_t>((value1 >> 32) & 0xffffffff);
79   uint32_t value2a = static_cast<uint32_t>(value2 & 0xffffffff);
80   uint32_t value2b = static_cast<uint32_t>((value2 >> 32) & 0xffffffff);
81 
82   uint64_t product1 = static_cast<uint64_t>(value1a) * short_random1;
83   uint64_t product2 = static_cast<uint64_t>(value1b) * short_random2;
84   uint64_t product3 = static_cast<uint64_t>(value2a) * short_random3;
85   uint64_t product4 = static_cast<uint64_t>(value2b) * short_random4;
86 
87   uint64_t hash64 = product1 + product2 + product3 + product4;
88 
89   if (sizeof(size_t) >= sizeof(uint64_t))
90     return static_cast<size_t>(hash64);
91 
92   uint64_t odd_random = 1578233944LL << 32 | 194370989LL;
93   uint32_t shift_random = 20591U << 16;
94 
95   hash64 = hash64 * odd_random + shift_random;
96   size_t high_bits =
97       static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
98   return high_bits;
99 }
100 
101 // The random seed is used to perturb the output of base::FastHash() and
102 // base::HashInts() so that it is only deterministic within the lifetime of a
103 // process. This prevents inadvertent dependencies on the underlying
104 // implementation, e.g. anything that persists the hash value and expects it to
105 // be unchanging will break.
106 //
107 // Note: this is the same trick absl uses to generate a random seed. This is
108 // more robust than using base::RandBytes(), which can fail inside a sandboxed
109 // environment. Note that without ASLR, the seed won't be quite as random...
110 #if DCHECK_IS_ON()
111 constexpr const void* kSeed = &kSeed;
112 #endif
113 
114 template <typename T>
Scramble(T input)115 T Scramble(T input) {
116 #if DCHECK_IS_ON()
117   return HashInts64Impl(input, reinterpret_cast<uintptr_t>(kSeed));
118 #else
119   return input;
120 #endif
121 }
122 
123 }  // namespace
124 
FastHash(base::span<const uint8_t> data)125 size_t FastHash(base::span<const uint8_t> data) {
126   return Scramble(FastHashImpl(data));
127 }
128 
Hash(base::span<const uint8_t> data)129 uint32_t Hash(base::span<const uint8_t> data) {
130   // Currently our in-memory hash is the same as the persistent hash. The
131   // split between in-memory and persistent hash functions is maintained to
132   // allow the in-memory hash function to be updated in the future.
133   return PersistentHash(data);
134 }
135 
Hash(const std::string & str)136 uint32_t Hash(const std::string& str) {
137   return PersistentHash(as_byte_span(str));
138 }
139 
PersistentHash(span<const uint8_t> data)140 uint32_t PersistentHash(span<const uint8_t> data) {
141   // This hash function must not change, since it is designed to be persistable
142   // to disk.
143   if (data.size() > size_t{std::numeric_limits<int>::max()}) {
144     NOTREACHED();
145   }
146   auto chars = as_chars(data);
147   return ::SuperFastHash(chars.data(), checked_cast<int>(chars.size()));
148 }
149 
PersistentHash(std::string_view str)150 uint32_t PersistentHash(std::string_view str) {
151   return PersistentHash(as_byte_span(str));
152 }
153 
HashInts32(uint32_t value1,uint32_t value2)154 size_t HashInts32(uint32_t value1, uint32_t value2) {
155   return Scramble(HashInts32Impl(value1, value2));
156 }
157 
HashInts64(uint64_t value1,uint64_t value2)158 size_t HashInts64(uint64_t value1, uint64_t value2) {
159   return Scramble(HashInts64Impl(value1, value2));
160 }
161 
162 }  // namespace base
163