• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/hash/hash.h"
6 
7 #include "base/check_op.h"
8 #include "base/notreached.h"
9 #include "base/rand_util.h"
10 #include "base/third_party/cityhash/city.h"
11 #include "build/build_config.h"
12 
13 // Definition in base/third_party/superfasthash/superfasthash.c. (Third-party
14 // code did not come with its own header file, so declaring the function here.)
15 // Note: This algorithm is also in Blink under Source/wtf/StringHasher.h.
16 extern "C" uint32_t SuperFastHash(const char* data, int len);
17 
18 namespace base {
19 
20 namespace {
21 
FastHashImpl(base::span<const uint8_t> data)22 size_t FastHashImpl(base::span<const uint8_t> data) {
23   // We use the updated CityHash within our namespace (not the deprecated
24   // version from third_party/smhasher).
25   if constexpr (sizeof(size_t) > 4) {
26     return base::internal::cityhash_v111::CityHash64(
27         reinterpret_cast<const char*>(data.data()), data.size());
28   } else {
29     return base::internal::cityhash_v111::CityHash32(
30         reinterpret_cast<const char*>(data.data()), data.size());
31   }
32 }
33 
34 // Implement hashing for pairs of at-most 32 bit integer values.
35 // When size_t is 32 bits, we turn the 64-bit hash code into 32 bits by using
36 // multiply-add hashing. This algorithm, as described in
37 // Theorem 4.3.3 of the thesis "Über die Komplexität der Multiplikation in
38 // eingeschränkten Branchingprogrammmodellen" by Woelfel, is:
39 //
40 //   h32(x32, y32) = (h64(x32, y32) * rand_odd64 + rand16 * 2^16) % 2^64 / 2^32
41 //
42 // Contact danakj@chromium.org for any questions.
HashInts32Impl(uint32_t value1,uint32_t value2)43 size_t HashInts32Impl(uint32_t value1, uint32_t value2) {
44   uint64_t value1_64 = value1;
45   uint64_t hash64 = (value1_64 << 32) | value2;
46 
47   if (sizeof(size_t) >= sizeof(uint64_t))
48     return static_cast<size_t>(hash64);
49 
50   uint64_t odd_random = 481046412LL << 32 | 1025306955LL;
51   uint32_t shift_random = 10121U << 16;
52 
53   hash64 = hash64 * odd_random + shift_random;
54   size_t high_bits =
55       static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
56   return high_bits;
57 }
58 
59 // Implement hashing for pairs of up-to 64-bit integer values.
60 // We use the compound integer hash method to produce a 64-bit hash code, by
61 // breaking the two 64-bit inputs into 4 32-bit values:
62 // http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000
63 // Then we reduce our result to 32 bits if required, similar to above.
HashInts64Impl(uint64_t value1,uint64_t value2)64 size_t HashInts64Impl(uint64_t value1, uint64_t value2) {
65   uint32_t short_random1 = 842304669U;
66   uint32_t short_random2 = 619063811U;
67   uint32_t short_random3 = 937041849U;
68   uint32_t short_random4 = 3309708029U;
69 
70   uint32_t value1a = static_cast<uint32_t>(value1 & 0xffffffff);
71   uint32_t value1b = static_cast<uint32_t>((value1 >> 32) & 0xffffffff);
72   uint32_t value2a = static_cast<uint32_t>(value2 & 0xffffffff);
73   uint32_t value2b = static_cast<uint32_t>((value2 >> 32) & 0xffffffff);
74 
75   uint64_t product1 = static_cast<uint64_t>(value1a) * short_random1;
76   uint64_t product2 = static_cast<uint64_t>(value1b) * short_random2;
77   uint64_t product3 = static_cast<uint64_t>(value2a) * short_random3;
78   uint64_t product4 = static_cast<uint64_t>(value2b) * short_random4;
79 
80   uint64_t hash64 = product1 + product2 + product3 + product4;
81 
82   if (sizeof(size_t) >= sizeof(uint64_t))
83     return static_cast<size_t>(hash64);
84 
85   uint64_t odd_random = 1578233944LL << 32 | 194370989LL;
86   uint32_t shift_random = 20591U << 16;
87 
88   hash64 = hash64 * odd_random + shift_random;
89   size_t high_bits =
90       static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
91   return high_bits;
92 }
93 
94 // The random seed is used to perturb the output of base::FastHash() and
95 // base::HashInts() so that it is only deterministic within the lifetime of a
96 // process. This prevents inadvertent dependencies on the underlying
97 // implementation, e.g. anything that persists the hash value and expects it to
98 // be unchanging will break.
99 //
100 // Note: this is the same trick absl uses to generate a random seed. This is
101 // more robust than using base::RandBytes(), which can fail inside a sandboxed
102 // environment. Note that without ASLR, the seed won't be quite as random...
103 #if DCHECK_IS_ON()
104 constexpr const void* kSeed = &kSeed;
105 #endif
106 
107 template <typename T>
Scramble(T input)108 T Scramble(T input) {
109 #if DCHECK_IS_ON()
110   return HashInts64Impl(input, reinterpret_cast<uintptr_t>(kSeed));
111 #else
112   return input;
113 #endif
114 }
115 
116 }  // namespace
117 
FastHash(base::span<const uint8_t> data)118 size_t FastHash(base::span<const uint8_t> data) {
119   return Scramble(FastHashImpl(data));
120 }
121 
Hash(const void * data,size_t length)122 uint32_t Hash(const void* data, size_t length) {
123   // Currently our in-memory hash is the same as the persistent hash. The
124   // split between in-memory and persistent hash functions is maintained to
125   // allow the in-memory hash function to be updated in the future.
126   return PersistentHash(data, length);
127 }
128 
Hash(const std::string & str)129 uint32_t Hash(const std::string& str) {
130   return PersistentHash(as_bytes(make_span(str)));
131 }
132 
Hash(const std::u16string & str)133 uint32_t Hash(const std::u16string& str) {
134   return PersistentHash(as_bytes(make_span(str)));
135 }
136 
PersistentHash(span<const uint8_t> data)137 uint32_t PersistentHash(span<const uint8_t> data) {
138   // This hash function must not change, since it is designed to be persistable
139   // to disk.
140   if (data.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
141     NOTREACHED();
142     return 0;
143   }
144   return ::SuperFastHash(reinterpret_cast<const char*>(data.data()),
145                          static_cast<int>(data.size()));
146 }
147 
PersistentHash(const void * data,size_t length)148 uint32_t PersistentHash(const void* data, size_t length) {
149   return PersistentHash(make_span(static_cast<const uint8_t*>(data), length));
150 }
151 
PersistentHash(const std::string & str)152 uint32_t PersistentHash(const std::string& str) {
153   return PersistentHash(str.data(), str.size());
154 }
155 
HashInts32(uint32_t value1,uint32_t value2)156 size_t HashInts32(uint32_t value1, uint32_t value2) {
157   return Scramble(HashInts32Impl(value1, value2));
158 }
159 
160 // Implement hashing for pairs of up-to 64-bit integer values.
161 // We use the compound integer hash method to produce a 64-bit hash code, by
162 // breaking the two 64-bit inputs into 4 32-bit values:
163 // http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000
164 // Then we reduce our result to 32 bits if required, similar to above.
HashInts64(uint64_t value1,uint64_t value2)165 size_t HashInts64(uint64_t value1, uint64_t value2) {
166   return Scramble(HashInts64Impl(value1, value2));
167 }
168 
169 }  // namespace base
170