1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <algorithm>
16 #include <cassert>
17 #include <cstddef>
18 #include <cstdint>
19 #include <cstring>
20 #include <string>
21 #include <tuple>
22 #include <type_traits>
23 #include <typeindex>
24 #include <utility>
25 #include <vector>
26
27 #include "absl/base/attributes.h"
28 #include "absl/container/flat_hash_set.h"
29 #include "absl/hash/hash.h"
30 #include "absl/random/random.h"
31 #include "absl/strings/cord.h"
32 #include "absl/strings/cord_test_helpers.h"
33 #include "absl/strings/string_view.h"
34 #include "benchmark/benchmark.h"
35
36 namespace {
37
38 using absl::Hash;
39
40 template <template <typename> class H, typename T>
RunBenchmark(benchmark::State & state,T value)41 void RunBenchmark(benchmark::State& state, T value) {
42 H<T> h;
43 for (auto _ : state) {
44 benchmark::DoNotOptimize(value);
45 benchmark::DoNotOptimize(h(value));
46 }
47 }
48
49 } // namespace
50
51 template <typename T>
52 using AbslHash = absl::Hash<T>;
53
54 class TypeErasedInterface {
55 public:
56 virtual ~TypeErasedInterface() = default;
57
58 template <typename H>
AbslHashValue(H state,const TypeErasedInterface & wrapper)59 friend H AbslHashValue(H state, const TypeErasedInterface& wrapper) {
60 state = H::combine(std::move(state), std::type_index(typeid(wrapper)));
61 wrapper.HashValue(absl::HashState::Create(&state));
62 return state;
63 }
64
65 private:
66 virtual void HashValue(absl::HashState state) const = 0;
67 };
68
69 template <typename T>
70 struct TypeErasedAbslHash {
71 class Wrapper : public TypeErasedInterface {
72 public:
Wrapper(const T & value)73 explicit Wrapper(const T& value) : value_(value) {}
74
75 private:
HashValue(absl::HashState state) const76 void HashValue(absl::HashState state) const override {
77 absl::HashState::combine(std::move(state), value_);
78 }
79
80 const T& value_;
81 };
82
operator ()TypeErasedAbslHash83 size_t operator()(const T& value) {
84 return absl::Hash<Wrapper>{}(Wrapper(value));
85 }
86 };
87
88 template <typename FuncType>
ODRUseFunction(FuncType * ptr)89 inline FuncType* ODRUseFunction(FuncType* ptr) {
90 volatile FuncType* dummy = ptr;
91 return dummy;
92 }
93
FlatCord(size_t size)94 absl::Cord FlatCord(size_t size) {
95 absl::Cord result(std::string(size, 'a'));
96 result.Flatten();
97 return result;
98 }
99
FragmentedCord(size_t size)100 absl::Cord FragmentedCord(size_t size) {
101 const size_t orig_size = size;
102 std::vector<std::string> chunks;
103 size_t chunk_size = std::max<size_t>(1, size / 10);
104 while (size > chunk_size) {
105 chunks.push_back(std::string(chunk_size, 'a'));
106 size -= chunk_size;
107 }
108 if (size > 0) {
109 chunks.push_back(std::string(size, 'a'));
110 }
111 absl::Cord result = absl::MakeFragmentedCord(chunks);
112 (void) orig_size;
113 assert(result.size() == orig_size);
114 return result;
115 }
116
117 template <typename T>
Vector(size_t count)118 std::vector<T> Vector(size_t count) {
119 std::vector<T> result;
120 for (size_t v = 0; v < count; ++v) {
121 result.push_back(v);
122 }
123 return result;
124 }
125
126 // Bogus type that replicates an unorderd_set's bit mixing, but with
127 // vector-speed iteration. This is intended to measure the overhead of unordered
128 // hashing without counting the speed of unordered_set iteration.
129 template <typename T>
130 struct FastUnorderedSet {
FastUnorderedSetFastUnorderedSet131 explicit FastUnorderedSet(size_t count) {
132 for (size_t v = 0; v < count; ++v) {
133 values.push_back(v);
134 }
135 }
136 std::vector<T> values;
137
138 template <typename H>
AbslHashValue(H h,const FastUnorderedSet & fus)139 friend H AbslHashValue(H h, const FastUnorderedSet& fus) {
140 return H::combine(H::combine_unordered(std::move(h), fus.values.begin(),
141 fus.values.end()),
142 fus.values.size());
143 }
144 };
145
146 template <typename T>
FlatHashSet(size_t count)147 absl::flat_hash_set<T> FlatHashSet(size_t count) {
148 absl::flat_hash_set<T> result;
149 for (size_t v = 0; v < count; ++v) {
150 result.insert(v);
151 }
152 return result;
153 }
154
155 // Generates a benchmark and a codegen method for the provided types. The
156 // codegen method provides a well known entrypoint for dumping assembly.
157 #define MAKE_BENCHMARK(hash, name, ...) \
158 namespace { \
159 void BM_##hash##_##name(benchmark::State& state) { \
160 RunBenchmark<hash>(state, __VA_ARGS__); \
161 } \
162 BENCHMARK(BM_##hash##_##name); \
163 } \
164 size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg); \
165 size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg) { \
166 return hash<decltype(__VA_ARGS__)>{}(arg); \
167 } \
168 bool absl_hash_test_odr_use##hash##name = \
169 ODRUseFunction(&Codegen##hash##name);
170
171 MAKE_BENCHMARK(AbslHash, Int32, int32_t{});
172 MAKE_BENCHMARK(AbslHash, Int64, int64_t{});
173 MAKE_BENCHMARK(AbslHash, Double, 1.2);
174 MAKE_BENCHMARK(AbslHash, DoubleZero, 0.0);
175 MAKE_BENCHMARK(AbslHash, PairInt32Int32, std::pair<int32_t, int32_t>{});
176 MAKE_BENCHMARK(AbslHash, PairInt64Int64, std::pair<int64_t, int64_t>{});
177 MAKE_BENCHMARK(AbslHash, TupleInt32BoolInt64,
178 std::tuple<int32_t, bool, int64_t>{});
179 MAKE_BENCHMARK(AbslHash, String_0, std::string());
180 MAKE_BENCHMARK(AbslHash, String_10, std::string(10, 'a'));
181 MAKE_BENCHMARK(AbslHash, String_30, std::string(30, 'a'));
182 MAKE_BENCHMARK(AbslHash, String_90, std::string(90, 'a'));
183 MAKE_BENCHMARK(AbslHash, String_200, std::string(200, 'a'));
184 MAKE_BENCHMARK(AbslHash, String_5000, std::string(5000, 'a'));
185 MAKE_BENCHMARK(AbslHash, Cord_Flat_0, absl::Cord());
186 MAKE_BENCHMARK(AbslHash, Cord_Flat_10, FlatCord(10));
187 MAKE_BENCHMARK(AbslHash, Cord_Flat_30, FlatCord(30));
188 MAKE_BENCHMARK(AbslHash, Cord_Flat_90, FlatCord(90));
189 MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200));
190 MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000));
191 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200));
192 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000));
193 MAKE_BENCHMARK(AbslHash, VectorInt64_10, Vector<int64_t>(10));
194 MAKE_BENCHMARK(AbslHash, VectorInt64_100, Vector<int64_t>(100));
195 MAKE_BENCHMARK(AbslHash, VectorInt64_1000, Vector<int64_t>(1000));
196 MAKE_BENCHMARK(AbslHash, VectorDouble_10, Vector<double>(10));
197 MAKE_BENCHMARK(AbslHash, VectorDouble_100, Vector<double>(100));
198 MAKE_BENCHMARK(AbslHash, VectorDouble_1000, Vector<double>(1000));
199 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_10, FlatHashSet<int64_t>(10));
200 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_100, FlatHashSet<int64_t>(100));
201 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_1000, FlatHashSet<int64_t>(1000));
202 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_10, FlatHashSet<double>(10));
203 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_100, FlatHashSet<double>(100));
204 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_1000, FlatHashSet<double>(1000));
205 MAKE_BENCHMARK(AbslHash, FastUnorderedSetInt64_1000,
206 FastUnorderedSet<int64_t>(1000));
207 MAKE_BENCHMARK(AbslHash, FastUnorderedSetDouble_1000,
208 FastUnorderedSet<double>(1000));
209 MAKE_BENCHMARK(AbslHash, PairStringString_0,
210 std::make_pair(std::string(), std::string()));
211 MAKE_BENCHMARK(AbslHash, PairStringString_10,
212 std::make_pair(std::string(10, 'a'), std::string(10, 'b')));
213 MAKE_BENCHMARK(AbslHash, PairStringString_30,
214 std::make_pair(std::string(30, 'a'), std::string(30, 'b')));
215 MAKE_BENCHMARK(AbslHash, PairStringString_90,
216 std::make_pair(std::string(90, 'a'), std::string(90, 'b')));
217 MAKE_BENCHMARK(AbslHash, PairStringString_200,
218 std::make_pair(std::string(200, 'a'), std::string(200, 'b')));
219 MAKE_BENCHMARK(AbslHash, PairStringString_5000,
220 std::make_pair(std::string(5000, 'a'), std::string(5000, 'b')));
221
222 MAKE_BENCHMARK(TypeErasedAbslHash, Int32, int32_t{});
223 MAKE_BENCHMARK(TypeErasedAbslHash, Int64, int64_t{});
224 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt32Int32,
225 std::pair<int32_t, int32_t>{});
226 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt64Int64,
227 std::pair<int64_t, int64_t>{});
228 MAKE_BENCHMARK(TypeErasedAbslHash, TupleInt32BoolInt64,
229 std::tuple<int32_t, bool, int64_t>{});
230 MAKE_BENCHMARK(TypeErasedAbslHash, String_0, std::string());
231 MAKE_BENCHMARK(TypeErasedAbslHash, String_10, std::string(10, 'a'));
232 MAKE_BENCHMARK(TypeErasedAbslHash, String_30, std::string(30, 'a'));
233 MAKE_BENCHMARK(TypeErasedAbslHash, String_90, std::string(90, 'a'));
234 MAKE_BENCHMARK(TypeErasedAbslHash, String_200, std::string(200, 'a'));
235 MAKE_BENCHMARK(TypeErasedAbslHash, String_5000, std::string(5000, 'a'));
236 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10,
237 std::vector<double>(10, 1.1));
238 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100,
239 std::vector<double>(100, 1.1));
240 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_1000,
241 std::vector<double>(1000, 1.1));
242 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_10,
243 FlatHashSet<int64_t>(10));
244 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_100,
245 FlatHashSet<int64_t>(100));
246 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_1000,
247 FlatHashSet<int64_t>(1000));
248 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_10,
249 FlatHashSet<double>(10));
250 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_100,
251 FlatHashSet<double>(100));
252 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_1000,
253 FlatHashSet<double>(1000));
254 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetInt64_1000,
255 FastUnorderedSet<int64_t>(1000));
256 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetDouble_1000,
257 FastUnorderedSet<double>(1000));
258
259 // The latency benchmark attempts to model the speed of the hash function in
260 // production. When a hash function is used for hashtable lookups it is rarely
261 // used to hash N items in a tight loop nor on constant sized strings. Instead,
262 // after hashing there is a potential equality test plus a (usually) large
263 // amount of user code. To simulate this effectively we introduce a data
264 // dependency between elements we hash by using the hash of the Nth element as
265 // the selector of the N+1th element to hash. This isolates the hash function
266 // code much like in production. As a bonus we use the hash to generate strings
267 // of size [1,N] (instead of fixed N) to disable perfect branch predictions in
268 // hash function implementations.
269 namespace {
270 // 16kb fits in L1 cache of most CPUs we care about. Keeping memory latency low
271 // will allow us to attribute most time to CPU which means more accurate
272 // measurements.
273 static constexpr size_t kEntropySize = 16 << 10;
274 static char entropy[kEntropySize + 1024];
__anon4f5b214d0302null275 ABSL_ATTRIBUTE_UNUSED static const bool kInitialized = [] {
276 absl::BitGen gen;
277 static_assert(sizeof(entropy) % sizeof(uint64_t) == 0, "");
278 for (int i = 0; i != sizeof(entropy); i += sizeof(uint64_t)) {
279 auto rand = absl::Uniform<uint64_t>(gen);
280 memcpy(&entropy[i], &rand, sizeof(uint64_t));
281 }
282 return true;
283 }();
284 } // namespace
285
286 template <class T>
287 struct PodRand {
288 static_assert(std::is_pod<T>::value, "");
289 static_assert(kEntropySize + sizeof(T) < sizeof(entropy), "");
290
GetPodRand291 T Get(size_t i) const {
292 T v;
293 memcpy(&v, &entropy[i % kEntropySize], sizeof(T));
294 return v;
295 }
296 };
297
298 template <size_t N>
299 struct StringRand {
300 static_assert(kEntropySize + N < sizeof(entropy), "");
301
GetStringRand302 absl::string_view Get(size_t i) const {
303 // This has a small bias towards small numbers. Because max N is ~200 this
304 // is very small and prefer to be very fast instead of absolutely accurate.
305 // Also we pass N = 2^K+1 so that mod reduces to a bitand.
306 size_t s = (i % (N - 1)) + 1;
307 return {&entropy[i % kEntropySize], s};
308 }
309 };
310
311 #define MAKE_LATENCY_BENCHMARK(hash, name, ...) \
312 namespace { \
313 void BM_latency_##hash##_##name(benchmark::State& state) { \
314 __VA_ARGS__ r; \
315 hash<decltype(r.Get(0))> h; \
316 size_t i = 871401241; \
317 for (auto _ : state) { \
318 benchmark::DoNotOptimize(i = h(r.Get(i))); \
319 } \
320 } \
321 BENCHMARK(BM_latency_##hash##_##name); \
322 } // namespace
323
324 MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand<int32_t>);
325 MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand<int64_t>);
326 MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>);
327 MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>);
328 MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>);
329 MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>);
330