• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <algorithm>
16 #include <cassert>
17 #include <cstddef>
18 #include <cstdint>
19 #include <cstring>
20 #include <string>
21 #include <tuple>
22 #include <type_traits>
23 #include <typeindex>
24 #include <utility>
25 #include <vector>
26 
27 #include "absl/base/attributes.h"
28 #include "absl/container/flat_hash_set.h"
29 #include "absl/hash/hash.h"
30 #include "absl/random/random.h"
31 #include "absl/strings/cord.h"
32 #include "absl/strings/cord_test_helpers.h"
33 #include "absl/strings/string_view.h"
34 #include "benchmark/benchmark.h"
35 
36 namespace {
37 
38 using absl::Hash;
39 
40 template <template <typename> class H, typename T>
RunBenchmark(benchmark::State & state,T value)41 void RunBenchmark(benchmark::State& state, T value) {
42   H<T> h;
43   for (auto _ : state) {
44     benchmark::DoNotOptimize(value);
45     benchmark::DoNotOptimize(h(value));
46   }
47 }
48 
49 }  // namespace
50 
51 template <typename T>
52 using AbslHash = absl::Hash<T>;
53 
54 class TypeErasedInterface {
55  public:
56   virtual ~TypeErasedInterface() = default;
57 
58   template <typename H>
AbslHashValue(H state,const TypeErasedInterface & wrapper)59   friend H AbslHashValue(H state, const TypeErasedInterface& wrapper) {
60     state = H::combine(std::move(state), std::type_index(typeid(wrapper)));
61     wrapper.HashValue(absl::HashState::Create(&state));
62     return state;
63   }
64 
65  private:
66   virtual void HashValue(absl::HashState state) const = 0;
67 };
68 
69 template <typename T>
70 struct TypeErasedAbslHash {
71   class Wrapper : public TypeErasedInterface {
72    public:
Wrapper(const T & value)73     explicit Wrapper(const T& value) : value_(value) {}
74 
75    private:
HashValue(absl::HashState state) const76     void HashValue(absl::HashState state) const override {
77       absl::HashState::combine(std::move(state), value_);
78     }
79 
80     const T& value_;
81   };
82 
operator ()TypeErasedAbslHash83   size_t operator()(const T& value) {
84     return absl::Hash<Wrapper>{}(Wrapper(value));
85   }
86 };
87 
88 template <typename FuncType>
ODRUseFunction(FuncType * ptr)89 inline FuncType* ODRUseFunction(FuncType* ptr) {
90   volatile FuncType* dummy = ptr;
91   return dummy;
92 }
93 
FlatCord(size_t size)94 absl::Cord FlatCord(size_t size) {
95   absl::Cord result(std::string(size, 'a'));
96   result.Flatten();
97   return result;
98 }
99 
FragmentedCord(size_t size)100 absl::Cord FragmentedCord(size_t size) {
101   const size_t orig_size = size;
102   std::vector<std::string> chunks;
103   size_t chunk_size = std::max<size_t>(1, size / 10);
104   while (size > chunk_size) {
105     chunks.push_back(std::string(chunk_size, 'a'));
106     size -= chunk_size;
107   }
108   if (size > 0) {
109     chunks.push_back(std::string(size, 'a'));
110   }
111   absl::Cord result = absl::MakeFragmentedCord(chunks);
112   (void) orig_size;
113   assert(result.size() == orig_size);
114   return result;
115 }
116 
117 template <typename T>
Vector(size_t count)118 std::vector<T> Vector(size_t count) {
119   std::vector<T> result;
120   for (size_t v = 0; v < count; ++v) {
121     result.push_back(v);
122   }
123   return result;
124 }
125 
126 // Bogus type that replicates an unorderd_set's bit mixing, but with
127 // vector-speed iteration. This is intended to measure the overhead of unordered
128 // hashing without counting the speed of unordered_set iteration.
129 template <typename T>
130 struct FastUnorderedSet {
FastUnorderedSetFastUnorderedSet131   explicit FastUnorderedSet(size_t count) {
132     for (size_t v = 0; v < count; ++v) {
133       values.push_back(v);
134     }
135   }
136   std::vector<T> values;
137 
138   template <typename H>
AbslHashValue(H h,const FastUnorderedSet & fus)139   friend H AbslHashValue(H h, const FastUnorderedSet& fus) {
140     return H::combine(H::combine_unordered(std::move(h), fus.values.begin(),
141                                            fus.values.end()),
142                       fus.values.size());
143   }
144 };
145 
146 template <typename T>
FlatHashSet(size_t count)147 absl::flat_hash_set<T> FlatHashSet(size_t count) {
148   absl::flat_hash_set<T> result;
149   for (size_t v = 0; v < count; ++v) {
150     result.insert(v);
151   }
152   return result;
153 }
154 
155 // Generates a benchmark and a codegen method for the provided types.  The
156 // codegen method provides a well known entrypoint for dumping assembly.
157 #define MAKE_BENCHMARK(hash, name, ...)                          \
158   namespace {                                                    \
159   void BM_##hash##_##name(benchmark::State& state) {             \
160     RunBenchmark<hash>(state, __VA_ARGS__);                      \
161   }                                                              \
162   BENCHMARK(BM_##hash##_##name);                                 \
163   }                                                              \
164   size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg);  \
165   size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg) { \
166     return hash<decltype(__VA_ARGS__)>{}(arg);                   \
167   }                                                              \
168   bool absl_hash_test_odr_use##hash##name =                      \
169       ODRUseFunction(&Codegen##hash##name);
170 
171 MAKE_BENCHMARK(AbslHash, Int32, int32_t{});
172 MAKE_BENCHMARK(AbslHash, Int64, int64_t{});
173 MAKE_BENCHMARK(AbslHash, Double, 1.2);
174 MAKE_BENCHMARK(AbslHash, DoubleZero, 0.0);
175 MAKE_BENCHMARK(AbslHash, PairInt32Int32, std::pair<int32_t, int32_t>{});
176 MAKE_BENCHMARK(AbslHash, PairInt64Int64, std::pair<int64_t, int64_t>{});
177 MAKE_BENCHMARK(AbslHash, TupleInt32BoolInt64,
178                std::tuple<int32_t, bool, int64_t>{});
179 MAKE_BENCHMARK(AbslHash, String_0, std::string());
180 MAKE_BENCHMARK(AbslHash, String_10, std::string(10, 'a'));
181 MAKE_BENCHMARK(AbslHash, String_30, std::string(30, 'a'));
182 MAKE_BENCHMARK(AbslHash, String_90, std::string(90, 'a'));
183 MAKE_BENCHMARK(AbslHash, String_200, std::string(200, 'a'));
184 MAKE_BENCHMARK(AbslHash, String_5000, std::string(5000, 'a'));
185 MAKE_BENCHMARK(AbslHash, Cord_Flat_0, absl::Cord());
186 MAKE_BENCHMARK(AbslHash, Cord_Flat_10, FlatCord(10));
187 MAKE_BENCHMARK(AbslHash, Cord_Flat_30, FlatCord(30));
188 MAKE_BENCHMARK(AbslHash, Cord_Flat_90, FlatCord(90));
189 MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200));
190 MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000));
191 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200));
192 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000));
193 MAKE_BENCHMARK(AbslHash, VectorInt64_10, Vector<int64_t>(10));
194 MAKE_BENCHMARK(AbslHash, VectorInt64_100, Vector<int64_t>(100));
195 MAKE_BENCHMARK(AbslHash, VectorInt64_1000, Vector<int64_t>(1000));
196 MAKE_BENCHMARK(AbslHash, VectorDouble_10, Vector<double>(10));
197 MAKE_BENCHMARK(AbslHash, VectorDouble_100, Vector<double>(100));
198 MAKE_BENCHMARK(AbslHash, VectorDouble_1000, Vector<double>(1000));
199 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_10, FlatHashSet<int64_t>(10));
200 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_100, FlatHashSet<int64_t>(100));
201 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_1000, FlatHashSet<int64_t>(1000));
202 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_10, FlatHashSet<double>(10));
203 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_100, FlatHashSet<double>(100));
204 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_1000, FlatHashSet<double>(1000));
205 MAKE_BENCHMARK(AbslHash, FastUnorderedSetInt64_1000,
206                FastUnorderedSet<int64_t>(1000));
207 MAKE_BENCHMARK(AbslHash, FastUnorderedSetDouble_1000,
208                FastUnorderedSet<double>(1000));
209 MAKE_BENCHMARK(AbslHash, PairStringString_0,
210                std::make_pair(std::string(), std::string()));
211 MAKE_BENCHMARK(AbslHash, PairStringString_10,
212                std::make_pair(std::string(10, 'a'), std::string(10, 'b')));
213 MAKE_BENCHMARK(AbslHash, PairStringString_30,
214                std::make_pair(std::string(30, 'a'), std::string(30, 'b')));
215 MAKE_BENCHMARK(AbslHash, PairStringString_90,
216                std::make_pair(std::string(90, 'a'), std::string(90, 'b')));
217 MAKE_BENCHMARK(AbslHash, PairStringString_200,
218                std::make_pair(std::string(200, 'a'), std::string(200, 'b')));
219 MAKE_BENCHMARK(AbslHash, PairStringString_5000,
220                std::make_pair(std::string(5000, 'a'), std::string(5000, 'b')));
221 
222 MAKE_BENCHMARK(TypeErasedAbslHash, Int32, int32_t{});
223 MAKE_BENCHMARK(TypeErasedAbslHash, Int64, int64_t{});
224 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt32Int32,
225                std::pair<int32_t, int32_t>{});
226 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt64Int64,
227                std::pair<int64_t, int64_t>{});
228 MAKE_BENCHMARK(TypeErasedAbslHash, TupleInt32BoolInt64,
229                std::tuple<int32_t, bool, int64_t>{});
230 MAKE_BENCHMARK(TypeErasedAbslHash, String_0, std::string());
231 MAKE_BENCHMARK(TypeErasedAbslHash, String_10, std::string(10, 'a'));
232 MAKE_BENCHMARK(TypeErasedAbslHash, String_30, std::string(30, 'a'));
233 MAKE_BENCHMARK(TypeErasedAbslHash, String_90, std::string(90, 'a'));
234 MAKE_BENCHMARK(TypeErasedAbslHash, String_200, std::string(200, 'a'));
235 MAKE_BENCHMARK(TypeErasedAbslHash, String_5000, std::string(5000, 'a'));
236 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10,
237                std::vector<double>(10, 1.1));
238 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100,
239                std::vector<double>(100, 1.1));
240 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_1000,
241                std::vector<double>(1000, 1.1));
242 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_10,
243                FlatHashSet<int64_t>(10));
244 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_100,
245                FlatHashSet<int64_t>(100));
246 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_1000,
247                FlatHashSet<int64_t>(1000));
248 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_10,
249                FlatHashSet<double>(10));
250 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_100,
251                FlatHashSet<double>(100));
252 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_1000,
253                FlatHashSet<double>(1000));
254 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetInt64_1000,
255                FastUnorderedSet<int64_t>(1000));
256 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetDouble_1000,
257                FastUnorderedSet<double>(1000));
258 
259 // The latency benchmark attempts to model the speed of the hash function in
260 // production. When a hash function is used for hashtable lookups it is rarely
261 // used to hash N items in a tight loop nor on constant sized strings. Instead,
262 // after hashing there is a potential equality test plus a (usually) large
263 // amount of user code. To simulate this effectively we introduce a data
264 // dependency between elements we hash by using the hash of the Nth element as
265 // the selector of the N+1th element to hash. This isolates the hash function
266 // code much like in production. As a bonus we use the hash to generate strings
267 // of size [1,N] (instead of fixed N) to disable perfect branch predictions in
268 // hash function implementations.
269 namespace {
270 // 16kb fits in L1 cache of most CPUs we care about. Keeping memory latency low
271 // will allow us to attribute most time to CPU which means more accurate
272 // measurements.
273 static constexpr size_t kEntropySize = 16 << 10;
274 static char entropy[kEntropySize + 1024];
__anon4f5b214d0302null275 ABSL_ATTRIBUTE_UNUSED static const bool kInitialized = [] {
276   absl::BitGen gen;
277   static_assert(sizeof(entropy) % sizeof(uint64_t) == 0, "");
278   for (int i = 0; i != sizeof(entropy); i += sizeof(uint64_t)) {
279     auto rand = absl::Uniform<uint64_t>(gen);
280     memcpy(&entropy[i], &rand, sizeof(uint64_t));
281   }
282   return true;
283 }();
284 }  // namespace
285 
286 template <class T>
287 struct PodRand {
288   static_assert(std::is_pod<T>::value, "");
289   static_assert(kEntropySize + sizeof(T) < sizeof(entropy), "");
290 
GetPodRand291   T Get(size_t i) const {
292     T v;
293     memcpy(&v, &entropy[i % kEntropySize], sizeof(T));
294     return v;
295   }
296 };
297 
298 template <size_t N>
299 struct StringRand {
300   static_assert(kEntropySize + N < sizeof(entropy), "");
301 
GetStringRand302   absl::string_view Get(size_t i) const {
303     // This has a small bias towards small numbers. Because max N is ~200 this
304     // is very small and prefer to be very fast instead of absolutely accurate.
305     // Also we pass N = 2^K+1 so that mod reduces to a bitand.
306     size_t s = (i % (N - 1)) + 1;
307     return {&entropy[i % kEntropySize], s};
308   }
309 };
310 
311 #define MAKE_LATENCY_BENCHMARK(hash, name, ...)              \
312   namespace {                                                \
313   void BM_latency_##hash##_##name(benchmark::State& state) { \
314     __VA_ARGS__ r;                                           \
315     hash<decltype(r.Get(0))> h;                              \
316     size_t i = 871401241;                                    \
317     for (auto _ : state) {                                   \
318       benchmark::DoNotOptimize(i = h(r.Get(i)));             \
319     }                                                        \
320   }                                                          \
321   BENCHMARK(BM_latency_##hash##_##name);                     \
322   }  // namespace
323 
324 MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand<int32_t>);
325 MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand<int64_t>);
326 MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>);
327 MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>);
328 MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>);
329 MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>);
330