1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <array>
16 #include <cmath>
17 #include <cstddef>
18 #include <cstdint>
19 #include <numeric>
20 #include <random>
21 #include <tuple>
22 #include <utility>
23 #include <vector>
24
25 #include "absl/base/internal/raw_logging.h"
26 #include "absl/container/internal/hash_function_defaults.h"
27 #include "absl/container/internal/raw_hash_set.h"
28 #include "absl/strings/str_format.h"
29 #include "benchmark/benchmark.h"
30
31 namespace absl {
32 ABSL_NAMESPACE_BEGIN
33 namespace container_internal {
34
35 struct RawHashSetTestOnlyAccess {
36 template <typename C>
GetSlotsabsl::container_internal::RawHashSetTestOnlyAccess37 static auto GetSlots(const C& c) -> decltype(c.slots_) {
38 return c.slots_;
39 }
40 };
41
42 namespace {
43
44 struct IntPolicy {
45 using slot_type = int64_t;
46 using key_type = int64_t;
47 using init_type = int64_t;
48
constructabsl::container_internal::__anonbb1d38590111::IntPolicy49 static void construct(void*, int64_t* slot, int64_t v) { *slot = v; }
destroyabsl::container_internal::__anonbb1d38590111::IntPolicy50 static void destroy(void*, int64_t*) {}
transferabsl::container_internal::__anonbb1d38590111::IntPolicy51 static void transfer(void*, int64_t* new_slot, int64_t* old_slot) {
52 *new_slot = *old_slot;
53 }
54
elementabsl::container_internal::__anonbb1d38590111::IntPolicy55 static int64_t& element(slot_type* slot) { return *slot; }
56
57 template <class F>
applyabsl::container_internal::__anonbb1d38590111::IntPolicy58 static auto apply(F&& f, int64_t x) -> decltype(std::forward<F>(f)(x, x)) {
59 return std::forward<F>(f)(x, x);
60 }
61 };
62
63 class StringPolicy {
64 template <class F, class K, class V,
65 class = typename std::enable_if<
66 std::is_convertible<const K&, absl::string_view>::value>::type>
67 decltype(std::declval<F>()(
68 std::declval<const absl::string_view&>(), std::piecewise_construct,
69 std::declval<std::tuple<K>>(),
apply_impl(F && f,std::pair<std::tuple<K>,V> p)70 std::declval<V>())) static apply_impl(F&& f,
71 std::pair<std::tuple<K>, V> p) {
72 const absl::string_view& key = std::get<0>(p.first);
73 return std::forward<F>(f)(key, std::piecewise_construct, std::move(p.first),
74 std::move(p.second));
75 }
76
77 public:
78 struct slot_type {
79 struct ctor {};
80
81 template <class... Ts>
slot_typeabsl::container_internal::__anonbb1d38590111::StringPolicy::slot_type82 slot_type(ctor, Ts&&... ts) : pair(std::forward<Ts>(ts)...) {}
83
84 std::pair<std::string, std::string> pair;
85 };
86
87 using key_type = std::string;
88 using init_type = std::pair<std::string, std::string>;
89
90 template <class allocator_type, class... Args>
construct(allocator_type * alloc,slot_type * slot,Args...args)91 static void construct(allocator_type* alloc, slot_type* slot, Args... args) {
92 std::allocator_traits<allocator_type>::construct(
93 *alloc, slot, typename slot_type::ctor(), std::forward<Args>(args)...);
94 }
95
96 template <class allocator_type>
destroy(allocator_type * alloc,slot_type * slot)97 static void destroy(allocator_type* alloc, slot_type* slot) {
98 std::allocator_traits<allocator_type>::destroy(*alloc, slot);
99 }
100
101 template <class allocator_type>
transfer(allocator_type * alloc,slot_type * new_slot,slot_type * old_slot)102 static void transfer(allocator_type* alloc, slot_type* new_slot,
103 slot_type* old_slot) {
104 construct(alloc, new_slot, std::move(old_slot->pair));
105 destroy(alloc, old_slot);
106 }
107
element(slot_type * slot)108 static std::pair<std::string, std::string>& element(slot_type* slot) {
109 return slot->pair;
110 }
111
112 template <class F, class... Args>
apply(F && f,Args &&...args)113 static auto apply(F&& f, Args&&... args)
114 -> decltype(apply_impl(std::forward<F>(f),
115 PairArgs(std::forward<Args>(args)...))) {
116 return apply_impl(std::forward<F>(f),
117 PairArgs(std::forward<Args>(args)...));
118 }
119 };
120
121 struct StringHash : container_internal::hash_default_hash<absl::string_view> {
122 using is_transparent = void;
123 };
124 struct StringEq : std::equal_to<absl::string_view> {
125 using is_transparent = void;
126 };
127
128 struct StringTable
129 : raw_hash_set<StringPolicy, StringHash, StringEq, std::allocator<int>> {
130 using Base = typename StringTable::raw_hash_set;
StringTableabsl::container_internal::__anonbb1d38590111::StringTable131 StringTable() {}
132 using Base::Base;
133 };
134
135 struct IntTable
136 : raw_hash_set<IntPolicy, container_internal::hash_default_hash<int64_t>,
137 std::equal_to<int64_t>, std::allocator<int64_t>> {
138 using Base = typename IntTable::raw_hash_set;
IntTableabsl::container_internal::__anonbb1d38590111::IntTable139 IntTable() {}
140 using Base::Base;
141 };
142
143 struct string_generator {
144 template <class RNG>
operator ()absl::container_internal::__anonbb1d38590111::string_generator145 std::string operator()(RNG& rng) const {
146 std::string res;
147 res.resize(size);
148 std::uniform_int_distribution<uint32_t> printable_ascii(0x20, 0x7E);
149 std::generate(res.begin(), res.end(), [&] { return printable_ascii(rng); });
150 return res;
151 }
152
153 size_t size;
154 };
155
156 // Model a cache in steady state.
157 //
158 // On a table of size N, keep deleting the LRU entry and add a random one.
BM_CacheInSteadyState(benchmark::State & state)159 void BM_CacheInSteadyState(benchmark::State& state) {
160 std::random_device rd;
161 std::mt19937 rng(rd());
162 string_generator gen{12};
163 StringTable t;
164 std::deque<std::string> keys;
165 while (t.size() < state.range(0)) {
166 auto x = t.emplace(gen(rng), gen(rng));
167 if (x.second) keys.push_back(x.first->first);
168 }
169 ABSL_RAW_CHECK(state.range(0) >= 10, "");
170 while (state.KeepRunning()) {
171 // Some cache hits.
172 std::deque<std::string>::const_iterator it;
173 for (int i = 0; i != 90; ++i) {
174 if (i % 10 == 0) it = keys.end();
175 ::benchmark::DoNotOptimize(t.find(*--it));
176 }
177 // Some cache misses.
178 for (int i = 0; i != 10; ++i) ::benchmark::DoNotOptimize(t.find(gen(rng)));
179 ABSL_RAW_CHECK(t.erase(keys.front()), keys.front().c_str());
180 keys.pop_front();
181 while (true) {
182 auto x = t.emplace(gen(rng), gen(rng));
183 if (x.second) {
184 keys.push_back(x.first->first);
185 break;
186 }
187 }
188 }
189 state.SetItemsProcessed(state.iterations());
190 state.SetLabel(absl::StrFormat("load_factor=%.2f", t.load_factor()));
191 }
192
193 template <typename Benchmark>
CacheInSteadyStateArgs(Benchmark * bm)194 void CacheInSteadyStateArgs(Benchmark* bm) {
195 // The default.
196 const float max_load_factor = 0.875;
197 // When the cache is at the steady state, the probe sequence will equal
198 // capacity if there is no reclamation of deleted slots. Pick a number large
199 // enough to make the benchmark slow for that case.
200 const size_t capacity = 1 << 10;
201
202 // Check N data points to cover load factors in [0.4, 0.8).
203 const size_t kNumPoints = 10;
204 for (size_t i = 0; i != kNumPoints; ++i)
205 bm->Arg(std::ceil(
206 capacity * (max_load_factor + i * max_load_factor / kNumPoints) / 2));
207 }
208 BENCHMARK(BM_CacheInSteadyState)->Apply(CacheInSteadyStateArgs);
209
BM_EraseEmplace(benchmark::State & state)210 void BM_EraseEmplace(benchmark::State& state) {
211 IntTable t;
212 int64_t size = state.range(0);
213 for (int64_t i = 0; i < size; ++i) {
214 t.emplace(i);
215 }
216 while (state.KeepRunningBatch(size)) {
217 for (int64_t i = 0; i < size; ++i) {
218 benchmark::DoNotOptimize(t);
219 t.erase(i);
220 t.emplace(i);
221 }
222 }
223 }
224 BENCHMARK(BM_EraseEmplace)->Arg(1)->Arg(2)->Arg(4)->Arg(8)->Arg(16)->Arg(100);
225
BM_EndComparison(benchmark::State & state)226 void BM_EndComparison(benchmark::State& state) {
227 StringTable t = {{"a", "a"}, {"b", "b"}};
228 auto it = t.begin();
229 for (auto i : state) {
230 benchmark::DoNotOptimize(t);
231 benchmark::DoNotOptimize(it);
232 benchmark::DoNotOptimize(it != t.end());
233 }
234 }
235 BENCHMARK(BM_EndComparison);
236
BM_Iteration(benchmark::State & state)237 void BM_Iteration(benchmark::State& state) {
238 std::random_device rd;
239 std::mt19937 rng(rd());
240 string_generator gen{12};
241 StringTable t;
242
243 size_t capacity = state.range(0);
244 size_t size = state.range(1);
245 t.reserve(capacity);
246
247 while (t.size() < size) {
248 t.emplace(gen(rng), gen(rng));
249 }
250
251 for (auto i : state) {
252 benchmark::DoNotOptimize(t);
253 for (auto it = t.begin(); it != t.end(); ++it) {
254 benchmark::DoNotOptimize(*it);
255 }
256 }
257 }
258
259 BENCHMARK(BM_Iteration)
260 ->ArgPair(1, 1)
261 ->ArgPair(2, 2)
262 ->ArgPair(4, 4)
263 ->ArgPair(7, 7)
264 ->ArgPair(10, 10)
265 ->ArgPair(15, 15)
266 ->ArgPair(16, 16)
267 ->ArgPair(54, 54)
268 ->ArgPair(100, 100)
269 ->ArgPair(400, 400)
270 // empty
271 ->ArgPair(0, 0)
272 ->ArgPair(10, 0)
273 ->ArgPair(100, 0)
274 ->ArgPair(1000, 0)
275 ->ArgPair(10000, 0)
276 // sparse
277 ->ArgPair(100, 1)
278 ->ArgPair(1000, 10);
279
BM_CopyCtorSparseInt(benchmark::State & state)280 void BM_CopyCtorSparseInt(benchmark::State& state) {
281 std::random_device rd;
282 std::mt19937 rng(rd());
283 IntTable t;
284 std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
285
286 size_t size = state.range(0);
287 t.reserve(size * 10);
288 while (t.size() < size) {
289 t.emplace(dist(rng));
290 }
291
292 for (auto i : state) {
293 IntTable t2 = t;
294 benchmark::DoNotOptimize(t2);
295 }
296 }
297 BENCHMARK(BM_CopyCtorSparseInt)->Range(128, 4096);
298
BM_CopyCtorInt(benchmark::State & state)299 void BM_CopyCtorInt(benchmark::State& state) {
300 std::random_device rd;
301 std::mt19937 rng(rd());
302 IntTable t;
303 std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
304
305 size_t size = state.range(0);
306 while (t.size() < size) {
307 t.emplace(dist(rng));
308 }
309
310 for (auto i : state) {
311 IntTable t2 = t;
312 benchmark::DoNotOptimize(t2);
313 }
314 }
315 BENCHMARK(BM_CopyCtorInt)->Range(128, 4096);
316
BM_CopyCtorString(benchmark::State & state)317 void BM_CopyCtorString(benchmark::State& state) {
318 std::random_device rd;
319 std::mt19937 rng(rd());
320 StringTable t;
321 std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
322
323 size_t size = state.range(0);
324 while (t.size() < size) {
325 t.emplace(std::to_string(dist(rng)), std::to_string(dist(rng)));
326 }
327
328 for (auto i : state) {
329 StringTable t2 = t;
330 benchmark::DoNotOptimize(t2);
331 }
332 }
333 BENCHMARK(BM_CopyCtorString)->Range(128, 4096);
334
BM_CopyAssign(benchmark::State & state)335 void BM_CopyAssign(benchmark::State& state) {
336 std::random_device rd;
337 std::mt19937 rng(rd());
338 IntTable t;
339 std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
340 while (t.size() < state.range(0)) {
341 t.emplace(dist(rng));
342 }
343
344 IntTable t2;
345 for (auto _ : state) {
346 t2 = t;
347 benchmark::DoNotOptimize(t2);
348 }
349 }
350 BENCHMARK(BM_CopyAssign)->Range(128, 4096);
351
BM_RangeCtor(benchmark::State & state)352 void BM_RangeCtor(benchmark::State& state) {
353 std::random_device rd;
354 std::mt19937 rng(rd());
355 std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
356 std::vector<int> values;
357 const size_t desired_size = state.range(0);
358 while (values.size() < desired_size) {
359 values.emplace_back(dist(rng));
360 }
361
362 for (auto unused : state) {
363 IntTable t{values.begin(), values.end()};
364 benchmark::DoNotOptimize(t);
365 }
366 }
367 BENCHMARK(BM_RangeCtor)->Range(128, 65536);
368
BM_NoOpReserveIntTable(benchmark::State & state)369 void BM_NoOpReserveIntTable(benchmark::State& state) {
370 IntTable t;
371 t.reserve(100000);
372 for (auto _ : state) {
373 benchmark::DoNotOptimize(t);
374 t.reserve(100000);
375 }
376 }
377 BENCHMARK(BM_NoOpReserveIntTable);
378
BM_NoOpReserveStringTable(benchmark::State & state)379 void BM_NoOpReserveStringTable(benchmark::State& state) {
380 StringTable t;
381 t.reserve(100000);
382 for (auto _ : state) {
383 benchmark::DoNotOptimize(t);
384 t.reserve(100000);
385 }
386 }
387 BENCHMARK(BM_NoOpReserveStringTable);
388
BM_ReserveIntTable(benchmark::State & state)389 void BM_ReserveIntTable(benchmark::State& state) {
390 constexpr size_t kBatchSize = 1024;
391 size_t reserve_size = static_cast<size_t>(state.range(0));
392
393 std::vector<IntTable> tables;
394 while (state.KeepRunningBatch(kBatchSize)) {
395 state.PauseTiming();
396 tables.clear();
397 tables.resize(kBatchSize);
398 state.ResumeTiming();
399 for (auto& t : tables) {
400 benchmark::DoNotOptimize(t);
401 t.reserve(reserve_size);
402 benchmark::DoNotOptimize(t);
403 }
404 }
405 }
406 BENCHMARK(BM_ReserveIntTable)->Range(1, 64);
407
BM_ReserveStringTable(benchmark::State & state)408 void BM_ReserveStringTable(benchmark::State& state) {
409 constexpr size_t kBatchSize = 1024;
410 size_t reserve_size = static_cast<size_t>(state.range(0));
411
412 std::vector<StringTable> tables;
413 while (state.KeepRunningBatch(kBatchSize)) {
414 state.PauseTiming();
415 tables.clear();
416 tables.resize(kBatchSize);
417 state.ResumeTiming();
418 for (auto& t : tables) {
419 benchmark::DoNotOptimize(t);
420 t.reserve(reserve_size);
421 benchmark::DoNotOptimize(t);
422 }
423 }
424 }
425 BENCHMARK(BM_ReserveStringTable)->Range(1, 64);
426
427 // Like std::iota, except that ctrl_t doesn't support operator++.
428 template <typename CtrlIter>
Iota(CtrlIter begin,CtrlIter end,int value)429 void Iota(CtrlIter begin, CtrlIter end, int value) {
430 for (; begin != end; ++begin, ++value) {
431 *begin = static_cast<ctrl_t>(value);
432 }
433 }
434
BM_Group_Match(benchmark::State & state)435 void BM_Group_Match(benchmark::State& state) {
436 std::array<ctrl_t, Group::kWidth> group;
437 Iota(group.begin(), group.end(), -4);
438 Group g{group.data()};
439 h2_t h = 1;
440 for (auto _ : state) {
441 ::benchmark::DoNotOptimize(h);
442 ::benchmark::DoNotOptimize(g);
443 ::benchmark::DoNotOptimize(g.Match(h));
444 }
445 }
446 BENCHMARK(BM_Group_Match);
447
BM_Group_MaskEmpty(benchmark::State & state)448 void BM_Group_MaskEmpty(benchmark::State& state) {
449 std::array<ctrl_t, Group::kWidth> group;
450 Iota(group.begin(), group.end(), -4);
451 Group g{group.data()};
452 for (auto _ : state) {
453 ::benchmark::DoNotOptimize(g);
454 ::benchmark::DoNotOptimize(g.MaskEmpty());
455 }
456 }
457 BENCHMARK(BM_Group_MaskEmpty);
458
BM_Group_MaskEmptyOrDeleted(benchmark::State & state)459 void BM_Group_MaskEmptyOrDeleted(benchmark::State& state) {
460 std::array<ctrl_t, Group::kWidth> group;
461 Iota(group.begin(), group.end(), -4);
462 Group g{group.data()};
463 for (auto _ : state) {
464 ::benchmark::DoNotOptimize(g);
465 ::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted());
466 }
467 }
468 BENCHMARK(BM_Group_MaskEmptyOrDeleted);
469
BM_Group_CountLeadingEmptyOrDeleted(benchmark::State & state)470 void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) {
471 std::array<ctrl_t, Group::kWidth> group;
472 Iota(group.begin(), group.end(), -2);
473 Group g{group.data()};
474 for (auto _ : state) {
475 ::benchmark::DoNotOptimize(g);
476 ::benchmark::DoNotOptimize(g.CountLeadingEmptyOrDeleted());
477 }
478 }
479 BENCHMARK(BM_Group_CountLeadingEmptyOrDeleted);
480
BM_Group_MatchFirstEmptyOrDeleted(benchmark::State & state)481 void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) {
482 std::array<ctrl_t, Group::kWidth> group;
483 Iota(group.begin(), group.end(), -2);
484 Group g{group.data()};
485 for (auto _ : state) {
486 ::benchmark::DoNotOptimize(g);
487 ::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted().LowestBitSet());
488 }
489 }
490 BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted);
491
BM_DropDeletes(benchmark::State & state)492 void BM_DropDeletes(benchmark::State& state) {
493 constexpr size_t capacity = (1 << 20) - 1;
494 std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth);
495 ctrl[capacity] = ctrl_t::kSentinel;
496 std::vector<ctrl_t> pattern = {ctrl_t::kEmpty, static_cast<ctrl_t>(2),
497 ctrl_t::kDeleted, static_cast<ctrl_t>(2),
498 ctrl_t::kEmpty, static_cast<ctrl_t>(1),
499 ctrl_t::kDeleted};
500 for (size_t i = 0; i != capacity; ++i) {
501 ctrl[i] = pattern[i % pattern.size()];
502 }
503 while (state.KeepRunning()) {
504 state.PauseTiming();
505 std::vector<ctrl_t> ctrl_copy = ctrl;
506 state.ResumeTiming();
507 ConvertDeletedToEmptyAndFullToDeleted(ctrl_copy.data(), capacity);
508 ::benchmark::DoNotOptimize(ctrl_copy[capacity]);
509 }
510 }
511 BENCHMARK(BM_DropDeletes);
512
BM_Resize(benchmark::State & state)513 void BM_Resize(benchmark::State& state) {
514 // For now just measure a small cheap hash table since we
515 // are mostly interested in the overhead of type-erasure
516 // in resize().
517 constexpr int kElements = 64;
518 const int kCapacity = kElements * 2;
519
520 IntTable table;
521 for (int i = 0; i < kElements; i++) {
522 table.insert(i);
523 }
524 for (auto unused : state) {
525 table.rehash(0);
526 table.rehash(kCapacity);
527 }
528 }
529 BENCHMARK(BM_Resize);
530
531 } // namespace
532 } // namespace container_internal
533 ABSL_NAMESPACE_END
534 } // namespace absl
535
536 // These methods are here to make it easy to examine the assembly for targeted
537 // parts of the API.
CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable * table,int64_t key)538 auto CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable* table,
539 int64_t key) -> decltype(table->find(key)) {
540 return table->find(key);
541 }
542
CodegenAbslRawHashSetInt64FindNeEnd(absl::container_internal::IntTable * table,int64_t key)543 bool CodegenAbslRawHashSetInt64FindNeEnd(
544 absl::container_internal::IntTable* table, int64_t key) {
545 return table->find(key) != table->end();
546 }
547
548 // This is useful because the find isn't inlined but the iterator comparison is.
CodegenAbslRawHashSetStringFindNeEnd(absl::container_internal::StringTable * table,const std::string & key)549 bool CodegenAbslRawHashSetStringFindNeEnd(
550 absl::container_internal::StringTable* table, const std::string& key) {
551 return table->find(key) != table->end();
552 }
553
CodegenAbslRawHashSetInt64Insert(absl::container_internal::IntTable * table,int64_t key)554 auto CodegenAbslRawHashSetInt64Insert(absl::container_internal::IntTable* table,
555 int64_t key)
556 -> decltype(table->insert(key)) {
557 return table->insert(key);
558 }
559
CodegenAbslRawHashSetInt64Contains(absl::container_internal::IntTable * table,int64_t key)560 bool CodegenAbslRawHashSetInt64Contains(
561 absl::container_internal::IntTable* table, int64_t key) {
562 return table->contains(key);
563 }
564
CodegenAbslRawHashSetInt64Iterate(absl::container_internal::IntTable * table)565 void CodegenAbslRawHashSetInt64Iterate(
566 absl::container_internal::IntTable* table) {
567 for (auto x : *table) benchmark::DoNotOptimize(x);
568 }
569
570 int odr =
571 (::benchmark::DoNotOptimize(std::make_tuple(
572 &CodegenAbslRawHashSetInt64Find, &CodegenAbslRawHashSetInt64FindNeEnd,
573 &CodegenAbslRawHashSetStringFindNeEnd,
574 &CodegenAbslRawHashSetInt64Insert, &CodegenAbslRawHashSetInt64Contains,
575 &CodegenAbslRawHashSetInt64Iterate)),
576 1);
577