• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <array>
16 #include <cmath>
17 #include <cstddef>
18 #include <cstdint>
19 #include <numeric>
20 #include <random>
21 #include <tuple>
22 #include <utility>
23 #include <vector>
24 
25 #include "absl/base/internal/raw_logging.h"
26 #include "absl/container/internal/hash_function_defaults.h"
27 #include "absl/container/internal/raw_hash_set.h"
28 #include "absl/strings/str_format.h"
29 #include "benchmark/benchmark.h"
30 
31 namespace absl {
32 ABSL_NAMESPACE_BEGIN
33 namespace container_internal {
34 
35 struct RawHashSetTestOnlyAccess {
36   template <typename C>
GetSlotsabsl::container_internal::RawHashSetTestOnlyAccess37   static auto GetSlots(const C& c) -> decltype(c.slots_) {
38     return c.slots_;
39   }
40 };
41 
42 namespace {
43 
44 struct IntPolicy {
45   using slot_type = int64_t;
46   using key_type = int64_t;
47   using init_type = int64_t;
48 
constructabsl::container_internal::__anonbb1d38590111::IntPolicy49   static void construct(void*, int64_t* slot, int64_t v) { *slot = v; }
destroyabsl::container_internal::__anonbb1d38590111::IntPolicy50   static void destroy(void*, int64_t*) {}
transferabsl::container_internal::__anonbb1d38590111::IntPolicy51   static void transfer(void*, int64_t* new_slot, int64_t* old_slot) {
52     *new_slot = *old_slot;
53   }
54 
elementabsl::container_internal::__anonbb1d38590111::IntPolicy55   static int64_t& element(slot_type* slot) { return *slot; }
56 
57   template <class F>
applyabsl::container_internal::__anonbb1d38590111::IntPolicy58   static auto apply(F&& f, int64_t x) -> decltype(std::forward<F>(f)(x, x)) {
59     return std::forward<F>(f)(x, x);
60   }
61 };
62 
63 class StringPolicy {
64   template <class F, class K, class V,
65             class = typename std::enable_if<
66                 std::is_convertible<const K&, absl::string_view>::value>::type>
67   decltype(std::declval<F>()(
68       std::declval<const absl::string_view&>(), std::piecewise_construct,
69       std::declval<std::tuple<K>>(),
apply_impl(F && f,std::pair<std::tuple<K>,V> p)70       std::declval<V>())) static apply_impl(F&& f,
71                                             std::pair<std::tuple<K>, V> p) {
72     const absl::string_view& key = std::get<0>(p.first);
73     return std::forward<F>(f)(key, std::piecewise_construct, std::move(p.first),
74                               std::move(p.second));
75   }
76 
77  public:
78   struct slot_type {
79     struct ctor {};
80 
81     template <class... Ts>
slot_typeabsl::container_internal::__anonbb1d38590111::StringPolicy::slot_type82     slot_type(ctor, Ts&&... ts) : pair(std::forward<Ts>(ts)...) {}
83 
84     std::pair<std::string, std::string> pair;
85   };
86 
87   using key_type = std::string;
88   using init_type = std::pair<std::string, std::string>;
89 
90   template <class allocator_type, class... Args>
construct(allocator_type * alloc,slot_type * slot,Args...args)91   static void construct(allocator_type* alloc, slot_type* slot, Args... args) {
92     std::allocator_traits<allocator_type>::construct(
93         *alloc, slot, typename slot_type::ctor(), std::forward<Args>(args)...);
94   }
95 
96   template <class allocator_type>
destroy(allocator_type * alloc,slot_type * slot)97   static void destroy(allocator_type* alloc, slot_type* slot) {
98     std::allocator_traits<allocator_type>::destroy(*alloc, slot);
99   }
100 
101   template <class allocator_type>
transfer(allocator_type * alloc,slot_type * new_slot,slot_type * old_slot)102   static void transfer(allocator_type* alloc, slot_type* new_slot,
103                        slot_type* old_slot) {
104     construct(alloc, new_slot, std::move(old_slot->pair));
105     destroy(alloc, old_slot);
106   }
107 
element(slot_type * slot)108   static std::pair<std::string, std::string>& element(slot_type* slot) {
109     return slot->pair;
110   }
111 
112   template <class F, class... Args>
apply(F && f,Args &&...args)113   static auto apply(F&& f, Args&&... args)
114       -> decltype(apply_impl(std::forward<F>(f),
115                              PairArgs(std::forward<Args>(args)...))) {
116     return apply_impl(std::forward<F>(f),
117                       PairArgs(std::forward<Args>(args)...));
118   }
119 };
120 
121 struct StringHash : container_internal::hash_default_hash<absl::string_view> {
122   using is_transparent = void;
123 };
124 struct StringEq : std::equal_to<absl::string_view> {
125   using is_transparent = void;
126 };
127 
128 struct StringTable
129     : raw_hash_set<StringPolicy, StringHash, StringEq, std::allocator<int>> {
130   using Base = typename StringTable::raw_hash_set;
StringTableabsl::container_internal::__anonbb1d38590111::StringTable131   StringTable() {}
132   using Base::Base;
133 };
134 
135 struct IntTable
136     : raw_hash_set<IntPolicy, container_internal::hash_default_hash<int64_t>,
137                    std::equal_to<int64_t>, std::allocator<int64_t>> {
138   using Base = typename IntTable::raw_hash_set;
IntTableabsl::container_internal::__anonbb1d38590111::IntTable139   IntTable() {}
140   using Base::Base;
141 };
142 
143 struct string_generator {
144   template <class RNG>
operator ()absl::container_internal::__anonbb1d38590111::string_generator145   std::string operator()(RNG& rng) const {
146     std::string res;
147     res.resize(size);
148     std::uniform_int_distribution<uint32_t> printable_ascii(0x20, 0x7E);
149     std::generate(res.begin(), res.end(), [&] { return printable_ascii(rng); });
150     return res;
151   }
152 
153   size_t size;
154 };
155 
156 // Model a cache in steady state.
157 //
158 // On a table of size N, keep deleting the LRU entry and add a random one.
BM_CacheInSteadyState(benchmark::State & state)159 void BM_CacheInSteadyState(benchmark::State& state) {
160   std::random_device rd;
161   std::mt19937 rng(rd());
162   string_generator gen{12};
163   StringTable t;
164   std::deque<std::string> keys;
165   while (t.size() < state.range(0)) {
166     auto x = t.emplace(gen(rng), gen(rng));
167     if (x.second) keys.push_back(x.first->first);
168   }
169   ABSL_RAW_CHECK(state.range(0) >= 10, "");
170   while (state.KeepRunning()) {
171     // Some cache hits.
172     std::deque<std::string>::const_iterator it;
173     for (int i = 0; i != 90; ++i) {
174       if (i % 10 == 0) it = keys.end();
175       ::benchmark::DoNotOptimize(t.find(*--it));
176     }
177     // Some cache misses.
178     for (int i = 0; i != 10; ++i) ::benchmark::DoNotOptimize(t.find(gen(rng)));
179     ABSL_RAW_CHECK(t.erase(keys.front()), keys.front().c_str());
180     keys.pop_front();
181     while (true) {
182       auto x = t.emplace(gen(rng), gen(rng));
183       if (x.second) {
184         keys.push_back(x.first->first);
185         break;
186       }
187     }
188   }
189   state.SetItemsProcessed(state.iterations());
190   state.SetLabel(absl::StrFormat("load_factor=%.2f", t.load_factor()));
191 }
192 
193 template <typename Benchmark>
CacheInSteadyStateArgs(Benchmark * bm)194 void CacheInSteadyStateArgs(Benchmark* bm) {
195   // The default.
196   const float max_load_factor = 0.875;
197   // When the cache is at the steady state, the probe sequence will equal
198   // capacity if there is no reclamation of deleted slots. Pick a number large
199   // enough to make the benchmark slow for that case.
200   const size_t capacity = 1 << 10;
201 
202   // Check N data points to cover load factors in [0.4, 0.8).
203   const size_t kNumPoints = 10;
204   for (size_t i = 0; i != kNumPoints; ++i)
205     bm->Arg(std::ceil(
206         capacity * (max_load_factor + i * max_load_factor / kNumPoints) / 2));
207 }
208 BENCHMARK(BM_CacheInSteadyState)->Apply(CacheInSteadyStateArgs);
209 
BM_EraseEmplace(benchmark::State & state)210 void BM_EraseEmplace(benchmark::State& state) {
211   IntTable t;
212   int64_t size = state.range(0);
213   for (int64_t i = 0; i < size; ++i) {
214     t.emplace(i);
215   }
216   while (state.KeepRunningBatch(size)) {
217     for (int64_t i = 0; i < size; ++i) {
218       benchmark::DoNotOptimize(t);
219       t.erase(i);
220       t.emplace(i);
221     }
222   }
223 }
224 BENCHMARK(BM_EraseEmplace)->Arg(1)->Arg(2)->Arg(4)->Arg(8)->Arg(16)->Arg(100);
225 
BM_EndComparison(benchmark::State & state)226 void BM_EndComparison(benchmark::State& state) {
227   StringTable t = {{"a", "a"}, {"b", "b"}};
228   auto it = t.begin();
229   for (auto i : state) {
230     benchmark::DoNotOptimize(t);
231     benchmark::DoNotOptimize(it);
232     benchmark::DoNotOptimize(it != t.end());
233   }
234 }
235 BENCHMARK(BM_EndComparison);
236 
BM_Iteration(benchmark::State & state)237 void BM_Iteration(benchmark::State& state) {
238   std::random_device rd;
239   std::mt19937 rng(rd());
240   string_generator gen{12};
241   StringTable t;
242 
243   size_t capacity = state.range(0);
244   size_t size = state.range(1);
245   t.reserve(capacity);
246 
247   while (t.size() < size) {
248     t.emplace(gen(rng), gen(rng));
249   }
250 
251   for (auto i : state) {
252     benchmark::DoNotOptimize(t);
253     for (auto it = t.begin(); it != t.end(); ++it) {
254       benchmark::DoNotOptimize(*it);
255     }
256   }
257 }
258 
259 BENCHMARK(BM_Iteration)
260     ->ArgPair(1, 1)
261     ->ArgPair(2, 2)
262     ->ArgPair(4, 4)
263     ->ArgPair(7, 7)
264     ->ArgPair(10, 10)
265     ->ArgPair(15, 15)
266     ->ArgPair(16, 16)
267     ->ArgPair(54, 54)
268     ->ArgPair(100, 100)
269     ->ArgPair(400, 400)
270     // empty
271     ->ArgPair(0, 0)
272     ->ArgPair(10, 0)
273     ->ArgPair(100, 0)
274     ->ArgPair(1000, 0)
275     ->ArgPair(10000, 0)
276     // sparse
277     ->ArgPair(100, 1)
278     ->ArgPair(1000, 10);
279 
BM_CopyCtorSparseInt(benchmark::State & state)280 void BM_CopyCtorSparseInt(benchmark::State& state) {
281   std::random_device rd;
282   std::mt19937 rng(rd());
283   IntTable t;
284   std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
285 
286   size_t size = state.range(0);
287   t.reserve(size * 10);
288   while (t.size() < size) {
289     t.emplace(dist(rng));
290   }
291 
292   for (auto i : state) {
293     IntTable t2 = t;
294     benchmark::DoNotOptimize(t2);
295   }
296 }
297 BENCHMARK(BM_CopyCtorSparseInt)->Range(128, 4096);
298 
BM_CopyCtorInt(benchmark::State & state)299 void BM_CopyCtorInt(benchmark::State& state) {
300   std::random_device rd;
301   std::mt19937 rng(rd());
302   IntTable t;
303   std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
304 
305   size_t size = state.range(0);
306   while (t.size() < size) {
307     t.emplace(dist(rng));
308   }
309 
310   for (auto i : state) {
311     IntTable t2 = t;
312     benchmark::DoNotOptimize(t2);
313   }
314 }
315 BENCHMARK(BM_CopyCtorInt)->Range(128, 4096);
316 
BM_CopyCtorString(benchmark::State & state)317 void BM_CopyCtorString(benchmark::State& state) {
318   std::random_device rd;
319   std::mt19937 rng(rd());
320   StringTable t;
321   std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
322 
323   size_t size = state.range(0);
324   while (t.size() < size) {
325     t.emplace(std::to_string(dist(rng)), std::to_string(dist(rng)));
326   }
327 
328   for (auto i : state) {
329     StringTable t2 = t;
330     benchmark::DoNotOptimize(t2);
331   }
332 }
333 BENCHMARK(BM_CopyCtorString)->Range(128, 4096);
334 
BM_CopyAssign(benchmark::State & state)335 void BM_CopyAssign(benchmark::State& state) {
336   std::random_device rd;
337   std::mt19937 rng(rd());
338   IntTable t;
339   std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
340   while (t.size() < state.range(0)) {
341     t.emplace(dist(rng));
342   }
343 
344   IntTable t2;
345   for (auto _ : state) {
346     t2 = t;
347     benchmark::DoNotOptimize(t2);
348   }
349 }
350 BENCHMARK(BM_CopyAssign)->Range(128, 4096);
351 
BM_RangeCtor(benchmark::State & state)352 void BM_RangeCtor(benchmark::State& state) {
353   std::random_device rd;
354   std::mt19937 rng(rd());
355   std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
356   std::vector<int> values;
357   const size_t desired_size = state.range(0);
358   while (values.size() < desired_size) {
359     values.emplace_back(dist(rng));
360   }
361 
362   for (auto unused : state) {
363     IntTable t{values.begin(), values.end()};
364     benchmark::DoNotOptimize(t);
365   }
366 }
367 BENCHMARK(BM_RangeCtor)->Range(128, 65536);
368 
BM_NoOpReserveIntTable(benchmark::State & state)369 void BM_NoOpReserveIntTable(benchmark::State& state) {
370   IntTable t;
371   t.reserve(100000);
372   for (auto _ : state) {
373     benchmark::DoNotOptimize(t);
374     t.reserve(100000);
375   }
376 }
377 BENCHMARK(BM_NoOpReserveIntTable);
378 
BM_NoOpReserveStringTable(benchmark::State & state)379 void BM_NoOpReserveStringTable(benchmark::State& state) {
380   StringTable t;
381   t.reserve(100000);
382   for (auto _ : state) {
383     benchmark::DoNotOptimize(t);
384     t.reserve(100000);
385   }
386 }
387 BENCHMARK(BM_NoOpReserveStringTable);
388 
BM_ReserveIntTable(benchmark::State & state)389 void BM_ReserveIntTable(benchmark::State& state) {
390   constexpr size_t kBatchSize = 1024;
391   size_t reserve_size = static_cast<size_t>(state.range(0));
392 
393   std::vector<IntTable> tables;
394   while (state.KeepRunningBatch(kBatchSize)) {
395     state.PauseTiming();
396     tables.clear();
397     tables.resize(kBatchSize);
398     state.ResumeTiming();
399     for (auto& t : tables) {
400       benchmark::DoNotOptimize(t);
401       t.reserve(reserve_size);
402       benchmark::DoNotOptimize(t);
403     }
404   }
405 }
406 BENCHMARK(BM_ReserveIntTable)->Range(1, 64);
407 
BM_ReserveStringTable(benchmark::State & state)408 void BM_ReserveStringTable(benchmark::State& state) {
409   constexpr size_t kBatchSize = 1024;
410   size_t reserve_size = static_cast<size_t>(state.range(0));
411 
412   std::vector<StringTable> tables;
413   while (state.KeepRunningBatch(kBatchSize)) {
414     state.PauseTiming();
415     tables.clear();
416     tables.resize(kBatchSize);
417     state.ResumeTiming();
418     for (auto& t : tables) {
419       benchmark::DoNotOptimize(t);
420       t.reserve(reserve_size);
421       benchmark::DoNotOptimize(t);
422     }
423   }
424 }
425 BENCHMARK(BM_ReserveStringTable)->Range(1, 64);
426 
427 // Like std::iota, except that ctrl_t doesn't support operator++.
428 template <typename CtrlIter>
Iota(CtrlIter begin,CtrlIter end,int value)429 void Iota(CtrlIter begin, CtrlIter end, int value) {
430   for (; begin != end; ++begin, ++value) {
431     *begin = static_cast<ctrl_t>(value);
432   }
433 }
434 
BM_Group_Match(benchmark::State & state)435 void BM_Group_Match(benchmark::State& state) {
436   std::array<ctrl_t, Group::kWidth> group;
437   Iota(group.begin(), group.end(), -4);
438   Group g{group.data()};
439   h2_t h = 1;
440   for (auto _ : state) {
441     ::benchmark::DoNotOptimize(h);
442     ::benchmark::DoNotOptimize(g);
443     ::benchmark::DoNotOptimize(g.Match(h));
444   }
445 }
446 BENCHMARK(BM_Group_Match);
447 
BM_Group_MaskEmpty(benchmark::State & state)448 void BM_Group_MaskEmpty(benchmark::State& state) {
449   std::array<ctrl_t, Group::kWidth> group;
450   Iota(group.begin(), group.end(), -4);
451   Group g{group.data()};
452   for (auto _ : state) {
453     ::benchmark::DoNotOptimize(g);
454     ::benchmark::DoNotOptimize(g.MaskEmpty());
455   }
456 }
457 BENCHMARK(BM_Group_MaskEmpty);
458 
BM_Group_MaskEmptyOrDeleted(benchmark::State & state)459 void BM_Group_MaskEmptyOrDeleted(benchmark::State& state) {
460   std::array<ctrl_t, Group::kWidth> group;
461   Iota(group.begin(), group.end(), -4);
462   Group g{group.data()};
463   for (auto _ : state) {
464     ::benchmark::DoNotOptimize(g);
465     ::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted());
466   }
467 }
468 BENCHMARK(BM_Group_MaskEmptyOrDeleted);
469 
BM_Group_CountLeadingEmptyOrDeleted(benchmark::State & state)470 void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) {
471   std::array<ctrl_t, Group::kWidth> group;
472   Iota(group.begin(), group.end(), -2);
473   Group g{group.data()};
474   for (auto _ : state) {
475     ::benchmark::DoNotOptimize(g);
476     ::benchmark::DoNotOptimize(g.CountLeadingEmptyOrDeleted());
477   }
478 }
479 BENCHMARK(BM_Group_CountLeadingEmptyOrDeleted);
480 
BM_Group_MatchFirstEmptyOrDeleted(benchmark::State & state)481 void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) {
482   std::array<ctrl_t, Group::kWidth> group;
483   Iota(group.begin(), group.end(), -2);
484   Group g{group.data()};
485   for (auto _ : state) {
486     ::benchmark::DoNotOptimize(g);
487     ::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted().LowestBitSet());
488   }
489 }
490 BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted);
491 
BM_DropDeletes(benchmark::State & state)492 void BM_DropDeletes(benchmark::State& state) {
493   constexpr size_t capacity = (1 << 20) - 1;
494   std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth);
495   ctrl[capacity] = ctrl_t::kSentinel;
496   std::vector<ctrl_t> pattern = {ctrl_t::kEmpty,   static_cast<ctrl_t>(2),
497                                  ctrl_t::kDeleted, static_cast<ctrl_t>(2),
498                                  ctrl_t::kEmpty,   static_cast<ctrl_t>(1),
499                                  ctrl_t::kDeleted};
500   for (size_t i = 0; i != capacity; ++i) {
501     ctrl[i] = pattern[i % pattern.size()];
502   }
503   while (state.KeepRunning()) {
504     state.PauseTiming();
505     std::vector<ctrl_t> ctrl_copy = ctrl;
506     state.ResumeTiming();
507     ConvertDeletedToEmptyAndFullToDeleted(ctrl_copy.data(), capacity);
508     ::benchmark::DoNotOptimize(ctrl_copy[capacity]);
509   }
510 }
511 BENCHMARK(BM_DropDeletes);
512 
BM_Resize(benchmark::State & state)513 void BM_Resize(benchmark::State& state) {
514   // For now just measure a small cheap hash table since we
515   // are mostly interested in the overhead of type-erasure
516   // in resize().
517   constexpr int kElements = 64;
518   const int kCapacity = kElements * 2;
519 
520   IntTable table;
521   for (int i = 0; i < kElements; i++) {
522     table.insert(i);
523   }
524   for (auto unused : state) {
525     table.rehash(0);
526     table.rehash(kCapacity);
527   }
528 }
529 BENCHMARK(BM_Resize);
530 
531 }  // namespace
532 }  // namespace container_internal
533 ABSL_NAMESPACE_END
534 }  // namespace absl
535 
536 // These methods are here to make it easy to examine the assembly for targeted
537 // parts of the API.
CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable * table,int64_t key)538 auto CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable* table,
539                                     int64_t key) -> decltype(table->find(key)) {
540   return table->find(key);
541 }
542 
CodegenAbslRawHashSetInt64FindNeEnd(absl::container_internal::IntTable * table,int64_t key)543 bool CodegenAbslRawHashSetInt64FindNeEnd(
544     absl::container_internal::IntTable* table, int64_t key) {
545   return table->find(key) != table->end();
546 }
547 
548 // This is useful because the find isn't inlined but the iterator comparison is.
CodegenAbslRawHashSetStringFindNeEnd(absl::container_internal::StringTable * table,const std::string & key)549 bool CodegenAbslRawHashSetStringFindNeEnd(
550     absl::container_internal::StringTable* table, const std::string& key) {
551   return table->find(key) != table->end();
552 }
553 
CodegenAbslRawHashSetInt64Insert(absl::container_internal::IntTable * table,int64_t key)554 auto CodegenAbslRawHashSetInt64Insert(absl::container_internal::IntTable* table,
555                                       int64_t key)
556     -> decltype(table->insert(key)) {
557   return table->insert(key);
558 }
559 
CodegenAbslRawHashSetInt64Contains(absl::container_internal::IntTable * table,int64_t key)560 bool CodegenAbslRawHashSetInt64Contains(
561     absl::container_internal::IntTable* table, int64_t key) {
562   return table->contains(key);
563 }
564 
CodegenAbslRawHashSetInt64Iterate(absl::container_internal::IntTable * table)565 void CodegenAbslRawHashSetInt64Iterate(
566     absl::container_internal::IntTable* table) {
567   for (auto x : *table) benchmark::DoNotOptimize(x);
568 }
569 
570 int odr =
571     (::benchmark::DoNotOptimize(std::make_tuple(
572          &CodegenAbslRawHashSetInt64Find, &CodegenAbslRawHashSetInt64FindNeEnd,
573          &CodegenAbslRawHashSetStringFindNeEnd,
574          &CodegenAbslRawHashSetInt64Insert, &CodegenAbslRawHashSetInt64Contains,
575          &CodegenAbslRawHashSetInt64Iterate)),
576      1);
577