• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Generates probe length statistics for many combinations of key types and key
16 // distributions, all using the default hash function for swisstable.
17 
18 #include <memory>
19 #include <regex>  // NOLINT
20 #include <vector>
21 
22 #include "absl/base/no_destructor.h"
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/container/internal/hash_function_defaults.h"
25 #include "absl/container/internal/hashtable_debug.h"
26 #include "absl/container/internal/raw_hash_set.h"
27 #include "absl/random/distributions.h"
28 #include "absl/random/random.h"
29 #include "absl/strings/str_cat.h"
30 #include "absl/strings/str_format.h"
31 #include "absl/strings/string_view.h"
32 #include "absl/strings/strip.h"
33 #include "absl/types/optional.h"
34 
35 namespace {
36 
37 enum class OutputStyle { kRegular, kBenchmark };
38 
39 // The --benchmark command line flag.
40 // This is populated from main().
41 // When run in "benchmark" mode, we have different output. This allows
42 // A/B comparisons with tools like `benchy`.
43 absl::string_view benchmarks;
44 
output()45 OutputStyle output() {
46   return !benchmarks.empty() ? OutputStyle::kBenchmark : OutputStyle::kRegular;
47 }
48 
49 template <class T>
50 struct Policy {
51   using slot_type = T;
52   using key_type = T;
53   using init_type = T;
54 
55   template <class allocator_type, class Arg>
construct__anon5626ca900111::Policy56   static void construct(allocator_type* alloc, slot_type* slot,
57                         const Arg& arg) {
58     std::allocator_traits<allocator_type>::construct(*alloc, slot, arg);
59   }
60 
61   template <class allocator_type>
destroy__anon5626ca900111::Policy62   static void destroy(allocator_type* alloc, slot_type* slot) {
63     std::allocator_traits<allocator_type>::destroy(*alloc, slot);
64   }
65 
element__anon5626ca900111::Policy66   static slot_type& element(slot_type* slot) { return *slot; }
67 
68   template <class F, class... Args>
apply__anon5626ca900111::Policy69   static auto apply(F&& f, const slot_type& arg)
70       -> decltype(std::forward<F>(f)(arg, arg)) {
71     return std::forward<F>(f)(arg, arg);
72   }
73 };
74 
GlobalBitGen()75 absl::BitGen& GlobalBitGen() {
76   static absl::NoDestructor<absl::BitGen> value;
77   return *value;
78 }
79 
80 // Keeps a pool of allocations and randomly gives one out.
81 // This introduces more randomization to the addresses given to swisstable and
82 // should help smooth out this factor from probe length calculation.
83 template <class T>
84 class RandomizedAllocator {
85  public:
86   using value_type = T;
87 
88   RandomizedAllocator() = default;
89   template <typename U>
RandomizedAllocator(RandomizedAllocator<U>)90   RandomizedAllocator(RandomizedAllocator<U>) {}  // NOLINT
91 
allocate(size_t n)92   static T* allocate(size_t n) {
93     auto& pointers = GetPointers(n);
94     // Fill the pool
95     while (pointers.size() < kRandomPool) {
96       pointers.push_back(std::allocator<T>{}.allocate(n));
97     }
98 
99     // Choose a random one.
100     size_t i = absl::Uniform<size_t>(GlobalBitGen(), 0, pointers.size());
101     T* result = pointers[i];
102     pointers[i] = pointers.back();
103     pointers.pop_back();
104     return result;
105   }
106 
deallocate(T * p,size_t n)107   static void deallocate(T* p, size_t n) {
108     // Just put it back on the pool. No need to release the memory.
109     GetPointers(n).push_back(p);
110   }
111 
112  private:
113   // We keep at least kRandomPool allocations for each size.
114   static constexpr size_t kRandomPool = 20;
115 
GetPointers(size_t n)116   static std::vector<T*>& GetPointers(size_t n) {
117     static absl::NoDestructor<absl::flat_hash_map<size_t, std::vector<T*>>> m;
118     return (*m)[n];
119   }
120 };
121 
122 template <class T>
123 struct DefaultHash {
124   using type = absl::container_internal::hash_default_hash<T>;
125 };
126 
127 template <class T>
128 using DefaultHashT = typename DefaultHash<T>::type;
129 
130 template <class T>
131 struct Table : absl::container_internal::raw_hash_set<
132                    Policy<T>, DefaultHashT<T>,
133                    absl::container_internal::hash_default_eq<T>,
134                    RandomizedAllocator<T>> {};
135 
136 struct LoadSizes {
137   size_t min_load;
138   size_t max_load;
139 };
140 
GetMinMaxLoadSizes()141 LoadSizes GetMinMaxLoadSizes() {
142   static const auto sizes = [] {
143     Table<int> t;
144 
145     // First, fill enough to have a good distribution.
146     constexpr size_t kMinSize = 10000;
147     while (t.size() < kMinSize) t.insert(t.size());
148 
149     const auto reach_min_load_factor = [&] {
150       const double lf = t.load_factor();
151       while (lf <= t.load_factor()) t.insert(t.size());
152     };
153 
154     // Then, insert until we reach min load factor.
155     reach_min_load_factor();
156     const size_t min_load_size = t.size();
157 
158     // Keep going until we hit min load factor again, then go back one.
159     t.insert(t.size());
160     reach_min_load_factor();
161 
162     return LoadSizes{min_load_size, t.size() - 1};
163   }();
164   return sizes;
165 }
166 
167 struct Ratios {
168   double min_load;
169   double avg_load;
170   double max_load;
171 };
172 
173 // See absl/container/internal/hashtable_debug.h for details on
174 // probe length calculation.
175 template <class ElemFn>
CollectMeanProbeLengths()176 Ratios CollectMeanProbeLengths() {
177   const auto min_max_sizes = GetMinMaxLoadSizes();
178 
179   ElemFn elem;
180   using Key = decltype(elem());
181   Table<Key> t;
182 
183   Ratios result;
184   while (t.size() < min_max_sizes.min_load) t.insert(elem());
185   result.min_load =
186       absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
187 
188   while (t.size() < (min_max_sizes.min_load + min_max_sizes.max_load) / 2)
189     t.insert(elem());
190   result.avg_load =
191       absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
192 
193   while (t.size() < min_max_sizes.max_load) t.insert(elem());
194   result.max_load =
195       absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
196 
197   return result;
198 }
199 
200 template <int Align>
PointerForAlignment()201 uintptr_t PointerForAlignment() {
202   alignas(Align) static constexpr uintptr_t kInitPointer = 0;
203   return reinterpret_cast<uintptr_t>(&kInitPointer);
204 }
205 
206 // This incomplete type is used for testing hash of pointers of different
207 // alignments.
208 // NOTE: We are generating invalid pointer values on the fly with
209 // reinterpret_cast. There are not "safely derived" pointers so using them is
210 // technically UB. It is unlikely to be a problem, though.
211 template <int Align>
212 struct Ptr;
213 
214 template <int Align>
MakePtr(uintptr_t v)215 Ptr<Align>* MakePtr(uintptr_t v) {
216   if (sizeof(v) == 8) {
217     constexpr int kCopyBits = 16;
218     // Ensure high bits are all the same.
219     v = static_cast<uintptr_t>(static_cast<intptr_t>(v << kCopyBits) >>
220                                kCopyBits);
221   }
222   return reinterpret_cast<Ptr<Align>*>(v);
223 }
224 
225 struct IntIdentity {
226   uint64_t i;
operator ==(IntIdentity a,IntIdentity b)227   friend bool operator==(IntIdentity a, IntIdentity b) { return a.i == b.i; }
operator ++__anon5626ca900111::IntIdentity228   IntIdentity operator++(int) { return IntIdentity{i++}; }
229 };
230 
231 template <int Align>
232 struct PtrIdentity {
PtrIdentity__anon5626ca900111::PtrIdentity233   explicit PtrIdentity(uintptr_t val = PointerForAlignment<Align>()) : i(val) {}
234   uintptr_t i;
operator ==(PtrIdentity a,PtrIdentity b)235   friend bool operator==(PtrIdentity a, PtrIdentity b) { return a.i == b.i; }
operator ++__anon5626ca900111::PtrIdentity236   PtrIdentity operator++(int) {
237     PtrIdentity p(i);
238     i += Align;
239     return p;
240   }
241 };
242 
243 constexpr char kStringFormat[] = "/path/to/file/name-%07d-of-9999999.txt";
244 
245 template <bool small>
246 struct String {
247   std::string value;
Make__anon5626ca900111::String248   static std::string Make(uint32_t v) {
249     return {small ? absl::StrCat(v) : absl::StrFormat(kStringFormat, v)};
250   }
251 };
252 
253 template <>
254 struct DefaultHash<IntIdentity> {
255   struct type {
operator ()__anon5626ca900111::DefaultHash::type256     size_t operator()(IntIdentity t) const { return t.i; }
257   };
258 };
259 
260 template <int Align>
261 struct DefaultHash<PtrIdentity<Align>> {
262   struct type {
operator ()__anon5626ca900111::DefaultHash::type263     size_t operator()(PtrIdentity<Align> t) const { return t.i; }
264   };
265 };
266 
267 template <class T>
268 struct Sequential {
operator ()__anon5626ca900111::Sequential269   T operator()() const { return current++; }
270   mutable T current{};
271 };
272 
273 template <int Align>
274 struct Sequential<Ptr<Align>*> {
operator ()__anon5626ca900111::Sequential275   Ptr<Align>* operator()() const {
276     auto* result = MakePtr<Align>(current);
277     current += Align;
278     return result;
279   }
280   mutable uintptr_t current = PointerForAlignment<Align>();
281 };
282 
283 
284 template <bool small>
285 struct Sequential<String<small>> {
operator ()__anon5626ca900111::Sequential286   std::string operator()() const { return String<small>::Make(current++); }
287   mutable uint32_t current = 0;
288 };
289 
290 template <class T, class U>
291 struct Sequential<std::pair<T, U>> {
292   mutable Sequential<T> tseq;
293   mutable Sequential<U> useq;
294 
295   using RealT = decltype(tseq());
296   using RealU = decltype(useq());
297 
298   mutable std::vector<RealT> ts;
299   mutable std::vector<RealU> us;
300   mutable size_t ti = 0, ui = 0;
301 
operator ()__anon5626ca900111::Sequential302   std::pair<RealT, RealU> operator()() const {
303     std::pair<RealT, RealU> value{get_t(), get_u()};
304     if (ti == 0) {
305       ti = ui + 1;
306       ui = 0;
307     } else {
308       --ti;
309       ++ui;
310     }
311     return value;
312   }
313 
get_t__anon5626ca900111::Sequential314   RealT get_t() const {
315     while (ti >= ts.size()) ts.push_back(tseq());
316     return ts[ti];
317   }
318 
get_u__anon5626ca900111::Sequential319   RealU get_u() const {
320     while (ui >= us.size()) us.push_back(useq());
321     return us[ui];
322   }
323 };
324 
325 template <class T, int percent_skip>
326 struct AlmostSequential {
327   mutable Sequential<T> current;
328 
operator ()__anon5626ca900111::AlmostSequential329   auto operator()() const -> decltype(current()) {
330     while (absl::Uniform(GlobalBitGen(), 0.0, 1.0) <= percent_skip / 100.)
331       current();
332     return current();
333   }
334 };
335 
336 struct Uniform {
337   template <typename T>
operator ()__anon5626ca900111::Uniform338   T operator()(T) const {
339     return absl::Uniform<T>(absl::IntervalClosed, GlobalBitGen(), T{0}, ~T{0});
340   }
341 };
342 
343 struct Gaussian {
344   template <typename T>
operator ()__anon5626ca900111::Gaussian345   T operator()(T) const {
346     double d;
347     do {
348       d = absl::Gaussian<double>(GlobalBitGen(), 1e6, 1e4);
349     } while (d <= 0 || d > std::numeric_limits<T>::max() / 2);
350     return static_cast<T>(d);
351   }
352 };
353 
354 struct Zipf {
355   template <typename T>
operator ()__anon5626ca900111::Zipf356   T operator()(T) const {
357     return absl::Zipf<T>(GlobalBitGen(), std::numeric_limits<T>::max(), 1.6);
358   }
359 };
360 
361 template <class T, class Dist>
362 struct Random {
operator ()__anon5626ca900111::Random363   T operator()() const { return Dist{}(T{}); }
364 };
365 
366 template <class Dist, int Align>
367 struct Random<Ptr<Align>*, Dist> {
operator ()__anon5626ca900111::Random368   Ptr<Align>* operator()() const {
369     return MakePtr<Align>(Random<uintptr_t, Dist>{}() * Align);
370   }
371 };
372 
373 template <class Dist>
374 struct Random<IntIdentity, Dist> {
operator ()__anon5626ca900111::Random375   IntIdentity operator()() const {
376     return IntIdentity{Random<uint64_t, Dist>{}()};
377   }
378 };
379 
380 template <class Dist, int Align>
381 struct Random<PtrIdentity<Align>, Dist> {
operator ()__anon5626ca900111::Random382   PtrIdentity<Align> operator()() const {
383     return PtrIdentity<Align>{Random<uintptr_t, Dist>{}() * Align};
384   }
385 };
386 
387 template <class Dist, bool small>
388 struct Random<String<small>, Dist> {
operator ()__anon5626ca900111::Random389   std::string operator()() const {
390     return String<small>::Make(Random<uint32_t, Dist>{}());
391   }
392 };
393 
394 template <class T, class U, class Dist>
395 struct Random<std::pair<T, U>, Dist> {
operator ()__anon5626ca900111::Random396   auto operator()() const
397       -> decltype(std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}())) {
398     return std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}());
399   }
400 };
401 
402 template <typename>
403 std::string Name();
404 
Name(uint32_t *)405 std::string Name(uint32_t*) { return "u32"; }
Name(uint64_t *)406 std::string Name(uint64_t*) { return "u64"; }
Name(IntIdentity *)407 std::string Name(IntIdentity*) { return "IntIdentity"; }
408 
409 template <int Align>
Name(Ptr<Align> **)410 std::string Name(Ptr<Align>**) {
411   return absl::StrCat("Ptr", Align);
412 }
413 
414 template <int Align>
Name(PtrIdentity<Align> *)415 std::string Name(PtrIdentity<Align>*) {
416   return absl::StrCat("PtrIdentity", Align);
417 }
418 
419 template <bool small>
Name(String<small> *)420 std::string Name(String<small>*) {
421   return small ? "StrS" : "StrL";
422 }
423 
424 template <class T, class U>
Name(std::pair<T,U> *)425 std::string Name(std::pair<T, U>*) {
426   if (output() == OutputStyle::kBenchmark)
427     return absl::StrCat("P_", Name<T>(), "_", Name<U>());
428   return absl::StrCat("P<", Name<T>(), ",", Name<U>(), ">");
429 }
430 
431 template <class T>
Name(Sequential<T> *)432 std::string Name(Sequential<T>*) {
433   return "Sequential";
434 }
435 
436 template <class T, int P>
Name(AlmostSequential<T,P> *)437 std::string Name(AlmostSequential<T, P>*) {
438   return absl::StrCat("AlmostSeq_", P);
439 }
440 
441 template <class T>
Name(Random<T,Uniform> *)442 std::string Name(Random<T, Uniform>*) {
443   return "UnifRand";
444 }
445 
446 template <class T>
Name(Random<T,Gaussian> *)447 std::string Name(Random<T, Gaussian>*) {
448   return "GausRand";
449 }
450 
451 template <class T>
Name(Random<T,Zipf> *)452 std::string Name(Random<T, Zipf>*) {
453   return "ZipfRand";
454 }
455 
456 template <typename T>
Name()457 std::string Name() {
458   return Name(static_cast<T*>(nullptr));
459 }
460 
461 constexpr int kNameWidth = 15;
462 constexpr int kDistWidth = 16;
463 
CanRunBenchmark(absl::string_view name)464 bool CanRunBenchmark(absl::string_view name) {
465   static const absl::NoDestructor<absl::optional<std::regex>> filter([] {
466     return benchmarks.empty() || benchmarks == "all"
467                ? absl::nullopt
468                : absl::make_optional(std::regex(std::string(benchmarks)));
469   }());
470   return !filter->has_value() || std::regex_search(std::string(name), **filter);
471 }
472 
473 struct Result {
474   std::string name;
475   std::string dist_name;
476   Ratios ratios;
477 };
478 
479 template <typename T, typename Dist>
RunForTypeAndDistribution(std::vector<Result> & results)480 void RunForTypeAndDistribution(std::vector<Result>& results) {
481   std::string name = absl::StrCat(Name<T>(), "/", Name<Dist>());
482   // We have to check against all three names (min/avg/max) before we run it.
483   // If any of them is enabled, we run it.
484   if (!CanRunBenchmark(absl::StrCat(name, "/min")) &&
485       !CanRunBenchmark(absl::StrCat(name, "/avg")) &&
486       !CanRunBenchmark(absl::StrCat(name, "/max"))) {
487     return;
488   }
489   results.push_back({Name<T>(), Name<Dist>(), CollectMeanProbeLengths<Dist>()});
490 }
491 
492 template <class T>
RunForType(std::vector<Result> & results)493 void RunForType(std::vector<Result>& results) {
494   RunForTypeAndDistribution<T, Sequential<T>>(results);
495   RunForTypeAndDistribution<T, AlmostSequential<T, 20>>(results);
496   RunForTypeAndDistribution<T, AlmostSequential<T, 50>>(results);
497   RunForTypeAndDistribution<T, Random<T, Uniform>>(results);
498 #ifdef NDEBUG
499   // Disable these in non-opt mode because they take too long.
500   RunForTypeAndDistribution<T, Random<T, Gaussian>>(results);
501   RunForTypeAndDistribution<T, Random<T, Zipf>>(results);
502 #endif  // NDEBUG
503 }
504 
505 }  // namespace
506 
main(int argc,char ** argv)507 int main(int argc, char** argv) {
508   // Parse the benchmark flags. Ignore all of them except the regex pattern.
509   for (int i = 1; i < argc; ++i) {
510     absl::string_view arg = argv[i];
511     const auto next = [&] { return argv[std::min(i + 1, argc - 1)]; };
512 
513     if (absl::ConsumePrefix(&arg, "--benchmark_filter")) {
514       if (arg == "") {
515         // --benchmark_filter X
516         benchmarks = next();
517       } else if (absl::ConsumePrefix(&arg, "=")) {
518         // --benchmark_filter=X
519         benchmarks = arg;
520       }
521     }
522 
523     // Any --benchmark flag turns on the mode.
524     if (absl::ConsumePrefix(&arg, "--benchmark")) {
525       if (benchmarks.empty()) benchmarks="all";
526     }
527   }
528 
529   std::vector<Result> results;
530   RunForType<uint64_t>(results);
531   RunForType<IntIdentity>(results);
532   RunForType<Ptr<8>*>(results);
533   RunForType<Ptr<16>*>(results);
534   RunForType<Ptr<32>*>(results);
535   RunForType<Ptr<64>*>(results);
536   RunForType<PtrIdentity<8>>(results);
537   RunForType<PtrIdentity<16>>(results);
538   RunForType<PtrIdentity<32>>(results);
539   RunForType<PtrIdentity<64>>(results);
540   RunForType<std::pair<uint32_t, uint32_t>>(results);
541   RunForType<String<true>>(results);
542   RunForType<String<false>>(results);
543   RunForType<std::pair<uint64_t, String<true>>>(results);
544   RunForType<std::pair<String<true>, uint64_t>>(results);
545   RunForType<std::pair<uint64_t, String<false>>>(results);
546   RunForType<std::pair<String<false>, uint64_t>>(results);
547 
548   switch (output()) {
549     case OutputStyle::kRegular:
550       absl::PrintF("%-*s%-*s       Min       Avg       Max\n%s\n", kNameWidth,
551                    "Type", kDistWidth, "Distribution",
552                    std::string(kNameWidth + kDistWidth + 10 * 3, '-'));
553       for (const auto& result : results) {
554         absl::PrintF("%-*s%-*s  %8.4f  %8.4f  %8.4f\n", kNameWidth, result.name,
555                      kDistWidth, result.dist_name, result.ratios.min_load,
556                      result.ratios.avg_load, result.ratios.max_load);
557       }
558       break;
559     case OutputStyle::kBenchmark: {
560       absl::PrintF("{\n");
561       absl::PrintF("  \"benchmarks\": [\n");
562       absl::string_view comma;
563       for (const auto& result : results) {
564         auto print = [&](absl::string_view stat, double Ratios::*val) {
565           std::string name =
566               absl::StrCat(result.name, "/", result.dist_name, "/", stat);
567           // Check the regex again. We might had have enabled only one of the
568           // stats for the benchmark.
569           if (!CanRunBenchmark(name)) return;
570           absl::PrintF("    %s{\n", comma);
571           absl::PrintF("      \"cpu_time\": %f,\n", 1e9 * result.ratios.*val);
572           absl::PrintF("      \"real_time\": %f,\n", 1e9 * result.ratios.*val);
573           absl::PrintF("      \"iterations\": 1,\n");
574           absl::PrintF("      \"name\": \"%s\",\n", name);
575           absl::PrintF("      \"time_unit\": \"ns\"\n");
576           absl::PrintF("    }\n");
577           comma = ",";
578         };
579         print("min", &Ratios::min_load);
580         print("avg", &Ratios::avg_load);
581         print("max", &Ratios::max_load);
582       }
583       absl::PrintF("  ],\n");
584       absl::PrintF("  \"context\": {\n");
585       absl::PrintF("  }\n");
586       absl::PrintF("}\n");
587       break;
588     }
589   }
590 
591   return 0;
592 }
593