1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Generates probe length statistics for many combinations of key types and key
16 // distributions, all using the default hash function for swisstable.
17
18 #include <memory>
19 #include <regex> // NOLINT
20 #include <vector>
21
22 #include "absl/base/no_destructor.h"
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/container/internal/hash_function_defaults.h"
25 #include "absl/container/internal/hashtable_debug.h"
26 #include "absl/container/internal/raw_hash_set.h"
27 #include "absl/random/distributions.h"
28 #include "absl/random/random.h"
29 #include "absl/strings/str_cat.h"
30 #include "absl/strings/str_format.h"
31 #include "absl/strings/string_view.h"
32 #include "absl/strings/strip.h"
33 #include "absl/types/optional.h"
34
35 namespace {
36
37 enum class OutputStyle { kRegular, kBenchmark };
38
39 // The --benchmark command line flag.
40 // This is populated from main().
41 // When run in "benchmark" mode, we have different output. This allows
42 // A/B comparisons with tools like `benchy`.
43 absl::string_view benchmarks;
44
output()45 OutputStyle output() {
46 return !benchmarks.empty() ? OutputStyle::kBenchmark : OutputStyle::kRegular;
47 }
48
49 template <class T>
50 struct Policy {
51 using slot_type = T;
52 using key_type = T;
53 using init_type = T;
54
55 template <class allocator_type, class Arg>
construct__anon5626ca900111::Policy56 static void construct(allocator_type* alloc, slot_type* slot,
57 const Arg& arg) {
58 std::allocator_traits<allocator_type>::construct(*alloc, slot, arg);
59 }
60
61 template <class allocator_type>
destroy__anon5626ca900111::Policy62 static void destroy(allocator_type* alloc, slot_type* slot) {
63 std::allocator_traits<allocator_type>::destroy(*alloc, slot);
64 }
65
element__anon5626ca900111::Policy66 static slot_type& element(slot_type* slot) { return *slot; }
67
68 template <class F, class... Args>
apply__anon5626ca900111::Policy69 static auto apply(F&& f, const slot_type& arg)
70 -> decltype(std::forward<F>(f)(arg, arg)) {
71 return std::forward<F>(f)(arg, arg);
72 }
73 };
74
GlobalBitGen()75 absl::BitGen& GlobalBitGen() {
76 static absl::NoDestructor<absl::BitGen> value;
77 return *value;
78 }
79
80 // Keeps a pool of allocations and randomly gives one out.
81 // This introduces more randomization to the addresses given to swisstable and
82 // should help smooth out this factor from probe length calculation.
83 template <class T>
84 class RandomizedAllocator {
85 public:
86 using value_type = T;
87
88 RandomizedAllocator() = default;
89 template <typename U>
RandomizedAllocator(RandomizedAllocator<U>)90 RandomizedAllocator(RandomizedAllocator<U>) {} // NOLINT
91
allocate(size_t n)92 static T* allocate(size_t n) {
93 auto& pointers = GetPointers(n);
94 // Fill the pool
95 while (pointers.size() < kRandomPool) {
96 pointers.push_back(std::allocator<T>{}.allocate(n));
97 }
98
99 // Choose a random one.
100 size_t i = absl::Uniform<size_t>(GlobalBitGen(), 0, pointers.size());
101 T* result = pointers[i];
102 pointers[i] = pointers.back();
103 pointers.pop_back();
104 return result;
105 }
106
deallocate(T * p,size_t n)107 static void deallocate(T* p, size_t n) {
108 // Just put it back on the pool. No need to release the memory.
109 GetPointers(n).push_back(p);
110 }
111
112 private:
113 // We keep at least kRandomPool allocations for each size.
114 static constexpr size_t kRandomPool = 20;
115
GetPointers(size_t n)116 static std::vector<T*>& GetPointers(size_t n) {
117 static absl::NoDestructor<absl::flat_hash_map<size_t, std::vector<T*>>> m;
118 return (*m)[n];
119 }
120 };
121
122 template <class T>
123 struct DefaultHash {
124 using type = absl::container_internal::hash_default_hash<T>;
125 };
126
127 template <class T>
128 using DefaultHashT = typename DefaultHash<T>::type;
129
130 template <class T>
131 struct Table : absl::container_internal::raw_hash_set<
132 Policy<T>, DefaultHashT<T>,
133 absl::container_internal::hash_default_eq<T>,
134 RandomizedAllocator<T>> {};
135
136 struct LoadSizes {
137 size_t min_load;
138 size_t max_load;
139 };
140
GetMinMaxLoadSizes()141 LoadSizes GetMinMaxLoadSizes() {
142 static const auto sizes = [] {
143 Table<int> t;
144
145 // First, fill enough to have a good distribution.
146 constexpr size_t kMinSize = 10000;
147 while (t.size() < kMinSize) t.insert(t.size());
148
149 const auto reach_min_load_factor = [&] {
150 const double lf = t.load_factor();
151 while (lf <= t.load_factor()) t.insert(t.size());
152 };
153
154 // Then, insert until we reach min load factor.
155 reach_min_load_factor();
156 const size_t min_load_size = t.size();
157
158 // Keep going until we hit min load factor again, then go back one.
159 t.insert(t.size());
160 reach_min_load_factor();
161
162 return LoadSizes{min_load_size, t.size() - 1};
163 }();
164 return sizes;
165 }
166
167 struct Ratios {
168 double min_load;
169 double avg_load;
170 double max_load;
171 };
172
173 // See absl/container/internal/hashtable_debug.h for details on
174 // probe length calculation.
175 template <class ElemFn>
CollectMeanProbeLengths()176 Ratios CollectMeanProbeLengths() {
177 const auto min_max_sizes = GetMinMaxLoadSizes();
178
179 ElemFn elem;
180 using Key = decltype(elem());
181 Table<Key> t;
182
183 Ratios result;
184 while (t.size() < min_max_sizes.min_load) t.insert(elem());
185 result.min_load =
186 absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
187
188 while (t.size() < (min_max_sizes.min_load + min_max_sizes.max_load) / 2)
189 t.insert(elem());
190 result.avg_load =
191 absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
192
193 while (t.size() < min_max_sizes.max_load) t.insert(elem());
194 result.max_load =
195 absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
196
197 return result;
198 }
199
200 template <int Align>
PointerForAlignment()201 uintptr_t PointerForAlignment() {
202 alignas(Align) static constexpr uintptr_t kInitPointer = 0;
203 return reinterpret_cast<uintptr_t>(&kInitPointer);
204 }
205
206 // This incomplete type is used for testing hash of pointers of different
207 // alignments.
208 // NOTE: We are generating invalid pointer values on the fly with
209 // reinterpret_cast. There are not "safely derived" pointers so using them is
210 // technically UB. It is unlikely to be a problem, though.
211 template <int Align>
212 struct Ptr;
213
214 template <int Align>
MakePtr(uintptr_t v)215 Ptr<Align>* MakePtr(uintptr_t v) {
216 if (sizeof(v) == 8) {
217 constexpr int kCopyBits = 16;
218 // Ensure high bits are all the same.
219 v = static_cast<uintptr_t>(static_cast<intptr_t>(v << kCopyBits) >>
220 kCopyBits);
221 }
222 return reinterpret_cast<Ptr<Align>*>(v);
223 }
224
225 struct IntIdentity {
226 uint64_t i;
operator ==(IntIdentity a,IntIdentity b)227 friend bool operator==(IntIdentity a, IntIdentity b) { return a.i == b.i; }
operator ++__anon5626ca900111::IntIdentity228 IntIdentity operator++(int) { return IntIdentity{i++}; }
229 };
230
231 template <int Align>
232 struct PtrIdentity {
PtrIdentity__anon5626ca900111::PtrIdentity233 explicit PtrIdentity(uintptr_t val = PointerForAlignment<Align>()) : i(val) {}
234 uintptr_t i;
operator ==(PtrIdentity a,PtrIdentity b)235 friend bool operator==(PtrIdentity a, PtrIdentity b) { return a.i == b.i; }
operator ++__anon5626ca900111::PtrIdentity236 PtrIdentity operator++(int) {
237 PtrIdentity p(i);
238 i += Align;
239 return p;
240 }
241 };
242
243 constexpr char kStringFormat[] = "/path/to/file/name-%07d-of-9999999.txt";
244
245 template <bool small>
246 struct String {
247 std::string value;
Make__anon5626ca900111::String248 static std::string Make(uint32_t v) {
249 return {small ? absl::StrCat(v) : absl::StrFormat(kStringFormat, v)};
250 }
251 };
252
253 template <>
254 struct DefaultHash<IntIdentity> {
255 struct type {
operator ()__anon5626ca900111::DefaultHash::type256 size_t operator()(IntIdentity t) const { return t.i; }
257 };
258 };
259
260 template <int Align>
261 struct DefaultHash<PtrIdentity<Align>> {
262 struct type {
operator ()__anon5626ca900111::DefaultHash::type263 size_t operator()(PtrIdentity<Align> t) const { return t.i; }
264 };
265 };
266
267 template <class T>
268 struct Sequential {
operator ()__anon5626ca900111::Sequential269 T operator()() const { return current++; }
270 mutable T current{};
271 };
272
273 template <int Align>
274 struct Sequential<Ptr<Align>*> {
operator ()__anon5626ca900111::Sequential275 Ptr<Align>* operator()() const {
276 auto* result = MakePtr<Align>(current);
277 current += Align;
278 return result;
279 }
280 mutable uintptr_t current = PointerForAlignment<Align>();
281 };
282
283
284 template <bool small>
285 struct Sequential<String<small>> {
operator ()__anon5626ca900111::Sequential286 std::string operator()() const { return String<small>::Make(current++); }
287 mutable uint32_t current = 0;
288 };
289
290 template <class T, class U>
291 struct Sequential<std::pair<T, U>> {
292 mutable Sequential<T> tseq;
293 mutable Sequential<U> useq;
294
295 using RealT = decltype(tseq());
296 using RealU = decltype(useq());
297
298 mutable std::vector<RealT> ts;
299 mutable std::vector<RealU> us;
300 mutable size_t ti = 0, ui = 0;
301
operator ()__anon5626ca900111::Sequential302 std::pair<RealT, RealU> operator()() const {
303 std::pair<RealT, RealU> value{get_t(), get_u()};
304 if (ti == 0) {
305 ti = ui + 1;
306 ui = 0;
307 } else {
308 --ti;
309 ++ui;
310 }
311 return value;
312 }
313
get_t__anon5626ca900111::Sequential314 RealT get_t() const {
315 while (ti >= ts.size()) ts.push_back(tseq());
316 return ts[ti];
317 }
318
get_u__anon5626ca900111::Sequential319 RealU get_u() const {
320 while (ui >= us.size()) us.push_back(useq());
321 return us[ui];
322 }
323 };
324
325 template <class T, int percent_skip>
326 struct AlmostSequential {
327 mutable Sequential<T> current;
328
operator ()__anon5626ca900111::AlmostSequential329 auto operator()() const -> decltype(current()) {
330 while (absl::Uniform(GlobalBitGen(), 0.0, 1.0) <= percent_skip / 100.)
331 current();
332 return current();
333 }
334 };
335
336 struct Uniform {
337 template <typename T>
operator ()__anon5626ca900111::Uniform338 T operator()(T) const {
339 return absl::Uniform<T>(absl::IntervalClosed, GlobalBitGen(), T{0}, ~T{0});
340 }
341 };
342
343 struct Gaussian {
344 template <typename T>
operator ()__anon5626ca900111::Gaussian345 T operator()(T) const {
346 double d;
347 do {
348 d = absl::Gaussian<double>(GlobalBitGen(), 1e6, 1e4);
349 } while (d <= 0 || d > std::numeric_limits<T>::max() / 2);
350 return static_cast<T>(d);
351 }
352 };
353
354 struct Zipf {
355 template <typename T>
operator ()__anon5626ca900111::Zipf356 T operator()(T) const {
357 return absl::Zipf<T>(GlobalBitGen(), std::numeric_limits<T>::max(), 1.6);
358 }
359 };
360
361 template <class T, class Dist>
362 struct Random {
operator ()__anon5626ca900111::Random363 T operator()() const { return Dist{}(T{}); }
364 };
365
366 template <class Dist, int Align>
367 struct Random<Ptr<Align>*, Dist> {
operator ()__anon5626ca900111::Random368 Ptr<Align>* operator()() const {
369 return MakePtr<Align>(Random<uintptr_t, Dist>{}() * Align);
370 }
371 };
372
373 template <class Dist>
374 struct Random<IntIdentity, Dist> {
operator ()__anon5626ca900111::Random375 IntIdentity operator()() const {
376 return IntIdentity{Random<uint64_t, Dist>{}()};
377 }
378 };
379
380 template <class Dist, int Align>
381 struct Random<PtrIdentity<Align>, Dist> {
operator ()__anon5626ca900111::Random382 PtrIdentity<Align> operator()() const {
383 return PtrIdentity<Align>{Random<uintptr_t, Dist>{}() * Align};
384 }
385 };
386
387 template <class Dist, bool small>
388 struct Random<String<small>, Dist> {
operator ()__anon5626ca900111::Random389 std::string operator()() const {
390 return String<small>::Make(Random<uint32_t, Dist>{}());
391 }
392 };
393
394 template <class T, class U, class Dist>
395 struct Random<std::pair<T, U>, Dist> {
operator ()__anon5626ca900111::Random396 auto operator()() const
397 -> decltype(std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}())) {
398 return std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}());
399 }
400 };
401
402 template <typename>
403 std::string Name();
404
Name(uint32_t *)405 std::string Name(uint32_t*) { return "u32"; }
Name(uint64_t *)406 std::string Name(uint64_t*) { return "u64"; }
Name(IntIdentity *)407 std::string Name(IntIdentity*) { return "IntIdentity"; }
408
409 template <int Align>
Name(Ptr<Align> **)410 std::string Name(Ptr<Align>**) {
411 return absl::StrCat("Ptr", Align);
412 }
413
414 template <int Align>
Name(PtrIdentity<Align> *)415 std::string Name(PtrIdentity<Align>*) {
416 return absl::StrCat("PtrIdentity", Align);
417 }
418
419 template <bool small>
Name(String<small> *)420 std::string Name(String<small>*) {
421 return small ? "StrS" : "StrL";
422 }
423
424 template <class T, class U>
Name(std::pair<T,U> *)425 std::string Name(std::pair<T, U>*) {
426 if (output() == OutputStyle::kBenchmark)
427 return absl::StrCat("P_", Name<T>(), "_", Name<U>());
428 return absl::StrCat("P<", Name<T>(), ",", Name<U>(), ">");
429 }
430
431 template <class T>
Name(Sequential<T> *)432 std::string Name(Sequential<T>*) {
433 return "Sequential";
434 }
435
436 template <class T, int P>
Name(AlmostSequential<T,P> *)437 std::string Name(AlmostSequential<T, P>*) {
438 return absl::StrCat("AlmostSeq_", P);
439 }
440
441 template <class T>
Name(Random<T,Uniform> *)442 std::string Name(Random<T, Uniform>*) {
443 return "UnifRand";
444 }
445
446 template <class T>
Name(Random<T,Gaussian> *)447 std::string Name(Random<T, Gaussian>*) {
448 return "GausRand";
449 }
450
451 template <class T>
Name(Random<T,Zipf> *)452 std::string Name(Random<T, Zipf>*) {
453 return "ZipfRand";
454 }
455
456 template <typename T>
Name()457 std::string Name() {
458 return Name(static_cast<T*>(nullptr));
459 }
460
461 constexpr int kNameWidth = 15;
462 constexpr int kDistWidth = 16;
463
CanRunBenchmark(absl::string_view name)464 bool CanRunBenchmark(absl::string_view name) {
465 static const absl::NoDestructor<absl::optional<std::regex>> filter([] {
466 return benchmarks.empty() || benchmarks == "all"
467 ? absl::nullopt
468 : absl::make_optional(std::regex(std::string(benchmarks)));
469 }());
470 return !filter->has_value() || std::regex_search(std::string(name), **filter);
471 }
472
473 struct Result {
474 std::string name;
475 std::string dist_name;
476 Ratios ratios;
477 };
478
479 template <typename T, typename Dist>
RunForTypeAndDistribution(std::vector<Result> & results)480 void RunForTypeAndDistribution(std::vector<Result>& results) {
481 std::string name = absl::StrCat(Name<T>(), "/", Name<Dist>());
482 // We have to check against all three names (min/avg/max) before we run it.
483 // If any of them is enabled, we run it.
484 if (!CanRunBenchmark(absl::StrCat(name, "/min")) &&
485 !CanRunBenchmark(absl::StrCat(name, "/avg")) &&
486 !CanRunBenchmark(absl::StrCat(name, "/max"))) {
487 return;
488 }
489 results.push_back({Name<T>(), Name<Dist>(), CollectMeanProbeLengths<Dist>()});
490 }
491
492 template <class T>
RunForType(std::vector<Result> & results)493 void RunForType(std::vector<Result>& results) {
494 RunForTypeAndDistribution<T, Sequential<T>>(results);
495 RunForTypeAndDistribution<T, AlmostSequential<T, 20>>(results);
496 RunForTypeAndDistribution<T, AlmostSequential<T, 50>>(results);
497 RunForTypeAndDistribution<T, Random<T, Uniform>>(results);
498 #ifdef NDEBUG
499 // Disable these in non-opt mode because they take too long.
500 RunForTypeAndDistribution<T, Random<T, Gaussian>>(results);
501 RunForTypeAndDistribution<T, Random<T, Zipf>>(results);
502 #endif // NDEBUG
503 }
504
505 } // namespace
506
main(int argc,char ** argv)507 int main(int argc, char** argv) {
508 // Parse the benchmark flags. Ignore all of them except the regex pattern.
509 for (int i = 1; i < argc; ++i) {
510 absl::string_view arg = argv[i];
511 const auto next = [&] { return argv[std::min(i + 1, argc - 1)]; };
512
513 if (absl::ConsumePrefix(&arg, "--benchmark_filter")) {
514 if (arg == "") {
515 // --benchmark_filter X
516 benchmarks = next();
517 } else if (absl::ConsumePrefix(&arg, "=")) {
518 // --benchmark_filter=X
519 benchmarks = arg;
520 }
521 }
522
523 // Any --benchmark flag turns on the mode.
524 if (absl::ConsumePrefix(&arg, "--benchmark")) {
525 if (benchmarks.empty()) benchmarks="all";
526 }
527 }
528
529 std::vector<Result> results;
530 RunForType<uint64_t>(results);
531 RunForType<IntIdentity>(results);
532 RunForType<Ptr<8>*>(results);
533 RunForType<Ptr<16>*>(results);
534 RunForType<Ptr<32>*>(results);
535 RunForType<Ptr<64>*>(results);
536 RunForType<PtrIdentity<8>>(results);
537 RunForType<PtrIdentity<16>>(results);
538 RunForType<PtrIdentity<32>>(results);
539 RunForType<PtrIdentity<64>>(results);
540 RunForType<std::pair<uint32_t, uint32_t>>(results);
541 RunForType<String<true>>(results);
542 RunForType<String<false>>(results);
543 RunForType<std::pair<uint64_t, String<true>>>(results);
544 RunForType<std::pair<String<true>, uint64_t>>(results);
545 RunForType<std::pair<uint64_t, String<false>>>(results);
546 RunForType<std::pair<String<false>, uint64_t>>(results);
547
548 switch (output()) {
549 case OutputStyle::kRegular:
550 absl::PrintF("%-*s%-*s Min Avg Max\n%s\n", kNameWidth,
551 "Type", kDistWidth, "Distribution",
552 std::string(kNameWidth + kDistWidth + 10 * 3, '-'));
553 for (const auto& result : results) {
554 absl::PrintF("%-*s%-*s %8.4f %8.4f %8.4f\n", kNameWidth, result.name,
555 kDistWidth, result.dist_name, result.ratios.min_load,
556 result.ratios.avg_load, result.ratios.max_load);
557 }
558 break;
559 case OutputStyle::kBenchmark: {
560 absl::PrintF("{\n");
561 absl::PrintF(" \"benchmarks\": [\n");
562 absl::string_view comma;
563 for (const auto& result : results) {
564 auto print = [&](absl::string_view stat, double Ratios::*val) {
565 std::string name =
566 absl::StrCat(result.name, "/", result.dist_name, "/", stat);
567 // Check the regex again. We might had have enabled only one of the
568 // stats for the benchmark.
569 if (!CanRunBenchmark(name)) return;
570 absl::PrintF(" %s{\n", comma);
571 absl::PrintF(" \"cpu_time\": %f,\n", 1e9 * result.ratios.*val);
572 absl::PrintF(" \"real_time\": %f,\n", 1e9 * result.ratios.*val);
573 absl::PrintF(" \"iterations\": 1,\n");
574 absl::PrintF(" \"name\": \"%s\",\n", name);
575 absl::PrintF(" \"time_unit\": \"ns\"\n");
576 absl::PrintF(" }\n");
577 comma = ",";
578 };
579 print("min", &Ratios::min_load);
580 print("avg", &Ratios::avg_load);
581 print("max", &Ratios::max_load);
582 }
583 absl::PrintF(" ],\n");
584 absl::PrintF(" \"context\": {\n");
585 absl::PrintF(" }\n");
586 absl::PrintF("}\n");
587 break;
588 }
589 }
590
591 return 0;
592 }
593