1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <cstdint>
16 #include <mutex> // NOLINT(build/c++11)
17 #include <vector>
18
19 #include "absl/base/config.h"
20 #include "absl/base/internal/cycleclock.h"
21 #include "absl/base/internal/spinlock.h"
22 #include "absl/synchronization/blocking_counter.h"
23 #include "absl/synchronization/internal/thread_pool.h"
24 #include "absl/synchronization/mutex.h"
25 #include "benchmark/benchmark.h"
26
27 namespace {
28
BM_Mutex(benchmark::State & state)29 void BM_Mutex(benchmark::State& state) {
30 static absl::Mutex* mu = new absl::Mutex;
31 for (auto _ : state) {
32 absl::MutexLock lock(mu);
33 }
34 }
35 BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
36
DelayNs(int64_t ns,int * data)37 static void DelayNs(int64_t ns, int* data) {
38 int64_t end = absl::base_internal::CycleClock::Now() +
39 ns * absl::base_internal::CycleClock::Frequency() / 1e9;
40 while (absl::base_internal::CycleClock::Now() < end) {
41 ++(*data);
42 benchmark::DoNotOptimize(*data);
43 }
44 }
45
46 template <typename MutexType>
47 class RaiiLocker {
48 public:
RaiiLocker(MutexType * mu)49 explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
~RaiiLocker()50 ~RaiiLocker() { mu_->Unlock(); }
51 private:
52 MutexType* mu_;
53 };
54
55 template <>
56 class RaiiLocker<std::mutex> {
57 public:
RaiiLocker(std::mutex * mu)58 explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
~RaiiLocker()59 ~RaiiLocker() { mu_->unlock(); }
60 private:
61 std::mutex* mu_;
62 };
63
64 template <typename MutexType>
BM_Contended(benchmark::State & state)65 void BM_Contended(benchmark::State& state) {
66 struct Shared {
67 MutexType mu;
68 int data = 0;
69 };
70 static auto* shared = new Shared;
71 int local = 0;
72 for (auto _ : state) {
73 // Here we model both local work outside of the critical section as well as
74 // some work inside of the critical section. The idea is to capture some
75 // more or less realisitic contention levels.
76 // If contention is too low, the benchmark won't measure anything useful.
77 // If contention is unrealistically high, the benchmark will favor
78 // bad mutex implementations that block and otherwise distract threads
79 // from the mutex and shared state for as much as possible.
80 // To achieve this amount of local work is multiplied by number of threads
81 // to keep ratio between local work and critical section approximately
82 // equal regardless of number of threads.
83 DelayNs(100 * state.threads, &local);
84 RaiiLocker<MutexType> locker(&shared->mu);
85 DelayNs(state.range(0), &shared->data);
86 }
87 }
88
89 BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
90 ->UseRealTime()
91 // ThreadPerCpu poorly handles non-power-of-two CPU counts.
92 ->Threads(1)
93 ->Threads(2)
94 ->Threads(4)
95 ->Threads(6)
96 ->Threads(8)
97 ->Threads(12)
98 ->Threads(16)
99 ->Threads(24)
100 ->Threads(32)
101 ->Threads(48)
102 ->Threads(64)
103 ->Threads(96)
104 ->Threads(128)
105 ->Threads(192)
106 ->Threads(256)
107 // Some empirically chosen amounts of work in critical section.
108 // 1 is low contention, 200 is high contention and few values in between.
109 ->Arg(1)
110 ->Arg(20)
111 ->Arg(50)
112 ->Arg(200);
113
114 BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)
115 ->UseRealTime()
116 // ThreadPerCpu poorly handles non-power-of-two CPU counts.
117 ->Threads(1)
118 ->Threads(2)
119 ->Threads(4)
120 ->Threads(6)
121 ->Threads(8)
122 ->Threads(12)
123 ->Threads(16)
124 ->Threads(24)
125 ->Threads(32)
126 ->Threads(48)
127 ->Threads(64)
128 ->Threads(96)
129 ->Threads(128)
130 ->Threads(192)
131 ->Threads(256)
132 // Some empirically chosen amounts of work in critical section.
133 // 1 is low contention, 200 is high contention and few values in between.
134 ->Arg(1)
135 ->Arg(20)
136 ->Arg(50)
137 ->Arg(200);
138
139 BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
140 ->UseRealTime()
141 // ThreadPerCpu poorly handles non-power-of-two CPU counts.
142 ->Threads(1)
143 ->Threads(2)
144 ->Threads(4)
145 ->Threads(6)
146 ->Threads(8)
147 ->Threads(12)
148 ->Threads(16)
149 ->Threads(24)
150 ->Threads(32)
151 ->Threads(48)
152 ->Threads(64)
153 ->Threads(96)
154 ->Threads(128)
155 ->Threads(192)
156 ->Threads(256)
157 // Some empirically chosen amounts of work in critical section.
158 // 1 is low contention, 200 is high contention and few values in between.
159 ->Arg(1)
160 ->Arg(20)
161 ->Arg(50)
162 ->Arg(200);
163
164 // Measure the overhead of conditions on mutex release (when they must be
165 // evaluated). Mutex has (some) support for equivalence classes allowing
166 // Conditions with the same function/argument to potentially not be multiply
167 // evaluated.
168 //
169 // num_classes==0 is used for the special case of every waiter being distinct.
BM_ConditionWaiters(benchmark::State & state)170 void BM_ConditionWaiters(benchmark::State& state) {
171 int num_classes = state.range(0);
172 int num_waiters = state.range(1);
173
174 struct Helper {
175 static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
176 init->DecrementCount();
177 m->LockWhen(absl::Condition(
178 static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p));
179 m->Unlock();
180 }
181 };
182
183 if (num_classes == 0) {
184 // No equivalence classes.
185 num_classes = num_waiters;
186 }
187
188 absl::BlockingCounter init(num_waiters);
189 absl::Mutex mu;
190 std::vector<int> equivalence_classes(num_classes, 1);
191
192 // Must be declared last to be destroyed first.
193 absl::synchronization_internal::ThreadPool pool(num_waiters);
194
195 for (int i = 0; i < num_waiters; i++) {
196 // Mutex considers Conditions with the same function and argument
197 // to be equivalent.
198 pool.Schedule([&, i] {
199 Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
200 });
201 }
202 init.Wait();
203
204 for (auto _ : state) {
205 mu.Lock();
206 mu.Unlock(); // Each unlock requires Condition evaluation for our waiters.
207 }
208
209 mu.Lock();
210 for (int i = 0; i < num_classes; i++) {
211 equivalence_classes[i] = 0;
212 }
213 mu.Unlock();
214 }
215
216 // Some configurations have higher thread limits than others.
217 #if defined(__linux__) && !defined(ABSL_HAVE_THREAD_SANITIZER)
218 constexpr int kMaxConditionWaiters = 8192;
219 #else
220 constexpr int kMaxConditionWaiters = 1024;
221 #endif
222 BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);
223
224 } // namespace
225