1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <cstdint>
16 #include <mutex> // NOLINT(build/c++11)
17 #include <vector>
18
19 #include "absl/base/internal/cycleclock.h"
20 #include "absl/base/internal/spinlock.h"
21 #include "absl/synchronization/blocking_counter.h"
22 #include "absl/synchronization/internal/thread_pool.h"
23 #include "absl/synchronization/mutex.h"
24 #include "benchmark/benchmark.h"
25
26 namespace {
27
BM_Mutex(benchmark::State & state)28 void BM_Mutex(benchmark::State& state) {
29 static absl::Mutex* mu = new absl::Mutex;
30 for (auto _ : state) {
31 absl::MutexLock lock(mu);
32 }
33 }
34 BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
35
DelayNs(int64_t ns,int * data)36 static void DelayNs(int64_t ns, int* data) {
37 int64_t end = absl::base_internal::CycleClock::Now() +
38 ns * absl::base_internal::CycleClock::Frequency() / 1e9;
39 while (absl::base_internal::CycleClock::Now() < end) {
40 ++(*data);
41 benchmark::DoNotOptimize(*data);
42 }
43 }
44
45 template <typename MutexType>
46 class RaiiLocker {
47 public:
RaiiLocker(MutexType * mu)48 explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
~RaiiLocker()49 ~RaiiLocker() { mu_->Unlock(); }
50 private:
51 MutexType* mu_;
52 };
53
54 template <>
55 class RaiiLocker<std::mutex> {
56 public:
RaiiLocker(std::mutex * mu)57 explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
~RaiiLocker()58 ~RaiiLocker() { mu_->unlock(); }
59 private:
60 std::mutex* mu_;
61 };
62
63 template <typename MutexType>
BM_Contended(benchmark::State & state)64 void BM_Contended(benchmark::State& state) {
65 struct Shared {
66 MutexType mu;
67 int data = 0;
68 };
69 static auto* shared = new Shared;
70 int local = 0;
71 for (auto _ : state) {
72 // Here we model both local work outside of the critical section as well as
73 // some work inside of the critical section. The idea is to capture some
74 // more or less realisitic contention levels.
75 // If contention is too low, the benchmark won't measure anything useful.
76 // If contention is unrealistically high, the benchmark will favor
77 // bad mutex implementations that block and otherwise distract threads
78 // from the mutex and shared state for as much as possible.
79 // To achieve this amount of local work is multiplied by number of threads
80 // to keep ratio between local work and critical section approximately
81 // equal regardless of number of threads.
82 DelayNs(100 * state.threads, &local);
83 RaiiLocker<MutexType> locker(&shared->mu);
84 DelayNs(state.range(0), &shared->data);
85 }
86 }
87
88 BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
89 ->UseRealTime()
90 // ThreadPerCpu poorly handles non-power-of-two CPU counts.
91 ->Threads(1)
92 ->Threads(2)
93 ->Threads(4)
94 ->Threads(6)
95 ->Threads(8)
96 ->Threads(12)
97 ->Threads(16)
98 ->Threads(24)
99 ->Threads(32)
100 ->Threads(48)
101 ->Threads(64)
102 ->Threads(96)
103 ->Threads(128)
104 ->Threads(192)
105 ->Threads(256)
106 // Some empirically chosen amounts of work in critical section.
107 // 1 is low contention, 200 is high contention and few values in between.
108 ->Arg(1)
109 ->Arg(20)
110 ->Arg(50)
111 ->Arg(200);
112
113 BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)
114 ->UseRealTime()
115 // ThreadPerCpu poorly handles non-power-of-two CPU counts.
116 ->Threads(1)
117 ->Threads(2)
118 ->Threads(4)
119 ->Threads(6)
120 ->Threads(8)
121 ->Threads(12)
122 ->Threads(16)
123 ->Threads(24)
124 ->Threads(32)
125 ->Threads(48)
126 ->Threads(64)
127 ->Threads(96)
128 ->Threads(128)
129 ->Threads(192)
130 ->Threads(256)
131 // Some empirically chosen amounts of work in critical section.
132 // 1 is low contention, 200 is high contention and few values in between.
133 ->Arg(1)
134 ->Arg(20)
135 ->Arg(50)
136 ->Arg(200);
137
138 BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
139 ->UseRealTime()
140 // ThreadPerCpu poorly handles non-power-of-two CPU counts.
141 ->Threads(1)
142 ->Threads(2)
143 ->Threads(4)
144 ->Threads(6)
145 ->Threads(8)
146 ->Threads(12)
147 ->Threads(16)
148 ->Threads(24)
149 ->Threads(32)
150 ->Threads(48)
151 ->Threads(64)
152 ->Threads(96)
153 ->Threads(128)
154 ->Threads(192)
155 ->Threads(256)
156 // Some empirically chosen amounts of work in critical section.
157 // 1 is low contention, 200 is high contention and few values in between.
158 ->Arg(1)
159 ->Arg(20)
160 ->Arg(50)
161 ->Arg(200);
162
163 // Measure the overhead of conditions on mutex release (when they must be
164 // evaluated). Mutex has (some) support for equivalence classes allowing
165 // Conditions with the same function/argument to potentially not be multiply
166 // evaluated.
167 //
168 // num_classes==0 is used for the special case of every waiter being distinct.
BM_ConditionWaiters(benchmark::State & state)169 void BM_ConditionWaiters(benchmark::State& state) {
170 int num_classes = state.range(0);
171 int num_waiters = state.range(1);
172
173 struct Helper {
174 static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
175 init->DecrementCount();
176 m->LockWhen(absl::Condition(
177 static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p));
178 m->Unlock();
179 }
180 };
181
182 if (num_classes == 0) {
183 // No equivalence classes.
184 num_classes = num_waiters;
185 }
186
187 absl::BlockingCounter init(num_waiters);
188 absl::Mutex mu;
189 std::vector<int> equivalence_classes(num_classes, 1);
190
191 // Must be declared last to be destroyed first.
192 absl::synchronization_internal::ThreadPool pool(num_waiters);
193
194 for (int i = 0; i < num_waiters; i++) {
195 // Mutex considers Conditions with the same function and argument
196 // to be equivalent.
197 pool.Schedule([&, i] {
198 Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
199 });
200 }
201 init.Wait();
202
203 for (auto _ : state) {
204 mu.Lock();
205 mu.Unlock(); // Each unlock requires Condition evaluation for our waiters.
206 }
207
208 mu.Lock();
209 for (int i = 0; i < num_classes; i++) {
210 equivalence_classes[i] = 0;
211 }
212 mu.Unlock();
213 }
214
215 // Some configurations have higher thread limits than others.
216 #if defined(__linux__) && !defined(THREAD_SANITIZER)
217 constexpr int kMaxConditionWaiters = 8192;
218 #else
219 constexpr int kMaxConditionWaiters = 1024;
220 #endif
221 BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);
222
223 } // namespace
224