• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <cstdint>
16 #include <mutex>  // NOLINT(build/c++11)
17 #include <vector>
18 
19 #include "absl/base/config.h"
20 #include "absl/base/internal/cycleclock.h"
21 #include "absl/base/internal/spinlock.h"
22 #include "absl/synchronization/blocking_counter.h"
23 #include "absl/synchronization/internal/thread_pool.h"
24 #include "absl/synchronization/mutex.h"
25 #include "benchmark/benchmark.h"
26 
27 namespace {
28 
BM_Mutex(benchmark::State & state)29 void BM_Mutex(benchmark::State& state) {
30   static absl::Mutex* mu = new absl::Mutex;
31   for (auto _ : state) {
32     absl::MutexLock lock(mu);
33   }
34 }
35 BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
36 
DelayNs(int64_t ns,int * data)37 static void DelayNs(int64_t ns, int* data) {
38   int64_t end = absl::base_internal::CycleClock::Now() +
39                 ns * absl::base_internal::CycleClock::Frequency() / 1e9;
40   while (absl::base_internal::CycleClock::Now() < end) {
41     ++(*data);
42     benchmark::DoNotOptimize(*data);
43   }
44 }
45 
46 template <typename MutexType>
47 class RaiiLocker {
48  public:
RaiiLocker(MutexType * mu)49   explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
~RaiiLocker()50   ~RaiiLocker() { mu_->Unlock(); }
51  private:
52   MutexType* mu_;
53 };
54 
55 template <>
56 class RaiiLocker<std::mutex> {
57  public:
RaiiLocker(std::mutex * mu)58   explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
~RaiiLocker()59   ~RaiiLocker() { mu_->unlock(); }
60  private:
61   std::mutex* mu_;
62 };
63 
64 template <typename MutexType>
BM_Contended(benchmark::State & state)65 void BM_Contended(benchmark::State& state) {
66   struct Shared {
67     MutexType mu;
68     int data = 0;
69   };
70   static auto* shared = new Shared;
71   int local = 0;
72   for (auto _ : state) {
73     // Here we model both local work outside of the critical section as well as
74     // some work inside of the critical section. The idea is to capture some
75     // more or less realisitic contention levels.
76     // If contention is too low, the benchmark won't measure anything useful.
77     // If contention is unrealistically high, the benchmark will favor
78     // bad mutex implementations that block and otherwise distract threads
79     // from the mutex and shared state for as much as possible.
80     // To achieve this amount of local work is multiplied by number of threads
81     // to keep ratio between local work and critical section approximately
82     // equal regardless of number of threads.
83     DelayNs(100 * state.threads, &local);
84     RaiiLocker<MutexType> locker(&shared->mu);
85     DelayNs(state.range(0), &shared->data);
86   }
87 }
88 
89 BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
90     ->UseRealTime()
91     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
92     ->Threads(1)
93     ->Threads(2)
94     ->Threads(4)
95     ->Threads(6)
96     ->Threads(8)
97     ->Threads(12)
98     ->Threads(16)
99     ->Threads(24)
100     ->Threads(32)
101     ->Threads(48)
102     ->Threads(64)
103     ->Threads(96)
104     ->Threads(128)
105     ->Threads(192)
106     ->Threads(256)
107     // Some empirically chosen amounts of work in critical section.
108     // 1 is low contention, 200 is high contention and few values in between.
109     ->Arg(1)
110     ->Arg(20)
111     ->Arg(50)
112     ->Arg(200);
113 
114 BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)
115     ->UseRealTime()
116     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
117     ->Threads(1)
118     ->Threads(2)
119     ->Threads(4)
120     ->Threads(6)
121     ->Threads(8)
122     ->Threads(12)
123     ->Threads(16)
124     ->Threads(24)
125     ->Threads(32)
126     ->Threads(48)
127     ->Threads(64)
128     ->Threads(96)
129     ->Threads(128)
130     ->Threads(192)
131     ->Threads(256)
132     // Some empirically chosen amounts of work in critical section.
133     // 1 is low contention, 200 is high contention and few values in between.
134     ->Arg(1)
135     ->Arg(20)
136     ->Arg(50)
137     ->Arg(200);
138 
139 BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
140     ->UseRealTime()
141     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
142     ->Threads(1)
143     ->Threads(2)
144     ->Threads(4)
145     ->Threads(6)
146     ->Threads(8)
147     ->Threads(12)
148     ->Threads(16)
149     ->Threads(24)
150     ->Threads(32)
151     ->Threads(48)
152     ->Threads(64)
153     ->Threads(96)
154     ->Threads(128)
155     ->Threads(192)
156     ->Threads(256)
157     // Some empirically chosen amounts of work in critical section.
158     // 1 is low contention, 200 is high contention and few values in between.
159     ->Arg(1)
160     ->Arg(20)
161     ->Arg(50)
162     ->Arg(200);
163 
164 // Measure the overhead of conditions on mutex release (when they must be
165 // evaluated).  Mutex has (some) support for equivalence classes allowing
166 // Conditions with the same function/argument to potentially not be multiply
167 // evaluated.
168 //
169 // num_classes==0 is used for the special case of every waiter being distinct.
BM_ConditionWaiters(benchmark::State & state)170 void BM_ConditionWaiters(benchmark::State& state) {
171   int num_classes = state.range(0);
172   int num_waiters = state.range(1);
173 
174   struct Helper {
175     static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
176       init->DecrementCount();
177       m->LockWhen(absl::Condition(
178           static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p));
179       m->Unlock();
180     }
181   };
182 
183   if (num_classes == 0) {
184     // No equivalence classes.
185     num_classes = num_waiters;
186   }
187 
188   absl::BlockingCounter init(num_waiters);
189   absl::Mutex mu;
190   std::vector<int> equivalence_classes(num_classes, 1);
191 
192   // Must be declared last to be destroyed first.
193   absl::synchronization_internal::ThreadPool pool(num_waiters);
194 
195   for (int i = 0; i < num_waiters; i++) {
196     // Mutex considers Conditions with the same function and argument
197     // to be equivalent.
198     pool.Schedule([&, i] {
199       Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
200     });
201   }
202   init.Wait();
203 
204   for (auto _ : state) {
205     mu.Lock();
206     mu.Unlock();  // Each unlock requires Condition evaluation for our waiters.
207   }
208 
209   mu.Lock();
210   for (int i = 0; i < num_classes; i++) {
211     equivalence_classes[i] = 0;
212   }
213   mu.Unlock();
214 }
215 
216 // Some configurations have higher thread limits than others.
217 #if defined(__linux__) && !defined(ABSL_HAVE_THREAD_SANITIZER)
218 constexpr int kMaxConditionWaiters = 8192;
219 #else
220 constexpr int kMaxConditionWaiters = 1024;
221 #endif
222 BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);
223 
224 }  // namespace
225