• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <cstdint>
16 #include <mutex>  // NOLINT(build/c++11)
17 #include <vector>
18 
19 #include "absl/base/internal/cycleclock.h"
20 #include "absl/base/internal/spinlock.h"
21 #include "absl/synchronization/blocking_counter.h"
22 #include "absl/synchronization/internal/thread_pool.h"
23 #include "absl/synchronization/mutex.h"
24 #include "benchmark/benchmark.h"
25 
26 namespace {
27 
BM_Mutex(benchmark::State & state)28 void BM_Mutex(benchmark::State& state) {
29   static absl::Mutex* mu = new absl::Mutex;
30   for (auto _ : state) {
31     absl::MutexLock lock(mu);
32   }
33 }
34 BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
35 
DelayNs(int64_t ns,int * data)36 static void DelayNs(int64_t ns, int* data) {
37   int64_t end = absl::base_internal::CycleClock::Now() +
38                 ns * absl::base_internal::CycleClock::Frequency() / 1e9;
39   while (absl::base_internal::CycleClock::Now() < end) {
40     ++(*data);
41     benchmark::DoNotOptimize(*data);
42   }
43 }
44 
45 template <typename MutexType>
46 class RaiiLocker {
47  public:
RaiiLocker(MutexType * mu)48   explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
~RaiiLocker()49   ~RaiiLocker() { mu_->Unlock(); }
50  private:
51   MutexType* mu_;
52 };
53 
54 template <>
55 class RaiiLocker<std::mutex> {
56  public:
RaiiLocker(std::mutex * mu)57   explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
~RaiiLocker()58   ~RaiiLocker() { mu_->unlock(); }
59  private:
60   std::mutex* mu_;
61 };
62 
63 template <typename MutexType>
BM_Contended(benchmark::State & state)64 void BM_Contended(benchmark::State& state) {
65   struct Shared {
66     MutexType mu;
67     int data = 0;
68   };
69   static auto* shared = new Shared;
70   int local = 0;
71   for (auto _ : state) {
72     // Here we model both local work outside of the critical section as well as
73     // some work inside of the critical section. The idea is to capture some
74     // more or less realisitic contention levels.
75     // If contention is too low, the benchmark won't measure anything useful.
76     // If contention is unrealistically high, the benchmark will favor
77     // bad mutex implementations that block and otherwise distract threads
78     // from the mutex and shared state for as much as possible.
79     // To achieve this amount of local work is multiplied by number of threads
80     // to keep ratio between local work and critical section approximately
81     // equal regardless of number of threads.
82     DelayNs(100 * state.threads, &local);
83     RaiiLocker<MutexType> locker(&shared->mu);
84     DelayNs(state.range(0), &shared->data);
85   }
86 }
87 
88 BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
89     ->UseRealTime()
90     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
91     ->Threads(1)
92     ->Threads(2)
93     ->Threads(4)
94     ->Threads(6)
95     ->Threads(8)
96     ->Threads(12)
97     ->Threads(16)
98     ->Threads(24)
99     ->Threads(32)
100     ->Threads(48)
101     ->Threads(64)
102     ->Threads(96)
103     ->Threads(128)
104     ->Threads(192)
105     ->Threads(256)
106     // Some empirically chosen amounts of work in critical section.
107     // 1 is low contention, 200 is high contention and few values in between.
108     ->Arg(1)
109     ->Arg(20)
110     ->Arg(50)
111     ->Arg(200);
112 
113 BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)
114     ->UseRealTime()
115     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
116     ->Threads(1)
117     ->Threads(2)
118     ->Threads(4)
119     ->Threads(6)
120     ->Threads(8)
121     ->Threads(12)
122     ->Threads(16)
123     ->Threads(24)
124     ->Threads(32)
125     ->Threads(48)
126     ->Threads(64)
127     ->Threads(96)
128     ->Threads(128)
129     ->Threads(192)
130     ->Threads(256)
131     // Some empirically chosen amounts of work in critical section.
132     // 1 is low contention, 200 is high contention and few values in between.
133     ->Arg(1)
134     ->Arg(20)
135     ->Arg(50)
136     ->Arg(200);
137 
138 BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
139     ->UseRealTime()
140     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
141     ->Threads(1)
142     ->Threads(2)
143     ->Threads(4)
144     ->Threads(6)
145     ->Threads(8)
146     ->Threads(12)
147     ->Threads(16)
148     ->Threads(24)
149     ->Threads(32)
150     ->Threads(48)
151     ->Threads(64)
152     ->Threads(96)
153     ->Threads(128)
154     ->Threads(192)
155     ->Threads(256)
156     // Some empirically chosen amounts of work in critical section.
157     // 1 is low contention, 200 is high contention and few values in between.
158     ->Arg(1)
159     ->Arg(20)
160     ->Arg(50)
161     ->Arg(200);
162 
163 // Measure the overhead of conditions on mutex release (when they must be
164 // evaluated).  Mutex has (some) support for equivalence classes allowing
165 // Conditions with the same function/argument to potentially not be multiply
166 // evaluated.
167 //
168 // num_classes==0 is used for the special case of every waiter being distinct.
BM_ConditionWaiters(benchmark::State & state)169 void BM_ConditionWaiters(benchmark::State& state) {
170   int num_classes = state.range(0);
171   int num_waiters = state.range(1);
172 
173   struct Helper {
174     static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
175       init->DecrementCount();
176       m->LockWhen(absl::Condition(
177           static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p));
178       m->Unlock();
179     }
180   };
181 
182   if (num_classes == 0) {
183     // No equivalence classes.
184     num_classes = num_waiters;
185   }
186 
187   absl::BlockingCounter init(num_waiters);
188   absl::Mutex mu;
189   std::vector<int> equivalence_classes(num_classes, 1);
190 
191   // Must be declared last to be destroyed first.
192   absl::synchronization_internal::ThreadPool pool(num_waiters);
193 
194   for (int i = 0; i < num_waiters; i++) {
195     // Mutex considers Conditions with the same function and argument
196     // to be equivalent.
197     pool.Schedule([&, i] {
198       Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
199     });
200   }
201   init.Wait();
202 
203   for (auto _ : state) {
204     mu.Lock();
205     mu.Unlock();  // Each unlock requires Condition evaluation for our waiters.
206   }
207 
208   mu.Lock();
209   for (int i = 0; i < num_classes; i++) {
210     equivalence_classes[i] = 0;
211   }
212   mu.Unlock();
213 }
214 
215 // Some configurations have higher thread limits than others.
216 #if defined(__linux__) && !defined(THREAD_SANITIZER)
217 constexpr int kMaxConditionWaiters = 8192;
218 #else
219 constexpr int kMaxConditionWaiters = 1024;
220 #endif
221 BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);
222 
223 }  // namespace
224