• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_MUTEX_H_
6 #define BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_MUTEX_H_
7 
8 #include <algorithm>
9 #include <atomic>
10 
11 #include "base/allocator/partition_allocator/partition_alloc_base/compiler_specific.h"
12 #include "base/allocator/partition_allocator/partition_alloc_base/component_export.h"
13 #include "base/allocator/partition_allocator/partition_alloc_base/thread_annotations.h"
14 #include "base/allocator/partition_allocator/partition_alloc_check.h"
15 #include "base/allocator/partition_allocator/partition_alloc_config.h"
16 #include "base/allocator/partition_allocator/yield_processor.h"
17 #include "build/build_config.h"
18 
19 #if BUILDFLAG(IS_WIN)
20 #include "base/allocator/partition_allocator/partition_alloc_base/win/windows_types.h"
21 #endif
22 
23 #if BUILDFLAG(IS_POSIX)
24 #include <errno.h>
25 #include <pthread.h>
26 #endif
27 
28 #if BUILDFLAG(IS_APPLE)
29 #include <os/lock.h>
30 #endif  // BUILDFLAG(IS_APPLE)
31 
32 #if BUILDFLAG(IS_FUCHSIA)
33 #include <lib/sync/mutex.h>
34 #endif
35 
36 namespace partition_alloc::internal {
37 
38 // The behavior of this class depends on whether PA_HAS_FAST_MUTEX is defined.
39 // 1. When it is defined:
40 //
41 // Simple spinning lock. It will spin in user space a set number of times before
42 // going into the kernel to sleep.
43 //
44 // This is intended to give "the best of both worlds" between a SpinLock and
45 // base::Lock:
46 // - SpinLock: Inlined fast path, no external function calls, just
47 //   compare-and-swap. Short waits do not go into the kernel. Good behavior in
48 //   low contention cases.
49 // - base::Lock: Good behavior in case of contention.
50 //
51 // We don't rely on base::Lock which we could make spin (by calling Try() in a
52 // loop), as performance is below a custom spinlock as seen on high-level
53 // benchmarks. Instead this implements a simple non-recursive mutex on top of
54 // the futex() syscall on Linux, SRWLock on Windows, os_unfair_lock on macOS,
55 // and pthread_mutex on POSIX. The main difference between this and a libc
56 // implementation is that it only supports the simplest path: private (to a
57 // process), non-recursive mutexes with no priority inheritance, no timed waits.
58 //
59 // As an interesting side-effect to be used in the allocator, this code does not
60 // make any allocations, locks are small with a constexpr constructor and no
61 // destructor.
62 //
63 // 2. Otherwise: This is a simple SpinLock, in the sense that it does not have
64 // any awareness of other threads' behavior.
PA_COMPONENT_EXPORT(PARTITION_ALLOC)65 class PA_LOCKABLE PA_COMPONENT_EXPORT(PARTITION_ALLOC) SpinningMutex {
66  public:
67   inline constexpr SpinningMutex();
68   PA_ALWAYS_INLINE void Acquire() PA_EXCLUSIVE_LOCK_FUNCTION();
69   PA_ALWAYS_INLINE void Release() PA_UNLOCK_FUNCTION();
70   PA_ALWAYS_INLINE bool Try() PA_EXCLUSIVE_TRYLOCK_FUNCTION(true);
71   void AssertAcquired() const {}  // Not supported.
72   void Reinit() PA_UNLOCK_FUNCTION();
73 
74  private:
75   PA_NOINLINE void AcquireSpinThenBlock() PA_EXCLUSIVE_LOCK_FUNCTION();
76 #if PA_CONFIG(HAS_FAST_MUTEX)
77   void LockSlow() PA_EXCLUSIVE_LOCK_FUNCTION();
78 #else
79   PA_ALWAYS_INLINE void LockSlow() PA_EXCLUSIVE_LOCK_FUNCTION();
80 #endif
81 
82   // See below, the latency of PA_YIELD_PROCESSOR can be as high as ~150
83   // cycles. Meanwhile, sleeping costs a few us. Spinning 64 times at 3GHz would
84   // cost 150 * 64 / 3e9 ~= 3.2us.
85   //
86   // This applies to Linux kernels, on x86_64. On ARM we might want to spin
87   // more.
88   static constexpr int kSpinCount = 64;
89 
90 #if PA_CONFIG(HAS_FAST_MUTEX)
91 
92 #if PA_CONFIG(HAS_LINUX_KERNEL)
93   void FutexWait();
94   void FutexWake();
95 
96   static constexpr int kUnlocked = 0;
97   static constexpr int kLockedUncontended = 1;
98   static constexpr int kLockedContended = 2;
99 
100   std::atomic<int32_t> state_{kUnlocked};
101 #elif BUILDFLAG(IS_WIN)
102   PA_CHROME_SRWLOCK lock_ = SRWLOCK_INIT;
103 #elif BUILDFLAG(IS_APPLE)
104   os_unfair_lock unfair_lock_ = OS_UNFAIR_LOCK_INIT;
105 #elif BUILDFLAG(IS_POSIX)
106   pthread_mutex_t lock_ = PTHREAD_MUTEX_INITIALIZER;
107 #elif BUILDFLAG(IS_FUCHSIA)
108   sync_mutex lock_;
109 #endif
110 
111 #else   // PA_CONFIG(HAS_FAST_MUTEX)
112   std::atomic<bool> lock_{false};
113 
114   // Spinlock-like, fallback.
115   PA_ALWAYS_INLINE bool TrySpinLock();
116   PA_ALWAYS_INLINE void ReleaseSpinLock();
117   void LockSlowSpinLock();
118 #endif  // PA_CONFIG(HAS_FAST_MUTEX)
119 };
120 
Acquire()121 PA_ALWAYS_INLINE void SpinningMutex::Acquire() {
122   // Not marked PA_LIKELY(), as:
123   // 1. We don't know how much contention the lock would experience
124   // 2. This may lead to weird-looking code layout when inlined into a caller
125   // with PA_(UN)LIKELY() annotations.
126   if (Try()) {
127     return;
128   }
129 
130   return AcquireSpinThenBlock();
131 }
132 
133 inline constexpr SpinningMutex::SpinningMutex() = default;
134 
135 #if PA_CONFIG(HAS_FAST_MUTEX)
136 
137 #if PA_CONFIG(HAS_LINUX_KERNEL)
138 
Try()139 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
140   // Using the weak variant of compare_exchange(), which may fail spuriously. On
141   // some architectures such as ARM, CAS is typically performed as a LDREX/STREX
142   // pair, where the store may fail. In the strong version, there is a loop
143   // inserted by the compiler to retry in these cases.
144   //
145   // Since we are retrying in Lock() anyway, there is no point having two nested
146   // loops.
147   int expected = kUnlocked;
148   return (state_.load(std::memory_order_relaxed) == expected) &&
149          state_.compare_exchange_weak(expected, kLockedUncontended,
150                                       std::memory_order_acquire,
151                                       std::memory_order_relaxed);
152 }
153 
Release()154 PA_ALWAYS_INLINE void SpinningMutex::Release() {
155   if (PA_UNLIKELY(state_.exchange(kUnlocked, std::memory_order_release) ==
156                   kLockedContended)) {
157     // |kLockedContended|: there is a waiter to wake up.
158     //
159     // Here there is a window where the lock is unlocked, since we just set it
160     // to |kUnlocked| above. Meaning that another thread can grab the lock
161     // in-between now and |FutexWake()| waking up a waiter. Aside from
162     // potentially fairness, this is not an issue, as the newly-awaken thread
163     // will check that the lock is still free.
164     //
165     // There is a small pessimization here though: if we have a single waiter,
166     // then when it wakes up, the lock will be set to |kLockedContended|, so
167     // when this waiter releases the lock, it will needlessly call
168     // |FutexWake()|, even though there are no waiters. This is supported by the
169     // kernel, and is what bionic (Android's libc) also does.
170     FutexWake();
171   }
172 }
173 
174 #elif BUILDFLAG(IS_WIN)
175 
Try()176 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
177   return !!::TryAcquireSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
178 }
179 
Release()180 PA_ALWAYS_INLINE void SpinningMutex::Release() {
181   ::ReleaseSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
182 }
183 
184 #elif BUILDFLAG(IS_APPLE)
185 
Try()186 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
187   return os_unfair_lock_trylock(&unfair_lock_);
188 }
189 
Release()190 PA_ALWAYS_INLINE void SpinningMutex::Release() {
191   return os_unfair_lock_unlock(&unfair_lock_);
192 }
193 
194 #elif BUILDFLAG(IS_POSIX)
195 
Try()196 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
197   int retval = pthread_mutex_trylock(&lock_);
198   PA_DCHECK(retval == 0 || retval == EBUSY);
199   return retval == 0;
200 }
201 
Release()202 PA_ALWAYS_INLINE void SpinningMutex::Release() {
203   int retval = pthread_mutex_unlock(&lock_);
204   PA_DCHECK(retval == 0);
205 }
206 
207 #elif BUILDFLAG(IS_FUCHSIA)
208 
Try()209 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
210   return sync_mutex_trylock(&lock_) == ZX_OK;
211 }
212 
Release()213 PA_ALWAYS_INLINE void SpinningMutex::Release() {
214   sync_mutex_unlock(&lock_);
215 }
216 
217 #endif
218 
219 #else  // PA_CONFIG(HAS_FAST_MUTEX)
220 
Try()221 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
222   // Possibly faster than CAS. The theory is that if the cacheline is shared,
223   // then it can stay shared, for the contended case.
224   return !lock_.load(std::memory_order_relaxed) &&
225          !lock_.exchange(true, std::memory_order_acquire);
226 }
227 
Release()228 PA_ALWAYS_INLINE void SpinningMutex::Release() {
229   lock_.store(false, std::memory_order_release);
230 }
231 
LockSlow()232 PA_ALWAYS_INLINE void SpinningMutex::LockSlow() {
233   return LockSlowSpinLock();
234 }
235 
236 #endif  // PA_CONFIG(HAS_FAST_MUTEX)
237 
238 }  // namespace partition_alloc::internal
239 
240 #endif  // BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_MUTEX_H_
241