1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/base/internal/spinlock.h"
16
17 #include <algorithm>
18 #include <atomic>
19 #include <limits>
20
21 #include "absl/base/attributes.h"
22 #include "absl/base/internal/atomic_hook.h"
23 #include "absl/base/internal/cycleclock.h"
24 #include "absl/base/internal/spinlock_wait.h"
25 #include "absl/base/internal/sysinfo.h" /* For NumCPUs() */
26 #include "absl/base/call_once.h"
27
28 // Description of lock-word:
29 // 31..00: [............................3][2][1][0]
30 //
31 // [0]: kSpinLockHeld
32 // [1]: kSpinLockCooperative
33 // [2]: kSpinLockDisabledScheduling
34 // [31..3]: ONLY kSpinLockSleeper OR
35 // Wait time in cycles >> PROFILE_TIMESTAMP_SHIFT
36 //
37 // Detailed descriptions:
38 //
39 // Bit [0]: The lock is considered held iff kSpinLockHeld is set.
40 //
41 // Bit [1]: Eligible waiters (e.g. Fibers) may co-operatively reschedule when
42 // contended iff kSpinLockCooperative is set.
43 //
44 // Bit [2]: This bit is exclusive from bit [1]. It is used only by a
45 // non-cooperative lock. When set, indicates that scheduling was
46 // successfully disabled when the lock was acquired. May be unset,
47 // even if non-cooperative, if a ThreadIdentity did not yet exist at
48 // time of acquisition.
49 //
50 // Bit [3]: If this is the only upper bit ([31..3]) set then this lock was
51 // acquired without contention, however, at least one waiter exists.
52 //
53 // Otherwise, bits [31..3] represent the time spent by the current lock
54 // holder to acquire the lock. There may be outstanding waiter(s).
55
56 namespace absl {
57 ABSL_NAMESPACE_BEGIN
58 namespace base_internal {
59
60 ABSL_INTERNAL_ATOMIC_HOOK_ATTRIBUTES static base_internal::AtomicHook<void (*)(
61 const void *lock, int64_t wait_cycles)>
62 submit_profile_data;
63
RegisterSpinLockProfiler(void (* fn)(const void * contendedlock,int64_t wait_cycles))64 void RegisterSpinLockProfiler(void (*fn)(const void *contendedlock,
65 int64_t wait_cycles)) {
66 submit_profile_data.Store(fn);
67 }
68
69 // Uncommon constructors.
SpinLock(base_internal::SchedulingMode mode)70 SpinLock::SpinLock(base_internal::SchedulingMode mode)
71 : lockword_(IsCooperative(mode) ? kSpinLockCooperative : 0) {
72 ABSL_TSAN_MUTEX_CREATE(this, __tsan_mutex_not_static);
73 }
74
SpinLock(base_internal::LinkerInitialized,base_internal::SchedulingMode mode)75 SpinLock::SpinLock(base_internal::LinkerInitialized,
76 base_internal::SchedulingMode mode) {
77 ABSL_TSAN_MUTEX_CREATE(this, 0);
78 if (IsCooperative(mode)) {
79 InitLinkerInitializedAndCooperative();
80 }
81 // Otherwise, lockword_ is already initialized.
82 }
83
84 // Static (linker initialized) spinlocks always start life as functional
85 // non-cooperative locks. When their static constructor does run, it will call
86 // this initializer to augment the lockword with the cooperative bit. By
87 // actually taking the lock when we do this we avoid the need for an atomic
88 // operation in the regular unlock path.
89 //
90 // SlowLock() must be careful to re-test for this bit so that any outstanding
91 // waiters may be upgraded to cooperative status.
InitLinkerInitializedAndCooperative()92 void SpinLock::InitLinkerInitializedAndCooperative() {
93 Lock();
94 lockword_.fetch_or(kSpinLockCooperative, std::memory_order_relaxed);
95 Unlock();
96 }
97
98 // Monitor the lock to see if its value changes within some time period
99 // (adaptive_spin_count loop iterations). The last value read from the lock
100 // is returned from the method.
SpinLoop()101 uint32_t SpinLock::SpinLoop() {
102 // We are already in the slow path of SpinLock, initialize the
103 // adaptive_spin_count here.
104 ABSL_CONST_INIT static absl::once_flag init_adaptive_spin_count;
105 ABSL_CONST_INIT static int adaptive_spin_count = 0;
106 base_internal::LowLevelCallOnce(&init_adaptive_spin_count, []() {
107 adaptive_spin_count = base_internal::NumCPUs() > 1 ? 1000 : 1;
108 });
109
110 int c = adaptive_spin_count;
111 uint32_t lock_value;
112 do {
113 lock_value = lockword_.load(std::memory_order_relaxed);
114 } while ((lock_value & kSpinLockHeld) != 0 && --c > 0);
115 return lock_value;
116 }
117
SlowLock()118 void SpinLock::SlowLock() {
119 uint32_t lock_value = SpinLoop();
120 lock_value = TryLockInternal(lock_value, 0);
121 if ((lock_value & kSpinLockHeld) == 0) {
122 return;
123 }
124 // The lock was not obtained initially, so this thread needs to wait for
125 // it. Record the current timestamp in the local variable wait_start_time
126 // so the total wait time can be stored in the lockword once this thread
127 // obtains the lock.
128 int64_t wait_start_time = CycleClock::Now();
129 uint32_t wait_cycles = 0;
130 int lock_wait_call_count = 0;
131 while ((lock_value & kSpinLockHeld) != 0) {
132 // If the lock is currently held, but not marked as having a sleeper, mark
133 // it as having a sleeper.
134 if ((lock_value & kWaitTimeMask) == 0) {
135 // Here, just "mark" that the thread is going to sleep. Don't store the
136 // lock wait time in the lock as that will cause the current lock
137 // owner to think it experienced contention.
138 if (lockword_.compare_exchange_strong(
139 lock_value, lock_value | kSpinLockSleeper,
140 std::memory_order_relaxed, std::memory_order_relaxed)) {
141 // Successfully transitioned to kSpinLockSleeper. Pass
142 // kSpinLockSleeper to the SpinLockWait routine to properly indicate
143 // the last lock_value observed.
144 lock_value |= kSpinLockSleeper;
145 } else if ((lock_value & kSpinLockHeld) == 0) {
146 // Lock is free again, so try and acquire it before sleeping. The
147 // new lock state will be the number of cycles this thread waited if
148 // this thread obtains the lock.
149 lock_value = TryLockInternal(lock_value, wait_cycles);
150 continue; // Skip the delay at the end of the loop.
151 }
152 }
153
154 base_internal::SchedulingMode scheduling_mode;
155 if ((lock_value & kSpinLockCooperative) != 0) {
156 scheduling_mode = base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL;
157 } else {
158 scheduling_mode = base_internal::SCHEDULE_KERNEL_ONLY;
159 }
160 // SpinLockDelay() calls into fiber scheduler, we need to see
161 // synchronization there to avoid false positives.
162 ABSL_TSAN_MUTEX_PRE_DIVERT(this, 0);
163 // Wait for an OS specific delay.
164 base_internal::SpinLockDelay(&lockword_, lock_value, ++lock_wait_call_count,
165 scheduling_mode);
166 ABSL_TSAN_MUTEX_POST_DIVERT(this, 0);
167 // Spin again after returning from the wait routine to give this thread
168 // some chance of obtaining the lock.
169 lock_value = SpinLoop();
170 wait_cycles = EncodeWaitCycles(wait_start_time, CycleClock::Now());
171 lock_value = TryLockInternal(lock_value, wait_cycles);
172 }
173 }
174
SlowUnlock(uint32_t lock_value)175 void SpinLock::SlowUnlock(uint32_t lock_value) {
176 base_internal::SpinLockWake(&lockword_,
177 false); // wake waiter if necessary
178
179 // If our acquisition was contended, collect contentionz profile info. We
180 // reserve a unitary wait time to represent that a waiter exists without our
181 // own acquisition having been contended.
182 if ((lock_value & kWaitTimeMask) != kSpinLockSleeper) {
183 const uint64_t wait_cycles = DecodeWaitCycles(lock_value);
184 ABSL_TSAN_MUTEX_PRE_DIVERT(this, 0);
185 submit_profile_data(this, wait_cycles);
186 ABSL_TSAN_MUTEX_POST_DIVERT(this, 0);
187 }
188 }
189
190 // We use the upper 29 bits of the lock word to store the time spent waiting to
191 // acquire this lock. This is reported by contentionz profiling. Since the
192 // lower bits of the cycle counter wrap very quickly on high-frequency
193 // processors we divide to reduce the granularity to 2^PROFILE_TIMESTAMP_SHIFT
194 // sized units. On a 4Ghz machine this will lose track of wait times greater
195 // than (2^29/4 Ghz)*128 =~ 17.2 seconds. Such waits should be extremely rare.
196 enum { PROFILE_TIMESTAMP_SHIFT = 7 };
197 enum { LOCKWORD_RESERVED_SHIFT = 3 }; // We currently reserve the lower 3 bits.
198
EncodeWaitCycles(int64_t wait_start_time,int64_t wait_end_time)199 uint32_t SpinLock::EncodeWaitCycles(int64_t wait_start_time,
200 int64_t wait_end_time) {
201 static const int64_t kMaxWaitTime =
202 std::numeric_limits<uint32_t>::max() >> LOCKWORD_RESERVED_SHIFT;
203 int64_t scaled_wait_time =
204 (wait_end_time - wait_start_time) >> PROFILE_TIMESTAMP_SHIFT;
205
206 // Return a representation of the time spent waiting that can be stored in
207 // the lock word's upper bits.
208 uint32_t clamped = static_cast<uint32_t>(
209 std::min(scaled_wait_time, kMaxWaitTime) << LOCKWORD_RESERVED_SHIFT);
210
211 if (clamped == 0) {
212 return kSpinLockSleeper; // Just wake waiters, but don't record contention.
213 }
214 // Bump up value if necessary to avoid returning kSpinLockSleeper.
215 const uint32_t kMinWaitTime =
216 kSpinLockSleeper + (1 << LOCKWORD_RESERVED_SHIFT);
217 if (clamped == kSpinLockSleeper) {
218 return kMinWaitTime;
219 }
220 return clamped;
221 }
222
DecodeWaitCycles(uint32_t lock_value)223 uint64_t SpinLock::DecodeWaitCycles(uint32_t lock_value) {
224 // Cast to uint32_t first to ensure bits [63:32] are cleared.
225 const uint64_t scaled_wait_time =
226 static_cast<uint32_t>(lock_value & kWaitTimeMask);
227 return scaled_wait_time
228 << (PROFILE_TIMESTAMP_SHIFT - LOCKWORD_RESERVED_SHIFT);
229 }
230
231 } // namespace base_internal
232 ABSL_NAMESPACE_END
233 } // namespace absl
234