• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 OR MIT
2 
3 /*
4 Fallback implementation using global locks.
5 
6 This implementation uses seqlock for global locks.
7 
8 This is basically based on global locks in crossbeam-utils's `AtomicCell`,
9 but seqlock is implemented in a way that does not depend on UB
10 (see comments in optimistic_read method in atomic! macro for details).
11 
12 Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic
13 type and the value type must be the same.
14 */
15 
16 #![cfg_attr(
17     any(
18         all(
19             target_arch = "x86_64",
20             not(portable_atomic_no_outline_atomics),
21             not(any(target_env = "sgx", miri)),
22         ),
23         all(
24             target_arch = "powerpc64",
25             feature = "fallback",
26             not(portable_atomic_no_outline_atomics),
27             portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default
28             any(
29                 all(
30                     target_os = "linux",
31                     any(
32                         target_env = "gnu",
33                         all(
34                             any(target_env = "musl", target_env = "ohos"),
35                             not(target_feature = "crt-static"),
36                         ),
37                         portable_atomic_outline_atomics,
38                     ),
39                 ),
40                 target_os = "android",
41                 target_os = "freebsd",
42                 all(target_os = "openbsd", portable_atomic_outline_atomics),
43             ),
44             not(any(miri, portable_atomic_sanitize_thread)),
45         ),
46         all(
47             target_arch = "riscv32",
48             not(any(miri, portable_atomic_sanitize_thread)),
49             not(portable_atomic_no_asm),
50             not(portable_atomic_pre_llvm_19),
51             any(
52                 target_feature = "experimental-zacas",
53                 portable_atomic_target_feature = "experimental-zacas",
54                 all(
55                     feature = "fallback",
56                     not(portable_atomic_no_outline_atomics),
57                     any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default
58                     any(target_os = "linux", target_os = "android"),
59                 ),
60             ),
61         ),
62         all(
63             target_arch = "riscv64",
64             not(portable_atomic_no_asm),
65             not(portable_atomic_pre_llvm_19),
66             any(
67                 target_feature = "experimental-zacas",
68                 portable_atomic_target_feature = "experimental-zacas",
69                 all(
70                     feature = "fallback",
71                     not(portable_atomic_no_outline_atomics),
72                     any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default
73                     any(target_os = "linux", target_os = "android"),
74                     not(any(miri, portable_atomic_sanitize_thread)),
75                 ),
76             ),
77         ),
78         all(
79             target_arch = "arm",
80             any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
81             any(target_os = "linux", target_os = "android"),
82             not(portable_atomic_no_outline_atomics),
83         ),
84     ),
85     allow(dead_code)
86 )]
87 
88 #[macro_use]
89 pub(crate) mod utils;
90 
91 // Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap
92 // around.
93 //
94 // In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be
95 // vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the
96 // counter will not be increased that fast.
97 //
98 // Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
99 // AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast,
100 // so use it to implement normal sequence lock.
101 cfg_has_fast_atomic_64! {
102     mod seq_lock;
103 }
104 cfg_no_fast_atomic_64! {
105     #[path = "seq_lock_wide.rs"]
106     mod seq_lock;
107 }
108 
109 use core::{cell::UnsafeCell, mem, sync::atomic::Ordering};
110 
111 use seq_lock::{SeqLock, SeqLockWriteGuard};
112 use utils::CachePadded;
113 
114 // Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
115 // AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast,
116 // so use it to reduce chunks of byte-wise atomic memcpy.
117 use seq_lock::{AtomicChunk, Chunk};
118 
119 // Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016.
120 #[inline]
121 #[must_use]
lock(addr: usize) -> &'static SeqLock122 fn lock(addr: usize) -> &'static SeqLock {
123     // The number of locks is a prime number because we want to make sure `addr % LEN` gets
124     // dispersed across all locks.
125     //
126     // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded,
127     // so the actual concurrency level will be smaller.
128     const LEN: usize = 67;
129     const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new());
130     static LOCKS: [CachePadded<SeqLock>; LEN] = [
131         L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
132         L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
133         L, L, L, L, L, L, L,
134     ];
135 
136     // If the modulus is a constant number, the compiler will use crazy math to transform this into
137     // a sequence of cheap arithmetic operations rather than using the slow modulo instruction.
138     &LOCKS[addr % LEN]
139 }
140 
141 macro_rules! atomic {
142     ($atomic_type:ident, $int_type:ident, $align:literal) => {
143         #[repr(C, align($align))]
144         pub(crate) struct $atomic_type {
145             v: UnsafeCell<$int_type>,
146         }
147 
148         impl $atomic_type {
149             const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>();
150 
151             #[inline]
152             unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] {
153                 static_assert!($atomic_type::LEN > 1);
154                 static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0);
155 
156                 // SAFETY: the caller must uphold the safety contract for `chunks`.
157                 unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) }
158             }
159 
160             #[inline]
161             fn optimistic_read(&self) -> $int_type {
162                 // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M
163                 let mut dst: [Chunk; Self::LEN] = [0; Self::LEN];
164                 // SAFETY:
165                 // - There are no threads that perform non-atomic concurrent write operations.
166                 // - There is no writer that updates the value using atomic operations of different granularity.
167                 //
168                 // If the atomic operation is not used here, it will cause a data race
169                 // when `write` performs concurrent write operation.
170                 // Such a data race is sometimes considered virtually unproblematic
171                 // in SeqLock implementations:
172                 //
173                 // - https://github.com/Amanieu/seqlock/issues/2
174                 // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116
175                 // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F
176                 //
177                 // However, in our use case, the implementation that loads/stores value as
178                 // chunks of usize is enough fast and sound, so we use that implementation.
179                 //
180                 // See also atomic-memcpy crate, a generic implementation of this pattern:
181                 // https://github.com/taiki-e/atomic-memcpy
182                 let chunks = unsafe { self.chunks() };
183                 for i in 0..Self::LEN {
184                     dst[i] = chunks[i].load(Ordering::Relaxed);
185                 }
186                 // SAFETY: integers are plain old data types so we can always transmute to them.
187                 unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) }
188             }
189 
190             #[inline]
191             fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type {
192                 // This calls optimistic_read that can return teared value, but the resulting value
193                 // is guaranteed not to be teared because we hold the lock to write.
194                 self.optimistic_read()
195             }
196 
197             #[inline]
198             fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) {
199                 // SAFETY: integers are plain old data types so we can always transmute them to arrays of integers.
200                 let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) };
201                 // SAFETY:
202                 // - The guard guarantees that we hold the lock to write.
203                 // - There are no threads that perform non-atomic concurrent read or write operations.
204                 //
205                 // See optimistic_read for the reason that atomic operations are used here.
206                 let chunks = unsafe { self.chunks() };
207                 for i in 0..Self::LEN {
208                     chunks[i].store(val[i], Ordering::Relaxed);
209                 }
210             }
211         }
212 
213         // Send is implicitly implemented.
214         // SAFETY: any data races are prevented by the lock and atomic operation.
215         unsafe impl Sync for $atomic_type {}
216 
217         impl_default_no_fetch_ops!($atomic_type, $int_type);
218         impl_default_bit_opts!($atomic_type, $int_type);
219         impl $atomic_type {
220             #[inline]
221             pub(crate) const fn new(v: $int_type) -> Self {
222                 Self { v: UnsafeCell::new(v) }
223             }
224 
225             #[inline]
226             pub(crate) fn is_lock_free() -> bool {
227                 Self::IS_ALWAYS_LOCK_FREE
228             }
229             pub(crate) const IS_ALWAYS_LOCK_FREE: bool = false;
230 
231             #[inline]
232             pub(crate) fn get_mut(&mut self) -> &mut $int_type {
233                 // SAFETY: the mutable reference guarantees unique ownership.
234                 // (UnsafeCell::get_mut requires Rust 1.50)
235                 unsafe { &mut *self.v.get() }
236             }
237 
238             #[inline]
239             #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
240             pub(crate) fn load(&self, order: Ordering) -> $int_type {
241                 crate::utils::assert_load_ordering(order);
242                 let lock = lock(self.v.get() as usize);
243 
244                 // Try doing an optimistic read first.
245                 if let Some(stamp) = lock.optimistic_read() {
246                     let val = self.optimistic_read();
247 
248                     if lock.validate_read(stamp) {
249                         return val;
250                     }
251                 }
252 
253                 // Grab a regular write lock so that writers don't starve this load.
254                 let guard = lock.write();
255                 let val = self.read(&guard);
256                 // The value hasn't been changed. Drop the guard without incrementing the stamp.
257                 guard.abort();
258                 val
259             }
260 
261             #[inline]
262             #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
263             pub(crate) fn store(&self, val: $int_type, order: Ordering) {
264                 crate::utils::assert_store_ordering(order);
265                 let guard = lock(self.v.get() as usize).write();
266                 self.write(val, &guard)
267             }
268 
269             #[inline]
270             pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
271                 let guard = lock(self.v.get() as usize).write();
272                 let prev = self.read(&guard);
273                 self.write(val, &guard);
274                 prev
275             }
276 
277             #[inline]
278             #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
279             pub(crate) fn compare_exchange(
280                 &self,
281                 current: $int_type,
282                 new: $int_type,
283                 success: Ordering,
284                 failure: Ordering,
285             ) -> Result<$int_type, $int_type> {
286                 crate::utils::assert_compare_exchange_ordering(success, failure);
287                 let guard = lock(self.v.get() as usize).write();
288                 let prev = self.read(&guard);
289                 if prev == current {
290                     self.write(new, &guard);
291                     Ok(prev)
292                 } else {
293                     // The value hasn't been changed. Drop the guard without incrementing the stamp.
294                     guard.abort();
295                     Err(prev)
296                 }
297             }
298 
299             #[inline]
300             #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
301             pub(crate) fn compare_exchange_weak(
302                 &self,
303                 current: $int_type,
304                 new: $int_type,
305                 success: Ordering,
306                 failure: Ordering,
307             ) -> Result<$int_type, $int_type> {
308                 self.compare_exchange(current, new, success, failure)
309             }
310 
311             #[inline]
312             pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
313                 let guard = lock(self.v.get() as usize).write();
314                 let prev = self.read(&guard);
315                 self.write(prev.wrapping_add(val), &guard);
316                 prev
317             }
318 
319             #[inline]
320             pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
321                 let guard = lock(self.v.get() as usize).write();
322                 let prev = self.read(&guard);
323                 self.write(prev.wrapping_sub(val), &guard);
324                 prev
325             }
326 
327             #[inline]
328             pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type {
329                 let guard = lock(self.v.get() as usize).write();
330                 let prev = self.read(&guard);
331                 self.write(prev & val, &guard);
332                 prev
333             }
334 
335             #[inline]
336             pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
337                 let guard = lock(self.v.get() as usize).write();
338                 let prev = self.read(&guard);
339                 self.write(!(prev & val), &guard);
340                 prev
341             }
342 
343             #[inline]
344             pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type {
345                 let guard = lock(self.v.get() as usize).write();
346                 let prev = self.read(&guard);
347                 self.write(prev | val, &guard);
348                 prev
349             }
350 
351             #[inline]
352             pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type {
353                 let guard = lock(self.v.get() as usize).write();
354                 let prev = self.read(&guard);
355                 self.write(prev ^ val, &guard);
356                 prev
357             }
358 
359             #[inline]
360             pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
361                 let guard = lock(self.v.get() as usize).write();
362                 let prev = self.read(&guard);
363                 self.write(core::cmp::max(prev, val), &guard);
364                 prev
365             }
366 
367             #[inline]
368             pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
369                 let guard = lock(self.v.get() as usize).write();
370                 let prev = self.read(&guard);
371                 self.write(core::cmp::min(prev, val), &guard);
372                 prev
373             }
374 
375             #[inline]
376             pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type {
377                 let guard = lock(self.v.get() as usize).write();
378                 let prev = self.read(&guard);
379                 self.write(!prev, &guard);
380                 prev
381             }
382             #[inline]
383             pub(crate) fn not(&self, order: Ordering) {
384                 self.fetch_not(order);
385             }
386 
387             #[inline]
388             pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
389                 let guard = lock(self.v.get() as usize).write();
390                 let prev = self.read(&guard);
391                 self.write(prev.wrapping_neg(), &guard);
392                 prev
393             }
394             #[inline]
395             pub(crate) fn neg(&self, order: Ordering) {
396                 self.fetch_neg(order);
397             }
398 
399             #[inline]
400             pub(crate) const fn as_ptr(&self) -> *mut $int_type {
401                 self.v.get()
402             }
403         }
404     };
405 }
406 
407 #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))]
408 #[cfg_attr(
409     not(portable_atomic_no_cfg_target_has_atomic),
410     cfg(any(
411         test,
412         not(any(
413             target_has_atomic = "64",
414             all(
415                 target_arch = "riscv32",
416                 not(any(miri, portable_atomic_sanitize_thread)),
417                 not(portable_atomic_no_asm),
418                 any(
419                     target_feature = "experimental-zacas",
420                     portable_atomic_target_feature = "experimental-zacas",
421                 ),
422             ),
423         ))
424     ))
425 )]
426 cfg_no_fast_atomic_64! {
427     atomic!(AtomicI64, i64, 8);
428     atomic!(AtomicU64, u64, 8);
429 }
430 
431 atomic!(AtomicI128, i128, 16);
432 atomic!(AtomicU128, u128, 16);
433 
434 #[cfg(test)]
435 mod tests {
436     use super::*;
437 
438     cfg_no_fast_atomic_64! {
439         test_atomic_int!(i64);
440         test_atomic_int!(u64);
441     }
442     test_atomic_int!(i128);
443     test_atomic_int!(u128);
444 
445     // load/store/swap implementation is not affected by signedness, so it is
446     // enough to test only unsigned types.
447     cfg_no_fast_atomic_64! {
448         stress_test!(u64);
449     }
450     stress_test!(u128);
451 }
452