1 // SPDX-License-Identifier: Apache-2.0 OR MIT
2
3 /*
4 128-bit atomic implementation on PowerPC64.
5
6 powerpc64 on pwr8+ support 128-bit atomics (load/store/LL/SC):
7 https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445
8 https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
9 https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0/llvm/test/CodeGen/PowerPC/atomics-i128.ll
10
11 powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0/llvm/lib/Target/PowerPC/PPC.td#L702
12 See also https://github.com/rust-lang/rust/issues/59932
13
14 Note that we do not separate LL and SC into separate functions, but handle
15 them within a single asm block. This is because it is theoretically possible
16 for the compiler to insert operations that might clear the reservation between
17 LL and SC. See aarch64.rs for details.
18
19 Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
20 this module and use intrinsics.rs instead.
21
22 Refs:
23 - Power ISA https://openpowerfoundation.org/specifications/isa
24 - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference
25 - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
26
27 Generated asm:
28 - powerpc64 (pwr8) https://godbolt.org/z/71xGhY9qf
29 - powerpc64le https://godbolt.org/z/4TexcjGEz
30 */
31
32 include!("macros.rs");
33
34 #[cfg(not(any(
35 target_feature = "quadword-atomics",
36 portable_atomic_target_feature = "quadword-atomics",
37 )))]
38 #[path = "../fallback/outline_atomics.rs"]
39 mod fallback;
40
41 // On musl with static linking, it seems that getauxval is not always available.
42 // See detect/auxv.rs for more.
43 #[cfg(not(portable_atomic_no_outline_atomics))]
44 #[cfg(any(test, portable_atomic_outline_atomics))] // TODO(powerpc64): currently disabled by default
45 #[cfg(any(
46 test,
47 not(any(
48 target_feature = "quadword-atomics",
49 portable_atomic_target_feature = "quadword-atomics",
50 )),
51 ))]
52 #[cfg(any(
53 all(
54 target_os = "linux",
55 any(
56 target_env = "gnu",
57 all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")),
58 portable_atomic_outline_atomics,
59 ),
60 ),
61 target_os = "android",
62 target_os = "freebsd",
63 all(target_os = "openbsd", portable_atomic_outline_atomics),
64 ))]
65 #[path = "../detect/auxv.rs"]
66 mod detect;
67
68 use core::{arch::asm, sync::atomic::Ordering};
69
70 use crate::utils::{Pair, U128};
71
72 macro_rules! debug_assert_pwr8 {
73 () => {
74 #[cfg(not(any(
75 target_feature = "quadword-atomics",
76 portable_atomic_target_feature = "quadword-atomics",
77 )))]
78 {
79 debug_assert!(detect::detect().has_quadword_atomics());
80 }
81 };
82 }
83
84 // Refs: https://www.ibm.com/docs/en/aix/7.3?topic=ops-machine-pseudo-op
85 //
86 // This is similar to #[target_feature(enable = "quadword-atomics")], except that there are
87 // no compiler guarantees regarding (un)inlining, and the scope is within an asm
88 // block rather than a function. We use this directive because #[target_feature(enable = "quadword-atomics")]
89 // is not supported as of Rust 1.70-nightly.
90 //
91 // start_pwr8 and end_pwr8 must be used in pairs.
92 //
93 // Note: If power8 instructions are not available at compile-time, we must guarantee that
94 // the function that uses it is not inlined into a function where it is not
95 // clear whether power8 instructions are available. Otherwise, (even if we checked whether
96 // power8 instructions are available at run-time) optimizations that reorder its
97 // instructions across the if condition might introduce undefined behavior.
98 // (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts)
99 // However, our code uses the ifunc helper macro that works with function pointers,
100 // so we don't have to worry about this unless calling without helper macro.
101 macro_rules! start_pwr8 {
102 () => {
103 ".machine push\n.machine power8"
104 };
105 }
106 macro_rules! end_pwr8 {
107 () => {
108 ".machine pop"
109 };
110 }
111
112 macro_rules! atomic_rmw {
113 ($op:ident, $order:ident) => {
114 match $order {
115 Ordering::Relaxed => $op!("", ""),
116 Ordering::Acquire => $op!("lwsync", ""),
117 Ordering::Release => $op!("", "lwsync"),
118 Ordering::AcqRel => $op!("lwsync", "lwsync"),
119 Ordering::SeqCst => $op!("lwsync", "sync"),
120 _ => unreachable!(),
121 }
122 };
123 }
124
125 // Extracts and checks the EQ bit of cr0.
126 #[inline(always)]
extract_cr0(r: u64) -> bool127 fn extract_cr0(r: u64) -> bool {
128 r & 0x20000000 != 0
129 }
130
131 // If quadword-atomics is available at compile-time, we can always use pwr8_fn.
132 #[cfg(any(
133 target_feature = "quadword-atomics",
134 portable_atomic_target_feature = "quadword-atomics",
135 ))]
136 use atomic_load_pwr8 as atomic_load;
137 // Otherwise, we need to do run-time detection and can use pwr8_fn only if quadword-atomics is available.
138 #[cfg(not(any(
139 target_feature = "quadword-atomics",
140 portable_atomic_target_feature = "quadword-atomics",
141 )))]
142 #[inline]
atomic_load(src: *mut u128, order: Ordering) -> u128143 unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
144 fn_alias! {
145 // inline(never) is just a hint and also not strictly necessary
146 // because we use ifunc helper macro, but used for clarity.
147 #[inline(never)]
148 unsafe fn(src: *mut u128) -> u128;
149 atomic_load_pwr8_relaxed = atomic_load_pwr8(Ordering::Relaxed);
150 atomic_load_pwr8_acquire = atomic_load_pwr8(Ordering::Acquire);
151 atomic_load_pwr8_seqcst = atomic_load_pwr8(Ordering::SeqCst);
152 }
153 // SAFETY: the caller must uphold the safety contract.
154 // we only calls atomic_load_pwr8 if quadword-atomics is available.
155 unsafe {
156 match order {
157 Ordering::Relaxed => {
158 ifunc!(unsafe fn(src: *mut u128) -> u128 {
159 if detect::detect().has_quadword_atomics() {
160 atomic_load_pwr8_relaxed
161 } else {
162 fallback::atomic_load_non_seqcst
163 }
164 })
165 }
166 Ordering::Acquire => {
167 ifunc!(unsafe fn(src: *mut u128) -> u128 {
168 if detect::detect().has_quadword_atomics() {
169 atomic_load_pwr8_acquire
170 } else {
171 fallback::atomic_load_non_seqcst
172 }
173 })
174 }
175 Ordering::SeqCst => {
176 ifunc!(unsafe fn(src: *mut u128) -> u128 {
177 if detect::detect().has_quadword_atomics() {
178 atomic_load_pwr8_seqcst
179 } else {
180 fallback::atomic_load_seqcst
181 }
182 })
183 }
184 _ => unreachable!(),
185 }
186 }
187 }
188 #[inline]
atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128189 unsafe fn atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128 {
190 debug_assert!(src as usize % 16 == 0);
191 debug_assert_pwr8!();
192
193 // SAFETY: the caller must uphold the safety contract.
194 //
195 // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
196 unsafe {
197 let (out_hi, out_lo);
198 macro_rules! atomic_load_acquire {
199 ($release:tt) => {
200 asm!(
201 start_pwr8!(),
202 $release,
203 "lq %r4, 0({src})",
204 // Lightweight acquire sync
205 // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62
206 "cmpd %cr7, %r4, %r4",
207 "bne- %cr7, 2f",
208 "2:",
209 "isync",
210 end_pwr8!(),
211 src = in(reg_nonzero) ptr_reg!(src),
212 // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
213 // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
214 out("r4") out_hi,
215 out("r5") out_lo,
216 out("cr7") _,
217 options(nostack, preserves_flags),
218 )
219 };
220 }
221 match order {
222 Ordering::Relaxed => {
223 asm!(
224 start_pwr8!(),
225 "lq %r4, 0({src})",
226 end_pwr8!(),
227 src = in(reg_nonzero) ptr_reg!(src),
228 // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
229 // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
230 out("r4") out_hi,
231 out("r5") out_lo,
232 options(nostack, preserves_flags),
233 );
234 }
235 Ordering::Acquire => atomic_load_acquire!(""),
236 Ordering::SeqCst => atomic_load_acquire!("sync"),
237 _ => unreachable!(),
238 }
239 U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
240 }
241 }
242
243 // If quadword-atomics is available at compile-time, we can always use pwr8_fn.
244 #[cfg(any(
245 target_feature = "quadword-atomics",
246 portable_atomic_target_feature = "quadword-atomics",
247 ))]
248 use atomic_store_pwr8 as atomic_store;
249 // Otherwise, we need to do run-time detection and can use pwr8_fn only if quadword-atomics is available.
250 #[cfg(not(any(
251 target_feature = "quadword-atomics",
252 portable_atomic_target_feature = "quadword-atomics",
253 )))]
254 #[inline]
atomic_store(dst: *mut u128, val: u128, order: Ordering)255 unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
256 fn_alias! {
257 // inline(never) is just a hint and also not strictly necessary
258 // because we use ifunc helper macro, but used for clarity.
259 #[inline(never)]
260 unsafe fn(dst: *mut u128, val: u128);
261 atomic_store_pwr8_relaxed = atomic_store_pwr8(Ordering::Relaxed);
262 atomic_store_pwr8_release = atomic_store_pwr8(Ordering::Release);
263 atomic_store_pwr8_seqcst = atomic_store_pwr8(Ordering::SeqCst);
264 }
265 // SAFETY: the caller must uphold the safety contract.
266 // we only calls atomic_store_pwr8 if quadword-atomics is available.
267 unsafe {
268 match order {
269 Ordering::Relaxed => {
270 ifunc!(unsafe fn(dst: *mut u128, val: u128) {
271 if detect::detect().has_quadword_atomics() {
272 atomic_store_pwr8_relaxed
273 } else {
274 fallback::atomic_store_non_seqcst
275 }
276 });
277 }
278 Ordering::Release => {
279 ifunc!(unsafe fn(dst: *mut u128, val: u128) {
280 if detect::detect().has_quadword_atomics() {
281 atomic_store_pwr8_release
282 } else {
283 fallback::atomic_store_non_seqcst
284 }
285 });
286 }
287 Ordering::SeqCst => {
288 ifunc!(unsafe fn(dst: *mut u128, val: u128) {
289 if detect::detect().has_quadword_atomics() {
290 atomic_store_pwr8_seqcst
291 } else {
292 fallback::atomic_store_seqcst
293 }
294 });
295 }
296 _ => unreachable!(),
297 }
298 }
299 }
300 #[inline]
atomic_store_pwr8(dst: *mut u128, val: u128, order: Ordering)301 unsafe fn atomic_store_pwr8(dst: *mut u128, val: u128, order: Ordering) {
302 debug_assert!(dst as usize % 16 == 0);
303 debug_assert_pwr8!();
304
305 // SAFETY: the caller must uphold the safety contract.
306 //
307 // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
308 unsafe {
309 let val = U128 { whole: val };
310 macro_rules! atomic_store {
311 ($release:tt) => {
312 asm!(
313 start_pwr8!(),
314 $release,
315 "stq %r4, 0({dst})",
316 end_pwr8!(),
317 dst = in(reg_nonzero) ptr_reg!(dst),
318 // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
319 // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
320 in("r4") val.pair.hi,
321 in("r5") val.pair.lo,
322 options(nostack, preserves_flags),
323 )
324 };
325 }
326 match order {
327 Ordering::Relaxed => atomic_store!(""),
328 Ordering::Release => atomic_store!("lwsync"),
329 Ordering::SeqCst => atomic_store!("sync"),
330 _ => unreachable!(),
331 }
332 }
333 }
334
335 #[inline]
atomic_compare_exchange( dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering, ) -> Result<u128, u128>336 unsafe fn atomic_compare_exchange(
337 dst: *mut u128,
338 old: u128,
339 new: u128,
340 success: Ordering,
341 failure: Ordering,
342 ) -> Result<u128, u128> {
343 let success = crate::utils::upgrade_success_ordering(success, failure);
344
345 #[cfg(any(
346 target_feature = "quadword-atomics",
347 portable_atomic_target_feature = "quadword-atomics",
348 ))]
349 // SAFETY: the caller must uphold the safety contract.
350 // cfg guarantees that quadword atomics instructions are available at compile-time.
351 let (prev, ok) = unsafe { atomic_compare_exchange_pwr8(dst, old, new, success) };
352 #[cfg(not(any(
353 target_feature = "quadword-atomics",
354 portable_atomic_target_feature = "quadword-atomics",
355 )))]
356 // SAFETY: the caller must uphold the safety contract.
357 let (prev, ok) = unsafe { atomic_compare_exchange_ifunc(dst, old, new, success) };
358 if ok {
359 Ok(prev)
360 } else {
361 Err(prev)
362 }
363 }
364 #[inline]
atomic_compare_exchange_pwr8( dst: *mut u128, old: u128, new: u128, order: Ordering, ) -> (u128, bool)365 unsafe fn atomic_compare_exchange_pwr8(
366 dst: *mut u128,
367 old: u128,
368 new: u128,
369 order: Ordering,
370 ) -> (u128, bool) {
371 debug_assert!(dst as usize % 16 == 0);
372 debug_assert_pwr8!();
373
374 // SAFETY: the caller must uphold the safety contract.
375 //
376 // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
377 unsafe {
378 let old = U128 { whole: old };
379 let new = U128 { whole: new };
380 let (mut prev_hi, mut prev_lo);
381 let mut r;
382 macro_rules! cmpxchg {
383 ($acquire:tt, $release:tt) => {
384 asm!(
385 start_pwr8!(),
386 $release,
387 "2:",
388 "lqarx %r8, 0, {dst}",
389 "xor {tmp_lo}, %r9, {old_lo}",
390 "xor {tmp_hi}, %r8, {old_hi}",
391 "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
392 "bne %cr0, 3f", // jump if compare failed
393 "stqcx. %r6, 0, {dst}",
394 "bne %cr0, 2b", // continue loop if store failed
395 "3:",
396 // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set.
397 "mfcr {tmp_lo}",
398 $acquire,
399 end_pwr8!(),
400 dst = in(reg_nonzero) ptr_reg!(dst),
401 old_hi = in(reg) old.pair.hi,
402 old_lo = in(reg) old.pair.lo,
403 tmp_hi = out(reg) _,
404 tmp_lo = out(reg) r,
405 // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
406 // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
407 in("r6") new.pair.hi,
408 in("r7") new.pair.lo,
409 out("r8") prev_hi,
410 out("r9") prev_lo,
411 out("cr0") _,
412 options(nostack, preserves_flags),
413 )
414 };
415 }
416 atomic_rmw!(cmpxchg, order);
417 (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r))
418 }
419 }
420
421 // Always use strong CAS for outline-atomics.
422 #[cfg(not(any(
423 target_feature = "quadword-atomics",
424 portable_atomic_target_feature = "quadword-atomics",
425 )))]
426 use atomic_compare_exchange as atomic_compare_exchange_weak;
427 #[cfg(any(
428 target_feature = "quadword-atomics",
429 portable_atomic_target_feature = "quadword-atomics",
430 ))]
431 #[inline]
atomic_compare_exchange_weak( dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering, ) -> Result<u128, u128>432 unsafe fn atomic_compare_exchange_weak(
433 dst: *mut u128,
434 old: u128,
435 new: u128,
436 success: Ordering,
437 failure: Ordering,
438 ) -> Result<u128, u128> {
439 let success = crate::utils::upgrade_success_ordering(success, failure);
440
441 // SAFETY: the caller must uphold the safety contract.
442 // cfg guarantees that quadword atomics instructions are available at compile-time.
443 let (prev, ok) = unsafe { atomic_compare_exchange_weak_pwr8(dst, old, new, success) };
444 if ok {
445 Ok(prev)
446 } else {
447 Err(prev)
448 }
449 }
450 #[cfg(any(
451 target_feature = "quadword-atomics",
452 portable_atomic_target_feature = "quadword-atomics",
453 ))]
454 #[inline]
atomic_compare_exchange_weak_pwr8( dst: *mut u128, old: u128, new: u128, order: Ordering, ) -> (u128, bool)455 unsafe fn atomic_compare_exchange_weak_pwr8(
456 dst: *mut u128,
457 old: u128,
458 new: u128,
459 order: Ordering,
460 ) -> (u128, bool) {
461 debug_assert!(dst as usize % 16 == 0);
462 debug_assert_pwr8!();
463
464 // SAFETY: the caller must uphold the safety contract.
465 //
466 // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
467 unsafe {
468 let old = U128 { whole: old };
469 let new = U128 { whole: new };
470 let (mut prev_hi, mut prev_lo);
471 let mut r;
472 macro_rules! cmpxchg_weak {
473 ($acquire:tt, $release:tt) => {
474 asm!(
475 start_pwr8!(),
476 $release,
477 "lqarx %r8, 0, {dst}",
478 "xor {tmp_lo}, %r9, {old_lo}",
479 "xor {tmp_hi}, %r8, {old_hi}",
480 "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
481 "bne %cr0, 3f", // jump if compare failed
482 "stqcx. %r6, 0, {dst}",
483 "3:",
484 // if compare or stqcx failed EQ bit is cleared, if stqcx succeeds EQ bit is set.
485 "mfcr {tmp_lo}",
486 $acquire,
487 end_pwr8!(),
488 dst = in(reg_nonzero) ptr_reg!(dst),
489 old_hi = in(reg) old.pair.hi,
490 old_lo = in(reg) old.pair.lo,
491 tmp_hi = out(reg) _,
492 tmp_lo = out(reg) r,
493 // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
494 // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
495 in("r6") new.pair.hi,
496 in("r7") new.pair.lo,
497 out("r8") prev_hi,
498 out("r9") prev_lo,
499 out("cr0") _,
500 options(nostack, preserves_flags),
501 )
502 };
503 }
504 atomic_rmw!(cmpxchg_weak, order);
505 (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r))
506 }
507 }
508
509 // Do not use atomic_rmw_ll_sc_3 because it needs extra MR to implement swap.
510 #[inline]
atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128511 unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 {
512 debug_assert!(dst as usize % 16 == 0);
513 debug_assert_pwr8!();
514
515 // SAFETY: the caller must uphold the safety contract.
516 unsafe {
517 let val = U128 { whole: val };
518 let (mut prev_hi, mut prev_lo);
519 macro_rules! swap {
520 ($acquire:tt, $release:tt) => {
521 asm!(
522 start_pwr8!(),
523 $release,
524 "2:",
525 "lqarx %r6, 0, {dst}",
526 "stqcx. %r8, 0, {dst}",
527 "bne %cr0, 2b",
528 $acquire,
529 end_pwr8!(),
530 dst = in(reg_nonzero) ptr_reg!(dst),
531 // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
532 // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
533 out("r6") prev_hi,
534 out("r7") prev_lo,
535 in("r8") val.pair.hi,
536 in("r9") val.pair.lo,
537 out("cr0") _,
538 options(nostack, preserves_flags),
539 )
540 };
541 }
542 atomic_rmw!(swap, order);
543 U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
544 }
545 }
546
547 /// Atomic RMW by LL/SC loop (3 arguments)
548 /// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
549 ///
550 /// $op can use the following registers:
551 /// - val_hi/val_lo pair: val argument (read-only for `$op`)
552 /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`)
553 /// - r8/r9 pair: new value that will be stored by sc
554 macro_rules! atomic_rmw_ll_sc_3 {
555 ($name:ident, [$($reg:tt)*], $($op:tt)*) => {
556 #[inline]
557 unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
558 debug_assert!(dst as usize % 16 == 0);
559 debug_assert_pwr8!();
560 // SAFETY: the caller must uphold the safety contract.
561 unsafe {
562 let val = U128 { whole: val };
563 let (mut prev_hi, mut prev_lo);
564 macro_rules! op {
565 ($acquire:tt, $release:tt) => {
566 asm!(
567 start_pwr8!(),
568 $release,
569 "2:",
570 "lqarx %r6, 0, {dst}",
571 $($op)*
572 "stqcx. %r8, 0, {dst}",
573 "bne %cr0, 2b",
574 $acquire,
575 end_pwr8!(),
576 dst = in(reg_nonzero) ptr_reg!(dst),
577 val_hi = in(reg) val.pair.hi,
578 val_lo = in(reg) val.pair.lo,
579 $($reg)*
580 // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
581 // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
582 out("r6") prev_hi,
583 out("r7") prev_lo,
584 out("r8") _, // new (hi)
585 out("r9") _, // new (lo)
586 out("cr0") _,
587 options(nostack, preserves_flags),
588 )
589 };
590 }
591 atomic_rmw!(op, order);
592 U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
593 }
594 }
595 };
596 }
597 /// Atomic RMW by LL/SC loop (2 arguments)
598 /// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
599 ///
600 /// $op can use the following registers:
601 /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`)
602 /// - r8/r9 pair: new value that will be stored by sc
603 macro_rules! atomic_rmw_ll_sc_2 {
604 ($name:ident, [$($reg:tt)*], $($op:tt)*) => {
605 #[inline]
606 unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
607 debug_assert!(dst as usize % 16 == 0);
608 debug_assert_pwr8!();
609 // SAFETY: the caller must uphold the safety contract.
610 unsafe {
611 let (mut prev_hi, mut prev_lo);
612 macro_rules! op {
613 ($acquire:tt, $release:tt) => {
614 asm!(
615 start_pwr8!(),
616 $release,
617 "2:",
618 "lqarx %r6, 0, {dst}",
619 $($op)*
620 "stqcx. %r8, 0, {dst}",
621 "bne %cr0, 2b",
622 $acquire,
623 end_pwr8!(),
624 dst = in(reg_nonzero) ptr_reg!(dst),
625 $($reg)*
626 // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
627 // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
628 out("r6") prev_hi,
629 out("r7") prev_lo,
630 out("r8") _, // new (hi)
631 out("r9") _, // new (lo)
632 out("cr0") _,
633 options(nostack, preserves_flags),
634 )
635 };
636 }
637 atomic_rmw!(op, order);
638 U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
639 }
640 }
641 };
642 }
643
644 atomic_rmw_ll_sc_3! {
645 atomic_add_pwr8, [out("xer") _,],
646 "addc %r9, {val_lo}, %r7",
647 "adde %r8, {val_hi}, %r6",
648 }
649 atomic_rmw_ll_sc_3! {
650 atomic_sub_pwr8, [out("xer") _,],
651 "subc %r9, %r7, {val_lo}",
652 "subfe %r8, {val_hi}, %r6",
653 }
654 atomic_rmw_ll_sc_3! {
655 atomic_and_pwr8, [],
656 "and %r9, {val_lo}, %r7",
657 "and %r8, {val_hi}, %r6",
658 }
659 atomic_rmw_ll_sc_3! {
660 atomic_nand_pwr8, [],
661 "nand %r9, {val_lo}, %r7",
662 "nand %r8, {val_hi}, %r6",
663 }
664 atomic_rmw_ll_sc_3! {
665 atomic_or_pwr8, [],
666 "or %r9, {val_lo}, %r7",
667 "or %r8, {val_hi}, %r6",
668 }
669 atomic_rmw_ll_sc_3! {
670 atomic_xor_pwr8, [],
671 "xor %r9, {val_lo}, %r7",
672 "xor %r8, {val_hi}, %r6",
673 }
674 atomic_rmw_ll_sc_3! {
675 atomic_max_pwr8, [out("cr1") _,],
676 "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0
677 "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
678 "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1
679 "isel %r8, %r7, {val_lo}, 5", // select lo 64-bit based on GT bit in cr1
680 "cmpld %r6, {val_hi}", // (unsigned) compare hi 64-bit, store result to cr0
681 "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0
682 "isel %r8, %r6, {val_hi}, 5", // select hi 64-bit based on GT bit in cr1
683 }
684 atomic_rmw_ll_sc_3! {
685 atomic_umax_pwr8, [],
686 "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0
687 "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
688 "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0
689 "iselgt %r8, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
690 "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0
691 "iselgt %r8, %r6, {val_hi}", // select hi 64-bit based on GT bit in cr0
692 }
693 atomic_rmw_ll_sc_3! {
694 atomic_min_pwr8, [out("cr1") _,],
695 "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0
696 "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
697 "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1
698 "isel %r8, %r7, {val_lo}, 4", // select lo 64-bit based on LT bit in cr1
699 "cmpld %r6, {val_hi}", // (unsigned) compare hi 64-bit, store result to cr0
700 "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0
701 "isel %r8, %r6, {val_hi}, 4", // select hi 64-bit based on LT bit in cr1
702 }
703 atomic_rmw_ll_sc_3! {
704 atomic_umin_pwr8, [],
705 "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0
706 "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
707 "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0
708 "isellt %r8, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
709 "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0
710 "isellt %r8, %r6, {val_hi}", // select hi 64-bit based on LT bit in cr0
711 }
712
713 #[inline]
atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128714 unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 {
715 // SAFETY: the caller must uphold the safety contract.
716 unsafe { atomic_xor_pwr8(dst, !0, order) }
717 }
718
719 #[cfg(not(portable_atomic_pre_llvm_16))]
720 atomic_rmw_ll_sc_2! {
721 atomic_neg_pwr8, [out("xer") _,],
722 "subfic %r9, %r7, 0",
723 "subfze %r8, %r6",
724 }
725 // LLVM 15 miscompiles subfic.
726 #[cfg(portable_atomic_pre_llvm_16)]
727 atomic_rmw_ll_sc_2! {
728 atomic_neg_pwr8, [zero = in(reg) 0_u64, out("xer") _,],
729 "subc %r9, {zero}, %r7",
730 "subfze %r8, %r6",
731 }
732
733 macro_rules! select_atomic_rmw {
734 (
735 unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?;
736 pwr8 = $pwr8_fn:ident;
737 non_seqcst_fallback = $non_seqcst_fallback_fn:ident;
738 seqcst_fallback = $seqcst_fallback_fn:ident;
739 ) => {
740 // If quadword-atomics is available at compile-time, we can always use pwr8_fn.
741 #[cfg(any(
742 target_feature = "quadword-atomics",
743 portable_atomic_target_feature = "quadword-atomics",
744 ))]
745 use $pwr8_fn as $name;
746 // Otherwise, we need to do run-time detection and can use pwr8_fn only if quadword-atomics is available.
747 #[cfg(not(any(
748 target_feature = "quadword-atomics",
749 portable_atomic_target_feature = "quadword-atomics",
750 )))]
751 #[inline]
752 unsafe fn $name($($arg)*, order: Ordering) $(-> $ret_ty)? {
753 fn_alias! {
754 // inline(never) is just a hint and also not strictly necessary
755 // because we use ifunc helper macro, but used for clarity.
756 #[inline(never)]
757 unsafe fn($($arg)*) $(-> $ret_ty)?;
758 pwr8_relaxed_fn = $pwr8_fn(Ordering::Relaxed);
759 pwr8_acquire_fn = $pwr8_fn(Ordering::Acquire);
760 pwr8_release_fn = $pwr8_fn(Ordering::Release);
761 pwr8_acqrel_fn = $pwr8_fn(Ordering::AcqRel);
762 pwr8_seqcst_fn = $pwr8_fn(Ordering::SeqCst);
763 }
764 // SAFETY: the caller must uphold the safety contract.
765 // we only calls pwr8_fn if quadword-atomics is available.
766 unsafe {
767 match order {
768 Ordering::Relaxed => {
769 ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
770 if detect::detect().has_quadword_atomics() {
771 pwr8_relaxed_fn
772 } else {
773 fallback::$non_seqcst_fallback_fn
774 }
775 })
776 }
777 Ordering::Acquire => {
778 ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
779 if detect::detect().has_quadword_atomics() {
780 pwr8_acquire_fn
781 } else {
782 fallback::$non_seqcst_fallback_fn
783 }
784 })
785 }
786 Ordering::Release => {
787 ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
788 if detect::detect().has_quadword_atomics() {
789 pwr8_release_fn
790 } else {
791 fallback::$non_seqcst_fallback_fn
792 }
793 })
794 }
795 Ordering::AcqRel => {
796 ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
797 if detect::detect().has_quadword_atomics() {
798 pwr8_acqrel_fn
799 } else {
800 fallback::$non_seqcst_fallback_fn
801 }
802 })
803 }
804 Ordering::SeqCst => {
805 ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
806 if detect::detect().has_quadword_atomics() {
807 pwr8_seqcst_fn
808 } else {
809 fallback::$seqcst_fallback_fn
810 }
811 })
812 }
813 _ => unreachable!(),
814 }
815 }
816 }
817 };
818 }
819
820 #[cfg(not(any(
821 target_feature = "quadword-atomics",
822 portable_atomic_target_feature = "quadword-atomics",
823 )))]
824 select_atomic_rmw! {
825 unsafe fn atomic_compare_exchange_ifunc(dst: *mut u128, old: u128, new: u128) -> (u128, bool);
826 pwr8 = atomic_compare_exchange_pwr8;
827 non_seqcst_fallback = atomic_compare_exchange_non_seqcst;
828 seqcst_fallback = atomic_compare_exchange_seqcst;
829 }
830 select_atomic_rmw! {
831 unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128;
832 pwr8 = atomic_swap_pwr8;
833 non_seqcst_fallback = atomic_swap_non_seqcst;
834 seqcst_fallback = atomic_swap_seqcst;
835 }
836 select_atomic_rmw! {
837 unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128;
838 pwr8 = atomic_add_pwr8;
839 non_seqcst_fallback = atomic_add_non_seqcst;
840 seqcst_fallback = atomic_add_seqcst;
841 }
842 select_atomic_rmw! {
843 unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128;
844 pwr8 = atomic_sub_pwr8;
845 non_seqcst_fallback = atomic_sub_non_seqcst;
846 seqcst_fallback = atomic_sub_seqcst;
847 }
848 select_atomic_rmw! {
849 unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128;
850 pwr8 = atomic_and_pwr8;
851 non_seqcst_fallback = atomic_and_non_seqcst;
852 seqcst_fallback = atomic_and_seqcst;
853 }
854 select_atomic_rmw! {
855 unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128;
856 pwr8 = atomic_nand_pwr8;
857 non_seqcst_fallback = atomic_nand_non_seqcst;
858 seqcst_fallback = atomic_nand_seqcst;
859 }
860 select_atomic_rmw! {
861 unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128;
862 pwr8 = atomic_or_pwr8;
863 non_seqcst_fallback = atomic_or_non_seqcst;
864 seqcst_fallback = atomic_or_seqcst;
865 }
866 select_atomic_rmw! {
867 unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128;
868 pwr8 = atomic_xor_pwr8;
869 non_seqcst_fallback = atomic_xor_non_seqcst;
870 seqcst_fallback = atomic_xor_seqcst;
871 }
872 select_atomic_rmw! {
873 unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128;
874 pwr8 = atomic_max_pwr8;
875 non_seqcst_fallback = atomic_max_non_seqcst;
876 seqcst_fallback = atomic_max_seqcst;
877 }
878 select_atomic_rmw! {
879 unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128;
880 pwr8 = atomic_umax_pwr8;
881 non_seqcst_fallback = atomic_umax_non_seqcst;
882 seqcst_fallback = atomic_umax_seqcst;
883 }
884 select_atomic_rmw! {
885 unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128;
886 pwr8 = atomic_min_pwr8;
887 non_seqcst_fallback = atomic_min_non_seqcst;
888 seqcst_fallback = atomic_min_seqcst;
889 }
890 select_atomic_rmw! {
891 unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128;
892 pwr8 = atomic_umin_pwr8;
893 non_seqcst_fallback = atomic_umin_non_seqcst;
894 seqcst_fallback = atomic_umin_seqcst;
895 }
896 select_atomic_rmw! {
897 unsafe fn atomic_not(dst: *mut u128) -> u128;
898 pwr8 = atomic_not_pwr8;
899 non_seqcst_fallback = atomic_not_non_seqcst;
900 seqcst_fallback = atomic_not_seqcst;
901 }
902 select_atomic_rmw! {
903 unsafe fn atomic_neg(dst: *mut u128) -> u128;
904 pwr8 = atomic_neg_pwr8;
905 non_seqcst_fallback = atomic_neg_non_seqcst;
906 seqcst_fallback = atomic_neg_seqcst;
907 }
908
909 #[inline]
is_lock_free() -> bool910 fn is_lock_free() -> bool {
911 #[cfg(any(
912 target_feature = "quadword-atomics",
913 portable_atomic_target_feature = "quadword-atomics",
914 ))]
915 {
916 // lqarx and stqcx. instructions are statically available.
917 true
918 }
919 #[cfg(not(any(
920 target_feature = "quadword-atomics",
921 portable_atomic_target_feature = "quadword-atomics",
922 )))]
923 {
924 detect::detect().has_quadword_atomics()
925 }
926 }
927 const IS_ALWAYS_LOCK_FREE: bool = cfg!(any(
928 target_feature = "quadword-atomics",
929 portable_atomic_target_feature = "quadword-atomics",
930 ));
931
932 atomic128!(AtomicI128, i128, atomic_max, atomic_min);
933 atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
934
935 #[cfg(test)]
936 mod tests {
937 use super::*;
938
939 test_atomic_int!(i128);
940 test_atomic_int!(u128);
941
942 // load/store/swap implementation is not affected by signedness, so it is
943 // enough to test only unsigned types.
944 stress_test!(u128);
945 }
946