• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 OR MIT
2 
3 /*
4 128-bit atomic implementation on PowerPC64.
5 
6 powerpc64 on pwr8+ support 128-bit atomics (load/store/LL/SC):
7 https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445
8 https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
9 https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0/llvm/test/CodeGen/PowerPC/atomics-i128.ll
10 
11 powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0/llvm/lib/Target/PowerPC/PPC.td#L702
12 See also https://github.com/rust-lang/rust/issues/59932
13 
14 Note that we do not separate LL and SC into separate functions, but handle
15 them within a single asm block. This is because it is theoretically possible
16 for the compiler to insert operations that might clear the reservation between
17 LL and SC. See aarch64.rs for details.
18 
19 Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
20 this module and use intrinsics.rs instead.
21 
22 Refs:
23 - Power ISA https://openpowerfoundation.org/specifications/isa
24 - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference
25 - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
26 
27 Generated asm:
28 - powerpc64 (pwr8) https://godbolt.org/z/71xGhY9qf
29 - powerpc64le https://godbolt.org/z/4TexcjGEz
30 */
31 
32 include!("macros.rs");
33 
34 #[cfg(not(any(
35     target_feature = "quadword-atomics",
36     portable_atomic_target_feature = "quadword-atomics",
37 )))]
38 #[path = "../fallback/outline_atomics.rs"]
39 mod fallback;
40 
41 // On musl with static linking, it seems that getauxval is not always available.
42 // See detect/auxv.rs for more.
43 #[cfg(not(portable_atomic_no_outline_atomics))]
44 #[cfg(any(test, portable_atomic_outline_atomics))] // TODO(powerpc64): currently disabled by default
45 #[cfg(any(
46     test,
47     not(any(
48         target_feature = "quadword-atomics",
49         portable_atomic_target_feature = "quadword-atomics",
50     )),
51 ))]
52 #[cfg(any(
53     all(
54         target_os = "linux",
55         any(
56             target_env = "gnu",
57             all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")),
58             portable_atomic_outline_atomics,
59         ),
60     ),
61     target_os = "android",
62     target_os = "freebsd",
63     all(target_os = "openbsd", portable_atomic_outline_atomics),
64 ))]
65 #[path = "../detect/auxv.rs"]
66 mod detect;
67 
68 use core::{arch::asm, sync::atomic::Ordering};
69 
70 use crate::utils::{Pair, U128};
71 
72 macro_rules! debug_assert_pwr8 {
73     () => {
74         #[cfg(not(any(
75             target_feature = "quadword-atomics",
76             portable_atomic_target_feature = "quadword-atomics",
77         )))]
78         {
79             debug_assert!(detect::detect().has_quadword_atomics());
80         }
81     };
82 }
83 
84 // Refs: https://www.ibm.com/docs/en/aix/7.3?topic=ops-machine-pseudo-op
85 //
86 // This is similar to #[target_feature(enable = "quadword-atomics")], except that there are
87 // no compiler guarantees regarding (un)inlining, and the scope is within an asm
88 // block rather than a function. We use this directive because #[target_feature(enable = "quadword-atomics")]
89 // is not supported as of Rust 1.70-nightly.
90 //
91 // start_pwr8 and end_pwr8 must be used in pairs.
92 //
93 // Note: If power8 instructions are not available at compile-time, we must guarantee that
94 // the function that uses it is not inlined into a function where it is not
95 // clear whether power8 instructions are available. Otherwise, (even if we checked whether
96 // power8 instructions are available at run-time) optimizations that reorder its
97 // instructions across the if condition might introduce undefined behavior.
98 // (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts)
99 // However, our code uses the ifunc helper macro that works with function pointers,
100 // so we don't have to worry about this unless calling without helper macro.
101 macro_rules! start_pwr8 {
102     () => {
103         ".machine push\n.machine power8"
104     };
105 }
106 macro_rules! end_pwr8 {
107     () => {
108         ".machine pop"
109     };
110 }
111 
112 macro_rules! atomic_rmw {
113     ($op:ident, $order:ident) => {
114         match $order {
115             Ordering::Relaxed => $op!("", ""),
116             Ordering::Acquire => $op!("lwsync", ""),
117             Ordering::Release => $op!("", "lwsync"),
118             Ordering::AcqRel => $op!("lwsync", "lwsync"),
119             Ordering::SeqCst => $op!("lwsync", "sync"),
120             _ => unreachable!(),
121         }
122     };
123 }
124 
125 // Extracts and checks the EQ bit of cr0.
126 #[inline(always)]
extract_cr0(r: u64) -> bool127 fn extract_cr0(r: u64) -> bool {
128     r & 0x20000000 != 0
129 }
130 
131 // If quadword-atomics is available at compile-time, we can always use pwr8_fn.
132 #[cfg(any(
133     target_feature = "quadword-atomics",
134     portable_atomic_target_feature = "quadword-atomics",
135 ))]
136 use atomic_load_pwr8 as atomic_load;
137 // Otherwise, we need to do run-time detection and can use pwr8_fn only if quadword-atomics is available.
138 #[cfg(not(any(
139     target_feature = "quadword-atomics",
140     portable_atomic_target_feature = "quadword-atomics",
141 )))]
142 #[inline]
atomic_load(src: *mut u128, order: Ordering) -> u128143 unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
144     fn_alias! {
145         // inline(never) is just a hint and also not strictly necessary
146         // because we use ifunc helper macro, but used for clarity.
147         #[inline(never)]
148         unsafe fn(src: *mut u128) -> u128;
149         atomic_load_pwr8_relaxed = atomic_load_pwr8(Ordering::Relaxed);
150         atomic_load_pwr8_acquire = atomic_load_pwr8(Ordering::Acquire);
151         atomic_load_pwr8_seqcst = atomic_load_pwr8(Ordering::SeqCst);
152     }
153     // SAFETY: the caller must uphold the safety contract.
154     // we only calls atomic_load_pwr8 if quadword-atomics is available.
155     unsafe {
156         match order {
157             Ordering::Relaxed => {
158                 ifunc!(unsafe fn(src: *mut u128) -> u128 {
159                     if detect::detect().has_quadword_atomics() {
160                         atomic_load_pwr8_relaxed
161                     } else {
162                         fallback::atomic_load_non_seqcst
163                     }
164                 })
165             }
166             Ordering::Acquire => {
167                 ifunc!(unsafe fn(src: *mut u128) -> u128 {
168                     if detect::detect().has_quadword_atomics() {
169                         atomic_load_pwr8_acquire
170                     } else {
171                         fallback::atomic_load_non_seqcst
172                     }
173                 })
174             }
175             Ordering::SeqCst => {
176                 ifunc!(unsafe fn(src: *mut u128) -> u128 {
177                     if detect::detect().has_quadword_atomics() {
178                         atomic_load_pwr8_seqcst
179                     } else {
180                         fallback::atomic_load_seqcst
181                     }
182                 })
183             }
184             _ => unreachable!(),
185         }
186     }
187 }
188 #[inline]
atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128189 unsafe fn atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128 {
190     debug_assert!(src as usize % 16 == 0);
191     debug_assert_pwr8!();
192 
193     // SAFETY: the caller must uphold the safety contract.
194     //
195     // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
196     unsafe {
197         let (out_hi, out_lo);
198         macro_rules! atomic_load_acquire {
199             ($release:tt) => {
200                 asm!(
201                     start_pwr8!(),
202                     $release,
203                     "lq %r4, 0({src})",
204                     // Lightweight acquire sync
205                     // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62
206                     "cmpd %cr7, %r4, %r4",
207                     "bne- %cr7, 2f",
208                     "2:",
209                     "isync",
210                     end_pwr8!(),
211                     src = in(reg_nonzero) ptr_reg!(src),
212                     // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
213                     // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
214                     out("r4") out_hi,
215                     out("r5") out_lo,
216                     out("cr7") _,
217                     options(nostack, preserves_flags),
218                 )
219             };
220         }
221         match order {
222             Ordering::Relaxed => {
223                 asm!(
224                     start_pwr8!(),
225                     "lq %r4, 0({src})",
226                     end_pwr8!(),
227                     src = in(reg_nonzero) ptr_reg!(src),
228                     // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
229                     // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
230                     out("r4") out_hi,
231                     out("r5") out_lo,
232                     options(nostack, preserves_flags),
233                 );
234             }
235             Ordering::Acquire => atomic_load_acquire!(""),
236             Ordering::SeqCst => atomic_load_acquire!("sync"),
237             _ => unreachable!(),
238         }
239         U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
240     }
241 }
242 
243 // If quadword-atomics is available at compile-time, we can always use pwr8_fn.
244 #[cfg(any(
245     target_feature = "quadword-atomics",
246     portable_atomic_target_feature = "quadword-atomics",
247 ))]
248 use atomic_store_pwr8 as atomic_store;
249 // Otherwise, we need to do run-time detection and can use pwr8_fn only if quadword-atomics is available.
250 #[cfg(not(any(
251     target_feature = "quadword-atomics",
252     portable_atomic_target_feature = "quadword-atomics",
253 )))]
254 #[inline]
atomic_store(dst: *mut u128, val: u128, order: Ordering)255 unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
256     fn_alias! {
257         // inline(never) is just a hint and also not strictly necessary
258         // because we use ifunc helper macro, but used for clarity.
259         #[inline(never)]
260         unsafe fn(dst: *mut u128, val: u128);
261         atomic_store_pwr8_relaxed = atomic_store_pwr8(Ordering::Relaxed);
262         atomic_store_pwr8_release = atomic_store_pwr8(Ordering::Release);
263         atomic_store_pwr8_seqcst = atomic_store_pwr8(Ordering::SeqCst);
264     }
265     // SAFETY: the caller must uphold the safety contract.
266     // we only calls atomic_store_pwr8 if quadword-atomics is available.
267     unsafe {
268         match order {
269             Ordering::Relaxed => {
270                 ifunc!(unsafe fn(dst: *mut u128, val: u128) {
271                     if detect::detect().has_quadword_atomics() {
272                         atomic_store_pwr8_relaxed
273                     } else {
274                         fallback::atomic_store_non_seqcst
275                     }
276                 });
277             }
278             Ordering::Release => {
279                 ifunc!(unsafe fn(dst: *mut u128, val: u128) {
280                     if detect::detect().has_quadword_atomics() {
281                         atomic_store_pwr8_release
282                     } else {
283                         fallback::atomic_store_non_seqcst
284                     }
285                 });
286             }
287             Ordering::SeqCst => {
288                 ifunc!(unsafe fn(dst: *mut u128, val: u128) {
289                     if detect::detect().has_quadword_atomics() {
290                         atomic_store_pwr8_seqcst
291                     } else {
292                         fallback::atomic_store_seqcst
293                     }
294                 });
295             }
296             _ => unreachable!(),
297         }
298     }
299 }
300 #[inline]
atomic_store_pwr8(dst: *mut u128, val: u128, order: Ordering)301 unsafe fn atomic_store_pwr8(dst: *mut u128, val: u128, order: Ordering) {
302     debug_assert!(dst as usize % 16 == 0);
303     debug_assert_pwr8!();
304 
305     // SAFETY: the caller must uphold the safety contract.
306     //
307     // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
308     unsafe {
309         let val = U128 { whole: val };
310         macro_rules! atomic_store {
311             ($release:tt) => {
312                 asm!(
313                     start_pwr8!(),
314                     $release,
315                     "stq %r4, 0({dst})",
316                     end_pwr8!(),
317                     dst = in(reg_nonzero) ptr_reg!(dst),
318                     // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
319                     // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
320                     in("r4") val.pair.hi,
321                     in("r5") val.pair.lo,
322                     options(nostack, preserves_flags),
323                 )
324             };
325         }
326         match order {
327             Ordering::Relaxed => atomic_store!(""),
328             Ordering::Release => atomic_store!("lwsync"),
329             Ordering::SeqCst => atomic_store!("sync"),
330             _ => unreachable!(),
331         }
332     }
333 }
334 
335 #[inline]
atomic_compare_exchange( dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering, ) -> Result<u128, u128>336 unsafe fn atomic_compare_exchange(
337     dst: *mut u128,
338     old: u128,
339     new: u128,
340     success: Ordering,
341     failure: Ordering,
342 ) -> Result<u128, u128> {
343     let success = crate::utils::upgrade_success_ordering(success, failure);
344 
345     #[cfg(any(
346         target_feature = "quadword-atomics",
347         portable_atomic_target_feature = "quadword-atomics",
348     ))]
349     // SAFETY: the caller must uphold the safety contract.
350     // cfg guarantees that quadword atomics instructions are available at compile-time.
351     let (prev, ok) = unsafe { atomic_compare_exchange_pwr8(dst, old, new, success) };
352     #[cfg(not(any(
353         target_feature = "quadword-atomics",
354         portable_atomic_target_feature = "quadword-atomics",
355     )))]
356     // SAFETY: the caller must uphold the safety contract.
357     let (prev, ok) = unsafe { atomic_compare_exchange_ifunc(dst, old, new, success) };
358     if ok {
359         Ok(prev)
360     } else {
361         Err(prev)
362     }
363 }
364 #[inline]
atomic_compare_exchange_pwr8( dst: *mut u128, old: u128, new: u128, order: Ordering, ) -> (u128, bool)365 unsafe fn atomic_compare_exchange_pwr8(
366     dst: *mut u128,
367     old: u128,
368     new: u128,
369     order: Ordering,
370 ) -> (u128, bool) {
371     debug_assert!(dst as usize % 16 == 0);
372     debug_assert_pwr8!();
373 
374     // SAFETY: the caller must uphold the safety contract.
375     //
376     // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
377     unsafe {
378         let old = U128 { whole: old };
379         let new = U128 { whole: new };
380         let (mut prev_hi, mut prev_lo);
381         let mut r;
382         macro_rules! cmpxchg {
383             ($acquire:tt, $release:tt) => {
384                 asm!(
385                     start_pwr8!(),
386                     $release,
387                     "2:",
388                         "lqarx %r8, 0, {dst}",
389                         "xor {tmp_lo}, %r9, {old_lo}",
390                         "xor {tmp_hi}, %r8, {old_hi}",
391                         "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
392                         "bne %cr0, 3f", // jump if compare failed
393                         "stqcx. %r6, 0, {dst}",
394                         "bne %cr0, 2b", // continue loop if store failed
395                     "3:",
396                     // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set.
397                     "mfcr {tmp_lo}",
398                     $acquire,
399                     end_pwr8!(),
400                     dst = in(reg_nonzero) ptr_reg!(dst),
401                     old_hi = in(reg) old.pair.hi,
402                     old_lo = in(reg) old.pair.lo,
403                     tmp_hi = out(reg) _,
404                     tmp_lo = out(reg) r,
405                     // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
406                     // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
407                     in("r6") new.pair.hi,
408                     in("r7") new.pair.lo,
409                     out("r8") prev_hi,
410                     out("r9") prev_lo,
411                     out("cr0") _,
412                     options(nostack, preserves_flags),
413                 )
414             };
415         }
416         atomic_rmw!(cmpxchg, order);
417         (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r))
418     }
419 }
420 
421 // Always use strong CAS for outline-atomics.
422 #[cfg(not(any(
423     target_feature = "quadword-atomics",
424     portable_atomic_target_feature = "quadword-atomics",
425 )))]
426 use atomic_compare_exchange as atomic_compare_exchange_weak;
427 #[cfg(any(
428     target_feature = "quadword-atomics",
429     portable_atomic_target_feature = "quadword-atomics",
430 ))]
431 #[inline]
atomic_compare_exchange_weak( dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering, ) -> Result<u128, u128>432 unsafe fn atomic_compare_exchange_weak(
433     dst: *mut u128,
434     old: u128,
435     new: u128,
436     success: Ordering,
437     failure: Ordering,
438 ) -> Result<u128, u128> {
439     let success = crate::utils::upgrade_success_ordering(success, failure);
440 
441     // SAFETY: the caller must uphold the safety contract.
442     // cfg guarantees that quadword atomics instructions are available at compile-time.
443     let (prev, ok) = unsafe { atomic_compare_exchange_weak_pwr8(dst, old, new, success) };
444     if ok {
445         Ok(prev)
446     } else {
447         Err(prev)
448     }
449 }
450 #[cfg(any(
451     target_feature = "quadword-atomics",
452     portable_atomic_target_feature = "quadword-atomics",
453 ))]
454 #[inline]
atomic_compare_exchange_weak_pwr8( dst: *mut u128, old: u128, new: u128, order: Ordering, ) -> (u128, bool)455 unsafe fn atomic_compare_exchange_weak_pwr8(
456     dst: *mut u128,
457     old: u128,
458     new: u128,
459     order: Ordering,
460 ) -> (u128, bool) {
461     debug_assert!(dst as usize % 16 == 0);
462     debug_assert_pwr8!();
463 
464     // SAFETY: the caller must uphold the safety contract.
465     //
466     // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
467     unsafe {
468         let old = U128 { whole: old };
469         let new = U128 { whole: new };
470         let (mut prev_hi, mut prev_lo);
471         let mut r;
472         macro_rules! cmpxchg_weak {
473             ($acquire:tt, $release:tt) => {
474                 asm!(
475                     start_pwr8!(),
476                     $release,
477                     "lqarx %r8, 0, {dst}",
478                     "xor {tmp_lo}, %r9, {old_lo}",
479                     "xor {tmp_hi}, %r8, {old_hi}",
480                     "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
481                     "bne %cr0, 3f", // jump if compare failed
482                     "stqcx. %r6, 0, {dst}",
483                     "3:",
484                     // if compare or stqcx failed EQ bit is cleared, if stqcx succeeds EQ bit is set.
485                     "mfcr {tmp_lo}",
486                     $acquire,
487                     end_pwr8!(),
488                     dst = in(reg_nonzero) ptr_reg!(dst),
489                     old_hi = in(reg) old.pair.hi,
490                     old_lo = in(reg) old.pair.lo,
491                     tmp_hi = out(reg) _,
492                     tmp_lo = out(reg) r,
493                     // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
494                     // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
495                     in("r6") new.pair.hi,
496                     in("r7") new.pair.lo,
497                     out("r8") prev_hi,
498                     out("r9") prev_lo,
499                     out("cr0") _,
500                     options(nostack, preserves_flags),
501                 )
502             };
503         }
504         atomic_rmw!(cmpxchg_weak, order);
505         (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r))
506     }
507 }
508 
509 // Do not use atomic_rmw_ll_sc_3 because it needs extra MR to implement swap.
510 #[inline]
atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128511 unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 {
512     debug_assert!(dst as usize % 16 == 0);
513     debug_assert_pwr8!();
514 
515     // SAFETY: the caller must uphold the safety contract.
516     unsafe {
517         let val = U128 { whole: val };
518         let (mut prev_hi, mut prev_lo);
519         macro_rules! swap {
520             ($acquire:tt, $release:tt) => {
521                 asm!(
522                     start_pwr8!(),
523                     $release,
524                     "2:",
525                         "lqarx %r6, 0, {dst}",
526                         "stqcx. %r8, 0, {dst}",
527                         "bne %cr0, 2b",
528                     $acquire,
529                     end_pwr8!(),
530                     dst = in(reg_nonzero) ptr_reg!(dst),
531                     // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
532                     // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
533                     out("r6") prev_hi,
534                     out("r7") prev_lo,
535                     in("r8") val.pair.hi,
536                     in("r9") val.pair.lo,
537                     out("cr0") _,
538                     options(nostack, preserves_flags),
539                 )
540             };
541         }
542         atomic_rmw!(swap, order);
543         U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
544     }
545 }
546 
547 /// Atomic RMW by LL/SC loop (3 arguments)
548 /// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
549 ///
550 /// $op can use the following registers:
551 /// - val_hi/val_lo pair: val argument (read-only for `$op`)
552 /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`)
553 /// - r8/r9 pair: new value that will be stored by sc
554 macro_rules! atomic_rmw_ll_sc_3 {
555     ($name:ident, [$($reg:tt)*], $($op:tt)*) => {
556         #[inline]
557         unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
558             debug_assert!(dst as usize % 16 == 0);
559             debug_assert_pwr8!();
560             // SAFETY: the caller must uphold the safety contract.
561             unsafe {
562                 let val = U128 { whole: val };
563                 let (mut prev_hi, mut prev_lo);
564                 macro_rules! op {
565                     ($acquire:tt, $release:tt) => {
566                         asm!(
567                             start_pwr8!(),
568                             $release,
569                             "2:",
570                                 "lqarx %r6, 0, {dst}",
571                                 $($op)*
572                                 "stqcx. %r8, 0, {dst}",
573                                 "bne %cr0, 2b",
574                             $acquire,
575                             end_pwr8!(),
576                             dst = in(reg_nonzero) ptr_reg!(dst),
577                             val_hi = in(reg) val.pair.hi,
578                             val_lo = in(reg) val.pair.lo,
579                             $($reg)*
580                             // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
581                             // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
582                             out("r6") prev_hi,
583                             out("r7") prev_lo,
584                             out("r8") _, // new (hi)
585                             out("r9") _, // new (lo)
586                             out("cr0") _,
587                             options(nostack, preserves_flags),
588                         )
589                     };
590                 }
591                 atomic_rmw!(op, order);
592                 U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
593             }
594         }
595     };
596 }
597 /// Atomic RMW by LL/SC loop (2 arguments)
598 /// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
599 ///
600 /// $op can use the following registers:
601 /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`)
602 /// - r8/r9 pair: new value that will be stored by sc
603 macro_rules! atomic_rmw_ll_sc_2 {
604     ($name:ident, [$($reg:tt)*], $($op:tt)*) => {
605         #[inline]
606         unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
607             debug_assert!(dst as usize % 16 == 0);
608             debug_assert_pwr8!();
609             // SAFETY: the caller must uphold the safety contract.
610             unsafe {
611                 let (mut prev_hi, mut prev_lo);
612                 macro_rules! op {
613                     ($acquire:tt, $release:tt) => {
614                         asm!(
615                             start_pwr8!(),
616                             $release,
617                             "2:",
618                                 "lqarx %r6, 0, {dst}",
619                                 $($op)*
620                                 "stqcx. %r8, 0, {dst}",
621                                 "bne %cr0, 2b",
622                             $acquire,
623                             end_pwr8!(),
624                             dst = in(reg_nonzero) ptr_reg!(dst),
625                             $($reg)*
626                             // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
627                             // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
628                             out("r6") prev_hi,
629                             out("r7") prev_lo,
630                             out("r8") _, // new (hi)
631                             out("r9") _, // new (lo)
632                             out("cr0") _,
633                             options(nostack, preserves_flags),
634                         )
635                     };
636                 }
637                 atomic_rmw!(op, order);
638                 U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
639             }
640         }
641     };
642 }
643 
644 atomic_rmw_ll_sc_3! {
645     atomic_add_pwr8, [out("xer") _,],
646     "addc %r9, {val_lo}, %r7",
647     "adde %r8, {val_hi}, %r6",
648 }
649 atomic_rmw_ll_sc_3! {
650     atomic_sub_pwr8, [out("xer") _,],
651     "subc %r9, %r7, {val_lo}",
652     "subfe %r8, {val_hi}, %r6",
653 }
654 atomic_rmw_ll_sc_3! {
655     atomic_and_pwr8, [],
656     "and %r9, {val_lo}, %r7",
657     "and %r8, {val_hi}, %r6",
658 }
659 atomic_rmw_ll_sc_3! {
660     atomic_nand_pwr8, [],
661     "nand %r9, {val_lo}, %r7",
662     "nand %r8, {val_hi}, %r6",
663 }
664 atomic_rmw_ll_sc_3! {
665     atomic_or_pwr8, [],
666     "or %r9, {val_lo}, %r7",
667     "or %r8, {val_hi}, %r6",
668 }
669 atomic_rmw_ll_sc_3! {
670     atomic_xor_pwr8, [],
671     "xor %r9, {val_lo}, %r7",
672     "xor %r8, {val_hi}, %r6",
673 }
674 atomic_rmw_ll_sc_3! {
675     atomic_max_pwr8, [out("cr1") _,],
676     "cmpld %r7, {val_lo}",        // (unsigned) compare lo 64-bit, store result to cr0
677     "iselgt %r9, %r7, {val_lo}",  // select lo 64-bit based on GT bit in cr0
678     "cmpd %cr1, %r6, {val_hi}",   // (signed) compare hi 64-bit, store result to cr1
679     "isel %r8, %r7, {val_lo}, 5", // select lo 64-bit based on GT bit in cr1
680     "cmpld %r6, {val_hi}",        // (unsigned) compare hi 64-bit, store result to cr0
681     "iseleq %r9, %r9, %r8",       // select lo 64-bit based on EQ bit in cr0
682     "isel %r8, %r6, {val_hi}, 5", // select hi 64-bit based on GT bit in cr1
683 }
684 atomic_rmw_ll_sc_3! {
685     atomic_umax_pwr8, [],
686     "cmpld %r7, {val_lo}",       // compare lo 64-bit, store result to cr0
687     "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
688     "cmpld %r6, {val_hi}",       // compare hi 64-bit, store result to cr0
689     "iselgt %r8, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
690     "iseleq %r9, %r9, %r8",      // select lo 64-bit based on EQ bit in cr0
691     "iselgt %r8, %r6, {val_hi}", // select hi 64-bit based on GT bit in cr0
692 }
693 atomic_rmw_ll_sc_3! {
694     atomic_min_pwr8, [out("cr1") _,],
695     "cmpld %r7, {val_lo}",        // (unsigned) compare lo 64-bit, store result to cr0
696     "isellt %r9, %r7, {val_lo}",  // select lo 64-bit based on LT bit in cr0
697     "cmpd %cr1, %r6, {val_hi}",   // (signed) compare hi 64-bit, store result to cr1
698     "isel %r8, %r7, {val_lo}, 4", // select lo 64-bit based on LT bit in cr1
699     "cmpld %r6, {val_hi}",        // (unsigned) compare hi 64-bit, store result to cr0
700     "iseleq %r9, %r9, %r8",       // select lo 64-bit based on EQ bit in cr0
701     "isel %r8, %r6, {val_hi}, 4", // select hi 64-bit based on LT bit in cr1
702 }
703 atomic_rmw_ll_sc_3! {
704     atomic_umin_pwr8, [],
705     "cmpld %r7, {val_lo}",       // compare lo 64-bit, store result to cr0
706     "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
707     "cmpld %r6, {val_hi}",       // compare hi 64-bit, store result to cr0
708     "isellt %r8, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
709     "iseleq %r9, %r9, %r8",      // select lo 64-bit based on EQ bit in cr0
710     "isellt %r8, %r6, {val_hi}", // select hi 64-bit based on LT bit in cr0
711 }
712 
713 #[inline]
atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128714 unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 {
715     // SAFETY: the caller must uphold the safety contract.
716     unsafe { atomic_xor_pwr8(dst, !0, order) }
717 }
718 
719 #[cfg(not(portable_atomic_pre_llvm_16))]
720 atomic_rmw_ll_sc_2! {
721     atomic_neg_pwr8, [out("xer") _,],
722     "subfic %r9, %r7, 0",
723     "subfze %r8, %r6",
724 }
725 // LLVM 15 miscompiles subfic.
726 #[cfg(portable_atomic_pre_llvm_16)]
727 atomic_rmw_ll_sc_2! {
728     atomic_neg_pwr8, [zero = in(reg) 0_u64, out("xer") _,],
729     "subc %r9, {zero}, %r7",
730     "subfze %r8, %r6",
731 }
732 
733 macro_rules! select_atomic_rmw {
734     (
735         unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?;
736         pwr8 = $pwr8_fn:ident;
737         non_seqcst_fallback = $non_seqcst_fallback_fn:ident;
738         seqcst_fallback = $seqcst_fallback_fn:ident;
739     ) => {
740         // If quadword-atomics is available at compile-time, we can always use pwr8_fn.
741         #[cfg(any(
742             target_feature = "quadword-atomics",
743             portable_atomic_target_feature = "quadword-atomics",
744         ))]
745         use $pwr8_fn as $name;
746         // Otherwise, we need to do run-time detection and can use pwr8_fn only if quadword-atomics is available.
747         #[cfg(not(any(
748             target_feature = "quadword-atomics",
749             portable_atomic_target_feature = "quadword-atomics",
750         )))]
751         #[inline]
752         unsafe fn $name($($arg)*, order: Ordering) $(-> $ret_ty)? {
753             fn_alias! {
754                 // inline(never) is just a hint and also not strictly necessary
755                 // because we use ifunc helper macro, but used for clarity.
756                 #[inline(never)]
757                 unsafe fn($($arg)*) $(-> $ret_ty)?;
758                 pwr8_relaxed_fn = $pwr8_fn(Ordering::Relaxed);
759                 pwr8_acquire_fn = $pwr8_fn(Ordering::Acquire);
760                 pwr8_release_fn = $pwr8_fn(Ordering::Release);
761                 pwr8_acqrel_fn = $pwr8_fn(Ordering::AcqRel);
762                 pwr8_seqcst_fn = $pwr8_fn(Ordering::SeqCst);
763             }
764             // SAFETY: the caller must uphold the safety contract.
765             // we only calls pwr8_fn if quadword-atomics is available.
766             unsafe {
767                 match order {
768                     Ordering::Relaxed => {
769                         ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
770                             if detect::detect().has_quadword_atomics() {
771                                 pwr8_relaxed_fn
772                             } else {
773                                 fallback::$non_seqcst_fallback_fn
774                             }
775                         })
776                     }
777                     Ordering::Acquire => {
778                         ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
779                             if detect::detect().has_quadword_atomics() {
780                                 pwr8_acquire_fn
781                             } else {
782                                 fallback::$non_seqcst_fallback_fn
783                             }
784                         })
785                     }
786                     Ordering::Release => {
787                         ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
788                             if detect::detect().has_quadword_atomics() {
789                                 pwr8_release_fn
790                             } else {
791                                 fallback::$non_seqcst_fallback_fn
792                             }
793                         })
794                     }
795                     Ordering::AcqRel => {
796                         ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
797                             if detect::detect().has_quadword_atomics() {
798                                 pwr8_acqrel_fn
799                             } else {
800                                 fallback::$non_seqcst_fallback_fn
801                             }
802                         })
803                     }
804                     Ordering::SeqCst => {
805                         ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
806                             if detect::detect().has_quadword_atomics() {
807                                 pwr8_seqcst_fn
808                             } else {
809                                 fallback::$seqcst_fallback_fn
810                             }
811                         })
812                     }
813                     _ => unreachable!(),
814                 }
815             }
816         }
817     };
818 }
819 
820 #[cfg(not(any(
821     target_feature = "quadword-atomics",
822     portable_atomic_target_feature = "quadword-atomics",
823 )))]
824 select_atomic_rmw! {
825     unsafe fn atomic_compare_exchange_ifunc(dst: *mut u128, old: u128, new: u128) -> (u128, bool);
826     pwr8 = atomic_compare_exchange_pwr8;
827     non_seqcst_fallback = atomic_compare_exchange_non_seqcst;
828     seqcst_fallback = atomic_compare_exchange_seqcst;
829 }
830 select_atomic_rmw! {
831     unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128;
832     pwr8 = atomic_swap_pwr8;
833     non_seqcst_fallback = atomic_swap_non_seqcst;
834     seqcst_fallback = atomic_swap_seqcst;
835 }
836 select_atomic_rmw! {
837     unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128;
838     pwr8 = atomic_add_pwr8;
839     non_seqcst_fallback = atomic_add_non_seqcst;
840     seqcst_fallback = atomic_add_seqcst;
841 }
842 select_atomic_rmw! {
843     unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128;
844     pwr8 = atomic_sub_pwr8;
845     non_seqcst_fallback = atomic_sub_non_seqcst;
846     seqcst_fallback = atomic_sub_seqcst;
847 }
848 select_atomic_rmw! {
849     unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128;
850     pwr8 = atomic_and_pwr8;
851     non_seqcst_fallback = atomic_and_non_seqcst;
852     seqcst_fallback = atomic_and_seqcst;
853 }
854 select_atomic_rmw! {
855     unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128;
856     pwr8 = atomic_nand_pwr8;
857     non_seqcst_fallback = atomic_nand_non_seqcst;
858     seqcst_fallback = atomic_nand_seqcst;
859 }
860 select_atomic_rmw! {
861     unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128;
862     pwr8 = atomic_or_pwr8;
863     non_seqcst_fallback = atomic_or_non_seqcst;
864     seqcst_fallback = atomic_or_seqcst;
865 }
866 select_atomic_rmw! {
867     unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128;
868     pwr8 = atomic_xor_pwr8;
869     non_seqcst_fallback = atomic_xor_non_seqcst;
870     seqcst_fallback = atomic_xor_seqcst;
871 }
872 select_atomic_rmw! {
873     unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128;
874     pwr8 = atomic_max_pwr8;
875     non_seqcst_fallback = atomic_max_non_seqcst;
876     seqcst_fallback = atomic_max_seqcst;
877 }
878 select_atomic_rmw! {
879     unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128;
880     pwr8 = atomic_umax_pwr8;
881     non_seqcst_fallback = atomic_umax_non_seqcst;
882     seqcst_fallback = atomic_umax_seqcst;
883 }
884 select_atomic_rmw! {
885     unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128;
886     pwr8 = atomic_min_pwr8;
887     non_seqcst_fallback = atomic_min_non_seqcst;
888     seqcst_fallback = atomic_min_seqcst;
889 }
890 select_atomic_rmw! {
891     unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128;
892     pwr8 = atomic_umin_pwr8;
893     non_seqcst_fallback = atomic_umin_non_seqcst;
894     seqcst_fallback = atomic_umin_seqcst;
895 }
896 select_atomic_rmw! {
897     unsafe fn atomic_not(dst: *mut u128) -> u128;
898     pwr8 = atomic_not_pwr8;
899     non_seqcst_fallback = atomic_not_non_seqcst;
900     seqcst_fallback = atomic_not_seqcst;
901 }
902 select_atomic_rmw! {
903     unsafe fn atomic_neg(dst: *mut u128) -> u128;
904     pwr8 = atomic_neg_pwr8;
905     non_seqcst_fallback = atomic_neg_non_seqcst;
906     seqcst_fallback = atomic_neg_seqcst;
907 }
908 
909 #[inline]
is_lock_free() -> bool910 fn is_lock_free() -> bool {
911     #[cfg(any(
912         target_feature = "quadword-atomics",
913         portable_atomic_target_feature = "quadword-atomics",
914     ))]
915     {
916         // lqarx and stqcx. instructions are statically available.
917         true
918     }
919     #[cfg(not(any(
920         target_feature = "quadword-atomics",
921         portable_atomic_target_feature = "quadword-atomics",
922     )))]
923     {
924         detect::detect().has_quadword_atomics()
925     }
926 }
927 const IS_ALWAYS_LOCK_FREE: bool = cfg!(any(
928     target_feature = "quadword-atomics",
929     portable_atomic_target_feature = "quadword-atomics",
930 ));
931 
932 atomic128!(AtomicI128, i128, atomic_max, atomic_min);
933 atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
934 
935 #[cfg(test)]
936 mod tests {
937     use super::*;
938 
939     test_atomic_int!(i128);
940     test_atomic_int!(u128);
941 
942     // load/store/swap implementation is not affected by signedness, so it is
943     // enough to test only unsigned types.
944     stress_test!(u128);
945 }
946