1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update 2; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32 3; This is already checked for in Atomics-64.ll 4; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64 5 6; FIXME: we don't currently check for the operations themselves with CHECK-NEXT, 7; because they are implemented in a very messy way with lwarx/stwcx. 8; It should be fixed soon in another patch. 9 10; We first check loads, for all sizes from i8 to i64. 11; We also vary orderings to check for barriers. 12define i8 @load_i8_unordered(i8* %mem) { 13; CHECK-LABEL: load_i8_unordered: 14; CHECK: # %bb.0: 15; CHECK-NEXT: lbz r3, 0(r3) 16; CHECK-NEXT: blr 17 %val = load atomic i8, i8* %mem unordered, align 1 18 ret i8 %val 19} 20define i16 @load_i16_monotonic(i16* %mem) { 21; CHECK-LABEL: load_i16_monotonic: 22; CHECK: # %bb.0: 23; CHECK-NEXT: lhz r3, 0(r3) 24; CHECK-NEXT: blr 25 %val = load atomic i16, i16* %mem monotonic, align 2 26 ret i16 %val 27} 28define i32 @load_i32_acquire(i32* %mem) { 29; PPC32-LABEL: load_i32_acquire: 30; PPC32: # %bb.0: 31; PPC32-NEXT: lwz r3, 0(r3) 32; PPC32-NEXT: lwsync 33; PPC32-NEXT: blr 34; 35; PPC64-LABEL: load_i32_acquire: 36; PPC64: # %bb.0: 37; PPC64-NEXT: lwz r3, 0(r3) 38; PPC64-NEXT: cmpd cr7, r3, r3 39; PPC64-NEXT: bne- cr7, .+4 40; PPC64-NEXT: isync 41; PPC64-NEXT: blr 42 %val = load atomic i32, i32* %mem acquire, align 4 43; CHECK-PPC32: lwsync 44; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] 45; CHECK-PPC64: bne- [[CR]], .+4 46; CHECK-PPC64: isync 47 ret i32 %val 48} 49define i64 @load_i64_seq_cst(i64* %mem) { 50; PPC32-LABEL: load_i64_seq_cst: 51; PPC32: # %bb.0: 52; PPC32-NEXT: mflr r0 53; PPC32-NEXT: stw r0, 4(r1) 54; PPC32-NEXT: stwu r1, -16(r1) 55; PPC32-NEXT: .cfi_def_cfa_offset 16 56; PPC32-NEXT: .cfi_offset lr, 4 57; PPC32-NEXT: li r4, 5 58; PPC32-NEXT: bl __atomic_load_8 59; PPC32-NEXT: lwz r0, 20(r1) 60; PPC32-NEXT: addi r1, r1, 16 61; PPC32-NEXT: mtlr r0 62; PPC32-NEXT: blr 63; 64; PPC64-LABEL: load_i64_seq_cst: 65; PPC64: # %bb.0: 66; PPC64-NEXT: sync 67; PPC64-NEXT: ld r3, 0(r3) 68; PPC64-NEXT: cmpd cr7, r3, r3 69; PPC64-NEXT: bne- cr7, .+4 70; PPC64-NEXT: isync 71; PPC64-NEXT: blr 72 %val = load atomic i64, i64* %mem seq_cst, align 8 73; CHECK-PPC32: lwsync 74; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] 75; CHECK-PPC64: bne- [[CR]], .+4 76; CHECK-PPC64: isync 77 ret i64 %val 78} 79 80; Stores 81define void @store_i8_unordered(i8* %mem) { 82; CHECK-LABEL: store_i8_unordered: 83; CHECK: # %bb.0: 84; CHECK-NEXT: li r4, 42 85; CHECK-NEXT: stb r4, 0(r3) 86; CHECK-NEXT: blr 87 store atomic i8 42, i8* %mem unordered, align 1 88 ret void 89} 90define void @store_i16_monotonic(i16* %mem) { 91; CHECK-LABEL: store_i16_monotonic: 92; CHECK: # %bb.0: 93; CHECK-NEXT: li r4, 42 94; CHECK-NEXT: sth r4, 0(r3) 95; CHECK-NEXT: blr 96 store atomic i16 42, i16* %mem monotonic, align 2 97 ret void 98} 99define void @store_i32_release(i32* %mem) { 100; CHECK-LABEL: store_i32_release: 101; CHECK: # %bb.0: 102; CHECK-NEXT: li r4, 42 103; CHECK-NEXT: lwsync 104; CHECK-NEXT: stw r4, 0(r3) 105; CHECK-NEXT: blr 106 store atomic i32 42, i32* %mem release, align 4 107 ret void 108} 109define void @store_i64_seq_cst(i64* %mem) { 110; PPC32-LABEL: store_i64_seq_cst: 111; PPC32: # %bb.0: 112; PPC32-NEXT: mflr r0 113; PPC32-NEXT: stw r0, 4(r1) 114; PPC32-NEXT: stwu r1, -16(r1) 115; PPC32-NEXT: .cfi_def_cfa_offset 16 116; PPC32-NEXT: .cfi_offset lr, 4 117; PPC32-NEXT: li r5, 0 118; PPC32-NEXT: li r6, 42 119; PPC32-NEXT: li r7, 5 120; PPC32-NEXT: bl __atomic_store_8 121; PPC32-NEXT: lwz r0, 20(r1) 122; PPC32-NEXT: addi r1, r1, 16 123; PPC32-NEXT: mtlr r0 124; PPC32-NEXT: blr 125; 126; PPC64-LABEL: store_i64_seq_cst: 127; PPC64: # %bb.0: 128; PPC64-NEXT: li r4, 42 129; PPC64-NEXT: sync 130; PPC64-NEXT: std r4, 0(r3) 131; PPC64-NEXT: blr 132 store atomic i64 42, i64* %mem seq_cst, align 8 133 ret void 134} 135 136; Atomic CmpXchg 137define i8 @cas_strong_i8_sc_sc(i8* %mem) { 138; PPC32-LABEL: cas_strong_i8_sc_sc: 139; PPC32: # %bb.0: 140; PPC32-NEXT: rlwinm r8, r3, 3, 27, 28 141; PPC32-NEXT: li r5, 1 142; PPC32-NEXT: li r6, 0 143; PPC32-NEXT: li r7, 255 144; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29 145; PPC32-NEXT: xori r3, r8, 24 146; PPC32-NEXT: slw r5, r5, r3 147; PPC32-NEXT: slw r8, r6, r3 148; PPC32-NEXT: slw r6, r7, r3 149; PPC32-NEXT: and r7, r5, r6 150; PPC32-NEXT: and r8, r8, r6 151; PPC32-NEXT: sync 152; PPC32-NEXT: .LBB8_1: 153; PPC32-NEXT: lwarx r9, 0, r4 154; PPC32-NEXT: and r5, r9, r6 155; PPC32-NEXT: cmpw r5, r8 156; PPC32-NEXT: bne cr0, .LBB8_3 157; PPC32-NEXT: # %bb.2: 158; PPC32-NEXT: andc r9, r9, r6 159; PPC32-NEXT: or r9, r9, r7 160; PPC32-NEXT: stwcx. r9, 0, r4 161; PPC32-NEXT: bne cr0, .LBB8_1 162; PPC32-NEXT: b .LBB8_4 163; PPC32-NEXT: .LBB8_3: 164; PPC32-NEXT: stwcx. r9, 0, r4 165; PPC32-NEXT: .LBB8_4: 166; PPC32-NEXT: srw r3, r5, r3 167; PPC32-NEXT: lwsync 168; PPC32-NEXT: blr 169; 170; PPC64-LABEL: cas_strong_i8_sc_sc: 171; PPC64: # %bb.0: 172; PPC64-NEXT: rlwinm r8, r3, 3, 27, 28 173; PPC64-NEXT: li r5, 1 174; PPC64-NEXT: li r6, 0 175; PPC64-NEXT: li r7, 255 176; PPC64-NEXT: rldicr r4, r3, 0, 61 177; PPC64-NEXT: xori r3, r8, 24 178; PPC64-NEXT: slw r5, r5, r3 179; PPC64-NEXT: slw r8, r6, r3 180; PPC64-NEXT: slw r6, r7, r3 181; PPC64-NEXT: and r7, r5, r6 182; PPC64-NEXT: and r8, r8, r6 183; PPC64-NEXT: sync 184; PPC64-NEXT: .LBB8_1: 185; PPC64-NEXT: lwarx r9, 0, r4 186; PPC64-NEXT: and r5, r9, r6 187; PPC64-NEXT: cmpw r5, r8 188; PPC64-NEXT: bne cr0, .LBB8_3 189; PPC64-NEXT: # %bb.2: 190; PPC64-NEXT: andc r9, r9, r6 191; PPC64-NEXT: or r9, r9, r7 192; PPC64-NEXT: stwcx. r9, 0, r4 193; PPC64-NEXT: bne cr0, .LBB8_1 194; PPC64-NEXT: b .LBB8_4 195; PPC64-NEXT: .LBB8_3: 196; PPC64-NEXT: stwcx. r9, 0, r4 197; PPC64-NEXT: .LBB8_4: 198; PPC64-NEXT: srw r3, r5, r3 199; PPC64-NEXT: lwsync 200; PPC64-NEXT: blr 201 %val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst 202 %loaded = extractvalue { i8, i1} %val, 0 203 ret i8 %loaded 204} 205define i16 @cas_weak_i16_acquire_acquire(i16* %mem) { 206; PPC32-LABEL: cas_weak_i16_acquire_acquire: 207; PPC32: # %bb.0: 208; PPC32-NEXT: li r6, 0 209; PPC32-NEXT: rlwinm r4, r3, 3, 27, 27 210; PPC32-NEXT: li r5, 1 211; PPC32-NEXT: ori r7, r6, 65535 212; PPC32-NEXT: xori r4, r4, 16 213; PPC32-NEXT: slw r8, r5, r4 214; PPC32-NEXT: slw r9, r6, r4 215; PPC32-NEXT: slw r5, r7, r4 216; PPC32-NEXT: rlwinm r3, r3, 0, 0, 29 217; PPC32-NEXT: and r6, r8, r5 218; PPC32-NEXT: and r8, r9, r5 219; PPC32-NEXT: .LBB9_1: 220; PPC32-NEXT: lwarx r9, 0, r3 221; PPC32-NEXT: and r7, r9, r5 222; PPC32-NEXT: cmpw r7, r8 223; PPC32-NEXT: bne cr0, .LBB9_3 224; PPC32-NEXT: # %bb.2: 225; PPC32-NEXT: andc r9, r9, r5 226; PPC32-NEXT: or r9, r9, r6 227; PPC32-NEXT: stwcx. r9, 0, r3 228; PPC32-NEXT: bne cr0, .LBB9_1 229; PPC32-NEXT: b .LBB9_4 230; PPC32-NEXT: .LBB9_3: 231; PPC32-NEXT: stwcx. r9, 0, r3 232; PPC32-NEXT: .LBB9_4: 233; PPC32-NEXT: srw r3, r7, r4 234; PPC32-NEXT: lwsync 235; PPC32-NEXT: blr 236; 237; PPC64-LABEL: cas_weak_i16_acquire_acquire: 238; PPC64: # %bb.0: 239; PPC64-NEXT: li r6, 0 240; PPC64-NEXT: rlwinm r4, r3, 3, 27, 27 241; PPC64-NEXT: li r5, 1 242; PPC64-NEXT: ori r7, r6, 65535 243; PPC64-NEXT: xori r4, r4, 16 244; PPC64-NEXT: slw r8, r5, r4 245; PPC64-NEXT: slw r9, r6, r4 246; PPC64-NEXT: slw r5, r7, r4 247; PPC64-NEXT: rldicr r3, r3, 0, 61 248; PPC64-NEXT: and r6, r8, r5 249; PPC64-NEXT: and r8, r9, r5 250; PPC64-NEXT: .LBB9_1: 251; PPC64-NEXT: lwarx r9, 0, r3 252; PPC64-NEXT: and r7, r9, r5 253; PPC64-NEXT: cmpw r7, r8 254; PPC64-NEXT: bne cr0, .LBB9_3 255; PPC64-NEXT: # %bb.2: 256; PPC64-NEXT: andc r9, r9, r5 257; PPC64-NEXT: or r9, r9, r6 258; PPC64-NEXT: stwcx. r9, 0, r3 259; PPC64-NEXT: bne cr0, .LBB9_1 260; PPC64-NEXT: b .LBB9_4 261; PPC64-NEXT: .LBB9_3: 262; PPC64-NEXT: stwcx. r9, 0, r3 263; PPC64-NEXT: .LBB9_4: 264; PPC64-NEXT: srw r3, r7, r4 265; PPC64-NEXT: lwsync 266; PPC64-NEXT: blr 267 %val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire 268 %loaded = extractvalue { i16, i1} %val, 0 269 ret i16 %loaded 270} 271define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) { 272; CHECK-LABEL: cas_strong_i32_acqrel_acquire: 273; CHECK: # %bb.0: 274; CHECK-NEXT: li r5, 1 275; CHECK-NEXT: li r6, 0 276; CHECK-NEXT: lwsync 277; CHECK-NEXT: .LBB10_1: 278; CHECK-NEXT: lwarx r4, 0, r3 279; CHECK-NEXT: cmpw r6, r4 280; CHECK-NEXT: bne cr0, .LBB10_3 281; CHECK-NEXT: # %bb.2: 282; CHECK-NEXT: stwcx. r5, 0, r3 283; CHECK-NEXT: bne cr0, .LBB10_1 284; CHECK-NEXT: b .LBB10_4 285; CHECK-NEXT: .LBB10_3: 286; CHECK-NEXT: stwcx. r4, 0, r3 287; CHECK-NEXT: .LBB10_4: 288; CHECK-NEXT: mr r3, r4 289; CHECK-NEXT: lwsync 290; CHECK-NEXT: blr 291 %val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire 292 %loaded = extractvalue { i32, i1} %val, 0 293 ret i32 %loaded 294} 295define i64 @cas_weak_i64_release_monotonic(i64* %mem) { 296; PPC32-LABEL: cas_weak_i64_release_monotonic: 297; PPC32: # %bb.0: 298; PPC32-NEXT: mflr r0 299; PPC32-NEXT: stw r0, 4(r1) 300; PPC32-NEXT: stwu r1, -16(r1) 301; PPC32-NEXT: .cfi_def_cfa_offset 16 302; PPC32-NEXT: .cfi_offset lr, 4 303; PPC32-NEXT: li r4, 0 304; PPC32-NEXT: stw r4, 12(r1) 305; PPC32-NEXT: li r5, 0 306; PPC32-NEXT: stw r4, 8(r1) 307; PPC32-NEXT: addi r4, r1, 8 308; PPC32-NEXT: li r6, 1 309; PPC32-NEXT: li r7, 3 310; PPC32-NEXT: li r8, 0 311; PPC32-NEXT: bl __atomic_compare_exchange_8 312; PPC32-NEXT: lwz r4, 12(r1) 313; PPC32-NEXT: lwz r3, 8(r1) 314; PPC32-NEXT: lwz r0, 20(r1) 315; PPC32-NEXT: addi r1, r1, 16 316; PPC32-NEXT: mtlr r0 317; PPC32-NEXT: blr 318; 319; PPC64-LABEL: cas_weak_i64_release_monotonic: 320; PPC64: # %bb.0: 321; PPC64-NEXT: li r5, 1 322; PPC64-NEXT: li r6, 0 323; PPC64-NEXT: lwsync 324; PPC64-NEXT: .LBB11_1: 325; PPC64-NEXT: ldarx r4, 0, r3 326; PPC64-NEXT: cmpd r6, r4 327; PPC64-NEXT: bne cr0, .LBB11_4 328; PPC64-NEXT: # %bb.2: 329; PPC64-NEXT: stdcx. r5, 0, r3 330; PPC64-NEXT: bne cr0, .LBB11_1 331; PPC64-NEXT: # %bb.3: 332; PPC64-NEXT: mr r3, r4 333; PPC64-NEXT: blr 334; PPC64-NEXT: .LBB11_4: 335; PPC64-NEXT: stdcx. r4, 0, r3 336; PPC64-NEXT: mr r3, r4 337; PPC64-NEXT: blr 338 %val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic 339 %loaded = extractvalue { i64, i1} %val, 0 340 ret i64 %loaded 341} 342 343; AtomicRMW 344define i8 @add_i8_monotonic(i8* %mem, i8 %operand) { 345; PPC32-LABEL: add_i8_monotonic: 346; PPC32: # %bb.0: 347; PPC32-NEXT: rlwinm r7, r3, 3, 27, 28 348; PPC32-NEXT: li r6, 255 349; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 350; PPC32-NEXT: xori r3, r7, 24 351; PPC32-NEXT: slw r4, r4, r3 352; PPC32-NEXT: slw r6, r6, r3 353; PPC32-NEXT: .LBB12_1: 354; PPC32-NEXT: lwarx r7, 0, r5 355; PPC32-NEXT: add r8, r4, r7 356; PPC32-NEXT: andc r9, r7, r6 357; PPC32-NEXT: and r8, r8, r6 358; PPC32-NEXT: or r8, r8, r9 359; PPC32-NEXT: stwcx. r8, 0, r5 360; PPC32-NEXT: bne cr0, .LBB12_1 361; PPC32-NEXT: # %bb.2: 362; PPC32-NEXT: srw r3, r7, r3 363; PPC32-NEXT: blr 364; 365; PPC64-LABEL: add_i8_monotonic: 366; PPC64: # %bb.0: 367; PPC64-NEXT: rlwinm r7, r3, 3, 27, 28 368; PPC64-NEXT: li r6, 255 369; PPC64-NEXT: rldicr r5, r3, 0, 61 370; PPC64-NEXT: xori r3, r7, 24 371; PPC64-NEXT: slw r4, r4, r3 372; PPC64-NEXT: slw r6, r6, r3 373; PPC64-NEXT: .LBB12_1: 374; PPC64-NEXT: lwarx r7, 0, r5 375; PPC64-NEXT: add r8, r4, r7 376; PPC64-NEXT: andc r9, r7, r6 377; PPC64-NEXT: and r8, r8, r6 378; PPC64-NEXT: or r8, r8, r9 379; PPC64-NEXT: stwcx. r8, 0, r5 380; PPC64-NEXT: bne cr0, .LBB12_1 381; PPC64-NEXT: # %bb.2: 382; PPC64-NEXT: srw r3, r7, r3 383; PPC64-NEXT: blr 384 %val = atomicrmw add i8* %mem, i8 %operand monotonic 385 ret i8 %val 386} 387define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) { 388; PPC32-LABEL: xor_i16_seq_cst: 389; PPC32: # %bb.0: 390; PPC32-NEXT: li r6, 0 391; PPC32-NEXT: rlwinm r7, r3, 3, 27, 27 392; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 393; PPC32-NEXT: ori r6, r6, 65535 394; PPC32-NEXT: xori r3, r7, 16 395; PPC32-NEXT: slw r4, r4, r3 396; PPC32-NEXT: slw r6, r6, r3 397; PPC32-NEXT: sync 398; PPC32-NEXT: .LBB13_1: 399; PPC32-NEXT: lwarx r7, 0, r5 400; PPC32-NEXT: xor r8, r4, r7 401; PPC32-NEXT: andc r9, r7, r6 402; PPC32-NEXT: and r8, r8, r6 403; PPC32-NEXT: or r8, r8, r9 404; PPC32-NEXT: stwcx. r8, 0, r5 405; PPC32-NEXT: bne cr0, .LBB13_1 406; PPC32-NEXT: # %bb.2: 407; PPC32-NEXT: srw r3, r7, r3 408; PPC32-NEXT: lwsync 409; PPC32-NEXT: blr 410; 411; PPC64-LABEL: xor_i16_seq_cst: 412; PPC64: # %bb.0: 413; PPC64-NEXT: li r6, 0 414; PPC64-NEXT: rlwinm r7, r3, 3, 27, 27 415; PPC64-NEXT: rldicr r5, r3, 0, 61 416; PPC64-NEXT: ori r6, r6, 65535 417; PPC64-NEXT: xori r3, r7, 16 418; PPC64-NEXT: slw r4, r4, r3 419; PPC64-NEXT: slw r6, r6, r3 420; PPC64-NEXT: sync 421; PPC64-NEXT: .LBB13_1: 422; PPC64-NEXT: lwarx r7, 0, r5 423; PPC64-NEXT: xor r8, r4, r7 424; PPC64-NEXT: andc r9, r7, r6 425; PPC64-NEXT: and r8, r8, r6 426; PPC64-NEXT: or r8, r8, r9 427; PPC64-NEXT: stwcx. r8, 0, r5 428; PPC64-NEXT: bne cr0, .LBB13_1 429; PPC64-NEXT: # %bb.2: 430; PPC64-NEXT: srw r3, r7, r3 431; PPC64-NEXT: lwsync 432; PPC64-NEXT: blr 433 %val = atomicrmw xor i16* %mem, i16 %operand seq_cst 434 ret i16 %val 435} 436define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) { 437; CHECK-LABEL: xchg_i32_acq_rel: 438; CHECK: # %bb.0: 439; CHECK-NEXT: lwsync 440; CHECK-NEXT: .LBB14_1: 441; CHECK-NEXT: lwarx r5, 0, r3 442; CHECK-NEXT: stwcx. r4, 0, r3 443; CHECK-NEXT: bne cr0, .LBB14_1 444; CHECK-NEXT: # %bb.2: 445; CHECK-NEXT: mr r3, r5 446; CHECK-NEXT: lwsync 447; CHECK-NEXT: blr 448 %val = atomicrmw xchg i32* %mem, i32 %operand acq_rel 449 ret i32 %val 450} 451define i64 @and_i64_release(i64* %mem, i64 %operand) { 452; PPC32-LABEL: and_i64_release: 453; PPC32: # %bb.0: 454; PPC32-NEXT: mflr r0 455; PPC32-NEXT: stw r0, 4(r1) 456; PPC32-NEXT: stwu r1, -16(r1) 457; PPC32-NEXT: .cfi_def_cfa_offset 16 458; PPC32-NEXT: .cfi_offset lr, 4 459; PPC32-NEXT: li r7, 3 460; PPC32-NEXT: bl __atomic_fetch_and_8 461; PPC32-NEXT: lwz r0, 20(r1) 462; PPC32-NEXT: addi r1, r1, 16 463; PPC32-NEXT: mtlr r0 464; PPC32-NEXT: blr 465; 466; PPC64-LABEL: and_i64_release: 467; PPC64: # %bb.0: 468; PPC64-NEXT: lwsync 469; PPC64-NEXT: .LBB15_1: 470; PPC64-NEXT: ldarx r5, 0, r3 471; PPC64-NEXT: and r6, r4, r5 472; PPC64-NEXT: stdcx. r6, 0, r3 473; PPC64-NEXT: bne cr0, .LBB15_1 474; PPC64-NEXT: # %bb.2: 475; PPC64-NEXT: mr r3, r5 476; PPC64-NEXT: blr 477 %val = atomicrmw and i64* %mem, i64 %operand release 478 ret i64 %val 479} 480