1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s 3 4@var = global i128 0 5 6; Due to the scheduling right after isel for cmpxchg and given the 7; machine scheduler and copy coalescer do not mess up with physical 8; register live-ranges, we end up with a useless copy. 9define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { 10; CHECK-LABEL: val_compare_and_swap: 11; CHECK: ## %bb.0: 12; CHECK-NEXT: pushq %rbx 13; CHECK-NEXT: .cfi_def_cfa_offset 16 14; CHECK-NEXT: .cfi_offset %rbx, -16 15; CHECK-NEXT: movq %rcx, %r9 16; CHECK-NEXT: movq %rsi, %rax 17; CHECK-NEXT: movq %r8, %rcx 18; CHECK-NEXT: movq %r9, %rbx 19; CHECK-NEXT: lock cmpxchg16b (%rdi) 20; CHECK-NEXT: popq %rbx 21; CHECK-NEXT: retq 22 %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire 23 %val = extractvalue { i128, i1 } %pair, 0 24 ret i128 %val 25} 26 27define void @fetch_and_nand(i128* %p, i128 %bits) { 28; CHECK-LABEL: fetch_and_nand: 29; CHECK: ## %bb.0: 30; CHECK-NEXT: pushq %rbx 31; CHECK-NEXT: .cfi_def_cfa_offset 16 32; CHECK-NEXT: .cfi_offset %rbx, -16 33; CHECK-NEXT: movq %rdx, %r8 34; CHECK-NEXT: movq (%rdi), %rax 35; CHECK-NEXT: movq 8(%rdi), %rdx 36; CHECK-NEXT: .p2align 4, 0x90 37; CHECK-NEXT: LBB1_1: ## %atomicrmw.start 38; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 39; CHECK-NEXT: movq %rdx, %rcx 40; CHECK-NEXT: andq %r8, %rcx 41; CHECK-NEXT: movq %rax, %rbx 42; CHECK-NEXT: andq %rsi, %rbx 43; CHECK-NEXT: notq %rbx 44; CHECK-NEXT: notq %rcx 45; CHECK-NEXT: lock cmpxchg16b (%rdi) 46; CHECK-NEXT: jne LBB1_1 47; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 48; CHECK-NEXT: movq %rax, {{.*}}(%rip) 49; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 50; CHECK-NEXT: popq %rbx 51; CHECK-NEXT: retq 52 %val = atomicrmw nand i128* %p, i128 %bits release 53 store i128 %val, i128* @var, align 16 54 ret void 55} 56 57define void @fetch_and_or(i128* %p, i128 %bits) { 58; CHECK-LABEL: fetch_and_or: 59; CHECK: ## %bb.0: 60; CHECK-NEXT: pushq %rbx 61; CHECK-NEXT: .cfi_def_cfa_offset 16 62; CHECK-NEXT: .cfi_offset %rbx, -16 63; CHECK-NEXT: movq %rdx, %r8 64; CHECK-NEXT: movq (%rdi), %rax 65; CHECK-NEXT: movq 8(%rdi), %rdx 66; CHECK-NEXT: .p2align 4, 0x90 67; CHECK-NEXT: LBB2_1: ## %atomicrmw.start 68; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 69; CHECK-NEXT: movq %rax, %rbx 70; CHECK-NEXT: orq %rsi, %rbx 71; CHECK-NEXT: movq %rdx, %rcx 72; CHECK-NEXT: orq %r8, %rcx 73; CHECK-NEXT: lock cmpxchg16b (%rdi) 74; CHECK-NEXT: jne LBB2_1 75; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 76; CHECK-NEXT: movq %rax, {{.*}}(%rip) 77; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 78; CHECK-NEXT: popq %rbx 79; CHECK-NEXT: retq 80 %val = atomicrmw or i128* %p, i128 %bits seq_cst 81 store i128 %val, i128* @var, align 16 82 ret void 83} 84 85define void @fetch_and_add(i128* %p, i128 %bits) { 86; CHECK-LABEL: fetch_and_add: 87; CHECK: ## %bb.0: 88; CHECK-NEXT: pushq %rbx 89; CHECK-NEXT: .cfi_def_cfa_offset 16 90; CHECK-NEXT: .cfi_offset %rbx, -16 91; CHECK-NEXT: movq %rdx, %r8 92; CHECK-NEXT: movq (%rdi), %rax 93; CHECK-NEXT: movq 8(%rdi), %rdx 94; CHECK-NEXT: .p2align 4, 0x90 95; CHECK-NEXT: LBB3_1: ## %atomicrmw.start 96; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 97; CHECK-NEXT: movq %rax, %rbx 98; CHECK-NEXT: addq %rsi, %rbx 99; CHECK-NEXT: movq %rdx, %rcx 100; CHECK-NEXT: adcq %r8, %rcx 101; CHECK-NEXT: lock cmpxchg16b (%rdi) 102; CHECK-NEXT: jne LBB3_1 103; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 104; CHECK-NEXT: movq %rax, {{.*}}(%rip) 105; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 106; CHECK-NEXT: popq %rbx 107; CHECK-NEXT: retq 108 %val = atomicrmw add i128* %p, i128 %bits seq_cst 109 store i128 %val, i128* @var, align 16 110 ret void 111} 112 113define void @fetch_and_sub(i128* %p, i128 %bits) { 114; CHECK-LABEL: fetch_and_sub: 115; CHECK: ## %bb.0: 116; CHECK-NEXT: pushq %rbx 117; CHECK-NEXT: .cfi_def_cfa_offset 16 118; CHECK-NEXT: .cfi_offset %rbx, -16 119; CHECK-NEXT: movq %rdx, %r8 120; CHECK-NEXT: movq (%rdi), %rax 121; CHECK-NEXT: movq 8(%rdi), %rdx 122; CHECK-NEXT: .p2align 4, 0x90 123; CHECK-NEXT: LBB4_1: ## %atomicrmw.start 124; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 125; CHECK-NEXT: movq %rax, %rbx 126; CHECK-NEXT: subq %rsi, %rbx 127; CHECK-NEXT: movq %rdx, %rcx 128; CHECK-NEXT: sbbq %r8, %rcx 129; CHECK-NEXT: lock cmpxchg16b (%rdi) 130; CHECK-NEXT: jne LBB4_1 131; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 132; CHECK-NEXT: movq %rax, {{.*}}(%rip) 133; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 134; CHECK-NEXT: popq %rbx 135; CHECK-NEXT: retq 136 %val = atomicrmw sub i128* %p, i128 %bits seq_cst 137 store i128 %val, i128* @var, align 16 138 ret void 139} 140 141define void @fetch_and_min(i128* %p, i128 %bits) { 142; CHECK-LABEL: fetch_and_min: 143; CHECK: ## %bb.0: 144; CHECK-NEXT: pushq %rbx 145; CHECK-NEXT: .cfi_def_cfa_offset 16 146; CHECK-NEXT: .cfi_offset %rbx, -16 147; CHECK-NEXT: movq %rdx, %r8 148; CHECK-NEXT: movq (%rdi), %rax 149; CHECK-NEXT: movq 8(%rdi), %rdx 150; CHECK-NEXT: .p2align 4, 0x90 151; CHECK-NEXT: LBB5_1: ## %atomicrmw.start 152; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 153; CHECK-NEXT: cmpq %rax, %rsi 154; CHECK-NEXT: movq %r8, %rcx 155; CHECK-NEXT: sbbq %rdx, %rcx 156; CHECK-NEXT: movq %r8, %rcx 157; CHECK-NEXT: cmovgeq %rdx, %rcx 158; CHECK-NEXT: movq %rsi, %rbx 159; CHECK-NEXT: cmovgeq %rax, %rbx 160; CHECK-NEXT: lock cmpxchg16b (%rdi) 161; CHECK-NEXT: jne LBB5_1 162; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 163; CHECK-NEXT: movq %rax, {{.*}}(%rip) 164; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 165; CHECK-NEXT: popq %rbx 166; CHECK-NEXT: retq 167 %val = atomicrmw min i128* %p, i128 %bits seq_cst 168 store i128 %val, i128* @var, align 16 169 ret void 170} 171 172define void @fetch_and_max(i128* %p, i128 %bits) { 173; CHECK-LABEL: fetch_and_max: 174; CHECK: ## %bb.0: 175; CHECK-NEXT: pushq %rbx 176; CHECK-NEXT: .cfi_def_cfa_offset 16 177; CHECK-NEXT: .cfi_offset %rbx, -16 178; CHECK-NEXT: movq %rdx, %r8 179; CHECK-NEXT: movq (%rdi), %rax 180; CHECK-NEXT: movq 8(%rdi), %rdx 181; CHECK-NEXT: .p2align 4, 0x90 182; CHECK-NEXT: LBB6_1: ## %atomicrmw.start 183; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 184; CHECK-NEXT: cmpq %rsi, %rax 185; CHECK-NEXT: movq %rdx, %rcx 186; CHECK-NEXT: sbbq %r8, %rcx 187; CHECK-NEXT: movq %r8, %rcx 188; CHECK-NEXT: cmovgeq %rdx, %rcx 189; CHECK-NEXT: movq %rsi, %rbx 190; CHECK-NEXT: cmovgeq %rax, %rbx 191; CHECK-NEXT: lock cmpxchg16b (%rdi) 192; CHECK-NEXT: jne LBB6_1 193; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 194; CHECK-NEXT: movq %rax, {{.*}}(%rip) 195; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 196; CHECK-NEXT: popq %rbx 197; CHECK-NEXT: retq 198 %val = atomicrmw max i128* %p, i128 %bits seq_cst 199 store i128 %val, i128* @var, align 16 200 ret void 201} 202 203define void @fetch_and_umin(i128* %p, i128 %bits) { 204; CHECK-LABEL: fetch_and_umin: 205; CHECK: ## %bb.0: 206; CHECK-NEXT: pushq %rbx 207; CHECK-NEXT: .cfi_def_cfa_offset 16 208; CHECK-NEXT: .cfi_offset %rbx, -16 209; CHECK-NEXT: movq %rdx, %r8 210; CHECK-NEXT: movq (%rdi), %rax 211; CHECK-NEXT: movq 8(%rdi), %rdx 212; CHECK-NEXT: .p2align 4, 0x90 213; CHECK-NEXT: LBB7_1: ## %atomicrmw.start 214; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 215; CHECK-NEXT: cmpq %rax, %rsi 216; CHECK-NEXT: movq %r8, %rcx 217; CHECK-NEXT: sbbq %rdx, %rcx 218; CHECK-NEXT: movq %r8, %rcx 219; CHECK-NEXT: cmovaeq %rdx, %rcx 220; CHECK-NEXT: movq %rsi, %rbx 221; CHECK-NEXT: cmovaeq %rax, %rbx 222; CHECK-NEXT: lock cmpxchg16b (%rdi) 223; CHECK-NEXT: jne LBB7_1 224; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 225; CHECK-NEXT: movq %rax, {{.*}}(%rip) 226; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 227; CHECK-NEXT: popq %rbx 228; CHECK-NEXT: retq 229 %val = atomicrmw umin i128* %p, i128 %bits seq_cst 230 store i128 %val, i128* @var, align 16 231 ret void 232} 233 234define void @fetch_and_umax(i128* %p, i128 %bits) { 235; CHECK-LABEL: fetch_and_umax: 236; CHECK: ## %bb.0: 237; CHECK-NEXT: pushq %rbx 238; CHECK-NEXT: .cfi_def_cfa_offset 16 239; CHECK-NEXT: .cfi_offset %rbx, -16 240; CHECK-NEXT: movq %rdx, %r8 241; CHECK-NEXT: movq (%rdi), %rax 242; CHECK-NEXT: movq 8(%rdi), %rdx 243; CHECK-NEXT: .p2align 4, 0x90 244; CHECK-NEXT: LBB8_1: ## %atomicrmw.start 245; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 246; CHECK-NEXT: cmpq %rax, %rsi 247; CHECK-NEXT: movq %r8, %rcx 248; CHECK-NEXT: sbbq %rdx, %rcx 249; CHECK-NEXT: movq %r8, %rcx 250; CHECK-NEXT: cmovbq %rdx, %rcx 251; CHECK-NEXT: movq %rsi, %rbx 252; CHECK-NEXT: cmovbq %rax, %rbx 253; CHECK-NEXT: lock cmpxchg16b (%rdi) 254; CHECK-NEXT: jne LBB8_1 255; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 256; CHECK-NEXT: movq %rax, {{.*}}(%rip) 257; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) 258; CHECK-NEXT: popq %rbx 259; CHECK-NEXT: retq 260 %val = atomicrmw umax i128* %p, i128 %bits seq_cst 261 store i128 %val, i128* @var, align 16 262 ret void 263} 264 265define i128 @atomic_load_seq_cst(i128* %p) { 266; CHECK-LABEL: atomic_load_seq_cst: 267; CHECK: ## %bb.0: 268; CHECK-NEXT: pushq %rbx 269; CHECK-NEXT: .cfi_def_cfa_offset 16 270; CHECK-NEXT: .cfi_offset %rbx, -16 271; CHECK-NEXT: xorl %eax, %eax 272; CHECK-NEXT: xorl %edx, %edx 273; CHECK-NEXT: xorl %ecx, %ecx 274; CHECK-NEXT: xorl %ebx, %ebx 275; CHECK-NEXT: lock cmpxchg16b (%rdi) 276; CHECK-NEXT: popq %rbx 277; CHECK-NEXT: retq 278 %r = load atomic i128, i128* %p seq_cst, align 16 279 ret i128 %r 280} 281 282define i128 @atomic_load_relaxed(i128* %p) { 283; CHECK-LABEL: atomic_load_relaxed: 284; CHECK: ## %bb.0: 285; CHECK-NEXT: pushq %rbx 286; CHECK-NEXT: .cfi_def_cfa_offset 16 287; CHECK-NEXT: .cfi_offset %rbx, -16 288; CHECK-NEXT: xorl %eax, %eax 289; CHECK-NEXT: xorl %edx, %edx 290; CHECK-NEXT: xorl %ecx, %ecx 291; CHECK-NEXT: xorl %ebx, %ebx 292; CHECK-NEXT: lock cmpxchg16b (%rdi) 293; CHECK-NEXT: popq %rbx 294; CHECK-NEXT: retq 295 %r = load atomic i128, i128* %p monotonic, align 16 296 ret i128 %r 297} 298 299define void @atomic_store_seq_cst(i128* %p, i128 %in) { 300; CHECK-LABEL: atomic_store_seq_cst: 301; CHECK: ## %bb.0: 302; CHECK-NEXT: pushq %rbx 303; CHECK-NEXT: .cfi_def_cfa_offset 16 304; CHECK-NEXT: .cfi_offset %rbx, -16 305; CHECK-NEXT: movq %rdx, %rcx 306; CHECK-NEXT: movq %rsi, %rbx 307; CHECK-NEXT: movq (%rdi), %rax 308; CHECK-NEXT: movq 8(%rdi), %rdx 309; CHECK-NEXT: .p2align 4, 0x90 310; CHECK-NEXT: LBB11_1: ## %atomicrmw.start 311; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 312; CHECK-NEXT: lock cmpxchg16b (%rdi) 313; CHECK-NEXT: jne LBB11_1 314; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 315; CHECK-NEXT: popq %rbx 316; CHECK-NEXT: retq 317 store atomic i128 %in, i128* %p seq_cst, align 16 318 ret void 319} 320 321define void @atomic_store_release(i128* %p, i128 %in) { 322; CHECK-LABEL: atomic_store_release: 323; CHECK: ## %bb.0: 324; CHECK-NEXT: pushq %rbx 325; CHECK-NEXT: .cfi_def_cfa_offset 16 326; CHECK-NEXT: .cfi_offset %rbx, -16 327; CHECK-NEXT: movq %rdx, %rcx 328; CHECK-NEXT: movq %rsi, %rbx 329; CHECK-NEXT: movq (%rdi), %rax 330; CHECK-NEXT: movq 8(%rdi), %rdx 331; CHECK-NEXT: .p2align 4, 0x90 332; CHECK-NEXT: LBB12_1: ## %atomicrmw.start 333; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 334; CHECK-NEXT: lock cmpxchg16b (%rdi) 335; CHECK-NEXT: jne LBB12_1 336; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 337; CHECK-NEXT: popq %rbx 338; CHECK-NEXT: retq 339 store atomic i128 %in, i128* %p release, align 16 340 ret void 341} 342 343define void @atomic_store_relaxed(i128* %p, i128 %in) { 344; CHECK-LABEL: atomic_store_relaxed: 345; CHECK: ## %bb.0: 346; CHECK-NEXT: pushq %rbx 347; CHECK-NEXT: .cfi_def_cfa_offset 16 348; CHECK-NEXT: .cfi_offset %rbx, -16 349; CHECK-NEXT: movq %rdx, %rcx 350; CHECK-NEXT: movq %rsi, %rbx 351; CHECK-NEXT: movq (%rdi), %rax 352; CHECK-NEXT: movq 8(%rdi), %rdx 353; CHECK-NEXT: .p2align 4, 0x90 354; CHECK-NEXT: LBB13_1: ## %atomicrmw.start 355; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 356; CHECK-NEXT: lock cmpxchg16b (%rdi) 357; CHECK-NEXT: jne LBB13_1 358; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end 359; CHECK-NEXT: popq %rbx 360; CHECK-NEXT: retq 361 store atomic i128 %in, i128* %p unordered, align 16 362 ret void 363} 364