1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+adx < %s | FileCheck %s --check-prefix=CHECK 3; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=-adx < %s | FileCheck %s --check-prefix=CHECK 4 5target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 6target triple = "x86_64-unknown-unknown" 7 8; Stack reload folding tests. 9; 10; By including a nop call with sideeffects we can force a partial register spill of the 11; relevant registers and check that the reload is correctly folded into the instruction. 12 13define i8 @stack_fold_addcarry_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { 14; CHECK-LABEL: stack_fold_addcarry_u32: 15; CHECK: # %bb.0: 16; CHECK-NEXT: pushq %rbp 17; CHECK-NEXT: .cfi_def_cfa_offset 16 18; CHECK-NEXT: pushq %r15 19; CHECK-NEXT: .cfi_def_cfa_offset 24 20; CHECK-NEXT: pushq %r14 21; CHECK-NEXT: .cfi_def_cfa_offset 32 22; CHECK-NEXT: pushq %r13 23; CHECK-NEXT: .cfi_def_cfa_offset 40 24; CHECK-NEXT: pushq %r12 25; CHECK-NEXT: .cfi_def_cfa_offset 48 26; CHECK-NEXT: pushq %rbx 27; CHECK-NEXT: .cfi_def_cfa_offset 56 28; CHECK-NEXT: .cfi_offset %rbx, -56 29; CHECK-NEXT: .cfi_offset %r12, -48 30; CHECK-NEXT: .cfi_offset %r13, -40 31; CHECK-NEXT: .cfi_offset %r14, -32 32; CHECK-NEXT: .cfi_offset %r15, -24 33; CHECK-NEXT: .cfi_offset %rbp, -16 34; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 35; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 36; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 37; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 38; CHECK-NEXT: #APP 39; CHECK-NEXT: nop 40; CHECK-NEXT: #NO_APP 41; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 42; CHECK-NEXT: addb $-1, %al 43; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 44; CHECK-NEXT: adcl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 45; CHECK-NEXT: setb %al 46; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 47; CHECK-NEXT: movl %edx, (%rcx) 48; CHECK-NEXT: popq %rbx 49; CHECK-NEXT: .cfi_def_cfa_offset 48 50; CHECK-NEXT: popq %r12 51; CHECK-NEXT: .cfi_def_cfa_offset 40 52; CHECK-NEXT: popq %r13 53; CHECK-NEXT: .cfi_def_cfa_offset 32 54; CHECK-NEXT: popq %r14 55; CHECK-NEXT: .cfi_def_cfa_offset 24 56; CHECK-NEXT: popq %r15 57; CHECK-NEXT: .cfi_def_cfa_offset 16 58; CHECK-NEXT: popq %rbp 59; CHECK-NEXT: .cfi_def_cfa_offset 8 60; CHECK-NEXT: retq 61 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 62 %2 = call { i8, i32 } @llvm.x86.addcarry.32(i8 %a0, i32 %a1, i32 %a2) 63 %3 = extractvalue { i8, i32 } %2, 1 64 %4 = bitcast i8* %a3 to i32* 65 store i32 %3, i32* %4, align 1 66 %5 = extractvalue { i8, i32 } %2, 0 67 ret i8 %5 68} 69 70define i8 @stack_fold_addcarry_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { 71; CHECK-LABEL: stack_fold_addcarry_u64: 72; CHECK: # %bb.0: 73; CHECK-NEXT: pushq %rbp 74; CHECK-NEXT: .cfi_def_cfa_offset 16 75; CHECK-NEXT: pushq %r15 76; CHECK-NEXT: .cfi_def_cfa_offset 24 77; CHECK-NEXT: pushq %r14 78; CHECK-NEXT: .cfi_def_cfa_offset 32 79; CHECK-NEXT: pushq %r13 80; CHECK-NEXT: .cfi_def_cfa_offset 40 81; CHECK-NEXT: pushq %r12 82; CHECK-NEXT: .cfi_def_cfa_offset 48 83; CHECK-NEXT: pushq %rbx 84; CHECK-NEXT: .cfi_def_cfa_offset 56 85; CHECK-NEXT: .cfi_offset %rbx, -56 86; CHECK-NEXT: .cfi_offset %r12, -48 87; CHECK-NEXT: .cfi_offset %r13, -40 88; CHECK-NEXT: .cfi_offset %r14, -32 89; CHECK-NEXT: .cfi_offset %r15, -24 90; CHECK-NEXT: .cfi_offset %rbp, -16 91; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 92; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 93; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 94; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 95; CHECK-NEXT: #APP 96; CHECK-NEXT: nop 97; CHECK-NEXT: #NO_APP 98; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 99; CHECK-NEXT: addb $-1, %al 100; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload 101; CHECK-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload 102; CHECK-NEXT: setb %al 103; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 104; CHECK-NEXT: movq %rdx, (%rcx) 105; CHECK-NEXT: popq %rbx 106; CHECK-NEXT: .cfi_def_cfa_offset 48 107; CHECK-NEXT: popq %r12 108; CHECK-NEXT: .cfi_def_cfa_offset 40 109; CHECK-NEXT: popq %r13 110; CHECK-NEXT: .cfi_def_cfa_offset 32 111; CHECK-NEXT: popq %r14 112; CHECK-NEXT: .cfi_def_cfa_offset 24 113; CHECK-NEXT: popq %r15 114; CHECK-NEXT: .cfi_def_cfa_offset 16 115; CHECK-NEXT: popq %rbp 116; CHECK-NEXT: .cfi_def_cfa_offset 8 117; CHECK-NEXT: retq 118 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 119 %2 = call { i8, i64 } @llvm.x86.addcarry.64(i8 %a0, i64 %a1, i64 %a2) 120 %3 = extractvalue { i8, i64 } %2, 1 121 %4 = bitcast i8* %a3 to i64* 122 store i64 %3, i64* %4, align 1 123 %5 = extractvalue { i8, i64 } %2, 0 124 ret i8 %5 125} 126 127define i8 @stack_fold_addcarryx_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { 128; CHECK-LABEL: stack_fold_addcarryx_u32: 129; CHECK: # %bb.0: 130; CHECK-NEXT: pushq %rbp 131; CHECK-NEXT: .cfi_def_cfa_offset 16 132; CHECK-NEXT: pushq %r15 133; CHECK-NEXT: .cfi_def_cfa_offset 24 134; CHECK-NEXT: pushq %r14 135; CHECK-NEXT: .cfi_def_cfa_offset 32 136; CHECK-NEXT: pushq %r13 137; CHECK-NEXT: .cfi_def_cfa_offset 40 138; CHECK-NEXT: pushq %r12 139; CHECK-NEXT: .cfi_def_cfa_offset 48 140; CHECK-NEXT: pushq %rbx 141; CHECK-NEXT: .cfi_def_cfa_offset 56 142; CHECK-NEXT: .cfi_offset %rbx, -56 143; CHECK-NEXT: .cfi_offset %r12, -48 144; CHECK-NEXT: .cfi_offset %r13, -40 145; CHECK-NEXT: .cfi_offset %r14, -32 146; CHECK-NEXT: .cfi_offset %r15, -24 147; CHECK-NEXT: .cfi_offset %rbp, -16 148; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 149; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 150; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 151; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 152; CHECK-NEXT: #APP 153; CHECK-NEXT: nop 154; CHECK-NEXT: #NO_APP 155; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 156; CHECK-NEXT: addb $-1, %al 157; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 158; CHECK-NEXT: adcl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 159; CHECK-NEXT: setb %al 160; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 161; CHECK-NEXT: movl %edx, (%rcx) 162; CHECK-NEXT: popq %rbx 163; CHECK-NEXT: .cfi_def_cfa_offset 48 164; CHECK-NEXT: popq %r12 165; CHECK-NEXT: .cfi_def_cfa_offset 40 166; CHECK-NEXT: popq %r13 167; CHECK-NEXT: .cfi_def_cfa_offset 32 168; CHECK-NEXT: popq %r14 169; CHECK-NEXT: .cfi_def_cfa_offset 24 170; CHECK-NEXT: popq %r15 171; CHECK-NEXT: .cfi_def_cfa_offset 16 172; CHECK-NEXT: popq %rbp 173; CHECK-NEXT: .cfi_def_cfa_offset 8 174; CHECK-NEXT: retq 175 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 176 %2 = call { i8, i32 } @llvm.x86.addcarry.32(i8 %a0, i32 %a1, i32 %a2) 177 %3 = extractvalue { i8, i32 } %2, 1 178 %4 = bitcast i8* %a3 to i32* 179 store i32 %3, i32* %4, align 1 180 %5 = extractvalue { i8, i32 } %2, 0 181 ret i8 %5 182} 183 184define i8 @stack_fold_addcarryx_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { 185; CHECK-LABEL: stack_fold_addcarryx_u64: 186; CHECK: # %bb.0: 187; CHECK-NEXT: pushq %rbp 188; CHECK-NEXT: .cfi_def_cfa_offset 16 189; CHECK-NEXT: pushq %r15 190; CHECK-NEXT: .cfi_def_cfa_offset 24 191; CHECK-NEXT: pushq %r14 192; CHECK-NEXT: .cfi_def_cfa_offset 32 193; CHECK-NEXT: pushq %r13 194; CHECK-NEXT: .cfi_def_cfa_offset 40 195; CHECK-NEXT: pushq %r12 196; CHECK-NEXT: .cfi_def_cfa_offset 48 197; CHECK-NEXT: pushq %rbx 198; CHECK-NEXT: .cfi_def_cfa_offset 56 199; CHECK-NEXT: .cfi_offset %rbx, -56 200; CHECK-NEXT: .cfi_offset %r12, -48 201; CHECK-NEXT: .cfi_offset %r13, -40 202; CHECK-NEXT: .cfi_offset %r14, -32 203; CHECK-NEXT: .cfi_offset %r15, -24 204; CHECK-NEXT: .cfi_offset %rbp, -16 205; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 206; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 207; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 208; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 209; CHECK-NEXT: #APP 210; CHECK-NEXT: nop 211; CHECK-NEXT: #NO_APP 212; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 213; CHECK-NEXT: addb $-1, %al 214; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload 215; CHECK-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload 216; CHECK-NEXT: setb %al 217; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 218; CHECK-NEXT: movq %rdx, (%rcx) 219; CHECK-NEXT: popq %rbx 220; CHECK-NEXT: .cfi_def_cfa_offset 48 221; CHECK-NEXT: popq %r12 222; CHECK-NEXT: .cfi_def_cfa_offset 40 223; CHECK-NEXT: popq %r13 224; CHECK-NEXT: .cfi_def_cfa_offset 32 225; CHECK-NEXT: popq %r14 226; CHECK-NEXT: .cfi_def_cfa_offset 24 227; CHECK-NEXT: popq %r15 228; CHECK-NEXT: .cfi_def_cfa_offset 16 229; CHECK-NEXT: popq %rbp 230; CHECK-NEXT: .cfi_def_cfa_offset 8 231; CHECK-NEXT: retq 232 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 233 %2 = call { i8, i64 } @llvm.x86.addcarry.64(i8 %a0, i64 %a1, i64 %a2) 234 %3 = extractvalue { i8, i64 } %2, 1 235 %4 = bitcast i8* %a3 to i64* 236 store i64 %3, i64* %4, align 1 237 %5 = extractvalue { i8, i64 } %2, 0 238 ret i8 %5 239} 240 241define i8 @stack_fold_subborrow_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { 242; CHECK-LABEL: stack_fold_subborrow_u32: 243; CHECK: # %bb.0: 244; CHECK-NEXT: pushq %rbp 245; CHECK-NEXT: .cfi_def_cfa_offset 16 246; CHECK-NEXT: pushq %r15 247; CHECK-NEXT: .cfi_def_cfa_offset 24 248; CHECK-NEXT: pushq %r14 249; CHECK-NEXT: .cfi_def_cfa_offset 32 250; CHECK-NEXT: pushq %r13 251; CHECK-NEXT: .cfi_def_cfa_offset 40 252; CHECK-NEXT: pushq %r12 253; CHECK-NEXT: .cfi_def_cfa_offset 48 254; CHECK-NEXT: pushq %rbx 255; CHECK-NEXT: .cfi_def_cfa_offset 56 256; CHECK-NEXT: .cfi_offset %rbx, -56 257; CHECK-NEXT: .cfi_offset %r12, -48 258; CHECK-NEXT: .cfi_offset %r13, -40 259; CHECK-NEXT: .cfi_offset %r14, -32 260; CHECK-NEXT: .cfi_offset %r15, -24 261; CHECK-NEXT: .cfi_offset %rbp, -16 262; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 263; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 264; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 265; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 266; CHECK-NEXT: #APP 267; CHECK-NEXT: nop 268; CHECK-NEXT: #NO_APP 269; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 270; CHECK-NEXT: addb $-1, %al 271; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 272; CHECK-NEXT: sbbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 273; CHECK-NEXT: setb %al 274; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 275; CHECK-NEXT: movl %edx, (%rcx) 276; CHECK-NEXT: popq %rbx 277; CHECK-NEXT: .cfi_def_cfa_offset 48 278; CHECK-NEXT: popq %r12 279; CHECK-NEXT: .cfi_def_cfa_offset 40 280; CHECK-NEXT: popq %r13 281; CHECK-NEXT: .cfi_def_cfa_offset 32 282; CHECK-NEXT: popq %r14 283; CHECK-NEXT: .cfi_def_cfa_offset 24 284; CHECK-NEXT: popq %r15 285; CHECK-NEXT: .cfi_def_cfa_offset 16 286; CHECK-NEXT: popq %rbp 287; CHECK-NEXT: .cfi_def_cfa_offset 8 288; CHECK-NEXT: retq 289 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 290 %2 = call { i8, i32 } @llvm.x86.subborrow.32(i8 %a0, i32 %a1, i32 %a2) 291 %3 = extractvalue { i8, i32 } %2, 1 292 %4 = bitcast i8* %a3 to i32* 293 store i32 %3, i32* %4, align 1 294 %5 = extractvalue { i8, i32 } %2, 0 295 ret i8 %5 296} 297 298define i8 @stack_fold_subborrow_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { 299; CHECK-LABEL: stack_fold_subborrow_u64: 300; CHECK: # %bb.0: 301; CHECK-NEXT: pushq %rbp 302; CHECK-NEXT: .cfi_def_cfa_offset 16 303; CHECK-NEXT: pushq %r15 304; CHECK-NEXT: .cfi_def_cfa_offset 24 305; CHECK-NEXT: pushq %r14 306; CHECK-NEXT: .cfi_def_cfa_offset 32 307; CHECK-NEXT: pushq %r13 308; CHECK-NEXT: .cfi_def_cfa_offset 40 309; CHECK-NEXT: pushq %r12 310; CHECK-NEXT: .cfi_def_cfa_offset 48 311; CHECK-NEXT: pushq %rbx 312; CHECK-NEXT: .cfi_def_cfa_offset 56 313; CHECK-NEXT: .cfi_offset %rbx, -56 314; CHECK-NEXT: .cfi_offset %r12, -48 315; CHECK-NEXT: .cfi_offset %r13, -40 316; CHECK-NEXT: .cfi_offset %r14, -32 317; CHECK-NEXT: .cfi_offset %r15, -24 318; CHECK-NEXT: .cfi_offset %rbp, -16 319; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 320; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 321; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 322; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 323; CHECK-NEXT: #APP 324; CHECK-NEXT: nop 325; CHECK-NEXT: #NO_APP 326; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 327; CHECK-NEXT: addb $-1, %al 328; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload 329; CHECK-NEXT: sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload 330; CHECK-NEXT: setb %al 331; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 332; CHECK-NEXT: movq %rdx, (%rcx) 333; CHECK-NEXT: popq %rbx 334; CHECK-NEXT: .cfi_def_cfa_offset 48 335; CHECK-NEXT: popq %r12 336; CHECK-NEXT: .cfi_def_cfa_offset 40 337; CHECK-NEXT: popq %r13 338; CHECK-NEXT: .cfi_def_cfa_offset 32 339; CHECK-NEXT: popq %r14 340; CHECK-NEXT: .cfi_def_cfa_offset 24 341; CHECK-NEXT: popq %r15 342; CHECK-NEXT: .cfi_def_cfa_offset 16 343; CHECK-NEXT: popq %rbp 344; CHECK-NEXT: .cfi_def_cfa_offset 8 345; CHECK-NEXT: retq 346 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 347 %2 = call { i8, i64 } @llvm.x86.subborrow.64(i8 %a0, i64 %a1, i64 %a2) 348 %3 = extractvalue { i8, i64 } %2, 1 349 %4 = bitcast i8* %a3 to i64* 350 store i64 %3, i64* %4, align 1 351 %5 = extractvalue { i8, i64 } %2, 0 352 ret i8 %5 353} 354 355declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32) 356declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64) 357declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32) 358declare { i8, i64 } @llvm.x86.subborrow.64(i8, i64, i64) 359