1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-unknown" 6 7; Stack reload folding tests. 8; 9; By including a nop call with sideeffects we can force a partial register spill of the 10; relevant registers and check that the reload is correctly folded into the instruction. 11 12define i32 @stack_fold_andn_u32(i32 %a0, i32 %a1) { 13; CHECK-LABEL: stack_fold_andn_u32: 14; CHECK: # %bb.0: 15; CHECK-NEXT: pushq %rbp 16; CHECK-NEXT: .cfi_def_cfa_offset 16 17; CHECK-NEXT: pushq %r15 18; CHECK-NEXT: .cfi_def_cfa_offset 24 19; CHECK-NEXT: pushq %r14 20; CHECK-NEXT: .cfi_def_cfa_offset 32 21; CHECK-NEXT: pushq %r13 22; CHECK-NEXT: .cfi_def_cfa_offset 40 23; CHECK-NEXT: pushq %r12 24; CHECK-NEXT: .cfi_def_cfa_offset 48 25; CHECK-NEXT: pushq %rbx 26; CHECK-NEXT: .cfi_def_cfa_offset 56 27; CHECK-NEXT: .cfi_offset %rbx, -56 28; CHECK-NEXT: .cfi_offset %r12, -48 29; CHECK-NEXT: .cfi_offset %r13, -40 30; CHECK-NEXT: .cfi_offset %r14, -32 31; CHECK-NEXT: .cfi_offset %r15, -24 32; CHECK-NEXT: .cfi_offset %rbp, -16 33; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 34; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 35; CHECK-NEXT: #APP 36; CHECK-NEXT: nop 37; CHECK-NEXT: #NO_APP 38; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 39; CHECK-NEXT: andnl {{[-0-9]+}}(%r{{[sb]}}p), %eax, %eax # 4-byte Folded Reload 40; CHECK-NEXT: popq %rbx 41; CHECK-NEXT: .cfi_def_cfa_offset 48 42; CHECK-NEXT: popq %r12 43; CHECK-NEXT: .cfi_def_cfa_offset 40 44; CHECK-NEXT: popq %r13 45; CHECK-NEXT: .cfi_def_cfa_offset 32 46; CHECK-NEXT: popq %r14 47; CHECK-NEXT: .cfi_def_cfa_offset 24 48; CHECK-NEXT: popq %r15 49; CHECK-NEXT: .cfi_def_cfa_offset 16 50; CHECK-NEXT: popq %rbp 51; CHECK-NEXT: .cfi_def_cfa_offset 8 52; CHECK-NEXT: retq 53 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 54 %2 = xor i32 %a0, -1 55 %3 = and i32 %a1, %2 56 ret i32 %3 57} 58 59define i64 @stack_fold_andn_u64(i64 %a0, i64 %a1) { 60; CHECK-LABEL: stack_fold_andn_u64: 61; CHECK: # %bb.0: 62; CHECK-NEXT: pushq %rbp 63; CHECK-NEXT: .cfi_def_cfa_offset 16 64; CHECK-NEXT: pushq %r15 65; CHECK-NEXT: .cfi_def_cfa_offset 24 66; CHECK-NEXT: pushq %r14 67; CHECK-NEXT: .cfi_def_cfa_offset 32 68; CHECK-NEXT: pushq %r13 69; CHECK-NEXT: .cfi_def_cfa_offset 40 70; CHECK-NEXT: pushq %r12 71; CHECK-NEXT: .cfi_def_cfa_offset 48 72; CHECK-NEXT: pushq %rbx 73; CHECK-NEXT: .cfi_def_cfa_offset 56 74; CHECK-NEXT: .cfi_offset %rbx, -56 75; CHECK-NEXT: .cfi_offset %r12, -48 76; CHECK-NEXT: .cfi_offset %r13, -40 77; CHECK-NEXT: .cfi_offset %r14, -32 78; CHECK-NEXT: .cfi_offset %r15, -24 79; CHECK-NEXT: .cfi_offset %rbp, -16 80; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 81; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 82; CHECK-NEXT: #APP 83; CHECK-NEXT: nop 84; CHECK-NEXT: #NO_APP 85; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 86; CHECK-NEXT: andnq {{[-0-9]+}}(%r{{[sb]}}p), %rax, %rax # 8-byte Folded Reload 87; CHECK-NEXT: popq %rbx 88; CHECK-NEXT: .cfi_def_cfa_offset 48 89; CHECK-NEXT: popq %r12 90; CHECK-NEXT: .cfi_def_cfa_offset 40 91; CHECK-NEXT: popq %r13 92; CHECK-NEXT: .cfi_def_cfa_offset 32 93; CHECK-NEXT: popq %r14 94; CHECK-NEXT: .cfi_def_cfa_offset 24 95; CHECK-NEXT: popq %r15 96; CHECK-NEXT: .cfi_def_cfa_offset 16 97; CHECK-NEXT: popq %rbp 98; CHECK-NEXT: .cfi_def_cfa_offset 8 99; CHECK-NEXT: retq 100 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 101 %2 = xor i64 %a0, -1 102 %3 = and i64 %a1, %2 103 ret i64 %3 104} 105 106define i32 @stack_fold_bextr_u32(i32 %a0, i32 %a1) { 107; CHECK-LABEL: stack_fold_bextr_u32: 108; CHECK: # %bb.0: 109; CHECK-NEXT: pushq %rbp 110; CHECK-NEXT: .cfi_def_cfa_offset 16 111; CHECK-NEXT: pushq %r15 112; CHECK-NEXT: .cfi_def_cfa_offset 24 113; CHECK-NEXT: pushq %r14 114; CHECK-NEXT: .cfi_def_cfa_offset 32 115; CHECK-NEXT: pushq %r13 116; CHECK-NEXT: .cfi_def_cfa_offset 40 117; CHECK-NEXT: pushq %r12 118; CHECK-NEXT: .cfi_def_cfa_offset 48 119; CHECK-NEXT: pushq %rbx 120; CHECK-NEXT: .cfi_def_cfa_offset 56 121; CHECK-NEXT: .cfi_offset %rbx, -56 122; CHECK-NEXT: .cfi_offset %r12, -48 123; CHECK-NEXT: .cfi_offset %r13, -40 124; CHECK-NEXT: .cfi_offset %r14, -32 125; CHECK-NEXT: .cfi_offset %r15, -24 126; CHECK-NEXT: .cfi_offset %rbp, -16 127; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 128; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 129; CHECK-NEXT: #APP 130; CHECK-NEXT: nop 131; CHECK-NEXT: #NO_APP 132; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 133; CHECK-NEXT: bextrl %eax, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 134; CHECK-NEXT: popq %rbx 135; CHECK-NEXT: .cfi_def_cfa_offset 48 136; CHECK-NEXT: popq %r12 137; CHECK-NEXT: .cfi_def_cfa_offset 40 138; CHECK-NEXT: popq %r13 139; CHECK-NEXT: .cfi_def_cfa_offset 32 140; CHECK-NEXT: popq %r14 141; CHECK-NEXT: .cfi_def_cfa_offset 24 142; CHECK-NEXT: popq %r15 143; CHECK-NEXT: .cfi_def_cfa_offset 16 144; CHECK-NEXT: popq %rbp 145; CHECK-NEXT: .cfi_def_cfa_offset 8 146; CHECK-NEXT: retq 147 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 148 %2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1) 149 ret i32 %2 150} 151declare i32 @llvm.x86.bmi.bextr.32(i32, i32) 152 153define i64 @stack_fold_bextr_u64(i64 %a0, i64 %a1) { 154; CHECK-LABEL: stack_fold_bextr_u64: 155; CHECK: # %bb.0: 156; CHECK-NEXT: pushq %rbp 157; CHECK-NEXT: .cfi_def_cfa_offset 16 158; CHECK-NEXT: pushq %r15 159; CHECK-NEXT: .cfi_def_cfa_offset 24 160; CHECK-NEXT: pushq %r14 161; CHECK-NEXT: .cfi_def_cfa_offset 32 162; CHECK-NEXT: pushq %r13 163; CHECK-NEXT: .cfi_def_cfa_offset 40 164; CHECK-NEXT: pushq %r12 165; CHECK-NEXT: .cfi_def_cfa_offset 48 166; CHECK-NEXT: pushq %rbx 167; CHECK-NEXT: .cfi_def_cfa_offset 56 168; CHECK-NEXT: .cfi_offset %rbx, -56 169; CHECK-NEXT: .cfi_offset %r12, -48 170; CHECK-NEXT: .cfi_offset %r13, -40 171; CHECK-NEXT: .cfi_offset %r14, -32 172; CHECK-NEXT: .cfi_offset %r15, -24 173; CHECK-NEXT: .cfi_offset %rbp, -16 174; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 175; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 176; CHECK-NEXT: #APP 177; CHECK-NEXT: nop 178; CHECK-NEXT: #NO_APP 179; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 180; CHECK-NEXT: bextrq %rax, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 181; CHECK-NEXT: popq %rbx 182; CHECK-NEXT: .cfi_def_cfa_offset 48 183; CHECK-NEXT: popq %r12 184; CHECK-NEXT: .cfi_def_cfa_offset 40 185; CHECK-NEXT: popq %r13 186; CHECK-NEXT: .cfi_def_cfa_offset 32 187; CHECK-NEXT: popq %r14 188; CHECK-NEXT: .cfi_def_cfa_offset 24 189; CHECK-NEXT: popq %r15 190; CHECK-NEXT: .cfi_def_cfa_offset 16 191; CHECK-NEXT: popq %rbp 192; CHECK-NEXT: .cfi_def_cfa_offset 8 193; CHECK-NEXT: retq 194 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 195 %2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a0, i64 %a1) 196 ret i64 %2 197} 198declare i64 @llvm.x86.bmi.bextr.64(i64, i64) 199 200define i32 @stack_fold_blsi_u32(i32 %a0) { 201; CHECK-LABEL: stack_fold_blsi_u32: 202; CHECK: # %bb.0: 203; CHECK-NEXT: pushq %rbp 204; CHECK-NEXT: .cfi_def_cfa_offset 16 205; CHECK-NEXT: pushq %r15 206; CHECK-NEXT: .cfi_def_cfa_offset 24 207; CHECK-NEXT: pushq %r14 208; CHECK-NEXT: .cfi_def_cfa_offset 32 209; CHECK-NEXT: pushq %r13 210; CHECK-NEXT: .cfi_def_cfa_offset 40 211; CHECK-NEXT: pushq %r12 212; CHECK-NEXT: .cfi_def_cfa_offset 48 213; CHECK-NEXT: pushq %rbx 214; CHECK-NEXT: .cfi_def_cfa_offset 56 215; CHECK-NEXT: .cfi_offset %rbx, -56 216; CHECK-NEXT: .cfi_offset %r12, -48 217; CHECK-NEXT: .cfi_offset %r13, -40 218; CHECK-NEXT: .cfi_offset %r14, -32 219; CHECK-NEXT: .cfi_offset %r15, -24 220; CHECK-NEXT: .cfi_offset %rbp, -16 221; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 222; CHECK-NEXT: #APP 223; CHECK-NEXT: nop 224; CHECK-NEXT: #NO_APP 225; CHECK-NEXT: blsil {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 226; CHECK-NEXT: popq %rbx 227; CHECK-NEXT: .cfi_def_cfa_offset 48 228; CHECK-NEXT: popq %r12 229; CHECK-NEXT: .cfi_def_cfa_offset 40 230; CHECK-NEXT: popq %r13 231; CHECK-NEXT: .cfi_def_cfa_offset 32 232; CHECK-NEXT: popq %r14 233; CHECK-NEXT: .cfi_def_cfa_offset 24 234; CHECK-NEXT: popq %r15 235; CHECK-NEXT: .cfi_def_cfa_offset 16 236; CHECK-NEXT: popq %rbp 237; CHECK-NEXT: .cfi_def_cfa_offset 8 238; CHECK-NEXT: retq 239 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 240 %2 = sub i32 0, %a0 241 %3 = and i32 %2, %a0 242 ret i32 %3 243} 244 245define i64 @stack_fold_blsi_u64(i64 %a0) { 246; CHECK-LABEL: stack_fold_blsi_u64: 247; CHECK: # %bb.0: 248; CHECK-NEXT: pushq %rbp 249; CHECK-NEXT: .cfi_def_cfa_offset 16 250; CHECK-NEXT: pushq %r15 251; CHECK-NEXT: .cfi_def_cfa_offset 24 252; CHECK-NEXT: pushq %r14 253; CHECK-NEXT: .cfi_def_cfa_offset 32 254; CHECK-NEXT: pushq %r13 255; CHECK-NEXT: .cfi_def_cfa_offset 40 256; CHECK-NEXT: pushq %r12 257; CHECK-NEXT: .cfi_def_cfa_offset 48 258; CHECK-NEXT: pushq %rbx 259; CHECK-NEXT: .cfi_def_cfa_offset 56 260; CHECK-NEXT: .cfi_offset %rbx, -56 261; CHECK-NEXT: .cfi_offset %r12, -48 262; CHECK-NEXT: .cfi_offset %r13, -40 263; CHECK-NEXT: .cfi_offset %r14, -32 264; CHECK-NEXT: .cfi_offset %r15, -24 265; CHECK-NEXT: .cfi_offset %rbp, -16 266; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 267; CHECK-NEXT: #APP 268; CHECK-NEXT: nop 269; CHECK-NEXT: #NO_APP 270; CHECK-NEXT: blsiq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 271; CHECK-NEXT: popq %rbx 272; CHECK-NEXT: .cfi_def_cfa_offset 48 273; CHECK-NEXT: popq %r12 274; CHECK-NEXT: .cfi_def_cfa_offset 40 275; CHECK-NEXT: popq %r13 276; CHECK-NEXT: .cfi_def_cfa_offset 32 277; CHECK-NEXT: popq %r14 278; CHECK-NEXT: .cfi_def_cfa_offset 24 279; CHECK-NEXT: popq %r15 280; CHECK-NEXT: .cfi_def_cfa_offset 16 281; CHECK-NEXT: popq %rbp 282; CHECK-NEXT: .cfi_def_cfa_offset 8 283; CHECK-NEXT: retq 284 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 285 %2 = sub i64 0, %a0 286 %3 = and i64 %2, %a0 287 ret i64 %3 288} 289 290define i32 @stack_fold_blsmsk_u32(i32 %a0) { 291; CHECK-LABEL: stack_fold_blsmsk_u32: 292; CHECK: # %bb.0: 293; CHECK-NEXT: pushq %rbp 294; CHECK-NEXT: .cfi_def_cfa_offset 16 295; CHECK-NEXT: pushq %r15 296; CHECK-NEXT: .cfi_def_cfa_offset 24 297; CHECK-NEXT: pushq %r14 298; CHECK-NEXT: .cfi_def_cfa_offset 32 299; CHECK-NEXT: pushq %r13 300; CHECK-NEXT: .cfi_def_cfa_offset 40 301; CHECK-NEXT: pushq %r12 302; CHECK-NEXT: .cfi_def_cfa_offset 48 303; CHECK-NEXT: pushq %rbx 304; CHECK-NEXT: .cfi_def_cfa_offset 56 305; CHECK-NEXT: .cfi_offset %rbx, -56 306; CHECK-NEXT: .cfi_offset %r12, -48 307; CHECK-NEXT: .cfi_offset %r13, -40 308; CHECK-NEXT: .cfi_offset %r14, -32 309; CHECK-NEXT: .cfi_offset %r15, -24 310; CHECK-NEXT: .cfi_offset %rbp, -16 311; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 312; CHECK-NEXT: #APP 313; CHECK-NEXT: nop 314; CHECK-NEXT: #NO_APP 315; CHECK-NEXT: blsmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 316; CHECK-NEXT: popq %rbx 317; CHECK-NEXT: .cfi_def_cfa_offset 48 318; CHECK-NEXT: popq %r12 319; CHECK-NEXT: .cfi_def_cfa_offset 40 320; CHECK-NEXT: popq %r13 321; CHECK-NEXT: .cfi_def_cfa_offset 32 322; CHECK-NEXT: popq %r14 323; CHECK-NEXT: .cfi_def_cfa_offset 24 324; CHECK-NEXT: popq %r15 325; CHECK-NEXT: .cfi_def_cfa_offset 16 326; CHECK-NEXT: popq %rbp 327; CHECK-NEXT: .cfi_def_cfa_offset 8 328; CHECK-NEXT: retq 329 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 330 %2 = sub i32 %a0, 1 331 %3 = xor i32 %2, %a0 332 ret i32 %3 333} 334 335define i64 @stack_fold_blsmsk_u64(i64 %a0) { 336; CHECK-LABEL: stack_fold_blsmsk_u64: 337; CHECK: # %bb.0: 338; CHECK-NEXT: pushq %rbp 339; CHECK-NEXT: .cfi_def_cfa_offset 16 340; CHECK-NEXT: pushq %r15 341; CHECK-NEXT: .cfi_def_cfa_offset 24 342; CHECK-NEXT: pushq %r14 343; CHECK-NEXT: .cfi_def_cfa_offset 32 344; CHECK-NEXT: pushq %r13 345; CHECK-NEXT: .cfi_def_cfa_offset 40 346; CHECK-NEXT: pushq %r12 347; CHECK-NEXT: .cfi_def_cfa_offset 48 348; CHECK-NEXT: pushq %rbx 349; CHECK-NEXT: .cfi_def_cfa_offset 56 350; CHECK-NEXT: .cfi_offset %rbx, -56 351; CHECK-NEXT: .cfi_offset %r12, -48 352; CHECK-NEXT: .cfi_offset %r13, -40 353; CHECK-NEXT: .cfi_offset %r14, -32 354; CHECK-NEXT: .cfi_offset %r15, -24 355; CHECK-NEXT: .cfi_offset %rbp, -16 356; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 357; CHECK-NEXT: #APP 358; CHECK-NEXT: nop 359; CHECK-NEXT: #NO_APP 360; CHECK-NEXT: blsmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 361; CHECK-NEXT: popq %rbx 362; CHECK-NEXT: .cfi_def_cfa_offset 48 363; CHECK-NEXT: popq %r12 364; CHECK-NEXT: .cfi_def_cfa_offset 40 365; CHECK-NEXT: popq %r13 366; CHECK-NEXT: .cfi_def_cfa_offset 32 367; CHECK-NEXT: popq %r14 368; CHECK-NEXT: .cfi_def_cfa_offset 24 369; CHECK-NEXT: popq %r15 370; CHECK-NEXT: .cfi_def_cfa_offset 16 371; CHECK-NEXT: popq %rbp 372; CHECK-NEXT: .cfi_def_cfa_offset 8 373; CHECK-NEXT: retq 374 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 375 %2 = sub i64 %a0, 1 376 %3 = xor i64 %2, %a0 377 ret i64 %3 378} 379 380define i32 @stack_fold_blsr_u32(i32 %a0) { 381; CHECK-LABEL: stack_fold_blsr_u32: 382; CHECK: # %bb.0: 383; CHECK-NEXT: pushq %rbp 384; CHECK-NEXT: .cfi_def_cfa_offset 16 385; CHECK-NEXT: pushq %r15 386; CHECK-NEXT: .cfi_def_cfa_offset 24 387; CHECK-NEXT: pushq %r14 388; CHECK-NEXT: .cfi_def_cfa_offset 32 389; CHECK-NEXT: pushq %r13 390; CHECK-NEXT: .cfi_def_cfa_offset 40 391; CHECK-NEXT: pushq %r12 392; CHECK-NEXT: .cfi_def_cfa_offset 48 393; CHECK-NEXT: pushq %rbx 394; CHECK-NEXT: .cfi_def_cfa_offset 56 395; CHECK-NEXT: .cfi_offset %rbx, -56 396; CHECK-NEXT: .cfi_offset %r12, -48 397; CHECK-NEXT: .cfi_offset %r13, -40 398; CHECK-NEXT: .cfi_offset %r14, -32 399; CHECK-NEXT: .cfi_offset %r15, -24 400; CHECK-NEXT: .cfi_offset %rbp, -16 401; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 402; CHECK-NEXT: #APP 403; CHECK-NEXT: nop 404; CHECK-NEXT: #NO_APP 405; CHECK-NEXT: blsrl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 406; CHECK-NEXT: popq %rbx 407; CHECK-NEXT: .cfi_def_cfa_offset 48 408; CHECK-NEXT: popq %r12 409; CHECK-NEXT: .cfi_def_cfa_offset 40 410; CHECK-NEXT: popq %r13 411; CHECK-NEXT: .cfi_def_cfa_offset 32 412; CHECK-NEXT: popq %r14 413; CHECK-NEXT: .cfi_def_cfa_offset 24 414; CHECK-NEXT: popq %r15 415; CHECK-NEXT: .cfi_def_cfa_offset 16 416; CHECK-NEXT: popq %rbp 417; CHECK-NEXT: .cfi_def_cfa_offset 8 418; CHECK-NEXT: retq 419 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 420 %2 = sub i32 %a0, 1 421 %3 = and i32 %2, %a0 422 ret i32 %3 423} 424 425define i64 @stack_fold_blsr_u64(i64 %a0) { 426; CHECK-LABEL: stack_fold_blsr_u64: 427; CHECK: # %bb.0: 428; CHECK-NEXT: pushq %rbp 429; CHECK-NEXT: .cfi_def_cfa_offset 16 430; CHECK-NEXT: pushq %r15 431; CHECK-NEXT: .cfi_def_cfa_offset 24 432; CHECK-NEXT: pushq %r14 433; CHECK-NEXT: .cfi_def_cfa_offset 32 434; CHECK-NEXT: pushq %r13 435; CHECK-NEXT: .cfi_def_cfa_offset 40 436; CHECK-NEXT: pushq %r12 437; CHECK-NEXT: .cfi_def_cfa_offset 48 438; CHECK-NEXT: pushq %rbx 439; CHECK-NEXT: .cfi_def_cfa_offset 56 440; CHECK-NEXT: .cfi_offset %rbx, -56 441; CHECK-NEXT: .cfi_offset %r12, -48 442; CHECK-NEXT: .cfi_offset %r13, -40 443; CHECK-NEXT: .cfi_offset %r14, -32 444; CHECK-NEXT: .cfi_offset %r15, -24 445; CHECK-NEXT: .cfi_offset %rbp, -16 446; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 447; CHECK-NEXT: #APP 448; CHECK-NEXT: nop 449; CHECK-NEXT: #NO_APP 450; CHECK-NEXT: blsrq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 451; CHECK-NEXT: popq %rbx 452; CHECK-NEXT: .cfi_def_cfa_offset 48 453; CHECK-NEXT: popq %r12 454; CHECK-NEXT: .cfi_def_cfa_offset 40 455; CHECK-NEXT: popq %r13 456; CHECK-NEXT: .cfi_def_cfa_offset 32 457; CHECK-NEXT: popq %r14 458; CHECK-NEXT: .cfi_def_cfa_offset 24 459; CHECK-NEXT: popq %r15 460; CHECK-NEXT: .cfi_def_cfa_offset 16 461; CHECK-NEXT: popq %rbp 462; CHECK-NEXT: .cfi_def_cfa_offset 8 463; CHECK-NEXT: retq 464 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 465 %2 = sub i64 %a0, 1 466 %3 = and i64 %2, %a0 467 ret i64 %3 468} 469 470;TODO stack_fold_tzcnt_u16 471 472define i32 @stack_fold_tzcnt_u32(i32 %a0) { 473; CHECK-LABEL: stack_fold_tzcnt_u32: 474; CHECK: # %bb.0: 475; CHECK-NEXT: pushq %rbp 476; CHECK-NEXT: .cfi_def_cfa_offset 16 477; CHECK-NEXT: pushq %r15 478; CHECK-NEXT: .cfi_def_cfa_offset 24 479; CHECK-NEXT: pushq %r14 480; CHECK-NEXT: .cfi_def_cfa_offset 32 481; CHECK-NEXT: pushq %r13 482; CHECK-NEXT: .cfi_def_cfa_offset 40 483; CHECK-NEXT: pushq %r12 484; CHECK-NEXT: .cfi_def_cfa_offset 48 485; CHECK-NEXT: pushq %rbx 486; CHECK-NEXT: .cfi_def_cfa_offset 56 487; CHECK-NEXT: .cfi_offset %rbx, -56 488; CHECK-NEXT: .cfi_offset %r12, -48 489; CHECK-NEXT: .cfi_offset %r13, -40 490; CHECK-NEXT: .cfi_offset %r14, -32 491; CHECK-NEXT: .cfi_offset %r15, -24 492; CHECK-NEXT: .cfi_offset %rbp, -16 493; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 494; CHECK-NEXT: #APP 495; CHECK-NEXT: nop 496; CHECK-NEXT: #NO_APP 497; CHECK-NEXT: tzcntl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 498; CHECK-NEXT: popq %rbx 499; CHECK-NEXT: .cfi_def_cfa_offset 48 500; CHECK-NEXT: popq %r12 501; CHECK-NEXT: .cfi_def_cfa_offset 40 502; CHECK-NEXT: popq %r13 503; CHECK-NEXT: .cfi_def_cfa_offset 32 504; CHECK-NEXT: popq %r14 505; CHECK-NEXT: .cfi_def_cfa_offset 24 506; CHECK-NEXT: popq %r15 507; CHECK-NEXT: .cfi_def_cfa_offset 16 508; CHECK-NEXT: popq %rbp 509; CHECK-NEXT: .cfi_def_cfa_offset 8 510; CHECK-NEXT: retq 511 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 512 %2 = tail call i32 @llvm.cttz.i32(i32 %a0, i1 0) 513 ret i32 %2 514} 515declare i32 @llvm.cttz.i32(i32, i1) 516 517define i64 @stack_fold_tzcnt_u64(i64 %a0) { 518; CHECK-LABEL: stack_fold_tzcnt_u64: 519; CHECK: # %bb.0: 520; CHECK-NEXT: pushq %rbp 521; CHECK-NEXT: .cfi_def_cfa_offset 16 522; CHECK-NEXT: pushq %r15 523; CHECK-NEXT: .cfi_def_cfa_offset 24 524; CHECK-NEXT: pushq %r14 525; CHECK-NEXT: .cfi_def_cfa_offset 32 526; CHECK-NEXT: pushq %r13 527; CHECK-NEXT: .cfi_def_cfa_offset 40 528; CHECK-NEXT: pushq %r12 529; CHECK-NEXT: .cfi_def_cfa_offset 48 530; CHECK-NEXT: pushq %rbx 531; CHECK-NEXT: .cfi_def_cfa_offset 56 532; CHECK-NEXT: .cfi_offset %rbx, -56 533; CHECK-NEXT: .cfi_offset %r12, -48 534; CHECK-NEXT: .cfi_offset %r13, -40 535; CHECK-NEXT: .cfi_offset %r14, -32 536; CHECK-NEXT: .cfi_offset %r15, -24 537; CHECK-NEXT: .cfi_offset %rbp, -16 538; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 539; CHECK-NEXT: #APP 540; CHECK-NEXT: nop 541; CHECK-NEXT: #NO_APP 542; CHECK-NEXT: tzcntq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 543; CHECK-NEXT: popq %rbx 544; CHECK-NEXT: .cfi_def_cfa_offset 48 545; CHECK-NEXT: popq %r12 546; CHECK-NEXT: .cfi_def_cfa_offset 40 547; CHECK-NEXT: popq %r13 548; CHECK-NEXT: .cfi_def_cfa_offset 32 549; CHECK-NEXT: popq %r14 550; CHECK-NEXT: .cfi_def_cfa_offset 24 551; CHECK-NEXT: popq %r15 552; CHECK-NEXT: .cfi_def_cfa_offset 16 553; CHECK-NEXT: popq %rbp 554; CHECK-NEXT: .cfi_def_cfa_offset 8 555; CHECK-NEXT: retq 556 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 557 %2 = tail call i64 @llvm.cttz.i64(i64 %a0, i1 0) 558 ret i64 %2 559} 560declare i64 @llvm.cttz.i64(i64, i1) 561