1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi,+tbm < %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-unknown" 6 7; Stack reload folding tests. 8; 9; By including a nop call with sideeffects we can force a partial register spill of the 10; relevant registers and check that the reload is correctly folded into the instruction. 11 12define i32 @stack_fold_bextri_u32(i32 %a0) { 13; CHECK-LABEL: stack_fold_bextri_u32: 14; CHECK: # %bb.0: 15; CHECK-NEXT: pushq %rbp 16; CHECK-NEXT: .cfi_def_cfa_offset 16 17; CHECK-NEXT: pushq %r15 18; CHECK-NEXT: .cfi_def_cfa_offset 24 19; CHECK-NEXT: pushq %r14 20; CHECK-NEXT: .cfi_def_cfa_offset 32 21; CHECK-NEXT: pushq %r13 22; CHECK-NEXT: .cfi_def_cfa_offset 40 23; CHECK-NEXT: pushq %r12 24; CHECK-NEXT: .cfi_def_cfa_offset 48 25; CHECK-NEXT: pushq %rbx 26; CHECK-NEXT: .cfi_def_cfa_offset 56 27; CHECK-NEXT: .cfi_offset %rbx, -56 28; CHECK-NEXT: .cfi_offset %r12, -48 29; CHECK-NEXT: .cfi_offset %r13, -40 30; CHECK-NEXT: .cfi_offset %r14, -32 31; CHECK-NEXT: .cfi_offset %r15, -24 32; CHECK-NEXT: .cfi_offset %rbp, -16 33; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 34; CHECK-NEXT: #APP 35; CHECK-NEXT: nop 36; CHECK-NEXT: #NO_APP 37; CHECK-NEXT: bextrl $3841, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 38; CHECK-NEXT: # imm = 0xF01 39; CHECK-NEXT: popq %rbx 40; CHECK-NEXT: .cfi_def_cfa_offset 48 41; CHECK-NEXT: popq %r12 42; CHECK-NEXT: .cfi_def_cfa_offset 40 43; CHECK-NEXT: popq %r13 44; CHECK-NEXT: .cfi_def_cfa_offset 32 45; CHECK-NEXT: popq %r14 46; CHECK-NEXT: .cfi_def_cfa_offset 24 47; CHECK-NEXT: popq %r15 48; CHECK-NEXT: .cfi_def_cfa_offset 16 49; CHECK-NEXT: popq %rbp 50; CHECK-NEXT: .cfi_def_cfa_offset 8 51; CHECK-NEXT: retq 52 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 53 %2 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a0, i32 3841) 54 ret i32 %2 55} 56declare i32 @llvm.x86.tbm.bextri.u32(i32, i32) 57 58define i64 @stack_fold_bextri_u64(i64 %a0) { 59; CHECK-LABEL: stack_fold_bextri_u64: 60; CHECK: # %bb.0: 61; CHECK-NEXT: pushq %rbp 62; CHECK-NEXT: .cfi_def_cfa_offset 16 63; CHECK-NEXT: pushq %r15 64; CHECK-NEXT: .cfi_def_cfa_offset 24 65; CHECK-NEXT: pushq %r14 66; CHECK-NEXT: .cfi_def_cfa_offset 32 67; CHECK-NEXT: pushq %r13 68; CHECK-NEXT: .cfi_def_cfa_offset 40 69; CHECK-NEXT: pushq %r12 70; CHECK-NEXT: .cfi_def_cfa_offset 48 71; CHECK-NEXT: pushq %rbx 72; CHECK-NEXT: .cfi_def_cfa_offset 56 73; CHECK-NEXT: .cfi_offset %rbx, -56 74; CHECK-NEXT: .cfi_offset %r12, -48 75; CHECK-NEXT: .cfi_offset %r13, -40 76; CHECK-NEXT: .cfi_offset %r14, -32 77; CHECK-NEXT: .cfi_offset %r15, -24 78; CHECK-NEXT: .cfi_offset %rbp, -16 79; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 80; CHECK-NEXT: #APP 81; CHECK-NEXT: nop 82; CHECK-NEXT: #NO_APP 83; CHECK-NEXT: bextrq $3841, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 84; CHECK-NEXT: # imm = 0xF01 85; CHECK-NEXT: popq %rbx 86; CHECK-NEXT: .cfi_def_cfa_offset 48 87; CHECK-NEXT: popq %r12 88; CHECK-NEXT: .cfi_def_cfa_offset 40 89; CHECK-NEXT: popq %r13 90; CHECK-NEXT: .cfi_def_cfa_offset 32 91; CHECK-NEXT: popq %r14 92; CHECK-NEXT: .cfi_def_cfa_offset 24 93; CHECK-NEXT: popq %r15 94; CHECK-NEXT: .cfi_def_cfa_offset 16 95; CHECK-NEXT: popq %rbp 96; CHECK-NEXT: .cfi_def_cfa_offset 8 97; CHECK-NEXT: retq 98 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 99 %2 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a0, i64 3841) 100 ret i64 %2 101} 102declare i64 @llvm.x86.tbm.bextri.u64(i64, i64) 103 104define i32 @stack_fold_blcfill_u32(i32 %a0) { 105; CHECK-LABEL: stack_fold_blcfill_u32: 106; CHECK: # %bb.0: 107; CHECK-NEXT: pushq %rbp 108; CHECK-NEXT: .cfi_def_cfa_offset 16 109; CHECK-NEXT: pushq %r15 110; CHECK-NEXT: .cfi_def_cfa_offset 24 111; CHECK-NEXT: pushq %r14 112; CHECK-NEXT: .cfi_def_cfa_offset 32 113; CHECK-NEXT: pushq %r13 114; CHECK-NEXT: .cfi_def_cfa_offset 40 115; CHECK-NEXT: pushq %r12 116; CHECK-NEXT: .cfi_def_cfa_offset 48 117; CHECK-NEXT: pushq %rbx 118; CHECK-NEXT: .cfi_def_cfa_offset 56 119; CHECK-NEXT: .cfi_offset %rbx, -56 120; CHECK-NEXT: .cfi_offset %r12, -48 121; CHECK-NEXT: .cfi_offset %r13, -40 122; CHECK-NEXT: .cfi_offset %r14, -32 123; CHECK-NEXT: .cfi_offset %r15, -24 124; CHECK-NEXT: .cfi_offset %rbp, -16 125; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 126; CHECK-NEXT: #APP 127; CHECK-NEXT: nop 128; CHECK-NEXT: #NO_APP 129; CHECK-NEXT: blcfilll {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 130; CHECK-NEXT: popq %rbx 131; CHECK-NEXT: .cfi_def_cfa_offset 48 132; CHECK-NEXT: popq %r12 133; CHECK-NEXT: .cfi_def_cfa_offset 40 134; CHECK-NEXT: popq %r13 135; CHECK-NEXT: .cfi_def_cfa_offset 32 136; CHECK-NEXT: popq %r14 137; CHECK-NEXT: .cfi_def_cfa_offset 24 138; CHECK-NEXT: popq %r15 139; CHECK-NEXT: .cfi_def_cfa_offset 16 140; CHECK-NEXT: popq %rbp 141; CHECK-NEXT: .cfi_def_cfa_offset 8 142; CHECK-NEXT: retq 143 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 144 %2 = add i32 %a0, 1 145 %3 = and i32 %a0, %2 146 ret i32 %3 147} 148 149define i64 @stack_fold_blcfill_u64(i64 %a0) { 150; CHECK-LABEL: stack_fold_blcfill_u64: 151; CHECK: # %bb.0: 152; CHECK-NEXT: pushq %rbp 153; CHECK-NEXT: .cfi_def_cfa_offset 16 154; CHECK-NEXT: pushq %r15 155; CHECK-NEXT: .cfi_def_cfa_offset 24 156; CHECK-NEXT: pushq %r14 157; CHECK-NEXT: .cfi_def_cfa_offset 32 158; CHECK-NEXT: pushq %r13 159; CHECK-NEXT: .cfi_def_cfa_offset 40 160; CHECK-NEXT: pushq %r12 161; CHECK-NEXT: .cfi_def_cfa_offset 48 162; CHECK-NEXT: pushq %rbx 163; CHECK-NEXT: .cfi_def_cfa_offset 56 164; CHECK-NEXT: .cfi_offset %rbx, -56 165; CHECK-NEXT: .cfi_offset %r12, -48 166; CHECK-NEXT: .cfi_offset %r13, -40 167; CHECK-NEXT: .cfi_offset %r14, -32 168; CHECK-NEXT: .cfi_offset %r15, -24 169; CHECK-NEXT: .cfi_offset %rbp, -16 170; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 171; CHECK-NEXT: #APP 172; CHECK-NEXT: nop 173; CHECK-NEXT: #NO_APP 174; CHECK-NEXT: blcfillq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 175; CHECK-NEXT: popq %rbx 176; CHECK-NEXT: .cfi_def_cfa_offset 48 177; CHECK-NEXT: popq %r12 178; CHECK-NEXT: .cfi_def_cfa_offset 40 179; CHECK-NEXT: popq %r13 180; CHECK-NEXT: .cfi_def_cfa_offset 32 181; CHECK-NEXT: popq %r14 182; CHECK-NEXT: .cfi_def_cfa_offset 24 183; CHECK-NEXT: popq %r15 184; CHECK-NEXT: .cfi_def_cfa_offset 16 185; CHECK-NEXT: popq %rbp 186; CHECK-NEXT: .cfi_def_cfa_offset 8 187; CHECK-NEXT: retq 188 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 189 %2 = add i64 %a0, 1 190 %3 = and i64 %a0, %2 191 ret i64 %3 192} 193 194define i32 @stack_fold_blci_u32(i32 %a0) { 195; CHECK-LABEL: stack_fold_blci_u32: 196; CHECK: # %bb.0: 197; CHECK-NEXT: pushq %rbp 198; CHECK-NEXT: .cfi_def_cfa_offset 16 199; CHECK-NEXT: pushq %r15 200; CHECK-NEXT: .cfi_def_cfa_offset 24 201; CHECK-NEXT: pushq %r14 202; CHECK-NEXT: .cfi_def_cfa_offset 32 203; CHECK-NEXT: pushq %r13 204; CHECK-NEXT: .cfi_def_cfa_offset 40 205; CHECK-NEXT: pushq %r12 206; CHECK-NEXT: .cfi_def_cfa_offset 48 207; CHECK-NEXT: pushq %rbx 208; CHECK-NEXT: .cfi_def_cfa_offset 56 209; CHECK-NEXT: .cfi_offset %rbx, -56 210; CHECK-NEXT: .cfi_offset %r12, -48 211; CHECK-NEXT: .cfi_offset %r13, -40 212; CHECK-NEXT: .cfi_offset %r14, -32 213; CHECK-NEXT: .cfi_offset %r15, -24 214; CHECK-NEXT: .cfi_offset %rbp, -16 215; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 216; CHECK-NEXT: #APP 217; CHECK-NEXT: nop 218; CHECK-NEXT: #NO_APP 219; CHECK-NEXT: blcil {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 220; CHECK-NEXT: popq %rbx 221; CHECK-NEXT: .cfi_def_cfa_offset 48 222; CHECK-NEXT: popq %r12 223; CHECK-NEXT: .cfi_def_cfa_offset 40 224; CHECK-NEXT: popq %r13 225; CHECK-NEXT: .cfi_def_cfa_offset 32 226; CHECK-NEXT: popq %r14 227; CHECK-NEXT: .cfi_def_cfa_offset 24 228; CHECK-NEXT: popq %r15 229; CHECK-NEXT: .cfi_def_cfa_offset 16 230; CHECK-NEXT: popq %rbp 231; CHECK-NEXT: .cfi_def_cfa_offset 8 232; CHECK-NEXT: retq 233 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 234 %2 = add i32 %a0, 1 235 %3 = xor i32 %2, -1 236 %4 = or i32 %a0, %3 237 ret i32 %4 238} 239 240define i64 @stack_fold_blci_u64(i64 %a0) { 241; CHECK-LABEL: stack_fold_blci_u64: 242; CHECK: # %bb.0: 243; CHECK-NEXT: pushq %rbp 244; CHECK-NEXT: .cfi_def_cfa_offset 16 245; CHECK-NEXT: pushq %r15 246; CHECK-NEXT: .cfi_def_cfa_offset 24 247; CHECK-NEXT: pushq %r14 248; CHECK-NEXT: .cfi_def_cfa_offset 32 249; CHECK-NEXT: pushq %r13 250; CHECK-NEXT: .cfi_def_cfa_offset 40 251; CHECK-NEXT: pushq %r12 252; CHECK-NEXT: .cfi_def_cfa_offset 48 253; CHECK-NEXT: pushq %rbx 254; CHECK-NEXT: .cfi_def_cfa_offset 56 255; CHECK-NEXT: .cfi_offset %rbx, -56 256; CHECK-NEXT: .cfi_offset %r12, -48 257; CHECK-NEXT: .cfi_offset %r13, -40 258; CHECK-NEXT: .cfi_offset %r14, -32 259; CHECK-NEXT: .cfi_offset %r15, -24 260; CHECK-NEXT: .cfi_offset %rbp, -16 261; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 262; CHECK-NEXT: #APP 263; CHECK-NEXT: nop 264; CHECK-NEXT: #NO_APP 265; CHECK-NEXT: blciq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 266; CHECK-NEXT: popq %rbx 267; CHECK-NEXT: .cfi_def_cfa_offset 48 268; CHECK-NEXT: popq %r12 269; CHECK-NEXT: .cfi_def_cfa_offset 40 270; CHECK-NEXT: popq %r13 271; CHECK-NEXT: .cfi_def_cfa_offset 32 272; CHECK-NEXT: popq %r14 273; CHECK-NEXT: .cfi_def_cfa_offset 24 274; CHECK-NEXT: popq %r15 275; CHECK-NEXT: .cfi_def_cfa_offset 16 276; CHECK-NEXT: popq %rbp 277; CHECK-NEXT: .cfi_def_cfa_offset 8 278; CHECK-NEXT: retq 279 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 280 %2 = add i64 %a0, 1 281 %3 = xor i64 %2, -1 282 %4 = or i64 %a0, %3 283 ret i64 %4 284} 285 286define i32 @stack_fold_blcic_u32(i32 %a0) { 287; CHECK-LABEL: stack_fold_blcic_u32: 288; CHECK: # %bb.0: 289; CHECK-NEXT: pushq %rbp 290; CHECK-NEXT: .cfi_def_cfa_offset 16 291; CHECK-NEXT: pushq %r15 292; CHECK-NEXT: .cfi_def_cfa_offset 24 293; CHECK-NEXT: pushq %r14 294; CHECK-NEXT: .cfi_def_cfa_offset 32 295; CHECK-NEXT: pushq %r13 296; CHECK-NEXT: .cfi_def_cfa_offset 40 297; CHECK-NEXT: pushq %r12 298; CHECK-NEXT: .cfi_def_cfa_offset 48 299; CHECK-NEXT: pushq %rbx 300; CHECK-NEXT: .cfi_def_cfa_offset 56 301; CHECK-NEXT: .cfi_offset %rbx, -56 302; CHECK-NEXT: .cfi_offset %r12, -48 303; CHECK-NEXT: .cfi_offset %r13, -40 304; CHECK-NEXT: .cfi_offset %r14, -32 305; CHECK-NEXT: .cfi_offset %r15, -24 306; CHECK-NEXT: .cfi_offset %rbp, -16 307; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 308; CHECK-NEXT: #APP 309; CHECK-NEXT: nop 310; CHECK-NEXT: #NO_APP 311; CHECK-NEXT: blcicl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 312; CHECK-NEXT: popq %rbx 313; CHECK-NEXT: .cfi_def_cfa_offset 48 314; CHECK-NEXT: popq %r12 315; CHECK-NEXT: .cfi_def_cfa_offset 40 316; CHECK-NEXT: popq %r13 317; CHECK-NEXT: .cfi_def_cfa_offset 32 318; CHECK-NEXT: popq %r14 319; CHECK-NEXT: .cfi_def_cfa_offset 24 320; CHECK-NEXT: popq %r15 321; CHECK-NEXT: .cfi_def_cfa_offset 16 322; CHECK-NEXT: popq %rbp 323; CHECK-NEXT: .cfi_def_cfa_offset 8 324; CHECK-NEXT: retq 325 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 326 %2 = add i32 %a0, 1 327 %3 = xor i32 %a0, -1 328 %4 = and i32 %2, %3 329 ret i32 %4 330} 331 332define i64 @stack_fold_blcic_u64(i64 %a0) { 333; CHECK-LABEL: stack_fold_blcic_u64: 334; CHECK: # %bb.0: 335; CHECK-NEXT: pushq %rbp 336; CHECK-NEXT: .cfi_def_cfa_offset 16 337; CHECK-NEXT: pushq %r15 338; CHECK-NEXT: .cfi_def_cfa_offset 24 339; CHECK-NEXT: pushq %r14 340; CHECK-NEXT: .cfi_def_cfa_offset 32 341; CHECK-NEXT: pushq %r13 342; CHECK-NEXT: .cfi_def_cfa_offset 40 343; CHECK-NEXT: pushq %r12 344; CHECK-NEXT: .cfi_def_cfa_offset 48 345; CHECK-NEXT: pushq %rbx 346; CHECK-NEXT: .cfi_def_cfa_offset 56 347; CHECK-NEXT: .cfi_offset %rbx, -56 348; CHECK-NEXT: .cfi_offset %r12, -48 349; CHECK-NEXT: .cfi_offset %r13, -40 350; CHECK-NEXT: .cfi_offset %r14, -32 351; CHECK-NEXT: .cfi_offset %r15, -24 352; CHECK-NEXT: .cfi_offset %rbp, -16 353; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 354; CHECK-NEXT: #APP 355; CHECK-NEXT: nop 356; CHECK-NEXT: #NO_APP 357; CHECK-NEXT: blcicq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 358; CHECK-NEXT: popq %rbx 359; CHECK-NEXT: .cfi_def_cfa_offset 48 360; CHECK-NEXT: popq %r12 361; CHECK-NEXT: .cfi_def_cfa_offset 40 362; CHECK-NEXT: popq %r13 363; CHECK-NEXT: .cfi_def_cfa_offset 32 364; CHECK-NEXT: popq %r14 365; CHECK-NEXT: .cfi_def_cfa_offset 24 366; CHECK-NEXT: popq %r15 367; CHECK-NEXT: .cfi_def_cfa_offset 16 368; CHECK-NEXT: popq %rbp 369; CHECK-NEXT: .cfi_def_cfa_offset 8 370; CHECK-NEXT: retq 371 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 372 %2 = add i64 %a0, 1 373 %3 = xor i64 %a0, -1 374 %4 = and i64 %2, %3 375 ret i64 %4 376} 377 378define i32 @stack_fold_blcmsk_u32(i32 %a0) { 379; CHECK-LABEL: stack_fold_blcmsk_u32: 380; CHECK: # %bb.0: 381; CHECK-NEXT: pushq %rbp 382; CHECK-NEXT: .cfi_def_cfa_offset 16 383; CHECK-NEXT: pushq %r15 384; CHECK-NEXT: .cfi_def_cfa_offset 24 385; CHECK-NEXT: pushq %r14 386; CHECK-NEXT: .cfi_def_cfa_offset 32 387; CHECK-NEXT: pushq %r13 388; CHECK-NEXT: .cfi_def_cfa_offset 40 389; CHECK-NEXT: pushq %r12 390; CHECK-NEXT: .cfi_def_cfa_offset 48 391; CHECK-NEXT: pushq %rbx 392; CHECK-NEXT: .cfi_def_cfa_offset 56 393; CHECK-NEXT: .cfi_offset %rbx, -56 394; CHECK-NEXT: .cfi_offset %r12, -48 395; CHECK-NEXT: .cfi_offset %r13, -40 396; CHECK-NEXT: .cfi_offset %r14, -32 397; CHECK-NEXT: .cfi_offset %r15, -24 398; CHECK-NEXT: .cfi_offset %rbp, -16 399; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 400; CHECK-NEXT: #APP 401; CHECK-NEXT: nop 402; CHECK-NEXT: #NO_APP 403; CHECK-NEXT: blcmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 404; CHECK-NEXT: popq %rbx 405; CHECK-NEXT: .cfi_def_cfa_offset 48 406; CHECK-NEXT: popq %r12 407; CHECK-NEXT: .cfi_def_cfa_offset 40 408; CHECK-NEXT: popq %r13 409; CHECK-NEXT: .cfi_def_cfa_offset 32 410; CHECK-NEXT: popq %r14 411; CHECK-NEXT: .cfi_def_cfa_offset 24 412; CHECK-NEXT: popq %r15 413; CHECK-NEXT: .cfi_def_cfa_offset 16 414; CHECK-NEXT: popq %rbp 415; CHECK-NEXT: .cfi_def_cfa_offset 8 416; CHECK-NEXT: retq 417 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 418 %2 = add i32 %a0, 1 419 %3 = xor i32 %a0, %2 420 ret i32 %3 421} 422 423define i64 @stack_fold_blcmsk_u64(i64 %a0) { 424; CHECK-LABEL: stack_fold_blcmsk_u64: 425; CHECK: # %bb.0: 426; CHECK-NEXT: pushq %rbp 427; CHECK-NEXT: .cfi_def_cfa_offset 16 428; CHECK-NEXT: pushq %r15 429; CHECK-NEXT: .cfi_def_cfa_offset 24 430; CHECK-NEXT: pushq %r14 431; CHECK-NEXT: .cfi_def_cfa_offset 32 432; CHECK-NEXT: pushq %r13 433; CHECK-NEXT: .cfi_def_cfa_offset 40 434; CHECK-NEXT: pushq %r12 435; CHECK-NEXT: .cfi_def_cfa_offset 48 436; CHECK-NEXT: pushq %rbx 437; CHECK-NEXT: .cfi_def_cfa_offset 56 438; CHECK-NEXT: .cfi_offset %rbx, -56 439; CHECK-NEXT: .cfi_offset %r12, -48 440; CHECK-NEXT: .cfi_offset %r13, -40 441; CHECK-NEXT: .cfi_offset %r14, -32 442; CHECK-NEXT: .cfi_offset %r15, -24 443; CHECK-NEXT: .cfi_offset %rbp, -16 444; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 445; CHECK-NEXT: #APP 446; CHECK-NEXT: nop 447; CHECK-NEXT: #NO_APP 448; CHECK-NEXT: blcmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 449; CHECK-NEXT: popq %rbx 450; CHECK-NEXT: .cfi_def_cfa_offset 48 451; CHECK-NEXT: popq %r12 452; CHECK-NEXT: .cfi_def_cfa_offset 40 453; CHECK-NEXT: popq %r13 454; CHECK-NEXT: .cfi_def_cfa_offset 32 455; CHECK-NEXT: popq %r14 456; CHECK-NEXT: .cfi_def_cfa_offset 24 457; CHECK-NEXT: popq %r15 458; CHECK-NEXT: .cfi_def_cfa_offset 16 459; CHECK-NEXT: popq %rbp 460; CHECK-NEXT: .cfi_def_cfa_offset 8 461; CHECK-NEXT: retq 462 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 463 %2 = add i64 %a0, 1 464 %3 = xor i64 %a0, %2 465 ret i64 %3 466} 467 468define i32 @stack_fold_blcs_u32(i32 %a0) { 469; CHECK-LABEL: stack_fold_blcs_u32: 470; CHECK: # %bb.0: 471; CHECK-NEXT: pushq %rbp 472; CHECK-NEXT: .cfi_def_cfa_offset 16 473; CHECK-NEXT: pushq %r15 474; CHECK-NEXT: .cfi_def_cfa_offset 24 475; CHECK-NEXT: pushq %r14 476; CHECK-NEXT: .cfi_def_cfa_offset 32 477; CHECK-NEXT: pushq %r13 478; CHECK-NEXT: .cfi_def_cfa_offset 40 479; CHECK-NEXT: pushq %r12 480; CHECK-NEXT: .cfi_def_cfa_offset 48 481; CHECK-NEXT: pushq %rbx 482; CHECK-NEXT: .cfi_def_cfa_offset 56 483; CHECK-NEXT: .cfi_offset %rbx, -56 484; CHECK-NEXT: .cfi_offset %r12, -48 485; CHECK-NEXT: .cfi_offset %r13, -40 486; CHECK-NEXT: .cfi_offset %r14, -32 487; CHECK-NEXT: .cfi_offset %r15, -24 488; CHECK-NEXT: .cfi_offset %rbp, -16 489; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 490; CHECK-NEXT: #APP 491; CHECK-NEXT: nop 492; CHECK-NEXT: #NO_APP 493; CHECK-NEXT: blcsl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 494; CHECK-NEXT: popq %rbx 495; CHECK-NEXT: .cfi_def_cfa_offset 48 496; CHECK-NEXT: popq %r12 497; CHECK-NEXT: .cfi_def_cfa_offset 40 498; CHECK-NEXT: popq %r13 499; CHECK-NEXT: .cfi_def_cfa_offset 32 500; CHECK-NEXT: popq %r14 501; CHECK-NEXT: .cfi_def_cfa_offset 24 502; CHECK-NEXT: popq %r15 503; CHECK-NEXT: .cfi_def_cfa_offset 16 504; CHECK-NEXT: popq %rbp 505; CHECK-NEXT: .cfi_def_cfa_offset 8 506; CHECK-NEXT: retq 507 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 508 %2 = add i32 %a0, 1 509 %3 = or i32 %a0, %2 510 ret i32 %3 511} 512 513define i64 @stack_fold_blcs_u64(i64 %a0) { 514; CHECK-LABEL: stack_fold_blcs_u64: 515; CHECK: # %bb.0: 516; CHECK-NEXT: pushq %rbp 517; CHECK-NEXT: .cfi_def_cfa_offset 16 518; CHECK-NEXT: pushq %r15 519; CHECK-NEXT: .cfi_def_cfa_offset 24 520; CHECK-NEXT: pushq %r14 521; CHECK-NEXT: .cfi_def_cfa_offset 32 522; CHECK-NEXT: pushq %r13 523; CHECK-NEXT: .cfi_def_cfa_offset 40 524; CHECK-NEXT: pushq %r12 525; CHECK-NEXT: .cfi_def_cfa_offset 48 526; CHECK-NEXT: pushq %rbx 527; CHECK-NEXT: .cfi_def_cfa_offset 56 528; CHECK-NEXT: .cfi_offset %rbx, -56 529; CHECK-NEXT: .cfi_offset %r12, -48 530; CHECK-NEXT: .cfi_offset %r13, -40 531; CHECK-NEXT: .cfi_offset %r14, -32 532; CHECK-NEXT: .cfi_offset %r15, -24 533; CHECK-NEXT: .cfi_offset %rbp, -16 534; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 535; CHECK-NEXT: #APP 536; CHECK-NEXT: nop 537; CHECK-NEXT: #NO_APP 538; CHECK-NEXT: blcsq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 539; CHECK-NEXT: popq %rbx 540; CHECK-NEXT: .cfi_def_cfa_offset 48 541; CHECK-NEXT: popq %r12 542; CHECK-NEXT: .cfi_def_cfa_offset 40 543; CHECK-NEXT: popq %r13 544; CHECK-NEXT: .cfi_def_cfa_offset 32 545; CHECK-NEXT: popq %r14 546; CHECK-NEXT: .cfi_def_cfa_offset 24 547; CHECK-NEXT: popq %r15 548; CHECK-NEXT: .cfi_def_cfa_offset 16 549; CHECK-NEXT: popq %rbp 550; CHECK-NEXT: .cfi_def_cfa_offset 8 551; CHECK-NEXT: retq 552 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 553 %2 = add i64 %a0, 1 554 %3 = or i64 %a0, %2 555 ret i64 %3 556} 557 558define i32 @stack_fold_blsfill_u32(i32 %a0) { 559; CHECK-LABEL: stack_fold_blsfill_u32: 560; CHECK: # %bb.0: 561; CHECK-NEXT: pushq %rbp 562; CHECK-NEXT: .cfi_def_cfa_offset 16 563; CHECK-NEXT: pushq %r15 564; CHECK-NEXT: .cfi_def_cfa_offset 24 565; CHECK-NEXT: pushq %r14 566; CHECK-NEXT: .cfi_def_cfa_offset 32 567; CHECK-NEXT: pushq %r13 568; CHECK-NEXT: .cfi_def_cfa_offset 40 569; CHECK-NEXT: pushq %r12 570; CHECK-NEXT: .cfi_def_cfa_offset 48 571; CHECK-NEXT: pushq %rbx 572; CHECK-NEXT: .cfi_def_cfa_offset 56 573; CHECK-NEXT: .cfi_offset %rbx, -56 574; CHECK-NEXT: .cfi_offset %r12, -48 575; CHECK-NEXT: .cfi_offset %r13, -40 576; CHECK-NEXT: .cfi_offset %r14, -32 577; CHECK-NEXT: .cfi_offset %r15, -24 578; CHECK-NEXT: .cfi_offset %rbp, -16 579; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 580; CHECK-NEXT: #APP 581; CHECK-NEXT: nop 582; CHECK-NEXT: #NO_APP 583; CHECK-NEXT: blsfilll {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 584; CHECK-NEXT: popq %rbx 585; CHECK-NEXT: .cfi_def_cfa_offset 48 586; CHECK-NEXT: popq %r12 587; CHECK-NEXT: .cfi_def_cfa_offset 40 588; CHECK-NEXT: popq %r13 589; CHECK-NEXT: .cfi_def_cfa_offset 32 590; CHECK-NEXT: popq %r14 591; CHECK-NEXT: .cfi_def_cfa_offset 24 592; CHECK-NEXT: popq %r15 593; CHECK-NEXT: .cfi_def_cfa_offset 16 594; CHECK-NEXT: popq %rbp 595; CHECK-NEXT: .cfi_def_cfa_offset 8 596; CHECK-NEXT: retq 597 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 598 %2 = sub i32 %a0, 1 599 %3 = or i32 %a0, %2 600 ret i32 %3 601} 602 603define i64 @stack_fold_blsfill_u64(i64 %a0) { 604; CHECK-LABEL: stack_fold_blsfill_u64: 605; CHECK: # %bb.0: 606; CHECK-NEXT: pushq %rbp 607; CHECK-NEXT: .cfi_def_cfa_offset 16 608; CHECK-NEXT: pushq %r15 609; CHECK-NEXT: .cfi_def_cfa_offset 24 610; CHECK-NEXT: pushq %r14 611; CHECK-NEXT: .cfi_def_cfa_offset 32 612; CHECK-NEXT: pushq %r13 613; CHECK-NEXT: .cfi_def_cfa_offset 40 614; CHECK-NEXT: pushq %r12 615; CHECK-NEXT: .cfi_def_cfa_offset 48 616; CHECK-NEXT: pushq %rbx 617; CHECK-NEXT: .cfi_def_cfa_offset 56 618; CHECK-NEXT: .cfi_offset %rbx, -56 619; CHECK-NEXT: .cfi_offset %r12, -48 620; CHECK-NEXT: .cfi_offset %r13, -40 621; CHECK-NEXT: .cfi_offset %r14, -32 622; CHECK-NEXT: .cfi_offset %r15, -24 623; CHECK-NEXT: .cfi_offset %rbp, -16 624; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 625; CHECK-NEXT: #APP 626; CHECK-NEXT: nop 627; CHECK-NEXT: #NO_APP 628; CHECK-NEXT: blsfillq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 629; CHECK-NEXT: popq %rbx 630; CHECK-NEXT: .cfi_def_cfa_offset 48 631; CHECK-NEXT: popq %r12 632; CHECK-NEXT: .cfi_def_cfa_offset 40 633; CHECK-NEXT: popq %r13 634; CHECK-NEXT: .cfi_def_cfa_offset 32 635; CHECK-NEXT: popq %r14 636; CHECK-NEXT: .cfi_def_cfa_offset 24 637; CHECK-NEXT: popq %r15 638; CHECK-NEXT: .cfi_def_cfa_offset 16 639; CHECK-NEXT: popq %rbp 640; CHECK-NEXT: .cfi_def_cfa_offset 8 641; CHECK-NEXT: retq 642 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 643 %2 = sub i64 %a0, 1 644 %3 = or i64 %a0, %2 645 ret i64 %3 646} 647 648define i32 @stack_fold_blsic_u32(i32 %a0) { 649; CHECK-LABEL: stack_fold_blsic_u32: 650; CHECK: # %bb.0: 651; CHECK-NEXT: pushq %rbp 652; CHECK-NEXT: .cfi_def_cfa_offset 16 653; CHECK-NEXT: pushq %r15 654; CHECK-NEXT: .cfi_def_cfa_offset 24 655; CHECK-NEXT: pushq %r14 656; CHECK-NEXT: .cfi_def_cfa_offset 32 657; CHECK-NEXT: pushq %r13 658; CHECK-NEXT: .cfi_def_cfa_offset 40 659; CHECK-NEXT: pushq %r12 660; CHECK-NEXT: .cfi_def_cfa_offset 48 661; CHECK-NEXT: pushq %rbx 662; CHECK-NEXT: .cfi_def_cfa_offset 56 663; CHECK-NEXT: .cfi_offset %rbx, -56 664; CHECK-NEXT: .cfi_offset %r12, -48 665; CHECK-NEXT: .cfi_offset %r13, -40 666; CHECK-NEXT: .cfi_offset %r14, -32 667; CHECK-NEXT: .cfi_offset %r15, -24 668; CHECK-NEXT: .cfi_offset %rbp, -16 669; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 670; CHECK-NEXT: #APP 671; CHECK-NEXT: nop 672; CHECK-NEXT: #NO_APP 673; CHECK-NEXT: blsicl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 674; CHECK-NEXT: popq %rbx 675; CHECK-NEXT: .cfi_def_cfa_offset 48 676; CHECK-NEXT: popq %r12 677; CHECK-NEXT: .cfi_def_cfa_offset 40 678; CHECK-NEXT: popq %r13 679; CHECK-NEXT: .cfi_def_cfa_offset 32 680; CHECK-NEXT: popq %r14 681; CHECK-NEXT: .cfi_def_cfa_offset 24 682; CHECK-NEXT: popq %r15 683; CHECK-NEXT: .cfi_def_cfa_offset 16 684; CHECK-NEXT: popq %rbp 685; CHECK-NEXT: .cfi_def_cfa_offset 8 686; CHECK-NEXT: retq 687 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 688 %2 = sub i32 %a0, 1 689 %3 = xor i32 %a0, -1 690 %4 = or i32 %2, %3 691 ret i32 %4 692} 693 694define i64 @stack_fold_blsic_u64(i64 %a0) { 695; CHECK-LABEL: stack_fold_blsic_u64: 696; CHECK: # %bb.0: 697; CHECK-NEXT: pushq %rbp 698; CHECK-NEXT: .cfi_def_cfa_offset 16 699; CHECK-NEXT: pushq %r15 700; CHECK-NEXT: .cfi_def_cfa_offset 24 701; CHECK-NEXT: pushq %r14 702; CHECK-NEXT: .cfi_def_cfa_offset 32 703; CHECK-NEXT: pushq %r13 704; CHECK-NEXT: .cfi_def_cfa_offset 40 705; CHECK-NEXT: pushq %r12 706; CHECK-NEXT: .cfi_def_cfa_offset 48 707; CHECK-NEXT: pushq %rbx 708; CHECK-NEXT: .cfi_def_cfa_offset 56 709; CHECK-NEXT: .cfi_offset %rbx, -56 710; CHECK-NEXT: .cfi_offset %r12, -48 711; CHECK-NEXT: .cfi_offset %r13, -40 712; CHECK-NEXT: .cfi_offset %r14, -32 713; CHECK-NEXT: .cfi_offset %r15, -24 714; CHECK-NEXT: .cfi_offset %rbp, -16 715; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 716; CHECK-NEXT: #APP 717; CHECK-NEXT: nop 718; CHECK-NEXT: #NO_APP 719; CHECK-NEXT: blsicq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 720; CHECK-NEXT: popq %rbx 721; CHECK-NEXT: .cfi_def_cfa_offset 48 722; CHECK-NEXT: popq %r12 723; CHECK-NEXT: .cfi_def_cfa_offset 40 724; CHECK-NEXT: popq %r13 725; CHECK-NEXT: .cfi_def_cfa_offset 32 726; CHECK-NEXT: popq %r14 727; CHECK-NEXT: .cfi_def_cfa_offset 24 728; CHECK-NEXT: popq %r15 729; CHECK-NEXT: .cfi_def_cfa_offset 16 730; CHECK-NEXT: popq %rbp 731; CHECK-NEXT: .cfi_def_cfa_offset 8 732; CHECK-NEXT: retq 733 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 734 %2 = sub i64 %a0, 1 735 %3 = xor i64 %a0, -1 736 %4 = or i64 %2, %3 737 ret i64 %4 738} 739 740define i32 @stack_fold_t1mskc_u32(i32 %a0) { 741; CHECK-LABEL: stack_fold_t1mskc_u32: 742; CHECK: # %bb.0: 743; CHECK-NEXT: pushq %rbp 744; CHECK-NEXT: .cfi_def_cfa_offset 16 745; CHECK-NEXT: pushq %r15 746; CHECK-NEXT: .cfi_def_cfa_offset 24 747; CHECK-NEXT: pushq %r14 748; CHECK-NEXT: .cfi_def_cfa_offset 32 749; CHECK-NEXT: pushq %r13 750; CHECK-NEXT: .cfi_def_cfa_offset 40 751; CHECK-NEXT: pushq %r12 752; CHECK-NEXT: .cfi_def_cfa_offset 48 753; CHECK-NEXT: pushq %rbx 754; CHECK-NEXT: .cfi_def_cfa_offset 56 755; CHECK-NEXT: .cfi_offset %rbx, -56 756; CHECK-NEXT: .cfi_offset %r12, -48 757; CHECK-NEXT: .cfi_offset %r13, -40 758; CHECK-NEXT: .cfi_offset %r14, -32 759; CHECK-NEXT: .cfi_offset %r15, -24 760; CHECK-NEXT: .cfi_offset %rbp, -16 761; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 762; CHECK-NEXT: #APP 763; CHECK-NEXT: nop 764; CHECK-NEXT: #NO_APP 765; CHECK-NEXT: t1mskcl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 766; CHECK-NEXT: popq %rbx 767; CHECK-NEXT: .cfi_def_cfa_offset 48 768; CHECK-NEXT: popq %r12 769; CHECK-NEXT: .cfi_def_cfa_offset 40 770; CHECK-NEXT: popq %r13 771; CHECK-NEXT: .cfi_def_cfa_offset 32 772; CHECK-NEXT: popq %r14 773; CHECK-NEXT: .cfi_def_cfa_offset 24 774; CHECK-NEXT: popq %r15 775; CHECK-NEXT: .cfi_def_cfa_offset 16 776; CHECK-NEXT: popq %rbp 777; CHECK-NEXT: .cfi_def_cfa_offset 8 778; CHECK-NEXT: retq 779 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 780 %2 = add i32 %a0, 1 781 %3 = xor i32 %a0, -1 782 %4 = or i32 %2, %3 783 ret i32 %4 784} 785 786define i64 @stack_fold_t1mskc_u64(i64 %a0) { 787; CHECK-LABEL: stack_fold_t1mskc_u64: 788; CHECK: # %bb.0: 789; CHECK-NEXT: pushq %rbp 790; CHECK-NEXT: .cfi_def_cfa_offset 16 791; CHECK-NEXT: pushq %r15 792; CHECK-NEXT: .cfi_def_cfa_offset 24 793; CHECK-NEXT: pushq %r14 794; CHECK-NEXT: .cfi_def_cfa_offset 32 795; CHECK-NEXT: pushq %r13 796; CHECK-NEXT: .cfi_def_cfa_offset 40 797; CHECK-NEXT: pushq %r12 798; CHECK-NEXT: .cfi_def_cfa_offset 48 799; CHECK-NEXT: pushq %rbx 800; CHECK-NEXT: .cfi_def_cfa_offset 56 801; CHECK-NEXT: .cfi_offset %rbx, -56 802; CHECK-NEXT: .cfi_offset %r12, -48 803; CHECK-NEXT: .cfi_offset %r13, -40 804; CHECK-NEXT: .cfi_offset %r14, -32 805; CHECK-NEXT: .cfi_offset %r15, -24 806; CHECK-NEXT: .cfi_offset %rbp, -16 807; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 808; CHECK-NEXT: #APP 809; CHECK-NEXT: nop 810; CHECK-NEXT: #NO_APP 811; CHECK-NEXT: t1mskcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 812; CHECK-NEXT: popq %rbx 813; CHECK-NEXT: .cfi_def_cfa_offset 48 814; CHECK-NEXT: popq %r12 815; CHECK-NEXT: .cfi_def_cfa_offset 40 816; CHECK-NEXT: popq %r13 817; CHECK-NEXT: .cfi_def_cfa_offset 32 818; CHECK-NEXT: popq %r14 819; CHECK-NEXT: .cfi_def_cfa_offset 24 820; CHECK-NEXT: popq %r15 821; CHECK-NEXT: .cfi_def_cfa_offset 16 822; CHECK-NEXT: popq %rbp 823; CHECK-NEXT: .cfi_def_cfa_offset 8 824; CHECK-NEXT: retq 825 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 826 %2 = add i64 %a0, 1 827 %3 = xor i64 %a0, -1 828 %4 = or i64 %2, %3 829 ret i64 %4 830} 831 832define i32 @stack_fold_tzmsk_u32(i32 %a0) { 833; CHECK-LABEL: stack_fold_tzmsk_u32: 834; CHECK: # %bb.0: 835; CHECK-NEXT: pushq %rbp 836; CHECK-NEXT: .cfi_def_cfa_offset 16 837; CHECK-NEXT: pushq %r15 838; CHECK-NEXT: .cfi_def_cfa_offset 24 839; CHECK-NEXT: pushq %r14 840; CHECK-NEXT: .cfi_def_cfa_offset 32 841; CHECK-NEXT: pushq %r13 842; CHECK-NEXT: .cfi_def_cfa_offset 40 843; CHECK-NEXT: pushq %r12 844; CHECK-NEXT: .cfi_def_cfa_offset 48 845; CHECK-NEXT: pushq %rbx 846; CHECK-NEXT: .cfi_def_cfa_offset 56 847; CHECK-NEXT: .cfi_offset %rbx, -56 848; CHECK-NEXT: .cfi_offset %r12, -48 849; CHECK-NEXT: .cfi_offset %r13, -40 850; CHECK-NEXT: .cfi_offset %r14, -32 851; CHECK-NEXT: .cfi_offset %r15, -24 852; CHECK-NEXT: .cfi_offset %rbp, -16 853; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 854; CHECK-NEXT: #APP 855; CHECK-NEXT: nop 856; CHECK-NEXT: #NO_APP 857; CHECK-NEXT: tzmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 858; CHECK-NEXT: popq %rbx 859; CHECK-NEXT: .cfi_def_cfa_offset 48 860; CHECK-NEXT: popq %r12 861; CHECK-NEXT: .cfi_def_cfa_offset 40 862; CHECK-NEXT: popq %r13 863; CHECK-NEXT: .cfi_def_cfa_offset 32 864; CHECK-NEXT: popq %r14 865; CHECK-NEXT: .cfi_def_cfa_offset 24 866; CHECK-NEXT: popq %r15 867; CHECK-NEXT: .cfi_def_cfa_offset 16 868; CHECK-NEXT: popq %rbp 869; CHECK-NEXT: .cfi_def_cfa_offset 8 870; CHECK-NEXT: retq 871 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 872 %2 = sub i32 %a0, 1 873 %3 = xor i32 %a0, -1 874 %4 = and i32 %2, %3 875 ret i32 %4 876} 877 878define i64 @stack_fold_tzmsk_u64(i64 %a0) { 879; CHECK-LABEL: stack_fold_tzmsk_u64: 880; CHECK: # %bb.0: 881; CHECK-NEXT: pushq %rbp 882; CHECK-NEXT: .cfi_def_cfa_offset 16 883; CHECK-NEXT: pushq %r15 884; CHECK-NEXT: .cfi_def_cfa_offset 24 885; CHECK-NEXT: pushq %r14 886; CHECK-NEXT: .cfi_def_cfa_offset 32 887; CHECK-NEXT: pushq %r13 888; CHECK-NEXT: .cfi_def_cfa_offset 40 889; CHECK-NEXT: pushq %r12 890; CHECK-NEXT: .cfi_def_cfa_offset 48 891; CHECK-NEXT: pushq %rbx 892; CHECK-NEXT: .cfi_def_cfa_offset 56 893; CHECK-NEXT: .cfi_offset %rbx, -56 894; CHECK-NEXT: .cfi_offset %r12, -48 895; CHECK-NEXT: .cfi_offset %r13, -40 896; CHECK-NEXT: .cfi_offset %r14, -32 897; CHECK-NEXT: .cfi_offset %r15, -24 898; CHECK-NEXT: .cfi_offset %rbp, -16 899; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 900; CHECK-NEXT: #APP 901; CHECK-NEXT: nop 902; CHECK-NEXT: #NO_APP 903; CHECK-NEXT: tzmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 904; CHECK-NEXT: popq %rbx 905; CHECK-NEXT: .cfi_def_cfa_offset 48 906; CHECK-NEXT: popq %r12 907; CHECK-NEXT: .cfi_def_cfa_offset 40 908; CHECK-NEXT: popq %r13 909; CHECK-NEXT: .cfi_def_cfa_offset 32 910; CHECK-NEXT: popq %r14 911; CHECK-NEXT: .cfi_def_cfa_offset 24 912; CHECK-NEXT: popq %r15 913; CHECK-NEXT: .cfi_def_cfa_offset 16 914; CHECK-NEXT: popq %rbp 915; CHECK-NEXT: .cfi_def_cfa_offset 8 916; CHECK-NEXT: retq 917 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 918 %2 = sub i64 %a0, 1 919 %3 = xor i64 %a0, -1 920 %4 = and i64 %2, %3 921 ret i64 %4 922} 923