• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4target triple = "x86_64-unknown-unknown"
5
6; Stack reload folding tests.
7;
8; By including a nop call with sideeffects we can force a partial register spill of the
9; relevant registers and check that the reload is correctly folded into the instruction.
10
11define i32 @stack_fold_andn_u32(i32 %a0, i32 %a1) {
12  ;CHECK-LABEL: stack_fold_andn_u32
13  ;CHECK:       andnl {{-?[0-9]*}}(%rsp), %eax, %eax {{.*#+}} 4-byte Folded Reload
14  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
15  %2 = xor i32 %a0, -1
16  %3 = and i32 %a1, %2
17  ret i32 %3
18}
19
20define i64 @stack_fold_andn_u64(i64 %a0, i64 %a1) {
21  ;CHECK-LABEL: stack_fold_andn_u64
22  ;CHECK:       andnq {{-?[0-9]*}}(%rsp), %rax, %rax {{.*#+}} 8-byte Folded Reload
23  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
24  %2 = xor i64 %a0, -1
25  %3 = and i64 %a1, %2
26  ret i64 %3
27}
28
29define i32 @stack_fold_bextr_u32(i32 %a0, i32 %a1) {
30  ;CHECK-LABEL: stack_fold_bextr_u32
31  ;CHECK:       # %bb.0:
32  ;CHECK:       bextrl %eax, {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
33  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
34  %2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1)
35  ret i32 %2
36}
37declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
38
39define i64 @stack_fold_bextr_u64(i64 %a0, i64 %a1) {
40  ;CHECK-LABEL: stack_fold_bextr_u64
41  ;CHECK:       # %bb.0:
42  ;CHECK:       bextrq %rax, {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
43  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
44  %2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a0, i64 %a1)
45  ret i64 %2
46}
47declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
48
49define i32 @stack_fold_blsi_u32(i32 %a0) {
50  ;CHECK-LABEL: stack_fold_blsi_u32
51  ;CHECK:       blsil {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
52  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
53  %2 = sub i32 0, %a0
54  %3 = and i32 %2, %a0
55  ret i32 %3
56}
57
58define i64 @stack_fold_blsi_u64(i64 %a0) {
59  ;CHECK-LABEL: stack_fold_blsi_u64
60  ;CHECK:       blsiq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
61  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
62  %2 = sub i64 0, %a0
63  %3 = and i64 %2, %a0
64  ret i64 %3
65}
66
67define i32 @stack_fold_blsmsk_u32(i32 %a0) {
68  ;CHECK-LABEL: stack_fold_blsmsk_u32
69  ;CHECK:       blsmskl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
70  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
71  %2 = sub i32 %a0, 1
72  %3 = xor i32 %2, %a0
73  ret i32 %3
74}
75
76define i64 @stack_fold_blsmsk_u64(i64 %a0) {
77  ;CHECK-LABEL: stack_fold_blsmsk_u64
78  ;CHECK:       blsmskq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
79  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
80  %2 = sub i64 %a0, 1
81  %3 = xor i64 %2, %a0
82  ret i64 %3
83}
84
85define i32 @stack_fold_blsr_u32(i32 %a0) {
86  ;CHECK-LABEL: stack_fold_blsr_u32
87  ;CHECK:       blsrl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
88  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
89  %2 = sub i32 %a0, 1
90  %3 = and i32 %2, %a0
91  ret i32 %3
92}
93
94define i64 @stack_fold_blsr_u64(i64 %a0) {
95  ;CHECK-LABEL: stack_fold_blsr_u64
96  ;CHECK:       blsrq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
97  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
98  %2 = sub i64 %a0, 1
99  %3 = and i64 %2, %a0
100  ret i64 %3
101}
102
103;TODO stack_fold_tzcnt_u16
104
105define i32 @stack_fold_tzcnt_u32(i32 %a0) {
106  ;CHECK-LABEL: stack_fold_tzcnt_u32
107  ;CHECK:       tzcntl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
108  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
109  %2 = tail call i32 @llvm.cttz.i32(i32 %a0, i1 0)
110  ret i32 %2
111}
112declare i32 @llvm.cttz.i32(i32, i1)
113
114define i64 @stack_fold_tzcnt_u64(i64 %a0) {
115  ;CHECK-LABEL: stack_fold_tzcnt_u64
116  ;CHECK:       tzcntq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
117  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
118  %2 = tail call i64 @llvm.cttz.i64(i64 %a0, i1 0)
119  ret i64 %2
120}
121declare i64 @llvm.cttz.i64(i64, i1)
122