• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi,+tbm < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4target triple = "x86_64-unknown-unknown"
5
6; Stack reload folding tests.
7;
8; By including a nop call with sideeffects we can force a partial register spill of the
9; relevant registers and check that the reload is correctly folded into the instruction.
10
11define i32 @stack_fold_bextri_u32(i32 %a0) {
12  ;CHECK-LABEL: stack_fold_bextri_u32
13  ;CHECK:       # %bb.0:
14  ;CHECK:       bextrl $3841, {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
15  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
16  %2 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a0, i32 3841)
17  ret i32 %2
18}
19declare i32 @llvm.x86.tbm.bextri.u32(i32, i32)
20
21define i64 @stack_fold_bextri_u64(i64 %a0) {
22  ;CHECK-LABEL: stack_fold_bextri_u64
23  ;CHECK:       # %bb.0:
24  ;CHECK:       bextrq $3841, {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
25  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
26  %2 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a0, i64 3841)
27  ret i64 %2
28}
29declare i64 @llvm.x86.tbm.bextri.u64(i64, i64)
30
31define i32 @stack_fold_blcfill_u32(i32 %a0) {
32  ;CHECK-LABEL: stack_fold_blcfill_u32
33  ;CHECK:       blcfilll {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
34  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
35  %2 = add i32 %a0, 1
36  %3 = and i32 %a0, %2
37  ret i32 %3
38}
39
40define i64 @stack_fold_blcfill_u64(i64 %a0) {
41  ;CHECK-LABEL: stack_fold_blcfill_u64
42  ;CHECK:       blcfillq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
43  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
44  %2 = add i64 %a0, 1
45  %3 = and i64 %a0, %2
46  ret i64 %3
47}
48
49define i32 @stack_fold_blci_u32(i32 %a0) {
50  ;CHECK-LABEL: stack_fold_blci_u32
51  ;CHECK:       blcil {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
52  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
53  %2 = add i32 %a0, 1
54  %3 = xor i32 %2, -1
55  %4 = or i32 %a0, %3
56  ret i32 %4
57}
58
59define i64 @stack_fold_blci_u64(i64 %a0) {
60  ;CHECK-LABEL: stack_fold_blci_u64
61  ;CHECK:       blciq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
62  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
63  %2 = add i64 %a0, 1
64  %3 = xor i64 %2, -1
65  %4 = or i64 %a0, %3
66  ret i64 %4
67}
68
69define i32 @stack_fold_blcic_u32(i32 %a0) {
70  ;CHECK-LABEL: stack_fold_blcic_u32
71  ;CHECK:       blcicl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
72  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
73  %2 = add i32 %a0, 1
74  %3 = xor i32 %a0, -1
75  %4 = and i32 %2, %3
76  ret i32 %4
77}
78
79define i64 @stack_fold_blcic_u64(i64 %a0) {
80  ;CHECK-LABEL: stack_fold_blcic_u64
81  ;CHECK:       blcicq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
82  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
83  %2 = add i64 %a0, 1
84  %3 = xor i64 %a0, -1
85  %4 = and i64 %2, %3
86  ret i64 %4
87}
88
89define i32 @stack_fold_blcmsk_u32(i32 %a0) {
90  ;CHECK-LABEL: stack_fold_blcmsk_u32
91  ;CHECK:       blcmskl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
92  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
93  %2 = add i32 %a0, 1
94  %3 = xor i32 %a0, %2
95  ret i32 %3
96}
97
98define i64 @stack_fold_blcmsk_u64(i64 %a0) {
99  ;CHECK-LABEL: stack_fold_blcmsk_u64
100  ;CHECK:       blcmskq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
101  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
102  %2 = add i64 %a0, 1
103  %3 = xor i64 %a0, %2
104  ret i64 %3
105}
106
107define i32 @stack_fold_blcs_u32(i32 %a0) {
108  ;CHECK-LABEL: stack_fold_blcs_u32
109  ;CHECK:       blcsl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
110  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
111  %2 = add i32 %a0, 1
112  %3 = or i32 %a0, %2
113  ret i32 %3
114}
115
116define i64 @stack_fold_blcs_u64(i64 %a0) {
117  ;CHECK-LABEL: stack_fold_blcs_u64
118  ;CHECK:       blcsq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
119  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
120  %2 = add i64 %a0, 1
121  %3 = or i64 %a0, %2
122  ret i64 %3
123}
124
125define i32 @stack_fold_blsfill_u32(i32 %a0) {
126  ;CHECK-LABEL: stack_fold_blsfill_u32
127  ;CHECK:       blsfilll {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
128  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
129  %2 = sub i32 %a0, 1
130  %3 = or i32 %a0, %2
131  ret i32 %3
132}
133
134define i64 @stack_fold_blsfill_u64(i64 %a0) {
135  ;CHECK-LABEL: stack_fold_blsfill_u64
136  ;CHECK:       blsfillq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
137  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
138  %2 = sub i64 %a0, 1
139  %3 = or i64 %a0, %2
140  ret i64 %3
141}
142
143define i32 @stack_fold_blsic_u32(i32 %a0) {
144  ;CHECK-LABEL: stack_fold_blsic_u32
145  ;CHECK:       blsicl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
146  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
147  %2 = sub i32 %a0, 1
148  %3 = xor i32 %a0, -1
149  %4 = or i32 %2, %3
150  ret i32 %4
151}
152
153define i64 @stack_fold_blsic_u64(i64 %a0) {
154  ;CHECK-LABEL: stack_fold_blsic_u64
155  ;CHECK:       blsicq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
156  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
157  %2 = sub i64 %a0, 1
158  %3 = xor i64 %a0, -1
159  %4 = or i64 %2, %3
160  ret i64 %4
161}
162
163define i32 @stack_fold_t1mskc_u32(i32 %a0) {
164  ;CHECK-LABEL: stack_fold_t1mskc_u32
165  ;CHECK:       t1mskcl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
166  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
167  %2 = add i32 %a0, 1
168  %3 = xor i32 %a0, -1
169  %4 = or i32 %2, %3
170  ret i32 %4
171}
172
173define i64 @stack_fold_t1mskc_u64(i64 %a0) {
174  ;CHECK-LABEL: stack_fold_t1mskc_u64
175  ;CHECK:       t1mskcq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
176  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
177  %2 = add i64 %a0, 1
178  %3 = xor i64 %a0, -1
179  %4 = or i64 %2, %3
180  ret i64 %4
181}
182
183define i32 @stack_fold_tzmsk_u32(i32 %a0) {
184  ;CHECK-LABEL: stack_fold_tzmsk_u32
185  ;CHECK:       tzmskl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
186  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
187  %2 = sub i32 %a0, 1
188  %3 = xor i32 %a0, -1
189  %4 = and i32 %2, %3
190  ret i32 %4
191}
192
193define i64 @stack_fold_tzmsk_u64(i64 %a0) {
194  ;CHECK-LABEL: stack_fold_tzmsk_u64
195  ;CHECK:       tzmskq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
196  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
197  %2 = sub i64 %a0, 1
198  %3 = xor i64 %a0, -1
199  %4 = and i64 %2, %3
200  ret i64 %4
201}
202