• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4target triple = "x86_64-unknown-unknown"
5
6; Stack reload folding tests.
7;
8; By including a nop call with sideeffects we can force a partial register spill of the
9; relevant registers and check that the reload is correctly folded into the instruction.
10
11define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) {
12  ;CHECK-LABEL: stack_fold_addpd
13  ;CHECK:       addpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
14  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
15  %2 = fadd <2 x double> %a0, %a1
16  ret <2 x double> %2
17}
18
19define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) {
20  ;CHECK-LABEL: stack_fold_addps
21  ;CHECK:       addps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
22  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
23  %2 = fadd <4 x float> %a0, %a1
24  ret <4 x float> %2
25}
26
27define double @stack_fold_addsd(double %a0, double %a1) {
28  ;CHECK-LABEL: stack_fold_addsd
29  ;CHECK:       addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
30  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
31  %2 = fadd double %a0, %a1
32  ret double %2
33}
34
35define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
36  ;CHECK-LABEL: stack_fold_addsd_int
37  ;CHECK:       addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
38  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
39  %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1)
40  ret <2 x double> %2
41}
42declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
43
44define float @stack_fold_addss(float %a0, float %a1) {
45  ;CHECK-LABEL: stack_fold_addss
46  ;CHECK:       addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
47  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
48  %2 = fadd float %a0, %a1
49  ret float %2
50}
51
52define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
53  ;CHECK-LABEL: stack_fold_addss_int
54  ;CHECK:       addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
55  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
56  %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1)
57  ret <4 x float> %2
58}
59declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
60
61define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) {
62  ;CHECK-LABEL: stack_fold_addsubpd
63  ;CHECK:       addsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
64  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
65  %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
66  ret <2 x double> %2
67}
68declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
69
70define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) {
71  ;CHECK-LABEL: stack_fold_addsubps
72  ;CHECK:       addsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
73  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
74  %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
75  ret <4 x float> %2
76}
77declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
78
79define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
80  ;CHECK-LABEL: stack_fold_andnpd
81  ;CHECK:       andnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
82  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
83  %2 = bitcast <2 x double> %a0 to <2 x i64>
84  %3 = bitcast <2 x double> %a1 to <2 x i64>
85  %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
86  %5 = and <2 x i64> %4, %3
87  %6 = bitcast <2 x i64> %5 to <2 x double>
88  ; fadd forces execution domain
89  %7 = fadd <2 x double> %6, <double 0x0, double 0x0>
90  ret <2 x double> %7
91}
92
93define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) {
94  ;CHECK-LABEL: stack_fold_andnps
95  ;CHECK:       andnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
96  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
97  %2 = bitcast <4 x float> %a0 to <2 x i64>
98  %3 = bitcast <4 x float> %a1 to <2 x i64>
99  %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
100  %5 = and <2 x i64> %4, %3
101  %6 = bitcast <2 x i64> %5 to <4 x float>
102  ; fadd forces execution domain
103  %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0>
104  ret <4 x float> %7
105}
106
107define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
108  ;CHECK-LABEL: stack_fold_andpd
109  ;CHECK:       andpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
110  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
111  %2 = bitcast <2 x double> %a0 to <2 x i64>
112  %3 = bitcast <2 x double> %a1 to <2 x i64>
113  %4 = and <2 x i64> %2, %3
114  %5 = bitcast <2 x i64> %4 to <2 x double>
115  ; fadd forces execution domain
116  %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
117  ret <2 x double> %6
118}
119
120define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) {
121  ;CHECK-LABEL: stack_fold_andps
122  ;CHECK:       andps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
123  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
124  %2 = bitcast <4 x float> %a0 to <2 x i64>
125  %3 = bitcast <4 x float> %a1 to <2 x i64>
126  %4 = and <2 x i64> %2, %3
127  %5 = bitcast <2 x i64> %4 to <4 x float>
128  ; fadd forces execution domain
129  %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
130  ret <4 x float> %6
131}
132
133define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) {
134  ;CHECK-LABEL: stack_fold_blendpd
135  ;CHECK:       blendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
136  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
137  %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1
138  ret <2 x double> %2
139}
140
141define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) {
142  ;CHECK-LABEL: stack_fold_blendps
143  ;CHECK:       blendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
144  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
145  %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1
146  ret <4 x float> %2
147}
148
149define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) {
150  ;CHECK-LABEL: stack_fold_blendvpd
151  ;CHECK:       blendvpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
152  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
153  %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0)
154  ret <2 x double> %2
155}
156declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
157
158define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) {
159  ;CHECK-LABEL: stack_fold_blendvps
160  ;CHECK:       blendvps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
161  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
162  %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0)
163  ret <4 x float> %2
164}
165declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
166
167define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) {
168  ;CHECK-LABEL: stack_fold_cmppd
169  ;CHECK:       cmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
170  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
171  %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
172  ret <2 x double> %2
173}
174declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
175
176define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
177  ;CHECK-LABEL: stack_fold_cmpps
178  ;CHECK:       cmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
179  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
180  %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0)
181  ret <4 x float> %2
182}
183declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
184
185define i32 @stack_fold_cmpsd(double %a0, double %a1) {
186  ;CHECK-LABEL: stack_fold_cmpsd
187  ;CHECK:       cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
188  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
189  %2 = fcmp oeq double %a0, %a1
190  %3 = zext i1 %2 to i32
191  ret i32 %3
192}
193
194define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) {
195  ;CHECK-LABEL: stack_fold_cmpsd_int
196  ;CHECK:       cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
197  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
198  %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
199  ret <2 x double> %2
200}
201declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
202
203define i32 @stack_fold_cmpss(float %a0, float %a1) {
204  ;CHECK-LABEL: stack_fold_cmpss
205  ;CHECK:       cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
206  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
207  %2 = fcmp oeq float %a0, %a1
208  %3 = zext i1 %2 to i32
209  ret i32 %3
210}
211
212define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) {
213  ;CHECK-LABEL: stack_fold_cmpss_int
214  ;CHECK:       cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
215  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
216  %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
217  ret <4 x float> %2
218}
219declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
220
221; TODO stack_fold_comisd
222
223define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) {
224  ;CHECK-LABEL: stack_fold_comisd_int
225  ;CHECK:       comisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
226  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
227  %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
228  ret i32 %2
229}
230declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
231
232; TODO stack_fold_comiss
233
234define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) {
235  ;CHECK-LABEL: stack_fold_comiss_int
236  ;CHECK:       comiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
237  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
238  %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
239  ret i32 %2
240}
241declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
242
243define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) {
244  ;CHECK-LABEL: stack_fold_cvtdq2pd
245  ;CHECK:       cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
246  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
247  %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0)
248  ret <2 x double> %2
249}
250declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
251
252define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
253  ;CHECK-LABEL: stack_fold_cvtdq2ps
254  ;CHECK:       cvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
255  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
256  %2 = sitofp <4 x i32> %a0 to <4 x float>
257  ret <4 x float> %2
258}
259
260define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) {
261  ;CHECK-LABEL: stack_fold_cvtpd2dq
262  ;CHECK:       cvtpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
263  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
264  %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
265  ret <4 x i32> %2
266}
267declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
268
269define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) {
270  ;CHECK-LABEL: stack_fold_cvtpd2ps
271  ;CHECK:       cvtpd2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
272  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
273  %2 = fptrunc <2 x double> %a0 to <2 x float>
274  ret <2 x float> %2
275}
276
277define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) {
278  ;CHECK-LABEL: stack_fold_cvtps2dq
279  ;CHECK:       cvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
280  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
281  %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
282  ret <4 x i32> %2
283}
284declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
285
286define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) {
287  ;CHECK-LABEL: stack_fold_cvtps2pd
288  ;CHECK:       cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
289  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
290  %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0)
291  ret <2 x double> %2
292}
293declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
294
295; TODO stack_fold_cvtsd2si
296
297define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) {
298  ;CHECK-LABEL: stack_fold_cvtsd2si_int
299  ;CHECK:       cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
300  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
301  %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
302  ret i32 %2
303}
304declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
305
306; TODO stack_fold_cvtsd2si64
307
308define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
309  ;CHECK-LABEL: stack_fold_cvtsd2si64_int
310  ;CHECK:       cvtsd2siq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
311  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
312  %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0)
313  ret i64 %2
314}
315declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
316
317define float @stack_fold_cvtsd2ss(double %a0) minsize {
318  ;CHECK-LABEL: stack_fold_cvtsd2ss
319  ;CHECK:       cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
320  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
321  %2 = fptrunc double %a0 to float
322  ret float %2
323}
324
325define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize {
326  ;CHECK-LABEL: stack_fold_cvtsd2ss_int
327  ;CHECK:       cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
328  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
329  %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0)
330  ret <4 x float> %2
331}
332declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
333
334define double @stack_fold_cvtsi2sd(i32 %a0) minsize {
335  ;CHECK-LABEL: stack_fold_cvtsi2sd
336  ;CHECK:       cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
337  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
338  %2 = sitofp i32 %a0 to double
339  ret double %2
340}
341
342define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
343  ;CHECK-LABEL: stack_fold_cvtsi2sd_int
344  ;CHECK:       cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
345  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
346  %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0)
347  ret <2 x double> %2
348}
349declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
350
351define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
352  ;CHECK-LABEL: stack_fold_cvtsi642sd
353  ;CHECK:       cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
354  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
355  %2 = sitofp i64 %a0 to double
356  ret double %2
357}
358
359define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
360  ;CHECK-LABEL: stack_fold_cvtsi642sd_int
361  ;CHECK:       cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
362  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
363  %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0)
364  ret <2 x double> %2
365}
366declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
367
368define float @stack_fold_cvtsi2ss(i32 %a0) minsize {
369  ;CHECK-LABEL: stack_fold_cvtsi2ss
370  ;CHECK:       cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
371  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
372  %2 = sitofp i32 %a0 to float
373  ret float %2
374}
375
376define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
377  ;CHECK-LABEL: stack_fold_cvtsi2ss_int
378  ;CHECK:  cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
379  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
380  %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0)
381  ret <4 x float> %2
382}
383declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
384
385define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
386  ;CHECK-LABEL: stack_fold_cvtsi642ss
387  ;CHECK:       cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
388  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
389  %2 = sitofp i64 %a0 to float
390  ret float %2
391}
392
393define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
394  ;CHECK-LABEL: stack_fold_cvtsi642ss_int
395  ;CHECK:  cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
396  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
397  %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0)
398  ret <4 x float> %2
399}
400declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
401
402define double @stack_fold_cvtss2sd(float %a0) minsize {
403  ;CHECK-LABEL: stack_fold_cvtss2sd
404  ;CHECK:       cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
405  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
406  %2 = fpext float %a0 to double
407  ret double %2
408}
409
410define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize {
411  ;CHECK-LABEL: stack_fold_cvtss2sd_int
412  ;CHECK:       cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
413  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
414  %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0)
415  ret <2 x double> %2
416}
417declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
418
419; TODO stack_fold_cvtss2si
420
421define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) {
422  ;CHECK-LABEL: stack_fold_cvtss2si_int
423  ;CHECK:       cvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
424  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
425  %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
426  ret i32 %2
427}
428declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
429
430; TODO stack_fold_cvtss2si64
431
432define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) {
433  ;CHECK-LABEL: stack_fold_cvtss2si64_int
434  ;CHECK:       cvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
435  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
436  %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0)
437  ret i64 %2
438}
439declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
440
441define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) {
442  ;CHECK-LABEL: stack_fold_cvttpd2dq
443  ;CHECK:       cvttpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
444  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
445  %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
446  ret <4 x i32> %2
447}
448declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
449
450define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) {
451  ;CHECK-LABEL: stack_fold_cvttps2dq
452  ;CHECK:       cvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
453  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
454  %2 = fptosi <4 x float> %a0 to <4 x i32>
455  ret <4 x i32> %2
456}
457
458define i32 @stack_fold_cvttsd2si(double %a0) {
459  ;CHECK-LABEL: stack_fold_cvttsd2si
460  ;CHECK:       cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload
461  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
462  %2 = fptosi double %a0 to i32
463  ret i32 %2
464}
465
466define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) {
467  ;CHECK-LABEL: stack_fold_cvttsd2si_int
468  ;CHECK:       cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
469  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
470  %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
471  ret i32 %2
472}
473declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
474
475define i64 @stack_fold_cvttsd2si64(double %a0) {
476  ;CHECK-LABEL: stack_fold_cvttsd2si64
477  ;CHECK:       cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
478  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
479  %2 = fptosi double %a0 to i64
480  ret i64 %2
481}
482
483define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) {
484  ;CHECK-LABEL: stack_fold_cvttsd2si64_int
485  ;CHECK:       cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
486  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
487  %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0)
488  ret i64 %2
489}
490declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
491
492define i32 @stack_fold_cvttss2si(float %a0) {
493  ;CHECK-LABEL: stack_fold_cvttss2si
494  ;CHECK:       cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
495  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
496  %2 = fptosi float %a0 to i32
497  ret i32 %2
498}
499
500define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) {
501  ;CHECK-LABEL: stack_fold_cvttss2si_int
502  ;CHECK:       cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
503  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
504  %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
505  ret i32 %2
506}
507declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
508
509define i64 @stack_fold_cvttss2si64(float %a0) {
510  ;CHECK-LABEL: stack_fold_cvttss2si64
511  ;CHECK:       cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload
512  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
513  %2 = fptosi float %a0 to i64
514  ret i64 %2
515}
516
517define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) {
518  ;CHECK-LABEL: stack_fold_cvttss2si64_int
519  ;CHECK:       cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
520  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
521  %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0)
522  ret i64 %2
523}
524declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
525
526define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
527  ;CHECK-LABEL: stack_fold_divpd
528  ;CHECK:       divpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
529  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
530  %2 = fdiv <2 x double> %a0, %a1
531  ret <2 x double> %2
532}
533
534define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) {
535  ;CHECK-LABEL: stack_fold_divps
536  ;CHECK:       divps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
537  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
538  %2 = fdiv <4 x float> %a0, %a1
539  ret <4 x float> %2
540}
541
542define double @stack_fold_divsd(double %a0, double %a1) {
543  ;CHECK-LABEL: stack_fold_divsd
544  ;CHECK:       divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
545  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
546  %2 = fdiv double %a0, %a1
547  ret double %2
548}
549
550define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
551  ;CHECK-LABEL: stack_fold_divsd_int
552  ;CHECK:       divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
553  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
554  %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1)
555  ret <2 x double> %2
556}
557declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
558
559define float @stack_fold_divss(float %a0, float %a1) {
560  ;CHECK-LABEL: stack_fold_divss
561  ;CHECK:       divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
562  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
563  %2 = fdiv float %a0, %a1
564  ret float %2
565}
566
567define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
568  ;CHECK-LABEL: stack_fold_divss_int
569  ;CHECK:       divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
570  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
571  %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1)
572  ret <4 x float> %2
573}
574declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
575
576define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) {
577  ;CHECK-LABEL: stack_fold_dppd
578  ;CHECK:       dppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
579  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
580  %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
581  ret <2 x double> %2
582}
583declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
584
585define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) {
586  ;CHECK-LABEL: stack_fold_dpps
587  ;CHECK:       dpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
588  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
589  %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
590  ret <4 x float> %2
591}
592declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
593
594define i32 @stack_fold_extractps(<4 x float> %a0) {
595  ;CHECK-LABEL: stack_fold_extractps
596  ;CHECK:       extractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
597  ;CHECK:       movl    {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
598  %1 = extractelement <4 x float> %a0, i32 1
599  %2 = bitcast float %1 to i32
600  %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
601  ret i32 %2
602}
603
604define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) {
605  ;CHECK-LABEL: stack_fold_haddpd
606  ;CHECK:       haddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
607  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
608  %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
609  ret <2 x double> %2
610}
611declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
612
613define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) {
614  ;CHECK-LABEL: stack_fold_haddps
615  ;CHECK:       haddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
616  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
617  %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
618  ret <4 x float> %2
619}
620declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
621
622define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) {
623  ;CHECK-LABEL: stack_fold_hsubpd
624  ;CHECK:       hsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
625  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
626  %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
627  ret <2 x double> %2
628}
629declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
630
631define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
632  ;CHECK-LABEL: stack_fold_hsubps
633  ;CHECK:       hsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
634  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
635  %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
636  ret <4 x float> %2
637}
638declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
639
640define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
641  ;CHECK-LABEL: stack_fold_insertps
642  ;CHECK:       insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
643  ;CHECK-NEXT:                                                        {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3]
644  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
645  %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
646  ret <4 x float> %2
647}
648declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
649
650define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
651  ;CHECK-LABEL: stack_fold_maxpd
652  ;CHECK:       maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
653  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
654  %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
655  ret <2 x double> %2
656}
657declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
658
659define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) {
660  ;CHECK-LABEL: stack_fold_maxps
661  ;CHECK:       maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
662  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
663  %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
664  ret <4 x float> %2
665}
666declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
667
668define double @stack_fold_maxsd(double %a0, double %a1) {
669  ;CHECK-LABEL: stack_fold_maxsd
670  ;CHECK:       maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
671  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
672  %2 = fcmp ogt double %a0, %a1
673  %3 = select i1 %2, double %a0, double %a1
674  ret double %3
675}
676
677define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) {
678  ;CHECK-LABEL: stack_fold_maxsd_int
679  ;CHECK:       maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
680  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
681  %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
682  ret <2 x double> %2
683}
684declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
685
686define float @stack_fold_maxss(float %a0, float %a1) {
687  ;CHECK-LABEL: stack_fold_maxss
688  ;CHECK:       maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
689  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
690  %2 = fcmp ogt float %a0, %a1
691  %3 = select i1 %2, float %a0, float %a1
692  ret float %3
693}
694
695define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) {
696  ;CHECK-LABEL: stack_fold_maxss_int
697  ;CHECK:       maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
698  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
699  %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
700  ret <4 x float> %2
701}
702declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
703
704define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) {
705  ;CHECK-LABEL: stack_fold_minpd
706  ;CHECK:       minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
707  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
708  %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
709  ret <2 x double> %2
710}
711declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
712
713define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) {
714  ;CHECK-LABEL: stack_fold_minps
715  ;CHECK:       minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
716  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
717  %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
718  ret <4 x float> %2
719}
720declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
721
722define double @stack_fold_minsd(double %a0, double %a1) {
723  ;CHECK-LABEL: stack_fold_minsd
724  ;CHECK:       minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
725  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
726  %2 = fcmp olt double %a0, %a1
727  %3 = select i1 %2, double %a0, double %a1
728  ret double %3
729}
730
731define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) {
732  ;CHECK-LABEL: stack_fold_minsd_int
733  ;CHECK:       minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
734  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
735  %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
736  ret <2 x double> %2
737}
738declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
739
740define float @stack_fold_minss(float %a0, float %a1) {
741  ;CHECK-LABEL: stack_fold_minss
742  ;CHECK:       minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
743  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
744  %2 = fcmp olt float %a0, %a1
745  %3 = select i1 %2, float %a0, float %a1
746  ret float %3
747}
748
749define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) {
750  ;CHECK-LABEL: stack_fold_minss_int
751  ;CHECK:       minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
752  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
753  %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
754  ret <4 x float> %2
755}
756declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
757
758define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
759  ;CHECK-LABEL: stack_fold_movddup
760  ;CHECK:   movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
761  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
762  %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
763  ret <2 x double> %2
764}
765; TODO stack_fold_movhpd (load / store)
766; TODO stack_fold_movhps (load / store)
767
768; TODO stack_fold_movlpd (load / store)
769; TODO stack_fold_movlps (load / store)
770
771define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
772  ;CHECK-LABEL: stack_fold_movshdup
773  ;CHECK:       movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
774  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
775  %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
776  ret <4 x float> %2
777}
778
779define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
780  ;CHECK-LABEL: stack_fold_movsldup
781  ;CHECK:       movsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
782  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
783  %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
784  ret <4 x float> %2
785}
786
787define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) {
788  ;CHECK-LABEL: stack_fold_mulpd
789  ;CHECK:       mulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
790  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
791  %2 = fmul <2 x double> %a0, %a1
792  ret <2 x double> %2
793}
794
795define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) {
796  ;CHECK-LABEL: stack_fold_mulps
797  ;CHECK:       mulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
798  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
799  %2 = fmul <4 x float> %a0, %a1
800  ret <4 x float> %2
801}
802
803define double @stack_fold_mulsd(double %a0, double %a1) {
804  ;CHECK-LABEL: stack_fold_mulsd
805  ;CHECK:       mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
806  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
807  %2 = fmul double %a0, %a1
808  ret double %2
809}
810
811define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
812  ;CHECK-LABEL: stack_fold_mulsd_int
813  ;CHECK:       mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
814  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
815  %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1)
816  ret <2 x double> %2
817}
818declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
819
820define float @stack_fold_mulss(float %a0, float %a1) {
821  ;CHECK-LABEL: stack_fold_mulss
822  ;CHECK:       mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
823  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
824  %2 = fmul float %a0, %a1
825  ret float %2
826}
827
828define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
829  ;CHECK-LABEL: stack_fold_mulss_int
830  ;CHECK:       mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
831  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
832  %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1)
833  ret <4 x float> %2
834}
835declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
836
837define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
838  ;CHECK-LABEL: stack_fold_orpd
839  ;CHECK:       orpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
840  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
841  %2 = bitcast <2 x double> %a0 to <2 x i64>
842  %3 = bitcast <2 x double> %a1 to <2 x i64>
843  %4 = or <2 x i64> %2, %3
844  %5 = bitcast <2 x i64> %4 to <2 x double>
845  ; fadd forces execution domain
846  %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
847  ret <2 x double> %6
848}
849
850define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) {
851  ;CHECK-LABEL: stack_fold_orps
852  ;CHECK:       orps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
853  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
854  %2 = bitcast <4 x float> %a0 to <2 x i64>
855  %3 = bitcast <4 x float> %a1 to <2 x i64>
856  %4 = or <2 x i64> %2, %3
857  %5 = bitcast <2 x i64> %4 to <4 x float>
858  ; fadd forces execution domain
859  %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
860  ret <4 x float> %6
861}
862
863; TODO stack_fold_rcpps
864
865define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) {
866  ;CHECK-LABEL: stack_fold_rcpps_int
867  ;CHECK:       rcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
868  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
869  %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
870  ret <4 x float> %2
871}
872declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
873
874; TODO stack_fold_rcpss
875; TODO stack_fold_rcpss_int
876
877define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
878  ;CHECK-LABEL: stack_fold_roundpd
879  ;CHECK:       roundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
880  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
881  %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
882  ret <2 x double> %2
883}
884declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
885
886define <4 x float> @stack_fold_roundps(<4 x float> %a0) {
887  ;CHECK-LABEL: stack_fold_roundps
888  ;CHECK:       roundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
889  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
890  %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
891  ret <4 x float> %2
892}
893declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
894
895define double @stack_fold_roundsd(double %a0) optsize {
896  ;CHECK-LABEL: stack_fold_roundsd
897  ;CHECK:       roundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
898  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
899  %2 = call double @llvm.floor.f64(double %a0)
900  ret double %2
901}
902declare double @llvm.floor.f64(double) nounwind readnone
903
904; TODO stack_fold_roundsd_int
905declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
906
907define float @stack_fold_roundss(float %a0) minsize {
908  ;CHECK-LABEL: stack_fold_roundss
909  ;CHECK:       roundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
910  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
911  %2 = call float @llvm.floor.f32(float %a0)
912  ret float %2
913}
914declare float @llvm.floor.f32(float) nounwind readnone
915
916; TODO stack_fold_roundss_int
917declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
918
919; TODO stack_fold_rsqrtps
920
921define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) {
922  ;CHECK-LABEL: stack_fold_rsqrtps_int
923  ;CHECK:       rsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
924  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
925  %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
926  ret <4 x float> %2
927}
928declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
929
930; TODO stack_fold_rsqrtss
931; TODO stack_fold_rsqrtss_int
932
933define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
934  ;CHECK-LABEL: stack_fold_shufpd
935  ;CHECK:       shufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
936  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
937  %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
938  ret <2 x double> %2
939}
940
941define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) {
942  ;CHECK-LABEL: stack_fold_shufps
943  ;CHECK:       shufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
944  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
945  %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7>
946  ret <4 x float> %2
947}
948
949define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) {
950  ;CHECK-LABEL: stack_fold_sqrtpd
951  ;CHECK:       sqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
952  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
953  %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
954  ret <2 x double> %2
955}
956declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
957
958define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) {
959  ;CHECK-LABEL: stack_fold_sqrtps
960  ;CHECK:       sqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
961  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
962  %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
963  ret <4 x float> %2
964}
965declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
966
967define double @stack_fold_sqrtsd(double %a0) optsize {
968  ;CHECK-LABEL: stack_fold_sqrtsd
969  ;CHECK:       sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
970  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
971  %2 = call double @llvm.sqrt.f64(double %a0)
972  ret double %2
973}
974declare double @llvm.sqrt.f64(double) nounwind readnone
975
976; TODO stack_fold_sqrtsd_int
977declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
978
979define float @stack_fold_sqrtss(float %a0) minsize {
980  ;CHECK-LABEL: stack_fold_sqrtss
981  ;CHECK:       sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
982  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
983  %2 = call float @llvm.sqrt.f32(float %a0)
984  ret float %2
985}
986declare float @llvm.sqrt.f32(float) nounwind readnone
987
988; TODO stack_fold_sqrtss_int
989declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
990
991define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
992  ;CHECK-LABEL: stack_fold_subpd
993  ;CHECK:       subpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
994  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
995  %2 = fsub <2 x double> %a0, %a1
996  ret <2 x double> %2
997}
998
999define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) {
1000  ;CHECK-LABEL: stack_fold_subps
1001  ;CHECK:       subps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1002  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1003  %2 = fsub <4 x float> %a0, %a1
1004  ret <4 x float> %2
1005}
1006
1007define double @stack_fold_subsd(double %a0, double %a1) {
1008  ;CHECK-LABEL: stack_fold_subsd
1009  ;CHECK:       subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1010  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1011  %2 = fsub double %a0, %a1
1012  ret double %2
1013}
1014
1015define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
1016  ;CHECK-LABEL: stack_fold_subsd_int
1017  ;CHECK:       subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1018  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1019  %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1)
1020  ret <2 x double> %2
1021}
1022declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
1023
1024define float @stack_fold_subss(float %a0, float %a1) {
1025  ;CHECK-LABEL: stack_fold_subss
1026  ;CHECK:       subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1027  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1028  %2 = fsub float %a0, %a1
1029  ret float %2
1030}
1031
1032define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
1033  ;CHECK-LABEL: stack_fold_subss_int
1034  ;CHECK:       subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1035  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1036  %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1)
1037  ret <4 x float> %2
1038}
1039declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
1040
1041define i32 @stack_fold_ucomisd(double %a0, double %a1) {
1042  ;CHECK-LABEL: stack_fold_ucomisd
1043  ;CHECK:       ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1044  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1045  %2 = fcmp ueq double %a0, %a1
1046  %3 = select i1 %2, i32 1, i32 -1
1047  ret i32 %3
1048}
1049
1050define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) {
1051  ;CHECK-LABEL: stack_fold_ucomisd_int
1052  ;CHECK:       ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1053  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1054  %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
1055  ret i32 %2
1056}
1057declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1058
1059define i32 @stack_fold_ucomiss(float %a0, float %a1) {
1060  ;CHECK-LABEL: stack_fold_ucomiss
1061  ;CHECK:       ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1062  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1063  %2 = fcmp ueq float %a0, %a1
1064  %3 = select i1 %2, i32 1, i32 -1
1065  ret i32 %3
1066}
1067
1068define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) {
1069  ;CHECK-LABEL: stack_fold_ucomiss_int
1070  ;CHECK:       ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1071  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1072  %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
1073  ret i32 %2
1074}
1075declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
1076
1077define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) {
1078  ;CHECK-LABEL: stack_fold_unpckhpd
1079  ;CHECK:       unpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1080  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1081  %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
1082  ; fadd forces execution domain
1083  %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
1084  ret <2 x double> %3
1085}
1086
1087define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) {
1088  ;CHECK-LABEL: stack_fold_unpckhps
1089  ;CHECK:       unpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1090  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1091  %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1092  ; fadd forces execution domain
1093  %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
1094  ret <4 x float> %3
1095}
1096
1097define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) {
1098  ;CHECK-LABEL: stack_fold_unpcklpd
1099  ;CHECK:       unpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1100  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1101  %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
1102  ; fadd forces execution domain
1103  %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
1104  ret <2 x double> %3
1105}
1106
1107define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) {
1108  ;CHECK-LABEL: stack_fold_unpcklps
1109  ;CHECK:       unpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1110  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1111  %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1112  ; fadd forces execution domain
1113  %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
1114  ret <4 x float> %3
1115}
1116
1117define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
1118  ;CHECK-LABEL: stack_fold_xorpd
1119  ;CHECK:       xorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1120  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1121  %2 = bitcast <2 x double> %a0 to <2 x i64>
1122  %3 = bitcast <2 x double> %a1 to <2 x i64>
1123  %4 = xor <2 x i64> %2, %3
1124  %5 = bitcast <2 x i64> %4 to <2 x double>
1125  ; fadd forces execution domain
1126  %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
1127  ret <2 x double> %6
1128}
1129
1130define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
1131  ;CHECK-LABEL: stack_fold_xorps
1132  ;CHECK:       xorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1133  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1134  %2 = bitcast <4 x float> %a0 to <2 x i64>
1135  %3 = bitcast <4 x float> %a1 to <2 x i64>
1136  %4 = xor <2 x i64> %2, %3
1137  %5 = bitcast <2 x i64> %4 to <4 x float>
1138  ; fadd forces execution domain
1139  %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
1140  ret <4 x float> %6
1141}
1142