• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
2; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
3
4define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
5  ; CHECK: addsd
6  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
7  ret <2 x double> %res
8}
9declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
10
11
12define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
13  ; CHECK: cmpordpd
14  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
15  ret <2 x double> %res
16}
17declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
18
19
20define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
21  ; CHECK: cmpordsd
22  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
23  ret <2 x double> %res
24}
25declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
26
27
28define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
29  ; CHECK: comisd
30  ; CHECK: sete
31  ; CHECK: movzbl
32  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
33  ret i32 %res
34}
35declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
36
37
38define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
39  ; CHECK: comisd
40  ; CHECK: setae
41  ; CHECK: movzbl
42  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
43  ret i32 %res
44}
45declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
46
47
48define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
49  ; CHECK: comisd
50  ; CHECK: seta
51  ; CHECK: movzbl
52  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
53  ret i32 %res
54}
55declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
56
57
58define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
59  ; CHECK: comisd
60  ; CHECK: setbe
61  ; CHECK: movzbl
62  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
63  ret i32 %res
64}
65declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
66
67
68define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
69  ; CHECK: comisd
70  ; CHECK: sbbl    %eax, %eax
71  ; CHECK: andl    $1, %eax
72  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
73  ret i32 %res
74}
75declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
76
77
78define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
79  ; CHECK: comisd
80  ; CHECK: setne
81  ; CHECK: movzbl
82  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
83  ret i32 %res
84}
85declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
86
87
88define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
89  ; CHECK: cvtdq2pd
90  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
91  ret <2 x double> %res
92}
93declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
94
95
96define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
97  ; CHECK: cvtdq2ps
98  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
99  ret <4 x float> %res
100}
101declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
102
103
104define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
105  ; CHECK: cvtpd2dq
106  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
107  ret <4 x i32> %res
108}
109declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
110
111
112define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
113  ; CHECK: cvtpd2ps
114  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
115  ret <4 x float> %res
116}
117declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
118
119
120define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
121  ; CHECK: cvtps2dq
122  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
123  ret <4 x i32> %res
124}
125declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
126
127
128define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
129  ; CHECK: cvtps2pd
130  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
131  ret <2 x double> %res
132}
133declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
134
135
136define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
137  ; CHECK: cvtsd2si
138  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
139  ret i32 %res
140}
141declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
142
143
144define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
145  ; CHECK: cvtsd2ss
146  ; SSE-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
147  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
148  ret <4 x float> %res
149}
150declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
151
152
153define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
154  ; CHECK: movl
155  ; CHECK: cvtsi2sd
156  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
157  ret <2 x double> %res
158}
159declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
160
161
162define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
163  ; CHECK: cvtss2sd
164  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
165  ret <2 x double> %res
166}
167declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
168
169
170define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
171  ; CHECK: cvttpd2dq
172  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
173  ret <4 x i32> %res
174}
175declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
176
177
178define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
179  ; CHECK: cvttps2dq
180  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
181  ret <4 x i32> %res
182}
183declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
184
185
186define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
187  ; CHECK: cvttsd2si
188  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
189  ret i32 %res
190}
191declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
192
193
194define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
195  ; CHECK: divsd
196  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
197  ret <2 x double> %res
198}
199declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
200
201
202
203define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
204  ; CHECK: maxpd
205  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
206  ret <2 x double> %res
207}
208declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
209
210
211define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
212  ; CHECK: maxsd
213  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
214  ret <2 x double> %res
215}
216declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
217
218
219define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
220  ; CHECK: minpd
221  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
222  ret <2 x double> %res
223}
224declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
225
226
227define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
228  ; CHECK: minsd
229  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
230  ret <2 x double> %res
231}
232declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
233
234
235define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
236  ; CHECK: movmskpd
237  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
238  ret i32 %res
239}
240declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
241
242
243
244
245define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
246  ; CHECK: test_x86_sse2_mul_sd
247  ; CHECK: mulsd
248  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
249  ret <2 x double> %res
250}
251declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
252
253
254define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
255  ; CHECK: packssdw
256  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
257  ret <8 x i16> %res
258}
259declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
260
261
262define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
263  ; CHECK: packsswb
264  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
265  ret <16 x i8> %res
266}
267declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
268
269
270define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
271  ; CHECK: packuswb
272  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
273  ret <16 x i8> %res
274}
275declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
276
277
278define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
279  ; CHECK: paddsb
280  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
281  ret <16 x i8> %res
282}
283declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
284
285
286define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
287  ; CHECK: paddsw
288  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
289  ret <8 x i16> %res
290}
291declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
292
293
294define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
295  ; CHECK: paddusb
296  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
297  ret <16 x i8> %res
298}
299declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
300
301
302define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
303  ; CHECK: paddusw
304  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
305  ret <8 x i16> %res
306}
307declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
308
309
310define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
311  ; CHECK: pavgb
312  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
313  ret <16 x i8> %res
314}
315declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
316
317
318define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
319  ; CHECK: pavgw
320  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
321  ret <8 x i16> %res
322}
323declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
324
325
326define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
327  ; CHECK: pmaddwd
328  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
329  ret <4 x i32> %res
330}
331declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
332
333
334define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
335  ; CHECK: pmaxsw
336  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
337  ret <8 x i16> %res
338}
339declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
340
341
342define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
343  ; CHECK: pmaxub
344  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
345  ret <16 x i8> %res
346}
347declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
348
349
350define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
351  ; CHECK: pminsw
352  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
353  ret <8 x i16> %res
354}
355declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
356
357
358define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
359  ; CHECK: pminub
360  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
361  ret <16 x i8> %res
362}
363declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
364
365
366define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
367  ; CHECK: pmovmskb
368  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
369  ret i32 %res
370}
371declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
372
373
374define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
375  ; CHECK: pmulhw
376  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
377  ret <8 x i16> %res
378}
379declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
380
381
382define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
383  ; CHECK: pmulhuw
384  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
385  ret <8 x i16> %res
386}
387declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
388
389
390define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
391  ; CHECK: pmuludq
392  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
393  ret <2 x i64> %res
394}
395declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
396
397
398define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
399  ; CHECK: psadbw
400  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
401  ret <2 x i64> %res
402}
403declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
404
405
406define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
407  ; CHECK: pslld
408  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
409  ret <4 x i32> %res
410}
411declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
412
413
414define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
415  ; CHECK: psllq
416  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
417  ret <2 x i64> %res
418}
419declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
420
421
422define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
423  ; CHECK: psllw
424  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
425  ret <8 x i16> %res
426}
427declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
428
429
430define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
431  ; CHECK: pslld
432  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
433  ret <4 x i32> %res
434}
435declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
436
437
438define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
439  ; CHECK: psllq
440  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
441  ret <2 x i64> %res
442}
443declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
444
445
446define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
447  ; CHECK: psllw
448  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
449  ret <8 x i16> %res
450}
451declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
452
453
454define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
455  ; CHECK: psrad
456  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
457  ret <4 x i32> %res
458}
459declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
460
461
462define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
463  ; CHECK: psraw
464  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
465  ret <8 x i16> %res
466}
467declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
468
469
470define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
471  ; CHECK: psrad
472  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
473  ret <4 x i32> %res
474}
475declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
476
477
478define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
479  ; CHECK: psraw
480  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
481  ret <8 x i16> %res
482}
483declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
484
485
486define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
487  ; CHECK: psrld
488  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
489  ret <4 x i32> %res
490}
491declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
492
493
494define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
495  ; CHECK: psrlq
496  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
497  ret <2 x i64> %res
498}
499declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
500
501
502define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
503  ; CHECK: psrlw
504  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
505  ret <8 x i16> %res
506}
507declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
508
509
510define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
511  ; CHECK: psrld
512  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
513  ret <4 x i32> %res
514}
515declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
516
517
518define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
519  ; CHECK: psrlq
520  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
521  ret <2 x i64> %res
522}
523declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
524
525
526define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
527  ; CHECK: psrlw
528  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
529  ret <8 x i16> %res
530}
531declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
532
533
534define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
535  ; CHECK: psubsb
536  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
537  ret <16 x i8> %res
538}
539declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
540
541
542define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
543  ; CHECK: psubsw
544  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
545  ret <8 x i16> %res
546}
547declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
548
549
550define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
551  ; CHECK: psubusb
552  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
553  ret <16 x i8> %res
554}
555declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
556
557
558define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
559  ; CHECK: psubusw
560  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
561  ret <8 x i16> %res
562}
563declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
564
565
566define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
567  ; CHECK: sqrtpd
568  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
569  ret <2 x double> %res
570}
571declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
572
573
574define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
575  ; CHECK: sqrtsd
576  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
577  ret <2 x double> %res
578}
579declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
580
581
582define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
583  ; CHECK: test_x86_sse2_storel_dq
584  ; CHECK: movl
585  ; CHECK: movlps
586  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
587  ret void
588}
589declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
590
591
592define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
593  ; CHECK: test_x86_sse2_storeu_dq
594  ; CHECK: movl
595  ; CHECK: movdqu
596  ; add operation forces the execution domain.
597  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
598  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
599  ret void
600}
601declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
602
603
604define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
605  ; CHECK: test_x86_sse2_storeu_pd
606  ; CHECK: movl
607  ; CHECK: movupd
608  ; fadd operation forces the execution domain.
609  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
610  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
611  ret void
612}
613declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
614
615
616define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
617  ; CHECK: test_x86_sse2_sub_sd
618  ; CHECK: subsd
619  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
620  ret <2 x double> %res
621}
622declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
623
624
625define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
626  ; CHECK: ucomisd
627  ; CHECK: sete
628  ; CHECK: movzbl
629  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
630  ret i32 %res
631}
632declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
633
634
635define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
636  ; CHECK: ucomisd
637  ; CHECK: setae
638  ; CHECK: movzbl
639  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
640  ret i32 %res
641}
642declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
643
644
645define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
646  ; CHECK: ucomisd
647  ; CHECK: seta
648  ; CHECK: movzbl
649  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
650  ret i32 %res
651}
652declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
653
654
655define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
656  ; CHECK: ucomisd
657  ; CHECK: setbe
658  ; CHECK: movzbl
659  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
660  ret i32 %res
661}
662declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
663
664
665define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
666  ; CHECK: ucomisd
667  ; CHECK: sbbl
668  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
669  ret i32 %res
670}
671declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
672
673
674define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
675  ; CHECK: ucomisd
676  ; CHECK: setne
677  ; CHECK: movzbl
678  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
679  ret i32 %res
680}
681declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
682
683define void @test_x86_sse2_pause() {
684  ; CHECK: pause
685  tail call void @llvm.x86.sse2.pause()
686  ret void
687}
688declare void @llvm.x86.sse2.pause() nounwind
689
690define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
691; CHECK-LABEL: test_x86_sse2_pshuf_d:
692; CHECK: pshufd $27
693entry:
694   %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
695   ret <4 x i32> %res
696}
697declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
698
699define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
700; CHECK-LABEL: test_x86_sse2_pshufl_w:
701; CHECK: pshuflw $27
702entry:
703   %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
704   ret <8 x i16> %res
705}
706declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
707
708define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
709; CHECK-LABEL: test_x86_sse2_pshufh_w:
710; CHECK: pshufhw $27
711entry:
712   %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
713   ret <8 x i16> %res
714}
715declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
716