• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
2
3define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
4  ; CHECK: vaesdec
5  %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
6  ret <2 x i64> %res
7}
8declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
9
10
11define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
12  ; CHECK: vaesdeclast
13  %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
14  ret <2 x i64> %res
15}
16declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
17
18
19define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
20  ; CHECK: vaesenc
21  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
22  ret <2 x i64> %res
23}
24declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
25
26
27define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
28  ; CHECK: vaesenclast
29  %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
30  ret <2 x i64> %res
31}
32declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
33
34
35define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
36  ; CHECK: vaesimc
37  %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
38  ret <2 x i64> %res
39}
40declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
41
42
43define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
44  ; CHECK: vaeskeygenassist
45  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
46  ret <2 x i64> %res
47}
48declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
49
50
51define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
52  ; CHECK: vaddsd
53  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
54  ret <2 x double> %res
55}
56declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
57
58
59define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
60  ; CHECK: vcmpordpd
61  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
62  ret <2 x double> %res
63}
64declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
65
66
67define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
68  ; CHECK: vcmpordsd
69  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
70  ret <2 x double> %res
71}
72declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
73
74
75define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
76  ; CHECK: vcomisd
77  ; CHECK: sete
78  ; CHECK: movzbl
79  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
80  ret i32 %res
81}
82declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
83
84
85define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
86  ; CHECK: vcomisd
87  ; CHECK: setae
88  ; CHECK: movzbl
89  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
90  ret i32 %res
91}
92declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
93
94
95define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
96  ; CHECK: vcomisd
97  ; CHECK: seta
98  ; CHECK: movzbl
99  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
100  ret i32 %res
101}
102declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
103
104
105define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
106  ; CHECK: vcomisd
107  ; CHECK: setbe
108  ; CHECK: movzbl
109  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
110  ret i32 %res
111}
112declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
113
114
115define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
116  ; CHECK: vcomisd
117  ; CHECK: sbbl    %eax, %eax
118  ; CHECK: andl    $1, %eax
119  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
120  ret i32 %res
121}
122declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
123
124
125define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
126  ; CHECK: vcomisd
127  ; CHECK: setne
128  ; CHECK: movzbl
129  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
130  ret i32 %res
131}
132declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
133
134
135define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
136  ; CHECK: vcvtdq2pd
137  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
138  ret <2 x double> %res
139}
140declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
141
142
143define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
144  ; CHECK: vcvtdq2ps
145  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
146  ret <4 x float> %res
147}
148declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
149
150
151define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
152  ; CHECK: vcvtpd2dq
153  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
154  ret <4 x i32> %res
155}
156declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
157
158
159define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
160  ; CHECK: vcvtpd2ps
161  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
162  ret <4 x float> %res
163}
164declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
165
166
167define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
168  ; CHECK: vcvtps2dq
169  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
170  ret <4 x i32> %res
171}
172declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
173
174
175define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
176  ; CHECK: vcvtps2pd
177  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
178  ret <2 x double> %res
179}
180declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
181
182
183define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
184  ; CHECK: vcvtsd2si
185  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
186  ret i32 %res
187}
188declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
189
190
191define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
192  ; CHECK: vcvtsd2ss
193  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
194  ret <4 x float> %res
195}
196declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
197
198
199define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
200  ; CHECK: movl
201  ; CHECK: vcvtsi2sd
202  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
203  ret <2 x double> %res
204}
205declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
206
207
208define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
209  ; CHECK: vcvtss2sd
210  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
211  ret <2 x double> %res
212}
213declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
214
215
216define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
217  ; CHECK: vcvttpd2dq
218  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
219  ret <4 x i32> %res
220}
221declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
222
223
224define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
225  ; CHECK: vcvttps2dq
226  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
227  ret <4 x i32> %res
228}
229declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
230
231
232define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
233  ; CHECK: vcvttsd2si
234  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
235  ret i32 %res
236}
237declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
238
239
240define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
241  ; CHECK: vdivsd
242  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
243  ret <2 x double> %res
244}
245declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
246
247
248define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
249  ; CHECK: movl
250  ; CHECK: vmovups
251  %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
252  ret <16 x i8> %res
253}
254declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
255
256
257define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
258  ; CHECK: movl
259  ; CHECK: vmovups
260  %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
261  ret <2 x double> %res
262}
263declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly
264
265
266define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
267  ; CHECK: pushl
268  ; CHECK: movl
269  ; CHECK: vmaskmovdqu
270  ; CHECK: popl
271  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
272  ret void
273}
274declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
275
276
277define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
278  ; CHECK: vmaxpd
279  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
280  ret <2 x double> %res
281}
282declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
283
284
285define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
286  ; CHECK: vmaxsd
287  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
288  ret <2 x double> %res
289}
290declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
291
292
293define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
294  ; CHECK: vminpd
295  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
296  ret <2 x double> %res
297}
298declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
299
300
301define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
302  ; CHECK: vminsd
303  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
304  ret <2 x double> %res
305}
306declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
307
308
309define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
310  ; CHECK: vmovmskpd
311  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
312  ret i32 %res
313}
314declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
315
316
317define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
318  ; CHECK: movl
319  ; CHECK: vmovntdq
320  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
321  ret void
322}
323declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
324
325
326define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
327  ; CHECK: movl
328  ; CHECK: vmovntpd
329  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
330  ret void
331}
332declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
333
334
335define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
336  ; CHECK: vmulsd
337  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
338  ret <2 x double> %res
339}
340declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
341
342
343define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
344  ; CHECK: vpackssdw
345  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
346  ret <8 x i16> %res
347}
348declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
349
350
351define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
352  ; CHECK: vpacksswb
353  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
354  ret <16 x i8> %res
355}
356declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
357
358
359define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
360  ; CHECK: vpackuswb
361  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
362  ret <16 x i8> %res
363}
364declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
365
366
367define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
368  ; CHECK: vpaddsb
369  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
370  ret <16 x i8> %res
371}
372declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
373
374
375define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
376  ; CHECK: vpaddsw
377  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
378  ret <8 x i16> %res
379}
380declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
381
382
383define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
384  ; CHECK: vpaddusb
385  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
386  ret <16 x i8> %res
387}
388declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
389
390
391define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
392  ; CHECK: vpaddusw
393  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
394  ret <8 x i16> %res
395}
396declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
397
398
399define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
400  ; CHECK: vpavgb
401  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
402  ret <16 x i8> %res
403}
404declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
405
406
407define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
408  ; CHECK: vpavgw
409  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
410  ret <8 x i16> %res
411}
412declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
413
414
415define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
416  ; CHECK: vpcmpeqb
417  %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
418  ret <16 x i8> %res
419}
420declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
421
422
423define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
424  ; CHECK: vpcmpeqd
425  %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
426  ret <4 x i32> %res
427}
428declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
429
430
431define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
432  ; CHECK: vpcmpeqw
433  %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
434  ret <8 x i16> %res
435}
436declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
437
438
439define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
440  ; CHECK: vpcmpgtb
441  %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
442  ret <16 x i8> %res
443}
444declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
445
446
447define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
448  ; CHECK: vpcmpgtd
449  %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
450  ret <4 x i32> %res
451}
452declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
453
454
455define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
456  ; CHECK: vpcmpgtw
457  %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
458  ret <8 x i16> %res
459}
460declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
461
462
463define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
464  ; CHECK: vpmaddwd
465  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
466  ret <4 x i32> %res
467}
468declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
469
470
471define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
472  ; CHECK: vpmaxsw
473  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
474  ret <8 x i16> %res
475}
476declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
477
478
479define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
480  ; CHECK: vpmaxub
481  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
482  ret <16 x i8> %res
483}
484declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
485
486
487define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
488  ; CHECK: vpminsw
489  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
490  ret <8 x i16> %res
491}
492declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
493
494
495define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
496  ; CHECK: vpminub
497  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
498  ret <16 x i8> %res
499}
500declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
501
502
503define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
504  ; CHECK: vpmovmskb
505  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
506  ret i32 %res
507}
508declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
509
510
511define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
512  ; CHECK: vpmulhw
513  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
514  ret <8 x i16> %res
515}
516declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
517
518
519define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
520  ; CHECK: vpmulhuw
521  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
522  ret <8 x i16> %res
523}
524declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
525
526
527define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
528  ; CHECK: vpmuludq
529  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
530  ret <2 x i64> %res
531}
532declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
533
534
535define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
536  ; CHECK: vpsadbw
537  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
538  ret <2 x i64> %res
539}
540declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
541
542
543define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
544  ; CHECK: vpslld
545  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
546  ret <4 x i32> %res
547}
548declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
549
550
551define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
552  ; CHECK: vpslldq
553  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
554  ret <2 x i64> %res
555}
556declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
557
558
559define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
560  ; CHECK: vpslldq
561  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
562  ret <2 x i64> %res
563}
564declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
565
566
567define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
568  ; CHECK: vpsllq
569  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
570  ret <2 x i64> %res
571}
572declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
573
574
575define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
576  ; CHECK: vpsllw
577  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
578  ret <8 x i16> %res
579}
580declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
581
582
583define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
584  ; CHECK: vpslld
585  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
586  ret <4 x i32> %res
587}
588declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
589
590
591define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
592  ; CHECK: vpsllq
593  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
594  ret <2 x i64> %res
595}
596declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
597
598
599define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
600  ; CHECK: vpsllw
601  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
602  ret <8 x i16> %res
603}
604declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
605
606
607define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
608  ; CHECK: vpsrad
609  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
610  ret <4 x i32> %res
611}
612declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
613
614
615define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
616  ; CHECK: vpsraw
617  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
618  ret <8 x i16> %res
619}
620declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
621
622
623define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
624  ; CHECK: vpsrad
625  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
626  ret <4 x i32> %res
627}
628declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
629
630
631define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
632  ; CHECK: vpsraw
633  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
634  ret <8 x i16> %res
635}
636declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
637
638
639define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
640  ; CHECK: vpsrld
641  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
642  ret <4 x i32> %res
643}
644declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
645
646
647define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
648  ; CHECK: vpsrldq
649  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
650  ret <2 x i64> %res
651}
652declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
653
654
655define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
656  ; CHECK: vpsrldq
657  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
658  ret <2 x i64> %res
659}
660declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
661
662
663define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
664  ; CHECK: vpsrlq
665  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
666  ret <2 x i64> %res
667}
668declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
669
670
671define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
672  ; CHECK: vpsrlw
673  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
674  ret <8 x i16> %res
675}
676declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
677
678
679define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
680  ; CHECK: vpsrld
681  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
682  ret <4 x i32> %res
683}
684declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
685
686
687define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
688  ; CHECK: vpsrlq
689  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
690  ret <2 x i64> %res
691}
692declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
693
694
695define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
696  ; CHECK: vpsrlw
697  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
698  ret <8 x i16> %res
699}
700declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
701
702
703define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
704  ; CHECK: vpsubsb
705  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
706  ret <16 x i8> %res
707}
708declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
709
710
711define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
712  ; CHECK: vpsubsw
713  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
714  ret <8 x i16> %res
715}
716declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
717
718
719define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
720  ; CHECK: vpsubusb
721  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
722  ret <16 x i8> %res
723}
724declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
725
726
727define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
728  ; CHECK: vpsubusw
729  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
730  ret <8 x i16> %res
731}
732declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
733
734
735define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
736  ; CHECK: vsqrtpd
737  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
738  ret <2 x double> %res
739}
740declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
741
742
743define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
744  ; CHECK: vsqrtsd
745  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
746  ret <2 x double> %res
747}
748declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
749
750
751define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
752  ; CHECK: movl
753  ; CHECK: vmovq
754  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
755  ret void
756}
757declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
758
759
760define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
761  ; CHECK: movl
762  ; CHECK: vmovdqu
763  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
764  ret void
765}
766declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
767
768
769define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
770  ; CHECK: movl
771  ; CHECK: vmovupd
772  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
773  ret void
774}
775declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
776
777
778define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
779  ; CHECK: vsubsd
780  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
781  ret <2 x double> %res
782}
783declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
784
785
786define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
787  ; CHECK: vucomisd
788  ; CHECK: sete
789  ; CHECK: movzbl
790  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
791  ret i32 %res
792}
793declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
794
795
796define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
797  ; CHECK: vucomisd
798  ; CHECK: setae
799  ; CHECK: movzbl
800  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
801  ret i32 %res
802}
803declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
804
805
806define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
807  ; CHECK: vucomisd
808  ; CHECK: seta
809  ; CHECK: movzbl
810  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
811  ret i32 %res
812}
813declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
814
815
816define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
817  ; CHECK: vucomisd
818  ; CHECK: setbe
819  ; CHECK: movzbl
820  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
821  ret i32 %res
822}
823declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
824
825
826define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
827  ; CHECK: vucomisd
828  ; CHECK: sbbl
829  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
830  ret i32 %res
831}
832declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
833
834
835define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
836  ; CHECK: vucomisd
837  ; CHECK: setne
838  ; CHECK: movzbl
839  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
840  ret i32 %res
841}
842declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
843
844
845define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
846  ; CHECK: vaddsubpd
847  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
848  ret <2 x double> %res
849}
850declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
851
852
853define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
854  ; CHECK: vaddsubps
855  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
856  ret <4 x float> %res
857}
858declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
859
860
861define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
862  ; CHECK: vhaddpd
863  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
864  ret <2 x double> %res
865}
866declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
867
868
869define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
870  ; CHECK: vhaddps
871  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
872  ret <4 x float> %res
873}
874declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
875
876
877define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
878  ; CHECK: vhsubpd
879  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
880  ret <2 x double> %res
881}
882declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
883
884
885define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
886  ; CHECK: vhsubps
887  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
888  ret <4 x float> %res
889}
890declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
891
892
893define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
894  ; CHECK: movl
895  ; CHECK: vlddqu
896  %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
897  ret <16 x i8> %res
898}
899declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
900
901
902define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
903  ; CHECK: vblendpd
904  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
905  ret <2 x double> %res
906}
907declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
908
909
910define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
911  ; CHECK: vblendps
912  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
913  ret <4 x float> %res
914}
915declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
916
917
918define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
919  ; CHECK: vblendvpd
920  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
921  ret <2 x double> %res
922}
923declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
924
925
926define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
927  ; CHECK: vblendvps
928  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
929  ret <4 x float> %res
930}
931declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
932
933
934define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
935  ; CHECK: vdppd
936  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
937  ret <2 x double> %res
938}
939declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
940
941
942define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
943  ; CHECK: vdpps
944  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
945  ret <4 x float> %res
946}
947declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
948
949
950define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
951  ; CHECK: vinsertps
952  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
953  ret <4 x float> %res
954}
955declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
956
957
958define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) {
959  ; CHECK: movl
960  ; CHECK: vmovntdqa
961  %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1]
962  ret <2 x i64> %res
963}
964declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly
965
966
967define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
968  ; CHECK: vmpsadbw
969  %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1]
970  ret <16 x i8> %res
971}
972declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
973
974
975define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
976  ; CHECK: vpackusdw
977  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
978  ret <8 x i16> %res
979}
980declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
981
982
983define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
984  ; CHECK: vpblendvb
985  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
986  ret <16 x i8> %res
987}
988declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
989
990
991define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
992  ; CHECK: vpblendw
993  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
994  ret <8 x i16> %res
995}
996declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
997
998
999define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
1000  ; CHECK: vpcmpeqq
1001  %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1002  ret <2 x i64> %res
1003}
1004declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
1005
1006
1007define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
1008  ; CHECK: vphminposuw
1009  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
1010  ret <8 x i16> %res
1011}
1012declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
1013
1014
1015define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
1016  ; CHECK: vpmaxsb
1017  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1018  ret <16 x i8> %res
1019}
1020declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
1021
1022
1023define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
1024  ; CHECK: vpmaxsd
1025  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1026  ret <4 x i32> %res
1027}
1028declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
1029
1030
1031define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
1032  ; CHECK: vpmaxud
1033  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1034  ret <4 x i32> %res
1035}
1036declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
1037
1038
1039define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
1040  ; CHECK: vpmaxuw
1041  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1042  ret <8 x i16> %res
1043}
1044declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
1045
1046
1047define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
1048  ; CHECK: vpminsb
1049  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1050  ret <16 x i8> %res
1051}
1052declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
1053
1054
1055define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
1056  ; CHECK: vpminsd
1057  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1058  ret <4 x i32> %res
1059}
1060declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
1061
1062
1063define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
1064  ; CHECK: vpminud
1065  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1066  ret <4 x i32> %res
1067}
1068declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
1069
1070
1071define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
1072  ; CHECK: vpminuw
1073  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1074  ret <8 x i16> %res
1075}
1076declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
1077
1078
1079define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
1080  ; CHECK: vpmovsxbd
1081  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
1082  ret <4 x i32> %res
1083}
1084declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
1085
1086
1087define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
1088  ; CHECK: vpmovsxbq
1089  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
1090  ret <2 x i64> %res
1091}
1092declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
1093
1094
1095define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
1096  ; CHECK: vpmovsxbw
1097  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
1098  ret <8 x i16> %res
1099}
1100declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
1101
1102
1103define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
1104  ; CHECK: vpmovsxdq
1105  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
1106  ret <2 x i64> %res
1107}
1108declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
1109
1110
1111define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
1112  ; CHECK: vpmovsxwd
1113  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
1114  ret <4 x i32> %res
1115}
1116declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
1117
1118
1119define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
1120  ; CHECK: vpmovsxwq
1121  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
1122  ret <2 x i64> %res
1123}
1124declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
1125
1126
1127define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
1128  ; CHECK: vpmovzxbd
1129  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
1130  ret <4 x i32> %res
1131}
1132declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
1133
1134
1135define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
1136  ; CHECK: vpmovzxbq
1137  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
1138  ret <2 x i64> %res
1139}
1140declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
1141
1142
1143define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
1144  ; CHECK: vpmovzxbw
1145  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
1146  ret <8 x i16> %res
1147}
1148declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
1149
1150
1151define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
1152  ; CHECK: vpmovzxdq
1153  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
1154  ret <2 x i64> %res
1155}
1156declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
1157
1158
1159define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
1160  ; CHECK: vpmovzxwd
1161  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
1162  ret <4 x i32> %res
1163}
1164declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
1165
1166
1167define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
1168  ; CHECK: vpmovzxwq
1169  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
1170  ret <2 x i64> %res
1171}
1172declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
1173
1174
1175define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
1176  ; CHECK: vpmuldq
1177  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
1178  ret <2 x i64> %res
1179}
1180declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
1181
1182
1183define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
1184  ; CHECK: vptest
1185  ; CHECK: sbbl
1186  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1187  ret i32 %res
1188}
1189declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
1190
1191
1192define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) {
1193  ; CHECK: vptest
1194  ; CHECK: seta
1195  ; CHECK: movzbl
1196  %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1197  ret i32 %res
1198}
1199declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
1200
1201
1202define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) {
1203  ; CHECK: vptest
1204  ; CHECK: sete
1205  ; CHECK: movzbl
1206  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1207  ret i32 %res
1208}
1209declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
1210
1211
1212define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
1213  ; CHECK: vroundpd
1214  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
1215  ret <2 x double> %res
1216}
1217declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1218
1219
1220define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
1221  ; CHECK: vroundps
1222  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1223  ret <4 x float> %res
1224}
1225declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1226
1227
1228define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
1229  ; CHECK: vroundsd
1230  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
1231  ret <2 x double> %res
1232}
1233declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
1234
1235
1236define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
1237  ; CHECK: vroundss
1238  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
1239  ret <4 x float> %res
1240}
1241declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
1242
1243
1244define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
1245  ; CHECK: movl
1246  ; CHECK: movl
1247  ; CHECK: vpcmpestri
1248  ; CHECK: movl
1249  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1250  ret i32 %res
1251}
1252declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1253
1254
1255define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
1256  ; CHECK: movl
1257  ; CHECK: movl
1258  ; CHECK: vpcmpestri
1259  ; CHECK: movl
1260  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1261  ret i32 %res
1262}
1263declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1264
1265
1266define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
1267  ; CHECK: movl
1268  ; CHECK: movl
1269  ; CHECK: vpcmpestri
1270  ; CHECK: movl
1271  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1272  ret i32 %res
1273}
1274declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1275
1276
1277define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
1278  ; CHECK: movl
1279  ; CHECK: movl
1280  ; CHECK: vpcmpestri
1281  ; CHECK: movl
1282  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1283  ret i32 %res
1284}
1285declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1286
1287
1288define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
1289  ; CHECK: movl
1290  ; CHECK: movl
1291  ; CHECK: vpcmpestri
1292  ; CHECK: movl
1293  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1294  ret i32 %res
1295}
1296declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1297
1298
1299define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
1300  ; CHECK: movl
1301  ; CHECK: movl
1302  ; CHECK: vpcmpestri
1303  ; CHECK: movl
1304  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1305  ret i32 %res
1306}
1307declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1308
1309
1310define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
1311  ; CHECK: movl
1312  ; CHECK: movl
1313  ; CHECK: vpcmpestrm
1314  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1315  ret <16 x i8> %res
1316}
1317declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1318
1319
1320define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
1321  ; CHECK: vpcmpgtq
1322  %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1323  ret <2 x i64> %res
1324}
1325declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
1326
1327
1328define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
1329  ; CHECK: vpcmpistri
1330  ; CHECK: movl
1331  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1332  ret i32 %res
1333}
1334declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1335
1336
1337define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
1338  ; CHECK: vpcmpistri
1339  ; CHECK: movl
1340  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1341  ret i32 %res
1342}
1343declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1344
1345
1346define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
1347  ; CHECK: vpcmpistri
1348  ; CHECK: movl
1349  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1350  ret i32 %res
1351}
1352declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1353
1354
1355define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
1356  ; CHECK: vpcmpistri
1357  ; CHECK: movl
1358  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1359  ret i32 %res
1360}
1361declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1362
1363
1364define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
1365  ; CHECK: vpcmpistri
1366  ; CHECK: movl
1367  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1368  ret i32 %res
1369}
1370declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1371
1372
1373define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
1374  ; CHECK: vpcmpistri
1375  ; CHECK: movl
1376  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1377  ret i32 %res
1378}
1379declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1380
1381
1382define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
1383  ; CHECK: vpcmpistrm
1384  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
1385  ret <16 x i8> %res
1386}
1387declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1388
1389
1390define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
1391  ; CHECK: vaddss
1392  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1393  ret <4 x float> %res
1394}
1395declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
1396
1397
1398define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
1399  ; CHECK: vcmpordps
1400  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1401  ret <4 x float> %res
1402}
1403declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
1404
1405
1406define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
1407  ; CHECK: vcmpordss
1408  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1409  ret <4 x float> %res
1410}
1411declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
1412
1413
1414define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
1415  ; CHECK: vcomiss
1416  ; CHECK: sete
1417  ; CHECK: movzbl
1418  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1419  ret i32 %res
1420}
1421declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
1422
1423
1424define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
1425  ; CHECK: vcomiss
1426  ; CHECK: setae
1427  ; CHECK: movzbl
1428  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1429  ret i32 %res
1430}
1431declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
1432
1433
1434define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
1435  ; CHECK: vcomiss
1436  ; CHECK: seta
1437  ; CHECK: movzbl
1438  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1439  ret i32 %res
1440}
1441declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
1442
1443
1444define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
1445  ; CHECK: vcomiss
1446  ; CHECK: setbe
1447  ; CHECK: movzbl
1448  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1449  ret i32 %res
1450}
1451declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
1452
1453
1454define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
1455  ; CHECK: vcomiss
1456  ; CHECK: sbb
1457  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1458  ret i32 %res
1459}
1460declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
1461
1462
1463define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
1464  ; CHECK: vcomiss
1465  ; CHECK: setne
1466  ; CHECK: movzbl
1467  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1468  ret i32 %res
1469}
1470declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
1471
1472
1473define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
1474  ; CHECK: movl
1475  ; CHECK: vcvtsi2ss
1476  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1477  ret <4 x float> %res
1478}
1479declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
1480
1481
1482define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
1483  ; CHECK: vcvtss2si
1484  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
1485  ret i32 %res
1486}
1487declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
1488
1489
1490define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
1491  ; CHECK: vcvttss2si
1492  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
1493  ret i32 %res
1494}
1495declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
1496
1497
1498define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
1499  ; CHECK: vdivss
1500  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1501  ret <4 x float> %res
1502}
1503declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
1504
1505
1506define void @test_x86_sse_ldmxcsr(i8* %a0) {
1507  ; CHECK: movl
1508  ; CHECK: vldmxcsr
1509  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
1510  ret void
1511}
1512declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
1513
1514
1515define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) {
1516  ; CHECK: movl
1517  ; CHECK: vmovups
1518  %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1]
1519  ret <4 x float> %res
1520}
1521declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
1522
1523
1524define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
1525  ; CHECK: vmaxps
1526  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1527  ret <4 x float> %res
1528}
1529declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1530
1531
1532define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
1533  ; CHECK: vmaxss
1534  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1535  ret <4 x float> %res
1536}
1537declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1538
1539
1540define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
1541  ; CHECK: vminps
1542  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1543  ret <4 x float> %res
1544}
1545declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1546
1547
1548define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
1549  ; CHECK: vminss
1550  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1551  ret <4 x float> %res
1552}
1553declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1554
1555
1556define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
1557  ; CHECK: vmovmskps
1558  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
1559  ret i32 %res
1560}
1561declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1562
1563
1564define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) {
1565  ; CHECK: movl
1566  ; CHECK: vmovntps
1567  call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1)
1568  ret void
1569}
1570declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind
1571
1572
1573define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
1574  ; CHECK: vmulss
1575  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1576  ret <4 x float> %res
1577}
1578declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
1579
1580
1581define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
1582  ; CHECK: vrcpps
1583  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1584  ret <4 x float> %res
1585}
1586declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1587
1588
1589define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
1590  ; CHECK: vrcpss
1591  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1592  ret <4 x float> %res
1593}
1594declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1595
1596
1597define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
1598  ; CHECK: vrsqrtps
1599  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1600  ret <4 x float> %res
1601}
1602declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1603
1604
1605define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
1606  ; CHECK: vrsqrtss
1607  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1608  ret <4 x float> %res
1609}
1610declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
1611
1612
1613define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
1614  ; CHECK: vsqrtps
1615  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1616  ret <4 x float> %res
1617}
1618declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
1619
1620
1621define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
1622  ; CHECK: vsqrtss
1623  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1624  ret <4 x float> %res
1625}
1626declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
1627
1628
1629define void @test_x86_sse_stmxcsr(i8* %a0) {
1630  ; CHECK: movl
1631  ; CHECK: vstmxcsr
1632  call void @llvm.x86.sse.stmxcsr(i8* %a0)
1633  ret void
1634}
1635declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
1636
1637
1638define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
1639  ; CHECK: movl
1640  ; CHECK: vmovups
1641  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
1642  ret void
1643}
1644declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
1645
1646
1647define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
1648  ; CHECK: vsubss
1649  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1650  ret <4 x float> %res
1651}
1652declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
1653
1654
1655define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
1656  ; CHECK: vucomiss
1657  ; CHECK: sete
1658  ; CHECK: movzbl
1659  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1660  ret i32 %res
1661}
1662declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
1663
1664
1665define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
1666  ; CHECK: vucomiss
1667  ; CHECK: setae
1668  ; CHECK: movzbl
1669  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1670  ret i32 %res
1671}
1672declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
1673
1674
1675define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
1676  ; CHECK: vucomiss
1677  ; CHECK: seta
1678  ; CHECK: movzbl
1679  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1680  ret i32 %res
1681}
1682declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
1683
1684
1685define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
1686  ; CHECK: vucomiss
1687  ; CHECK: setbe
1688  ; CHECK: movzbl
1689  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1690  ret i32 %res
1691}
1692declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
1693
1694
1695define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
1696  ; CHECK: vucomiss
1697  ; CHECK: sbbl
1698  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1699  ret i32 %res
1700}
1701declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
1702
1703
1704define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
1705  ; CHECK: vucomiss
1706  ; CHECK: setne
1707  ; CHECK: movzbl
1708  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1709  ret i32 %res
1710}
1711declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
1712
1713
1714define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
1715  ; CHECK: vpabsb
1716  %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
1717  ret <16 x i8> %res
1718}
1719declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
1720
1721
1722define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
1723  ; CHECK: vpabsd
1724  %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
1725  ret <4 x i32> %res
1726}
1727declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
1728
1729
1730define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
1731  ; CHECK: vpabsw
1732  %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
1733  ret <8 x i16> %res
1734}
1735declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
1736
1737
1738define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
1739  ; CHECK: vphaddd
1740  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1741  ret <4 x i32> %res
1742}
1743declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
1744
1745
1746define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
1747  ; CHECK: vphaddsw
1748  %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1749  ret <4 x i32> %res
1750}
1751declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone
1752
1753
1754define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
1755  ; CHECK: vphaddw
1756  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1757  ret <8 x i16> %res
1758}
1759declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
1760
1761
1762define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
1763  ; CHECK: vphsubd
1764  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1765  ret <4 x i32> %res
1766}
1767declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
1768
1769
1770define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
1771  ; CHECK: vphsubsw
1772  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1773  ret <8 x i16> %res
1774}
1775declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
1776
1777
1778define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
1779  ; CHECK: vphsubw
1780  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1781  ret <8 x i16> %res
1782}
1783declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
1784
1785
1786define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
1787  ; CHECK: vpmaddubsw
1788  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1789  ret <8 x i16> %res
1790}
1791declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
1792
1793
1794define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
1795  ; CHECK: vpmulhrsw
1796  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1797  ret <8 x i16> %res
1798}
1799declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
1800
1801
1802define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
1803  ; CHECK: vpshufb
1804  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1805  ret <16 x i8> %res
1806}
1807declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
1808
1809
1810define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
1811  ; CHECK: vpsignb
1812  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1813  ret <16 x i8> %res
1814}
1815declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
1816
1817
1818define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
1819  ; CHECK: vpsignd
1820  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1821  ret <4 x i32> %res
1822}
1823declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
1824
1825
1826define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
1827  ; CHECK: vpsignw
1828  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1829  ret <8 x i16> %res
1830}
1831declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
1832
1833
1834define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
1835  ; CHECK: vaddsubpd
1836  %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
1837  ret <4 x double> %res
1838}
1839declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
1840
1841
1842define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
1843  ; CHECK: vaddsubps
1844  %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
1845  ret <8 x float> %res
1846}
1847declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
1848
1849
1850define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
1851  ; CHECK: vblendpd
1852  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
1853  ret <4 x double> %res
1854}
1855declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
1856
1857
1858define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
1859  ; CHECK: vblendps
1860  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
1861  ret <8 x float> %res
1862}
1863declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
1864
1865
1866define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
1867  ; CHECK: vblendvpd
1868  %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
1869  ret <4 x double> %res
1870}
1871declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
1872
1873
1874define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
1875  ; CHECK: vblendvps
1876  %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
1877  ret <8 x float> %res
1878}
1879declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
1880
1881
1882define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
1883  ; CHECK: vcmpordpd
1884  %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
1885  ret <4 x double> %res
1886}
1887declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
1888
1889
1890define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
1891  ; CHECK: vcmpordps
1892  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
1893  ret <8 x float> %res
1894}
1895declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
1896
1897
1898define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
1899  ; CHECK: vcvtpd2psy
1900  %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
1901  ret <4 x float> %res
1902}
1903declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
1904
1905
1906define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
1907  ; CHECK: vcvtpd2dqy
1908  %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
1909  ret <4 x i32> %res
1910}
1911declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
1912
1913
1914define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
1915  ; CHECK: vcvtps2pd
1916  %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
1917  ret <4 x double> %res
1918}
1919declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
1920
1921
1922define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
1923  ; CHECK: vcvtps2dq
1924  %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
1925  ret <8 x i32> %res
1926}
1927declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
1928
1929
1930define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
1931  ; CHECK: vcvtdq2pd
1932  %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
1933  ret <4 x double> %res
1934}
1935declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
1936
1937
1938define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
1939  ; CHECK: vcvtdq2ps
1940  %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
1941  ret <8 x float> %res
1942}
1943declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
1944
1945
1946define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
1947  ; CHECK: vcvttpd2dqy
1948  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
1949  ret <4 x i32> %res
1950}
1951declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
1952
1953
1954define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
1955  ; CHECK: vcvttps2dq
1956  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
1957  ret <8 x i32> %res
1958}
1959declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
1960
1961
1962define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
1963  ; CHECK: vdpps
1964  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
1965  ret <8 x float> %res
1966}
1967declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
1968
1969
1970define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
1971  ; CHECK: vhaddpd
1972  %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
1973  ret <4 x double> %res
1974}
1975declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
1976
1977
1978define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
1979  ; CHECK: vhaddps
1980  %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
1981  ret <8 x float> %res
1982}
1983declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
1984
1985
1986define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
1987  ; CHECK: vhsubpd
1988  %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
1989  ret <4 x double> %res
1990}
1991declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
1992
1993
1994define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
1995  ; CHECK: vhsubps
1996  %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
1997  ret <8 x float> %res
1998}
1999declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
2000
2001
2002define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
2003  ; CHECK: vlddqu
2004  %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
2005  ret <32 x i8> %res
2006}
2007declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
2008
2009
2010define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
2011  ; CHECK: vmovdqu
2012  %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
2013  ret <32 x i8> %res
2014}
2015declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
2016
2017
2018define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
2019  ; CHECK: vmovupd
2020  %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
2021  ret <4 x double> %res
2022}
2023declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
2024
2025
2026define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) {
2027  ; CHECK: vmovups
2028  %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
2029  ret <8 x float> %res
2030}
2031declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly
2032
2033
2034define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
2035  ; CHECK: vmaskmovpd
2036  %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
2037  ret <2 x double> %res
2038}
2039declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
2040
2041
2042define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
2043  ; CHECK: vmaskmovpd
2044  %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2045  ret <4 x double> %res
2046}
2047declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
2048
2049
2050define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
2051  ; CHECK: vmaskmovps
2052  %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2053  ret <4 x float> %res
2054}
2055declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
2056
2057
2058define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
2059  ; CHECK: vmaskmovps
2060  %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2061  ret <8 x float> %res
2062}
2063declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
2064
2065
2066define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
2067  ; CHECK: vmaskmovpd
2068  call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
2069  ret void
2070}
2071declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
2072
2073
2074define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
2075  ; CHECK: vmaskmovpd
2076  call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
2077  ret void
2078}
2079declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
2080
2081
2082define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
2083  ; CHECK: vmaskmovps
2084  call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
2085  ret void
2086}
2087declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
2088
2089
2090define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
2091  ; CHECK: vmaskmovps
2092  call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
2093  ret void
2094}
2095declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
2096
2097
2098define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
2099  ; CHECK: vmaxpd
2100  %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2101  ret <4 x double> %res
2102}
2103declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
2104
2105
2106define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
2107  ; CHECK: vmaxps
2108  %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2109  ret <8 x float> %res
2110}
2111declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
2112
2113
2114define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
2115  ; CHECK: vminpd
2116  %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2117  ret <4 x double> %res
2118}
2119declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
2120
2121
2122define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
2123  ; CHECK: vminps
2124  %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2125  ret <8 x float> %res
2126}
2127declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
2128
2129
2130define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
2131  ; CHECK: vmovmskpd
2132  %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
2133  ret i32 %res
2134}
2135declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
2136
2137
2138define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
2139  ; CHECK: vmovmskps
2140  %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
2141  ret i32 %res
2142}
2143declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
2144
2145
2146define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
2147  ; CHECK: vmovntdq
2148  call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1)
2149  ret void
2150}
2151declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
2152
2153
2154define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
2155  ; CHECK: vmovntpd
2156  call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1)
2157  ret void
2158}
2159declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
2160
2161
2162define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) {
2163  ; CHECK: vmovntps
2164  call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1)
2165  ret void
2166}
2167declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
2168
2169
2170define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
2171  ; CHECK: vptest
2172  ; CHECK: sbbl
2173  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2174  ret i32 %res
2175}
2176declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
2177
2178
2179define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
2180  ; CHECK: vptest
2181  ; CHECK: seta
2182  ; CHECK: movzbl
2183  %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2184  ret i32 %res
2185}
2186declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
2187
2188
2189define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
2190  ; CHECK: vptest
2191  ; CHECK: sete
2192  ; CHECK: movzbl
2193  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2194  ret i32 %res
2195}
2196declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
2197
2198
2199define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
2200  ; CHECK: vrcpps
2201  %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2202  ret <8 x float> %res
2203}
2204declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
2205
2206
2207define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
2208  ; CHECK: vroundpd
2209  %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
2210  ret <4 x double> %res
2211}
2212declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
2213
2214
2215define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
2216  ; CHECK: vroundps
2217  %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
2218  ret <8 x float> %res
2219}
2220declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
2221
2222
2223define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
2224  ; CHECK: vrsqrtps
2225  %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2226  ret <8 x float> %res
2227}
2228declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
2229
2230
2231define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
2232  ; CHECK: vsqrtpd
2233  %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
2234  ret <4 x double> %res
2235}
2236declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
2237
2238
2239define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
2240  ; CHECK: vsqrtps
2241  %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2242  ret <8 x float> %res
2243}
2244declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
2245
2246
2247define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
2248  ; CHECK: vmovdqu
2249  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1)
2250  ret void
2251}
2252declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
2253
2254
2255define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
2256  ; CHECK: vmovupd
2257  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1)
2258  ret void
2259}
2260declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
2261
2262
2263define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
2264  ; CHECK: vmovups
2265  call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
2266  ret void
2267}
2268declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
2269
2270
2271define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
2272  ; CHECK: vbroadcastsd
2273  %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
2274  ret <4 x double> %res
2275}
2276declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
2277
2278
2279define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
2280  ; CHECK: vbroadcastf128
2281  %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
2282  ret <4 x double> %res
2283}
2284declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
2285
2286
2287define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
2288  ; CHECK: vbroadcastf128
2289  %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
2290  ret <8 x float> %res
2291}
2292declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
2293
2294
2295define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
2296  ; CHECK: vbroadcastss
2297  %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
2298  ret <4 x float> %res
2299}
2300declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
2301
2302
2303define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
2304  ; CHECK: vbroadcastss
2305  %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
2306  ret <8 x float> %res
2307}
2308declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
2309
2310
2311define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
2312  ; CHECK: vextractf128
2313  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
2314  ret <2 x double> %res
2315}
2316declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
2317
2318
2319define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) {
2320  ; CHECK: vextractf128
2321  %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
2322  ret <4 x float> %res
2323}
2324declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
2325
2326
2327define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) {
2328  ; CHECK: vextractf128
2329  %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1]
2330  ret <4 x i32> %res
2331}
2332declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
2333
2334
2335define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) {
2336  ; CHECK: vinsertf128
2337  %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2338  ret <4 x double> %res
2339}
2340declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
2341
2342
2343define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) {
2344  ; CHECK: vinsertf128
2345  %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2346  ret <8 x float> %res
2347}
2348declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
2349
2350
2351define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) {
2352  ; CHECK: vinsertf128
2353  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
2354  ret <8 x i32> %res
2355}
2356declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
2357
2358
2359define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
2360  ; CHECK: vperm2f128
2361  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2362  ret <4 x double> %res
2363}
2364declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2365
2366
2367define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
2368  ; CHECK: vperm2f128
2369  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2370  ret <8 x float> %res
2371}
2372declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2373
2374
2375define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
2376  ; CHECK: vperm2f128
2377  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
2378  ret <8 x i32> %res
2379}
2380declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
2381
2382
2383define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
2384  ; CHECK: vpermilpd
2385  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
2386  ret <2 x double> %res
2387}
2388declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
2389
2390
2391define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
2392  ; CHECK: vpermilpd
2393  %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
2394  ret <4 x double> %res
2395}
2396declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
2397
2398
2399define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
2400  ; CHECK: vpermilps
2401  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
2402  ret <4 x float> %res
2403}
2404declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
2405
2406
2407define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
2408  ; CHECK: vpermilps
2409  %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
2410  ret <8 x float> %res
2411}
2412declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
2413
2414
2415define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
2416  ; CHECK: vpermilpd
2417  %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
2418  ret <2 x double> %res
2419}
2420declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
2421
2422
2423define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
2424  ; CHECK: vpermilpd
2425  %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
2426  ret <4 x double> %res
2427}
2428declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
2429
2430
2431define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
2432  ; CHECK: vpermilps
2433  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
2434  ret <4 x float> %res
2435}
2436declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
2437
2438
2439define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
2440  ; CHECK: vpermilps
2441  %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
2442  ret <8 x float> %res
2443}
2444declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
2445
2446
2447define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
2448  ; CHECK: vtestpd
2449  ; CHECK: sbbl
2450  %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
2451  ret i32 %res
2452}
2453declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
2454
2455
2456define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
2457  ; CHECK: vtestpd
2458  ; CHECK: sbbl
2459  %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
2460  ret i32 %res
2461}
2462declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
2463
2464
2465define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
2466  ; CHECK: vtestps
2467  ; CHECK: sbbl
2468  %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2469  ret i32 %res
2470}
2471declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
2472
2473
2474define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
2475  ; CHECK: vtestps
2476  ; CHECK: sbbl
2477  %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
2478  ret i32 %res
2479}
2480declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
2481
2482
2483define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
2484  ; CHECK: vtestpd
2485  ; CHECK: seta
2486  ; CHECK: movzbl
2487  %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
2488  ret i32 %res
2489}
2490declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
2491
2492
2493define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
2494  ; CHECK: vtestpd
2495  ; CHECK: seta
2496  ; CHECK: movzbl
2497  %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
2498  ret i32 %res
2499}
2500declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
2501
2502
2503define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
2504  ; CHECK: vtestps
2505  ; CHECK: seta
2506  ; CHECK: movzbl
2507  %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2508  ret i32 %res
2509}
2510declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
2511
2512
2513define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
2514  ; CHECK: vtestps
2515  ; CHECK: seta
2516  ; CHECK: movzbl
2517  %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
2518  ret i32 %res
2519}
2520declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
2521
2522
2523define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
2524  ; CHECK: vtestpd
2525  ; CHECK: sete
2526  ; CHECK: movzbl
2527  %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
2528  ret i32 %res
2529}
2530declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
2531
2532
2533define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
2534  ; CHECK: vtestpd
2535  ; CHECK: sete
2536  ; CHECK: movzbl
2537  %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
2538  ret i32 %res
2539}
2540declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
2541
2542
2543define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
2544  ; CHECK: vtestps
2545  ; CHECK: sete
2546  ; CHECK: movzbl
2547  %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2548  ret i32 %res
2549}
2550declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
2551
2552
2553define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
2554  ; CHECK: vtestps
2555  ; CHECK: sete
2556  ; CHECK: movzbl
2557  %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
2558  ret i32 %res
2559}
2560declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
2561
2562
2563define void @test_x86_avx_vzeroall() {
2564  ; CHECK: vzeroall
2565  call void @llvm.x86.avx.vzeroall()
2566  ret void
2567}
2568declare void @llvm.x86.avx.vzeroall() nounwind
2569
2570
2571define void @test_x86_avx_vzeroupper() {
2572  ; CHECK: vzeroupper
2573  call void @llvm.x86.avx.vzeroupper()
2574  ret void
2575}
2576declare void @llvm.x86.avx.vzeroupper() nounwind
2577
2578
2579