• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s
3
4define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
5; CHECK-LABEL: test_x86_aesni_aesdec:
6; CHECK:       ## BB#0:
7; CHECK-NEXT:    vaesdec %xmm1, %xmm0, %xmm0
8; CHECK-NEXT:    retl
9  %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
10  ret <2 x i64> %res
11}
12declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
13
14
15define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
16; CHECK-LABEL: test_x86_aesni_aesdeclast:
17; CHECK:       ## BB#0:
18; CHECK-NEXT:    vaesdeclast %xmm1, %xmm0, %xmm0
19; CHECK-NEXT:    retl
20  %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
21  ret <2 x i64> %res
22}
23declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
24
25
26define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
27; CHECK-LABEL: test_x86_aesni_aesenc:
28; CHECK:       ## BB#0:
29; CHECK-NEXT:    vaesenc %xmm1, %xmm0, %xmm0
30; CHECK-NEXT:    retl
31  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
32  ret <2 x i64> %res
33}
34declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
35
36
37define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
38; CHECK-LABEL: test_x86_aesni_aesenclast:
39; CHECK:       ## BB#0:
40; CHECK-NEXT:    vaesenclast %xmm1, %xmm0, %xmm0
41; CHECK-NEXT:    retl
42  %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
43  ret <2 x i64> %res
44}
45declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
46
47
48define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
49; CHECK-LABEL: test_x86_aesni_aesimc:
50; CHECK:       ## BB#0:
51; CHECK-NEXT:    vaesimc %xmm0, %xmm0
52; CHECK-NEXT:    retl
53  %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
54  ret <2 x i64> %res
55}
56declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
57
58
59define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
60; CHECK-LABEL: test_x86_aesni_aeskeygenassist:
61; CHECK:       ## BB#0:
62; CHECK-NEXT:    vaeskeygenassist $7, %xmm0, %xmm0
63; CHECK-NEXT:    retl
64  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
65  ret <2 x i64> %res
66}
67declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
68
69
70define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
71; CHECK-LABEL: test_x86_sse2_add_sd:
72; CHECK:       ## BB#0:
73; CHECK-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
74; CHECK-NEXT:    retl
75  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
76  ret <2 x double> %res
77}
78declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
79
80
81define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
82; CHECK-LABEL: test_x86_sse2_cmp_pd:
83; CHECK:       ## BB#0:
84; CHECK-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0
85; CHECK-NEXT:    retl
86  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
87  ret <2 x double> %res
88}
89declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
90
91
92define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
93; CHECK-LABEL: test_x86_sse2_cmp_sd:
94; CHECK:       ## BB#0:
95; CHECK-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0
96; CHECK-NEXT:    retl
97  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
98  ret <2 x double> %res
99}
100declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
101
102
103define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
104; CHECK-LABEL: test_x86_sse2_comieq_sd:
105; CHECK:       ## BB#0:
106; CHECK-NEXT:    vcomisd %xmm1, %xmm0
107; CHECK-NEXT:    sete %al
108; CHECK-NEXT:    movzbl %al, %eax
109; CHECK-NEXT:    retl
110  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
111  ret i32 %res
112}
113declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
114
115
116define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
117; CHECK-LABEL: test_x86_sse2_comige_sd:
118; CHECK:       ## BB#0:
119; CHECK-NEXT:    vcomisd %xmm1, %xmm0
120; CHECK-NEXT:    setae %al
121; CHECK-NEXT:    movzbl %al, %eax
122; CHECK-NEXT:    retl
123  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
124  ret i32 %res
125}
126declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
127
128
129define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
130; CHECK-LABEL: test_x86_sse2_comigt_sd:
131; CHECK:       ## BB#0:
132; CHECK-NEXT:    vcomisd %xmm1, %xmm0
133; CHECK-NEXT:    seta %al
134; CHECK-NEXT:    movzbl %al, %eax
135; CHECK-NEXT:    retl
136  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
137  ret i32 %res
138}
139declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
140
141
142define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
143; CHECK-LABEL: test_x86_sse2_comile_sd:
144; CHECK:       ## BB#0:
145; CHECK-NEXT:    vcomisd %xmm1, %xmm0
146; CHECK-NEXT:    setbe %al
147; CHECK-NEXT:    movzbl %al, %eax
148; CHECK-NEXT:    retl
149  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
150  ret i32 %res
151}
152declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
153
154
155define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
156; CHECK-LABEL: test_x86_sse2_comilt_sd:
157; CHECK:       ## BB#0:
158; CHECK-NEXT:    vcomisd %xmm1, %xmm0
159; CHECK-NEXT:    sbbl %eax, %eax
160; CHECK-NEXT:    andl $1, %eax
161; CHECK-NEXT:    retl
162  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
163  ret i32 %res
164}
165declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
166
167
168define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
169; CHECK-LABEL: test_x86_sse2_comineq_sd:
170; CHECK:       ## BB#0:
171; CHECK-NEXT:    vcomisd %xmm1, %xmm0
172; CHECK-NEXT:    setne %al
173; CHECK-NEXT:    movzbl %al, %eax
174; CHECK-NEXT:    retl
175  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
176  ret i32 %res
177}
178declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
179
180
181define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
182; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
183; CHECK:       ## BB#0:
184; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0
185; CHECK-NEXT:    retl
186  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
187  ret <2 x double> %res
188}
189declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
190
191
192define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
193; CHECK-LABEL: test_x86_sse2_cvtdq2ps:
194; CHECK:       ## BB#0:
195; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
196; CHECK-NEXT:    retl
197  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
198  ret <4 x float> %res
199}
200declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
201
202
203define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
204; CHECK-LABEL: test_x86_sse2_cvtpd2dq:
205; CHECK:       ## BB#0:
206; CHECK-NEXT:    vcvtpd2dq %xmm0, %xmm0
207; CHECK-NEXT:    retl
208  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
209  ret <4 x i32> %res
210}
211declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
212
213
214define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
215; CHECK-LABEL: test_x86_sse2_cvtpd2ps:
216; CHECK:       ## BB#0:
217; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm0
218; CHECK-NEXT:    retl
219  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
220  ret <4 x float> %res
221}
222declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
223
224
225define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
226; CHECK-LABEL: test_x86_sse2_cvtps2dq:
227; CHECK:       ## BB#0:
228; CHECK-NEXT:    vcvtps2dq %xmm0, %xmm0
229; CHECK-NEXT:    retl
230  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
231  ret <4 x i32> %res
232}
233declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
234
235
236define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
237; CHECK-LABEL: test_x86_sse2_cvtps2pd:
238; CHECK:       ## BB#0:
239; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm0
240; CHECK-NEXT:    retl
241  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
242  ret <2 x double> %res
243}
244declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
245
246
247define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
248; CHECK-LABEL: test_x86_sse2_cvtsd2si:
249; CHECK:       ## BB#0:
250; CHECK-NEXT:    vcvtsd2si %xmm0, %eax
251; CHECK-NEXT:    retl
252  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
253  ret i32 %res
254}
255declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
256
257
258define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
259; CHECK-LABEL: test_x86_sse2_cvtsd2ss:
260; CHECK:       ## BB#0:
261; CHECK-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0
262; CHECK-NEXT:    retl
263  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
264  ret <4 x float> %res
265}
266declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
267
268
269define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
270; CHECK-LABEL: test_x86_sse2_cvtsi2sd:
271; CHECK:       ## BB#0:
272; CHECK-NEXT:    movl $7, %eax
273; CHECK-NEXT:    vcvtsi2sdl %eax, %xmm0, %xmm0
274; CHECK-NEXT:    retl
275  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
276  ret <2 x double> %res
277}
278declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
279
280
281define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
282; CHECK-LABEL: test_x86_sse2_cvtss2sd:
283; CHECK:       ## BB#0:
284; CHECK-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
285; CHECK-NEXT:    retl
286  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
287  ret <2 x double> %res
288}
289declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
290
291
292define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
293; CHECK-LABEL: test_x86_sse2_cvttpd2dq:
294; CHECK:       ## BB#0:
295; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm0
296; CHECK-NEXT:    retl
297  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
298  ret <4 x i32> %res
299}
300declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
301
302
303define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
304; CHECK-LABEL: test_x86_sse2_cvttps2dq:
305; CHECK:       ## BB#0:
306; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
307; CHECK-NEXT:    retl
308  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
309  ret <4 x i32> %res
310}
311declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
312
313
314define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
315; CHECK-LABEL: test_x86_sse2_cvttsd2si:
316; CHECK:       ## BB#0:
317; CHECK-NEXT:    vcvttsd2si %xmm0, %eax
318; CHECK-NEXT:    retl
319  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
320  ret i32 %res
321}
322declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
323
324
325define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
326; CHECK-LABEL: test_x86_sse2_div_sd:
327; CHECK:       ## BB#0:
328; CHECK-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
329; CHECK-NEXT:    retl
330  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
331  ret <2 x double> %res
332}
333declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
334
335
336
337define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
338; CHECK-LABEL: test_x86_sse2_max_pd:
339; CHECK:       ## BB#0:
340; CHECK-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
341; CHECK-NEXT:    retl
342  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
343  ret <2 x double> %res
344}
345declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
346
347
348define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
349; CHECK-LABEL: test_x86_sse2_max_sd:
350; CHECK:       ## BB#0:
351; CHECK-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
352; CHECK-NEXT:    retl
353  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
354  ret <2 x double> %res
355}
356declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
357
358
359define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
360; CHECK-LABEL: test_x86_sse2_min_pd:
361; CHECK:       ## BB#0:
362; CHECK-NEXT:    vminpd %xmm1, %xmm0, %xmm0
363; CHECK-NEXT:    retl
364  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
365  ret <2 x double> %res
366}
367declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
368
369
370define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
371; CHECK-LABEL: test_x86_sse2_min_sd:
372; CHECK:       ## BB#0:
373; CHECK-NEXT:    vminsd %xmm1, %xmm0, %xmm0
374; CHECK-NEXT:    retl
375  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
376  ret <2 x double> %res
377}
378declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
379
380
381define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
382; CHECK-LABEL: test_x86_sse2_movmsk_pd:
383; CHECK:       ## BB#0:
384; CHECK-NEXT:    vmovmskpd %xmm0, %eax
385; CHECK-NEXT:    retl
386  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
387  ret i32 %res
388}
389declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
390
391
392
393
394define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
395; CHECK-LABEL: test_x86_sse2_mul_sd:
396; CHECK:       ## BB#0:
397; CHECK-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
398; CHECK-NEXT:    retl
399  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
400  ret <2 x double> %res
401}
402declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
403
404
405define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
406; CHECK-LABEL: test_x86_sse2_packssdw_128:
407; CHECK:       ## BB#0:
408; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
409; CHECK-NEXT:    retl
410  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
411  ret <8 x i16> %res
412}
413declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
414
415
416define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
417; CHECK-LABEL: test_x86_sse2_packsswb_128:
418; CHECK:       ## BB#0:
419; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
420; CHECK-NEXT:    retl
421  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
422  ret <16 x i8> %res
423}
424declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
425
426
427define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
428; CHECK-LABEL: test_x86_sse2_packuswb_128:
429; CHECK:       ## BB#0:
430; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
431; CHECK-NEXT:    retl
432  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
433  ret <16 x i8> %res
434}
435declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
436
437
438define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
439; CHECK-LABEL: test_x86_sse2_padds_b:
440; CHECK:       ## BB#0:
441; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0
442; CHECK-NEXT:    retl
443  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
444  ret <16 x i8> %res
445}
446declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
447
448
449define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
450; CHECK-LABEL: test_x86_sse2_padds_w:
451; CHECK:       ## BB#0:
452; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
453; CHECK-NEXT:    retl
454  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
455  ret <8 x i16> %res
456}
457declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
458
459
460define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
461; CHECK-LABEL: test_x86_sse2_paddus_b:
462; CHECK:       ## BB#0:
463; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
464; CHECK-NEXT:    retl
465  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
466  ret <16 x i8> %res
467}
468declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
469
470
471define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
472; CHECK-LABEL: test_x86_sse2_paddus_w:
473; CHECK:       ## BB#0:
474; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
475; CHECK-NEXT:    retl
476  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
477  ret <8 x i16> %res
478}
479declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
480
481
482define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
483; CHECK-LABEL: test_x86_sse2_pavg_b:
484; CHECK:       ## BB#0:
485; CHECK-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
486; CHECK-NEXT:    retl
487  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
488  ret <16 x i8> %res
489}
490declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
491
492
493define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
494; CHECK-LABEL: test_x86_sse2_pavg_w:
495; CHECK:       ## BB#0:
496; CHECK-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
497; CHECK-NEXT:    retl
498  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
499  ret <8 x i16> %res
500}
501declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
502
503
504define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
505; CHECK-LABEL: test_x86_sse2_pmadd_wd:
506; CHECK:       ## BB#0:
507; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
508; CHECK-NEXT:    retl
509  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
510  ret <4 x i32> %res
511}
512declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
513
514
515define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
516; CHECK-LABEL: test_x86_sse2_pmaxs_w:
517; CHECK:       ## BB#0:
518; CHECK-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
519; CHECK-NEXT:    retl
520  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
521  ret <8 x i16> %res
522}
523declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
524
525
526define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
527; CHECK-LABEL: test_x86_sse2_pmaxu_b:
528; CHECK:       ## BB#0:
529; CHECK-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
530; CHECK-NEXT:    retl
531  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
532  ret <16 x i8> %res
533}
534declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
535
536
537define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
538; CHECK-LABEL: test_x86_sse2_pmins_w:
539; CHECK:       ## BB#0:
540; CHECK-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
541; CHECK-NEXT:    retl
542  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
543  ret <8 x i16> %res
544}
545declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
546
547
548define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
549; CHECK-LABEL: test_x86_sse2_pminu_b:
550; CHECK:       ## BB#0:
551; CHECK-NEXT:    vpminub %xmm1, %xmm0, %xmm0
552; CHECK-NEXT:    retl
553  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
554  ret <16 x i8> %res
555}
556declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
557
558
559define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
560; CHECK-LABEL: test_x86_sse2_pmovmskb_128:
561; CHECK:       ## BB#0:
562; CHECK-NEXT:    vpmovmskb %xmm0, %eax
563; CHECK-NEXT:    retl
564  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
565  ret i32 %res
566}
567declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
568
569
570define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
571; CHECK-LABEL: test_x86_sse2_pmulh_w:
572; CHECK:       ## BB#0:
573; CHECK-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0
574; CHECK-NEXT:    retl
575  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
576  ret <8 x i16> %res
577}
578declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
579
580
581define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
582; CHECK-LABEL: test_x86_sse2_pmulhu_w:
583; CHECK:       ## BB#0:
584; CHECK-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
585; CHECK-NEXT:    retl
586  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
587  ret <8 x i16> %res
588}
589declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
590
591
592define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
593; CHECK-LABEL: test_x86_sse2_pmulu_dq:
594; CHECK:       ## BB#0:
595; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
596; CHECK-NEXT:    retl
597  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
598  ret <2 x i64> %res
599}
600declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
601
602
603define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
604; CHECK-LABEL: test_x86_sse2_psad_bw:
605; CHECK:       ## BB#0:
606; CHECK-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
607; CHECK-NEXT:    retl
608  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
609  ret <2 x i64> %res
610}
611declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
612
613
614define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
615; CHECK-LABEL: test_x86_sse2_psll_d:
616; CHECK:       ## BB#0:
617; CHECK-NEXT:    vpslld %xmm1, %xmm0, %xmm0
618; CHECK-NEXT:    retl
619  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
620  ret <4 x i32> %res
621}
622declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
623
624
625define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
626; CHECK-LABEL: test_x86_sse2_psll_q:
627; CHECK:       ## BB#0:
628; CHECK-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
629; CHECK-NEXT:    retl
630  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
631  ret <2 x i64> %res
632}
633declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
634
635
636define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
637; CHECK-LABEL: test_x86_sse2_psll_w:
638; CHECK:       ## BB#0:
639; CHECK-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
640; CHECK-NEXT:    retl
641  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
642  ret <8 x i16> %res
643}
644declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
645
646
647define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
648; CHECK-LABEL: test_x86_sse2_pslli_d:
649; CHECK:       ## BB#0:
650; CHECK-NEXT:    vpslld $7, %xmm0, %xmm0
651; CHECK-NEXT:    retl
652  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
653  ret <4 x i32> %res
654}
655declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
656
657
658define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
659; CHECK-LABEL: test_x86_sse2_pslli_q:
660; CHECK:       ## BB#0:
661; CHECK-NEXT:    vpsllq $7, %xmm0, %xmm0
662; CHECK-NEXT:    retl
663  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
664  ret <2 x i64> %res
665}
666declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
667
668
669define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
670; CHECK-LABEL: test_x86_sse2_pslli_w:
671; CHECK:       ## BB#0:
672; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
673; CHECK-NEXT:    retl
674  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
675  ret <8 x i16> %res
676}
677declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
678
679
680define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
681; CHECK-LABEL: test_x86_sse2_psra_d:
682; CHECK:       ## BB#0:
683; CHECK-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
684; CHECK-NEXT:    retl
685  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
686  ret <4 x i32> %res
687}
688declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
689
690
691define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
692; CHECK-LABEL: test_x86_sse2_psra_w:
693; CHECK:       ## BB#0:
694; CHECK-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
695; CHECK-NEXT:    retl
696  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
697  ret <8 x i16> %res
698}
699declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
700
701
702define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
703; CHECK-LABEL: test_x86_sse2_psrai_d:
704; CHECK:       ## BB#0:
705; CHECK-NEXT:    vpsrad $7, %xmm0, %xmm0
706; CHECK-NEXT:    retl
707  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
708  ret <4 x i32> %res
709}
710declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
711
712
713define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
714; CHECK-LABEL: test_x86_sse2_psrai_w:
715; CHECK:       ## BB#0:
716; CHECK-NEXT:    vpsraw $7, %xmm0, %xmm0
717; CHECK-NEXT:    retl
718  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
719  ret <8 x i16> %res
720}
721declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
722
723
724define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
725; CHECK-LABEL: test_x86_sse2_psrl_d:
726; CHECK:       ## BB#0:
727; CHECK-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
728; CHECK-NEXT:    retl
729  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
730  ret <4 x i32> %res
731}
732declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
733
734
735define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
736; CHECK-LABEL: test_x86_sse2_psrl_q:
737; CHECK:       ## BB#0:
738; CHECK-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
739; CHECK-NEXT:    retl
740  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
741  ret <2 x i64> %res
742}
743declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
744
745
746define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
747; CHECK-LABEL: test_x86_sse2_psrl_w:
748; CHECK:       ## BB#0:
749; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
750; CHECK-NEXT:    retl
751  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
752  ret <8 x i16> %res
753}
754declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
755
756
757define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
758; CHECK-LABEL: test_x86_sse2_psrli_d:
759; CHECK:       ## BB#0:
760; CHECK-NEXT:    vpsrld $7, %xmm0, %xmm0
761; CHECK-NEXT:    retl
762  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
763  ret <4 x i32> %res
764}
765declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
766
767
768define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
769; CHECK-LABEL: test_x86_sse2_psrli_q:
770; CHECK:       ## BB#0:
771; CHECK-NEXT:    vpsrlq $7, %xmm0, %xmm0
772; CHECK-NEXT:    retl
773  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
774  ret <2 x i64> %res
775}
776declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
777
778
779define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
780; CHECK-LABEL: test_x86_sse2_psrli_w:
781; CHECK:       ## BB#0:
782; CHECK-NEXT:    vpsrlw $7, %xmm0, %xmm0
783; CHECK-NEXT:    retl
784  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
785  ret <8 x i16> %res
786}
787declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
788
789
790define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
791; CHECK-LABEL: test_x86_sse2_psubs_b:
792; CHECK:       ## BB#0:
793; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0
794; CHECK-NEXT:    retl
795  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
796  ret <16 x i8> %res
797}
798declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
799
800
801define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
802; CHECK-LABEL: test_x86_sse2_psubs_w:
803; CHECK:       ## BB#0:
804; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
805; CHECK-NEXT:    retl
806  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
807  ret <8 x i16> %res
808}
809declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
810
811
812define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
813; CHECK-LABEL: test_x86_sse2_psubus_b:
814; CHECK:       ## BB#0:
815; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
816; CHECK-NEXT:    retl
817  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
818  ret <16 x i8> %res
819}
820declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
821
822
823define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
824; CHECK-LABEL: test_x86_sse2_psubus_w:
825; CHECK:       ## BB#0:
826; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
827; CHECK-NEXT:    retl
828  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
829  ret <8 x i16> %res
830}
831declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
832
833
834define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
835; CHECK-LABEL: test_x86_sse2_sqrt_pd:
836; CHECK:       ## BB#0:
837; CHECK-NEXT:    vsqrtpd %xmm0, %xmm0
838; CHECK-NEXT:    retl
839  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
840  ret <2 x double> %res
841}
842declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
843
844
845define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
846; CHECK-LABEL: test_x86_sse2_sqrt_sd:
847; CHECK:       ## BB#0:
848; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
849; CHECK-NEXT:    retl
850  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
851  ret <2 x double> %res
852}
853declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
854
855
856define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
857; CHECK-LABEL: test_x86_sse2_storel_dq:
858; CHECK:       ## BB#0:
859; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
860; CHECK-NEXT:    vmovlps %xmm0, (%eax)
861; CHECK-NEXT:    retl
862  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
863  ret void
864}
865declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
866
867
868define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
869  ; add operation forces the execution domain.
870; CHECK-LABEL: test_x86_sse2_storeu_dq:
871; CHECK:       ## BB#0:
872; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
873; CHECK-NEXT:    vpaddb LCPI77_0, %xmm0, %xmm0
874; CHECK-NEXT:    vmovdqu %xmm0, (%eax)
875; CHECK-NEXT:    retl
876  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
877  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
878  ret void
879}
880declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
881
882
883define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
884  ; fadd operation forces the execution domain.
885; CHECK-LABEL: test_x86_sse2_storeu_pd:
886; CHECK:       ## BB#0:
887; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
888; CHECK-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
889; CHECK-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
890; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
891; CHECK-NEXT:    vmovupd %xmm0, (%eax)
892; CHECK-NEXT:    retl
893  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
894  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
895  ret void
896}
897declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
898
899
900define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
901; CHECK-LABEL: test_x86_sse2_sub_sd:
902; CHECK:       ## BB#0:
903; CHECK-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
904; CHECK-NEXT:    retl
905  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
906  ret <2 x double> %res
907}
908declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
909
910
911define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
912; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
913; CHECK:       ## BB#0:
914; CHECK-NEXT:    vucomisd %xmm1, %xmm0
915; CHECK-NEXT:    sete %al
916; CHECK-NEXT:    movzbl %al, %eax
917; CHECK-NEXT:    retl
918  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
919  ret i32 %res
920}
921declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
922
923
924define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
925; CHECK-LABEL: test_x86_sse2_ucomige_sd:
926; CHECK:       ## BB#0:
927; CHECK-NEXT:    vucomisd %xmm1, %xmm0
928; CHECK-NEXT:    setae %al
929; CHECK-NEXT:    movzbl %al, %eax
930; CHECK-NEXT:    retl
931  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
932  ret i32 %res
933}
934declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
935
936
937define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
938; CHECK-LABEL: test_x86_sse2_ucomigt_sd:
939; CHECK:       ## BB#0:
940; CHECK-NEXT:    vucomisd %xmm1, %xmm0
941; CHECK-NEXT:    seta %al
942; CHECK-NEXT:    movzbl %al, %eax
943; CHECK-NEXT:    retl
944  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
945  ret i32 %res
946}
947declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
948
949
950define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
951; CHECK-LABEL: test_x86_sse2_ucomile_sd:
952; CHECK:       ## BB#0:
953; CHECK-NEXT:    vucomisd %xmm1, %xmm0
954; CHECK-NEXT:    setbe %al
955; CHECK-NEXT:    movzbl %al, %eax
956; CHECK-NEXT:    retl
957  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
958  ret i32 %res
959}
960declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
961
962
963define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
964; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
965; CHECK:       ## BB#0:
966; CHECK-NEXT:    vucomisd %xmm1, %xmm0
967; CHECK-NEXT:    sbbl %eax, %eax
968; CHECK-NEXT:    andl $1, %eax
969; CHECK-NEXT:    retl
970  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
971  ret i32 %res
972}
973declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
974
975
976define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
977; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
978; CHECK:       ## BB#0:
979; CHECK-NEXT:    vucomisd %xmm1, %xmm0
980; CHECK-NEXT:    setne %al
981; CHECK-NEXT:    movzbl %al, %eax
982; CHECK-NEXT:    retl
983  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
984  ret i32 %res
985}
986declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
987
988
989define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
990; CHECK-LABEL: test_x86_sse3_addsub_pd:
991; CHECK:       ## BB#0:
992; CHECK-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
993; CHECK-NEXT:    retl
994  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
995  ret <2 x double> %res
996}
997declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
998
999
1000define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
1001; CHECK-LABEL: test_x86_sse3_addsub_ps:
1002; CHECK:       ## BB#0:
1003; CHECK-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
1004; CHECK-NEXT:    retl
1005  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1006  ret <4 x float> %res
1007}
1008declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
1009
1010
1011define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
1012; CHECK-LABEL: test_x86_sse3_hadd_pd:
1013; CHECK:       ## BB#0:
1014; CHECK-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
1015; CHECK-NEXT:    retl
1016  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1017  ret <2 x double> %res
1018}
1019declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
1020
1021
1022define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
1023; CHECK-LABEL: test_x86_sse3_hadd_ps:
1024; CHECK:       ## BB#0:
1025; CHECK-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
1026; CHECK-NEXT:    retl
1027  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1028  ret <4 x float> %res
1029}
1030declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
1031
1032
1033define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
1034; CHECK-LABEL: test_x86_sse3_hsub_pd:
1035; CHECK:       ## BB#0:
1036; CHECK-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
1037; CHECK-NEXT:    retl
1038  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1039  ret <2 x double> %res
1040}
1041declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
1042
1043
1044define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
1045; CHECK-LABEL: test_x86_sse3_hsub_ps:
1046; CHECK:       ## BB#0:
1047; CHECK-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
1048; CHECK-NEXT:    retl
1049  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1050  ret <4 x float> %res
1051}
1052declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
1053
1054
1055define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
1056; CHECK-LABEL: test_x86_sse3_ldu_dq:
1057; CHECK:       ## BB#0:
1058; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1059; CHECK-NEXT:    vlddqu (%eax), %xmm0
1060; CHECK-NEXT:    retl
1061  %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
1062  ret <16 x i8> %res
1063}
1064declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
1065
1066
1067define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
1068; CHECK-LABEL: test_x86_sse41_blendvpd:
1069; CHECK:       ## BB#0:
1070; CHECK-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1071; CHECK-NEXT:    retl
1072  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
1073  ret <2 x double> %res
1074}
1075declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
1076
1077
1078define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
1079; CHECK-LABEL: test_x86_sse41_blendvps:
1080; CHECK:       ## BB#0:
1081; CHECK-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1082; CHECK-NEXT:    retl
1083  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
1084  ret <4 x float> %res
1085}
1086declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
1087
1088
1089define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
1090; CHECK-LABEL: test_x86_sse41_dppd:
1091; CHECK:       ## BB#0:
1092; CHECK-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0
1093; CHECK-NEXT:    retl
1094  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
1095  ret <2 x double> %res
1096}
1097declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
1098
1099
1100define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
1101; CHECK-LABEL: test_x86_sse41_dpps:
1102; CHECK:       ## BB#0:
1103; CHECK-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0
1104; CHECK-NEXT:    retl
1105  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1106  ret <4 x float> %res
1107}
1108declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
1109
1110
1111define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
1112; CHECK-LABEL: test_x86_sse41_insertps:
1113; CHECK:       ## BB#0:
1114; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
1115; CHECK-NEXT:    retl
1116  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1117  ret <4 x float> %res
1118}
1119declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
1120
1121
1122
1123define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
1124; CHECK-LABEL: test_x86_sse41_mpsadbw:
1125; CHECK:       ## BB#0:
1126; CHECK-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0
1127; CHECK-NEXT:    retl
1128  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
1129  ret <8 x i16> %res
1130}
1131declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
1132
1133
1134define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
1135; CHECK-LABEL: test_x86_sse41_packusdw:
1136; CHECK:       ## BB#0:
1137; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
1138; CHECK-NEXT:    retl
1139  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
1140  ret <8 x i16> %res
1141}
1142declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
1143
1144
1145define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
1146; CHECK-LABEL: test_x86_sse41_pblendvb:
1147; CHECK:       ## BB#0:
1148; CHECK-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
1149; CHECK-NEXT:    retl
1150  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
1151  ret <16 x i8> %res
1152}
1153declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1154
1155
1156define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
1157; CHECK-LABEL: test_x86_sse41_phminposuw:
1158; CHECK:       ## BB#0:
1159; CHECK-NEXT:    vphminposuw %xmm0, %xmm0
1160; CHECK-NEXT:    retl
1161  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
1162  ret <8 x i16> %res
1163}
1164declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
1165
1166
1167define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
1168; CHECK-LABEL: test_x86_sse41_pmaxsb:
1169; CHECK:       ## BB#0:
1170; CHECK-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1171; CHECK-NEXT:    retl
1172  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1173  ret <16 x i8> %res
1174}
1175declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
1176
1177
1178define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
1179; CHECK-LABEL: test_x86_sse41_pmaxsd:
1180; CHECK:       ## BB#0:
1181; CHECK-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1182; CHECK-NEXT:    retl
1183  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1184  ret <4 x i32> %res
1185}
1186declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
1187
1188
1189define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
1190; CHECK-LABEL: test_x86_sse41_pmaxud:
1191; CHECK:       ## BB#0:
1192; CHECK-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1193; CHECK-NEXT:    retl
1194  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1195  ret <4 x i32> %res
1196}
1197declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
1198
1199
1200define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
1201; CHECK-LABEL: test_x86_sse41_pmaxuw:
1202; CHECK:       ## BB#0:
1203; CHECK-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
1204; CHECK-NEXT:    retl
1205  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1206  ret <8 x i16> %res
1207}
1208declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
1209
1210
1211define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
1212; CHECK-LABEL: test_x86_sse41_pminsb:
1213; CHECK:       ## BB#0:
1214; CHECK-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
1215; CHECK-NEXT:    retl
1216  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1217  ret <16 x i8> %res
1218}
1219declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
1220
1221
1222define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
1223; CHECK-LABEL: test_x86_sse41_pminsd:
1224; CHECK:       ## BB#0:
1225; CHECK-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1226; CHECK-NEXT:    retl
1227  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1228  ret <4 x i32> %res
1229}
1230declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
1231
1232
1233define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
1234; CHECK-LABEL: test_x86_sse41_pminud:
1235; CHECK:       ## BB#0:
1236; CHECK-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1237; CHECK-NEXT:    retl
1238  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1239  ret <4 x i32> %res
1240}
1241declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
1242
1243
1244define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
1245; CHECK-LABEL: test_x86_sse41_pminuw:
1246; CHECK:       ## BB#0:
1247; CHECK-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1248; CHECK-NEXT:    retl
1249  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1250  ret <8 x i16> %res
1251}
1252declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
1253
1254
1255define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
1256; CHECK-LABEL: test_x86_sse41_pmovzxbd:
1257; CHECK:       ## BB#0:
1258; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1259; CHECK-NEXT:    retl
1260  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
1261  ret <4 x i32> %res
1262}
1263declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
1264
1265
1266define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
1267; CHECK-LABEL: test_x86_sse41_pmovzxbq:
1268; CHECK:       ## BB#0:
1269; CHECK-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1270; CHECK-NEXT:    retl
1271  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
1272  ret <2 x i64> %res
1273}
1274declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
1275
1276
1277define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
1278; CHECK-LABEL: test_x86_sse41_pmovzxbw:
1279; CHECK:       ## BB#0:
1280; CHECK-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1281; CHECK-NEXT:    retl
1282  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
1283  ret <8 x i16> %res
1284}
1285declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
1286
1287
1288define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
1289; CHECK-LABEL: test_x86_sse41_pmovzxdq:
1290; CHECK:       ## BB#0:
1291; CHECK-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1292; CHECK-NEXT:    retl
1293  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
1294  ret <2 x i64> %res
1295}
1296declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
1297
1298
1299define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
1300; CHECK-LABEL: test_x86_sse41_pmovzxwd:
1301; CHECK:       ## BB#0:
1302; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1303; CHECK-NEXT:    retl
1304  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
1305  ret <4 x i32> %res
1306}
1307declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
1308
1309
1310define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
1311; CHECK-LABEL: test_x86_sse41_pmovzxwq:
1312; CHECK:       ## BB#0:
1313; CHECK-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1314; CHECK-NEXT:    retl
1315  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
1316  ret <2 x i64> %res
1317}
1318declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
1319
1320
1321define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
1322; CHECK-LABEL: test_x86_sse41_pmuldq:
1323; CHECK:       ## BB#0:
1324; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0
1325; CHECK-NEXT:    retl
1326  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
1327  ret <2 x i64> %res
1328}
1329declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
1330
1331
1332define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
1333; CHECK-LABEL: test_x86_sse41_ptestc:
1334; CHECK:       ## BB#0:
1335; CHECK-NEXT:    vptest %xmm1, %xmm0
1336; CHECK-NEXT:    sbbl %eax, %eax
1337; CHECK-NEXT:    andl $1, %eax
1338; CHECK-NEXT:    retl
1339  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1340  ret i32 %res
1341}
1342declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
1343
1344
1345define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
1346; CHECK-LABEL: test_x86_sse41_ptestnzc:
1347; CHECK:       ## BB#0:
1348; CHECK-NEXT:    vptest %xmm1, %xmm0
1349; CHECK-NEXT:    seta %al
1350; CHECK-NEXT:    movzbl %al, %eax
1351; CHECK-NEXT:    retl
1352  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1353  ret i32 %res
1354}
1355declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
1356
1357
1358define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
1359; CHECK-LABEL: test_x86_sse41_ptestz:
1360; CHECK:       ## BB#0:
1361; CHECK-NEXT:    vptest %xmm1, %xmm0
1362; CHECK-NEXT:    sete %al
1363; CHECK-NEXT:    movzbl %al, %eax
1364; CHECK-NEXT:    retl
1365  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1366  ret i32 %res
1367}
1368declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
1369
1370
1371define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
1372; CHECK-LABEL: test_x86_sse41_round_pd:
1373; CHECK:       ## BB#0:
1374; CHECK-NEXT:    vroundpd $7, %xmm0, %xmm0
1375; CHECK-NEXT:    retl
1376  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
1377  ret <2 x double> %res
1378}
1379declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1380
1381
1382define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
1383; CHECK-LABEL: test_x86_sse41_round_ps:
1384; CHECK:       ## BB#0:
1385; CHECK-NEXT:    vroundps $7, %xmm0, %xmm0
1386; CHECK-NEXT:    retl
1387  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1388  ret <4 x float> %res
1389}
1390declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1391
1392
1393define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
1394; CHECK-LABEL: test_x86_sse41_round_sd:
1395; CHECK:       ## BB#0:
1396; CHECK-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm0
1397; CHECK-NEXT:    retl
1398  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
1399  ret <2 x double> %res
1400}
1401declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
1402
1403
1404define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
1405; CHECK-LABEL: test_x86_sse41_round_ss:
1406; CHECK:       ## BB#0:
1407; CHECK-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm0
1408; CHECK-NEXT:    retl
1409  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
1410  ret <4 x float> %res
1411}
1412declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
1413
1414
1415define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
1416; CHECK-LABEL: test_x86_sse42_pcmpestri128:
1417; CHECK:       ## BB#0:
1418; CHECK-NEXT:    movl $7, %eax
1419; CHECK-NEXT:    movl $7, %edx
1420; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1421; CHECK-NEXT:    movl %ecx, %eax
1422; CHECK-NEXT:    retl
1423  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1424  ret i32 %res
1425}
1426declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1427
1428
1429define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
1430; CHECK-LABEL: test_x86_sse42_pcmpestri128_load:
1431; CHECK:       ## BB#0:
1432; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1433; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1434; CHECK-NEXT:    vmovdqa (%eax), %xmm0
1435; CHECK-NEXT:    movl $7, %eax
1436; CHECK-NEXT:    movl $7, %edx
1437; CHECK-NEXT:    vpcmpestri $7, (%ecx), %xmm0
1438; CHECK-NEXT:    movl %ecx, %eax
1439; CHECK-NEXT:    retl
1440  %1 = load <16 x i8>, <16 x i8>* %a0
1441  %2 = load <16 x i8>, <16 x i8>* %a2
1442  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
1443  ret i32 %res
1444}
1445
1446
1447define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
1448; CHECK-LABEL: test_x86_sse42_pcmpestria128:
1449; CHECK:       ## BB#0:
1450; CHECK-NEXT:    movl $7, %eax
1451; CHECK-NEXT:    movl $7, %edx
1452; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1453; CHECK-NEXT:    seta %al
1454; CHECK-NEXT:    movzbl %al, %eax
1455; CHECK-NEXT:    retl
1456  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1457  ret i32 %res
1458}
1459declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1460
1461
1462define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
1463; CHECK-LABEL: test_x86_sse42_pcmpestric128:
1464; CHECK:       ## BB#0:
1465; CHECK-NEXT:    movl $7, %eax
1466; CHECK-NEXT:    movl $7, %edx
1467; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1468; CHECK-NEXT:    sbbl %eax, %eax
1469; CHECK-NEXT:    andl $1, %eax
1470; CHECK-NEXT:    retl
1471  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1472  ret i32 %res
1473}
1474declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1475
1476
1477define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
1478; CHECK-LABEL: test_x86_sse42_pcmpestrio128:
1479; CHECK:       ## BB#0:
1480; CHECK-NEXT:    movl $7, %eax
1481; CHECK-NEXT:    movl $7, %edx
1482; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1483; CHECK-NEXT:    seto %al
1484; CHECK-NEXT:    movzbl %al, %eax
1485; CHECK-NEXT:    retl
1486  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1487  ret i32 %res
1488}
1489declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1490
1491
1492define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
1493; CHECK-LABEL: test_x86_sse42_pcmpestris128:
1494; CHECK:       ## BB#0:
1495; CHECK-NEXT:    movl $7, %eax
1496; CHECK-NEXT:    movl $7, %edx
1497; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1498; CHECK-NEXT:    sets %al
1499; CHECK-NEXT:    movzbl %al, %eax
1500; CHECK-NEXT:    retl
1501  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1502  ret i32 %res
1503}
1504declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1505
1506
1507define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
1508; CHECK-LABEL: test_x86_sse42_pcmpestriz128:
1509; CHECK:       ## BB#0:
1510; CHECK-NEXT:    movl $7, %eax
1511; CHECK-NEXT:    movl $7, %edx
1512; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1513; CHECK-NEXT:    sete %al
1514; CHECK-NEXT:    movzbl %al, %eax
1515; CHECK-NEXT:    retl
1516  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1517  ret i32 %res
1518}
1519declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1520
1521
1522define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
1523; CHECK-LABEL: test_x86_sse42_pcmpestrm128:
1524; CHECK:       ## BB#0:
1525; CHECK-NEXT:    movl $7, %eax
1526; CHECK-NEXT:    movl $7, %edx
1527; CHECK-NEXT:    vpcmpestrm $7, %xmm1, %xmm0
1528; CHECK-NEXT:    retl
1529  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1530  ret <16 x i8> %res
1531}
1532declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1533
1534
1535define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
1536; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load:
1537; CHECK:       ## BB#0:
1538; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1539; CHECK-NEXT:    movl $7, %eax
1540; CHECK-NEXT:    movl $7, %edx
1541; CHECK-NEXT:    vpcmpestrm $7, (%ecx), %xmm0
1542; CHECK-NEXT:    retl
1543  %1 = load <16 x i8>, <16 x i8>* %a2
1544  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1545  ret <16 x i8> %res
1546}
1547
1548
1549define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
1550; CHECK-LABEL: test_x86_sse42_pcmpistri128:
1551; CHECK:       ## BB#0:
1552; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1553; CHECK-NEXT:    movl %ecx, %eax
1554; CHECK-NEXT:    retl
1555  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1556  ret i32 %res
1557}
1558declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1559
1560
1561define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
1562; CHECK-LABEL: test_x86_sse42_pcmpistri128_load:
1563; CHECK:       ## BB#0:
1564; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1565; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1566; CHECK-NEXT:    vmovdqa (%ecx), %xmm0
1567; CHECK-NEXT:    vpcmpistri $7, (%eax), %xmm0
1568; CHECK-NEXT:    movl %ecx, %eax
1569; CHECK-NEXT:    retl
1570  %1 = load <16 x i8>, <16 x i8>* %a0
1571  %2 = load <16 x i8>, <16 x i8>* %a1
1572  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
1573  ret i32 %res
1574}
1575
1576
1577define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
1578; CHECK-LABEL: test_x86_sse42_pcmpistria128:
1579; CHECK:       ## BB#0:
1580; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1581; CHECK-NEXT:    seta %al
1582; CHECK-NEXT:    movzbl %al, %eax
1583; CHECK-NEXT:    retl
1584  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1585  ret i32 %res
1586}
1587declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1588
1589
1590define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
1591; CHECK-LABEL: test_x86_sse42_pcmpistric128:
1592; CHECK:       ## BB#0:
1593; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1594; CHECK-NEXT:    sbbl %eax, %eax
1595; CHECK-NEXT:    andl $1, %eax
1596; CHECK-NEXT:    retl
1597  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1598  ret i32 %res
1599}
1600declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1601
1602
1603define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
1604; CHECK-LABEL: test_x86_sse42_pcmpistrio128:
1605; CHECK:       ## BB#0:
1606; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1607; CHECK-NEXT:    seto %al
1608; CHECK-NEXT:    movzbl %al, %eax
1609; CHECK-NEXT:    retl
1610  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1611  ret i32 %res
1612}
1613declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1614
1615
1616define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
1617; CHECK-LABEL: test_x86_sse42_pcmpistris128:
1618; CHECK:       ## BB#0:
1619; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1620; CHECK-NEXT:    sets %al
1621; CHECK-NEXT:    movzbl %al, %eax
1622; CHECK-NEXT:    retl
1623  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1624  ret i32 %res
1625}
1626declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1627
1628
1629define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
1630; CHECK-LABEL: test_x86_sse42_pcmpistriz128:
1631; CHECK:       ## BB#0:
1632; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1633; CHECK-NEXT:    sete %al
1634; CHECK-NEXT:    movzbl %al, %eax
1635; CHECK-NEXT:    retl
1636  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1637  ret i32 %res
1638}
1639declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1640
1641
1642define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
1643; CHECK-LABEL: test_x86_sse42_pcmpistrm128:
1644; CHECK:       ## BB#0:
1645; CHECK-NEXT:    vpcmpistrm $7, %xmm1, %xmm0
1646; CHECK-NEXT:    retl
1647  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
1648  ret <16 x i8> %res
1649}
1650declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1651
1652
1653define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
1654; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load:
1655; CHECK:       ## BB#0:
1656; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1657; CHECK-NEXT:    vpcmpistrm $7, (%eax), %xmm0
1658; CHECK-NEXT:    retl
1659  %1 = load <16 x i8>, <16 x i8>* %a1
1660  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
1661  ret <16 x i8> %res
1662}
1663
1664
1665define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
1666; CHECK-LABEL: test_x86_sse_add_ss:
1667; CHECK:       ## BB#0:
1668; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
1669; CHECK-NEXT:    retl
1670  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1671  ret <4 x float> %res
1672}
1673declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
1674
1675
1676define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
1677; CHECK-LABEL: test_x86_sse_cmp_ps:
1678; CHECK:       ## BB#0:
1679; CHECK-NEXT:    vcmpordps %xmm1, %xmm0, %xmm0
1680; CHECK-NEXT:    retl
1681  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1682  ret <4 x float> %res
1683}
1684declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
1685
1686
1687define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
1688; CHECK-LABEL: test_x86_sse_cmp_ss:
1689; CHECK:       ## BB#0:
1690; CHECK-NEXT:    vcmpordss %xmm1, %xmm0, %xmm0
1691; CHECK-NEXT:    retl
1692  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1693  ret <4 x float> %res
1694}
1695declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
1696
1697
1698define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
1699; CHECK-LABEL: test_x86_sse_comieq_ss:
1700; CHECK:       ## BB#0:
1701; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1702; CHECK-NEXT:    sete %al
1703; CHECK-NEXT:    movzbl %al, %eax
1704; CHECK-NEXT:    retl
1705  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1706  ret i32 %res
1707}
1708declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
1709
1710
1711define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
1712; CHECK-LABEL: test_x86_sse_comige_ss:
1713; CHECK:       ## BB#0:
1714; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1715; CHECK-NEXT:    setae %al
1716; CHECK-NEXT:    movzbl %al, %eax
1717; CHECK-NEXT:    retl
1718  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1719  ret i32 %res
1720}
1721declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
1722
1723
1724define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
1725; CHECK-LABEL: test_x86_sse_comigt_ss:
1726; CHECK:       ## BB#0:
1727; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1728; CHECK-NEXT:    seta %al
1729; CHECK-NEXT:    movzbl %al, %eax
1730; CHECK-NEXT:    retl
1731  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1732  ret i32 %res
1733}
1734declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
1735
1736
1737define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
1738; CHECK-LABEL: test_x86_sse_comile_ss:
1739; CHECK:       ## BB#0:
1740; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1741; CHECK-NEXT:    setbe %al
1742; CHECK-NEXT:    movzbl %al, %eax
1743; CHECK-NEXT:    retl
1744  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1745  ret i32 %res
1746}
1747declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
1748
1749
1750define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
1751; CHECK-LABEL: test_x86_sse_comilt_ss:
1752; CHECK:       ## BB#0:
1753; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1754; CHECK-NEXT:    sbbl %eax, %eax
1755; CHECK-NEXT:    andl $1, %eax
1756; CHECK-NEXT:    retl
1757  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1758  ret i32 %res
1759}
1760declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
1761
1762
1763define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
1764; CHECK-LABEL: test_x86_sse_comineq_ss:
1765; CHECK:       ## BB#0:
1766; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1767; CHECK-NEXT:    setne %al
1768; CHECK-NEXT:    movzbl %al, %eax
1769; CHECK-NEXT:    retl
1770  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1771  ret i32 %res
1772}
1773declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
1774
1775
1776define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
1777; CHECK-LABEL: test_x86_sse_cvtsi2ss:
1778; CHECK:       ## BB#0:
1779; CHECK-NEXT:    movl $7, %eax
1780; CHECK-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
1781; CHECK-NEXT:    retl
1782  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1783  ret <4 x float> %res
1784}
1785declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
1786
1787
1788define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
1789; CHECK-LABEL: test_x86_sse_cvtss2si:
1790; CHECK:       ## BB#0:
1791; CHECK-NEXT:    vcvtss2si %xmm0, %eax
1792; CHECK-NEXT:    retl
1793  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
1794  ret i32 %res
1795}
1796declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
1797
1798
1799define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
1800; CHECK-LABEL: test_x86_sse_cvttss2si:
1801; CHECK:       ## BB#0:
1802; CHECK-NEXT:    vcvttss2si %xmm0, %eax
1803; CHECK-NEXT:    retl
1804  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
1805  ret i32 %res
1806}
1807declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
1808
1809
1810define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
1811; CHECK-LABEL: test_x86_sse_div_ss:
1812; CHECK:       ## BB#0:
1813; CHECK-NEXT:    vdivss %xmm1, %xmm0, %xmm0
1814; CHECK-NEXT:    retl
1815  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1816  ret <4 x float> %res
1817}
1818declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
1819
1820
1821define void @test_x86_sse_ldmxcsr(i8* %a0) {
1822; CHECK-LABEL: test_x86_sse_ldmxcsr:
1823; CHECK:       ## BB#0:
1824; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1825; CHECK-NEXT:    vldmxcsr (%eax)
1826; CHECK-NEXT:    retl
1827  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
1828  ret void
1829}
1830declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
1831
1832
1833
1834define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
1835; CHECK-LABEL: test_x86_sse_max_ps:
1836; CHECK:       ## BB#0:
1837; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
1838; CHECK-NEXT:    retl
1839  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1840  ret <4 x float> %res
1841}
1842declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1843
1844
1845define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
1846; CHECK-LABEL: test_x86_sse_max_ss:
1847; CHECK:       ## BB#0:
1848; CHECK-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
1849; CHECK-NEXT:    retl
1850  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1851  ret <4 x float> %res
1852}
1853declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1854
1855
1856define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
1857; CHECK-LABEL: test_x86_sse_min_ps:
1858; CHECK:       ## BB#0:
1859; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0
1860; CHECK-NEXT:    retl
1861  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1862  ret <4 x float> %res
1863}
1864declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1865
1866
1867define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
1868; CHECK-LABEL: test_x86_sse_min_ss:
1869; CHECK:       ## BB#0:
1870; CHECK-NEXT:    vminss %xmm1, %xmm0, %xmm0
1871; CHECK-NEXT:    retl
1872  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1873  ret <4 x float> %res
1874}
1875declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1876
1877
1878define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
1879; CHECK-LABEL: test_x86_sse_movmsk_ps:
1880; CHECK:       ## BB#0:
1881; CHECK-NEXT:    vmovmskps %xmm0, %eax
1882; CHECK-NEXT:    retl
1883  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
1884  ret i32 %res
1885}
1886declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1887
1888
1889
1890define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
1891; CHECK-LABEL: test_x86_sse_mul_ss:
1892; CHECK:       ## BB#0:
1893; CHECK-NEXT:    vmulss %xmm1, %xmm0, %xmm0
1894; CHECK-NEXT:    retl
1895  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1896  ret <4 x float> %res
1897}
1898declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
1899
1900
1901define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
1902; CHECK-LABEL: test_x86_sse_rcp_ps:
1903; CHECK:       ## BB#0:
1904; CHECK-NEXT:    vrcpps %xmm0, %xmm0
1905; CHECK-NEXT:    retl
1906  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1907  ret <4 x float> %res
1908}
1909declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1910
1911
1912define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
1913; CHECK-LABEL: test_x86_sse_rcp_ss:
1914; CHECK:       ## BB#0:
1915; CHECK-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
1916; CHECK-NEXT:    retl
1917  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1918  ret <4 x float> %res
1919}
1920declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1921
1922
1923define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
1924; CHECK-LABEL: test_x86_sse_rsqrt_ps:
1925; CHECK:       ## BB#0:
1926; CHECK-NEXT:    vrsqrtps %xmm0, %xmm0
1927; CHECK-NEXT:    retl
1928  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1929  ret <4 x float> %res
1930}
1931declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1932
1933
1934define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
1935; CHECK-LABEL: test_x86_sse_rsqrt_ss:
1936; CHECK:       ## BB#0:
1937; CHECK-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
1938; CHECK-NEXT:    retl
1939  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1940  ret <4 x float> %res
1941}
1942declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
1943
1944
1945define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
1946; CHECK-LABEL: test_x86_sse_sqrt_ps:
1947; CHECK:       ## BB#0:
1948; CHECK-NEXT:    vsqrtps %xmm0, %xmm0
1949; CHECK-NEXT:    retl
1950  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1951  ret <4 x float> %res
1952}
1953declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
1954
1955
1956define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
1957; CHECK-LABEL: test_x86_sse_sqrt_ss:
1958; CHECK:       ## BB#0:
1959; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
1960; CHECK-NEXT:    retl
1961  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1962  ret <4 x float> %res
1963}
1964declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
1965
1966
1967define void @test_x86_sse_stmxcsr(i8* %a0) {
1968; CHECK-LABEL: test_x86_sse_stmxcsr:
1969; CHECK:       ## BB#0:
1970; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1971; CHECK-NEXT:    vstmxcsr (%eax)
1972; CHECK-NEXT:    retl
1973  call void @llvm.x86.sse.stmxcsr(i8* %a0)
1974  ret void
1975}
1976declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
1977
1978
1979define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
1980; CHECK-LABEL: test_x86_sse_storeu_ps:
1981; CHECK:       ## BB#0:
1982; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1983; CHECK-NEXT:    vmovups %xmm0, (%eax)
1984; CHECK-NEXT:    retl
1985  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
1986  ret void
1987}
1988declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
1989
1990
1991define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
1992; CHECK-LABEL: test_x86_sse_sub_ss:
1993; CHECK:       ## BB#0:
1994; CHECK-NEXT:    vsubss %xmm1, %xmm0, %xmm0
1995; CHECK-NEXT:    retl
1996  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1997  ret <4 x float> %res
1998}
1999declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
2000
2001
2002define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
2003; CHECK-LABEL: test_x86_sse_ucomieq_ss:
2004; CHECK:       ## BB#0:
2005; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2006; CHECK-NEXT:    sete %al
2007; CHECK-NEXT:    movzbl %al, %eax
2008; CHECK-NEXT:    retl
2009  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2010  ret i32 %res
2011}
2012declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
2013
2014
2015define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
2016; CHECK-LABEL: test_x86_sse_ucomige_ss:
2017; CHECK:       ## BB#0:
2018; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2019; CHECK-NEXT:    setae %al
2020; CHECK-NEXT:    movzbl %al, %eax
2021; CHECK-NEXT:    retl
2022  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2023  ret i32 %res
2024}
2025declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
2026
2027
2028define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
2029; CHECK-LABEL: test_x86_sse_ucomigt_ss:
2030; CHECK:       ## BB#0:
2031; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2032; CHECK-NEXT:    seta %al
2033; CHECK-NEXT:    movzbl %al, %eax
2034; CHECK-NEXT:    retl
2035  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2036  ret i32 %res
2037}
2038declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
2039
2040
2041define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
2042; CHECK-LABEL: test_x86_sse_ucomile_ss:
2043; CHECK:       ## BB#0:
2044; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2045; CHECK-NEXT:    setbe %al
2046; CHECK-NEXT:    movzbl %al, %eax
2047; CHECK-NEXT:    retl
2048  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2049  ret i32 %res
2050}
2051declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
2052
2053
2054define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
2055; CHECK-LABEL: test_x86_sse_ucomilt_ss:
2056; CHECK:       ## BB#0:
2057; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2058; CHECK-NEXT:    sbbl %eax, %eax
2059; CHECK-NEXT:    andl $1, %eax
2060; CHECK-NEXT:    retl
2061  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2062  ret i32 %res
2063}
2064declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
2065
2066
2067define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
2068; CHECK-LABEL: test_x86_sse_ucomineq_ss:
2069; CHECK:       ## BB#0:
2070; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2071; CHECK-NEXT:    setne %al
2072; CHECK-NEXT:    movzbl %al, %eax
2073; CHECK-NEXT:    retl
2074  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2075  ret i32 %res
2076}
2077declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
2078
2079
2080define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
2081; CHECK-LABEL: test_x86_ssse3_pabs_b_128:
2082; CHECK:       ## BB#0:
2083; CHECK-NEXT:    vpabsb %xmm0, %xmm0
2084; CHECK-NEXT:    retl
2085  %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
2086  ret <16 x i8> %res
2087}
2088declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
2089
2090
2091define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
2092; CHECK-LABEL: test_x86_ssse3_pabs_d_128:
2093; CHECK:       ## BB#0:
2094; CHECK-NEXT:    vpabsd %xmm0, %xmm0
2095; CHECK-NEXT:    retl
2096  %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
2097  ret <4 x i32> %res
2098}
2099declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
2100
2101
2102define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
2103; CHECK-LABEL: test_x86_ssse3_pabs_w_128:
2104; CHECK:       ## BB#0:
2105; CHECK-NEXT:    vpabsw %xmm0, %xmm0
2106; CHECK-NEXT:    retl
2107  %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
2108  ret <8 x i16> %res
2109}
2110declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
2111
2112
2113define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2114; CHECK-LABEL: test_x86_ssse3_phadd_d_128:
2115; CHECK:       ## BB#0:
2116; CHECK-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
2117; CHECK-NEXT:    retl
2118  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2119  ret <4 x i32> %res
2120}
2121declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2122
2123
2124define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2125; CHECK-LABEL: test_x86_ssse3_phadd_sw_128:
2126; CHECK:       ## BB#0:
2127; CHECK-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0
2128; CHECK-NEXT:    retl
2129  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2130  ret <8 x i16> %res
2131}
2132declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2133
2134
2135define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2136; CHECK-LABEL: test_x86_ssse3_phadd_w_128:
2137; CHECK:       ## BB#0:
2138; CHECK-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
2139; CHECK-NEXT:    retl
2140  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2141  ret <8 x i16> %res
2142}
2143declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2144
2145
2146define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2147; CHECK-LABEL: test_x86_ssse3_phsub_d_128:
2148; CHECK:       ## BB#0:
2149; CHECK-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
2150; CHECK-NEXT:    retl
2151  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2152  ret <4 x i32> %res
2153}
2154declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2155
2156
2157define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2158; CHECK-LABEL: test_x86_ssse3_phsub_sw_128:
2159; CHECK:       ## BB#0:
2160; CHECK-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0
2161; CHECK-NEXT:    retl
2162  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2163  ret <8 x i16> %res
2164}
2165declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2166
2167
2168define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2169; CHECK-LABEL: test_x86_ssse3_phsub_w_128:
2170; CHECK:       ## BB#0:
2171; CHECK-NEXT:    vphsubw %xmm1, %xmm0, %xmm0
2172; CHECK-NEXT:    retl
2173  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2174  ret <8 x i16> %res
2175}
2176declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2177
2178
2179define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
2180; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
2181; CHECK:       ## BB#0:
2182; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0
2183; CHECK-NEXT:    retl
2184  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
2185  ret <8 x i16> %res
2186}
2187declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
2188
2189
2190define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2191; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128:
2192; CHECK:       ## BB#0:
2193; CHECK-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0
2194; CHECK-NEXT:    retl
2195  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2196  ret <8 x i16> %res
2197}
2198declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2199
2200
2201define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
2202; CHECK-LABEL: test_x86_ssse3_pshuf_b_128:
2203; CHECK:       ## BB#0:
2204; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
2205; CHECK-NEXT:    retl
2206  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
2207  ret <16 x i8> %res
2208}
2209declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
2210
2211
2212define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
2213; CHECK-LABEL: test_x86_ssse3_psign_b_128:
2214; CHECK:       ## BB#0:
2215; CHECK-NEXT:    vpsignb %xmm1, %xmm0, %xmm0
2216; CHECK-NEXT:    retl
2217  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
2218  ret <16 x i8> %res
2219}
2220declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
2221
2222
2223define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2224; CHECK-LABEL: test_x86_ssse3_psign_d_128:
2225; CHECK:       ## BB#0:
2226; CHECK-NEXT:    vpsignd %xmm1, %xmm0, %xmm0
2227; CHECK-NEXT:    retl
2228  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2229  ret <4 x i32> %res
2230}
2231declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2232
2233
2234define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2235; CHECK-LABEL: test_x86_ssse3_psign_w_128:
2236; CHECK:       ## BB#0:
2237; CHECK-NEXT:    vpsignw %xmm1, %xmm0, %xmm0
2238; CHECK-NEXT:    retl
2239  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2240  ret <8 x i16> %res
2241}
2242declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2243
2244
2245define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
2246; CHECK-LABEL: test_x86_avx_addsub_pd_256:
2247; CHECK:       ## BB#0:
2248; CHECK-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
2249; CHECK-NEXT:    retl
2250  %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2251  ret <4 x double> %res
2252}
2253declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
2254
2255
2256define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
2257; CHECK-LABEL: test_x86_avx_addsub_ps_256:
2258; CHECK:       ## BB#0:
2259; CHECK-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
2260; CHECK-NEXT:    retl
2261  %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2262  ret <8 x float> %res
2263}
2264declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
2265
2266
2267define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
2268; CHECK-LABEL: test_x86_avx_blendv_pd_256:
2269; CHECK:       ## BB#0:
2270; CHECK-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
2271; CHECK-NEXT:    retl
2272  %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
2273  ret <4 x double> %res
2274}
2275declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
2276
2277
2278define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
2279; CHECK-LABEL: test_x86_avx_blendv_ps_256:
2280; CHECK:       ## BB#0:
2281; CHECK-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0
2282; CHECK-NEXT:    retl
2283  %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
2284  ret <8 x float> %res
2285}
2286declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
2287
2288
2289define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
2290; CHECK-LABEL: test_x86_avx_cmp_pd_256:
2291; CHECK:       ## BB#0:
2292; CHECK-NEXT:    vcmpordpd %ymm1, %ymm0, %ymm0
2293; CHECK-NEXT:    retl
2294  %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2295  ret <4 x double> %res
2296}
2297declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2298
2299
2300define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
2301; CHECK-LABEL: test_x86_avx_cmp_ps_256:
2302; CHECK:       ## BB#0:
2303; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm0
2304; CHECK-NEXT:    retl
2305  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2306  ret <8 x float> %res
2307}
2308
2309define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
2310; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
2311; CHECK:       ## BB#0:
2312; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1
2313; CHECK-NEXT:    vcmpltps %ymm1, %ymm0, %ymm1
2314; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %ymm1
2315; CHECK-NEXT:    vcmpunordps %ymm1, %ymm0, %ymm1
2316; CHECK-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm1
2317; CHECK-NEXT:    vcmpnltps %ymm1, %ymm0, %ymm1
2318; CHECK-NEXT:    vcmpnleps %ymm1, %ymm0, %ymm1
2319; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm1
2320; CHECK-NEXT:    vcmpeq_uqps %ymm1, %ymm0, %ymm1
2321; CHECK-NEXT:    vcmpngeps %ymm1, %ymm0, %ymm1
2322; CHECK-NEXT:    vcmpngtps %ymm1, %ymm0, %ymm1
2323; CHECK-NEXT:    vcmpfalseps %ymm1, %ymm0, %ymm1
2324; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %ymm1
2325; CHECK-NEXT:    vcmpgeps %ymm1, %ymm0, %ymm1
2326; CHECK-NEXT:    vcmpgtps %ymm1, %ymm0, %ymm1
2327; CHECK-NEXT:    vcmptrueps %ymm1, %ymm0, %ymm1
2328; CHECK-NEXT:    vcmpeq_osps %ymm1, %ymm0, %ymm1
2329; CHECK-NEXT:    vcmplt_oqps %ymm1, %ymm0, %ymm1
2330; CHECK-NEXT:    vcmple_oqps %ymm1, %ymm0, %ymm1
2331; CHECK-NEXT:    vcmpunord_sps %ymm1, %ymm0, %ymm1
2332; CHECK-NEXT:    vcmpneq_usps %ymm1, %ymm0, %ymm1
2333; CHECK-NEXT:    vcmpnlt_uqps %ymm1, %ymm0, %ymm1
2334; CHECK-NEXT:    vcmpnle_uqps %ymm1, %ymm0, %ymm1
2335; CHECK-NEXT:    vcmpord_sps %ymm1, %ymm0, %ymm1
2336; CHECK-NEXT:    vcmpeq_usps %ymm1, %ymm0, %ymm1
2337; CHECK-NEXT:    vcmpnge_uqps %ymm1, %ymm0, %ymm1
2338; CHECK-NEXT:    vcmpngt_uqps %ymm1, %ymm0, %ymm1
2339; CHECK-NEXT:    vcmpfalse_osps %ymm1, %ymm0, %ymm1
2340; CHECK-NEXT:    vcmpneq_osps %ymm1, %ymm0, %ymm1
2341; CHECK-NEXT:    vcmpge_oqps %ymm1, %ymm0, %ymm1
2342; CHECK-NEXT:    vcmpgt_oqps %ymm1, %ymm0, %ymm1
2343; CHECK-NEXT:    vcmptrue_usps %ymm1, %ymm0, %ymm0
2344; CHECK-NEXT:    retl
2345  %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
2346  %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
2347  %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
2348  %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
2349  %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
2350  %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
2351  %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
2352  %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
2353  %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
2354  %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
2355  %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
2356  %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
2357  %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
2358  %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
2359  %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
2360  %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
2361  %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
2362  %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
2363  %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
2364  %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
2365  %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
2366  %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
2367  %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
2368  %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
2369  %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
2370  %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
2371  %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
2372  %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
2373  %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
2374  %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
2375  %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
2376  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
2377  ret <8 x float> %res
2378}
2379declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2380
2381
2382define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
2383; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256:
2384; CHECK:       ## BB#0:
2385; CHECK-NEXT:    vcvtpd2psy %ymm0, %xmm0
2386; CHECK-NEXT:    vzeroupper
2387; CHECK-NEXT:    retl
2388  %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
2389  ret <4 x float> %res
2390}
2391declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
2392
2393
2394define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
2395; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256:
2396; CHECK:       ## BB#0:
2397; CHECK-NEXT:    vcvtpd2dqy %ymm0, %xmm0
2398; CHECK-NEXT:    vzeroupper
2399; CHECK-NEXT:    retl
2400  %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
2401  ret <4 x i32> %res
2402}
2403declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
2404
2405
2406define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
2407; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
2408; CHECK:       ## BB#0:
2409; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0
2410; CHECK-NEXT:    retl
2411  %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
2412  ret <4 x double> %res
2413}
2414declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
2415
2416
2417define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
2418; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
2419; CHECK:       ## BB#0:
2420; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm0
2421; CHECK-NEXT:    retl
2422  %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
2423  ret <8 x i32> %res
2424}
2425declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
2426
2427
2428define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
2429; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
2430; CHECK:       ## BB#0:
2431; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
2432; CHECK-NEXT:    retl
2433  %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
2434  ret <4 x double> %res
2435}
2436declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
2437
2438
2439define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
2440; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256:
2441; CHECK:       ## BB#0:
2442; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
2443; CHECK-NEXT:    retl
2444  %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
2445  ret <8 x float> %res
2446}
2447declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
2448
2449
2450define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
2451; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
2452; CHECK:       ## BB#0:
2453; CHECK-NEXT:    vcvttpd2dqy %ymm0, %xmm0
2454; CHECK-NEXT:    vzeroupper
2455; CHECK-NEXT:    retl
2456  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
2457  ret <4 x i32> %res
2458}
2459declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
2460
2461
2462define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
2463; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
2464; CHECK:       ## BB#0:
2465; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
2466; CHECK-NEXT:    retl
2467  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
2468  ret <8 x i32> %res
2469}
2470declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
2471
2472
2473define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
2474; CHECK-LABEL: test_x86_avx_dp_ps_256:
2475; CHECK:       ## BB#0:
2476; CHECK-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0
2477; CHECK-NEXT:    retl
2478  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2479  ret <8 x float> %res
2480}
2481declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2482
2483
2484define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
2485; CHECK-LABEL: test_x86_avx_hadd_pd_256:
2486; CHECK:       ## BB#0:
2487; CHECK-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
2488; CHECK-NEXT:    retl
2489  %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2490  ret <4 x double> %res
2491}
2492declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
2493
2494
2495define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
2496; CHECK-LABEL: test_x86_avx_hadd_ps_256:
2497; CHECK:       ## BB#0:
2498; CHECK-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
2499; CHECK-NEXT:    retl
2500  %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2501  ret <8 x float> %res
2502}
2503declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
2504
2505
2506define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
2507; CHECK-LABEL: test_x86_avx_hsub_pd_256:
2508; CHECK:       ## BB#0:
2509; CHECK-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
2510; CHECK-NEXT:    retl
2511  %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2512  ret <4 x double> %res
2513}
2514declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
2515
2516
2517define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
2518; CHECK-LABEL: test_x86_avx_hsub_ps_256:
2519; CHECK:       ## BB#0:
2520; CHECK-NEXT:    vhsubps %ymm1, %ymm0, %ymm0
2521; CHECK-NEXT:    retl
2522  %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2523  ret <8 x float> %res
2524}
2525declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
2526
2527
2528define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
2529; CHECK-LABEL: test_x86_avx_ldu_dq_256:
2530; CHECK:       ## BB#0:
2531; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2532; CHECK-NEXT:    vlddqu (%eax), %ymm0
2533; CHECK-NEXT:    retl
2534  %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
2535  ret <32 x i8> %res
2536}
2537declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
2538
2539
2540define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
2541; CHECK-LABEL: test_x86_avx_maskload_pd:
2542; CHECK:       ## BB#0:
2543; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2544; CHECK-NEXT:    vmaskmovpd (%eax), %xmm0, %xmm0
2545; CHECK-NEXT:    retl
2546  %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
2547  ret <2 x double> %res
2548}
2549declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
2550
2551
2552define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
2553; CHECK-LABEL: test_x86_avx_maskload_pd_256:
2554; CHECK:       ## BB#0:
2555; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2556; CHECK-NEXT:    vmaskmovpd (%eax), %ymm0, %ymm0
2557; CHECK-NEXT:    retl
2558  %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
2559  ret <4 x double> %res
2560}
2561declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
2562
2563
2564define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
2565; CHECK-LABEL: test_x86_avx_maskload_ps:
2566; CHECK:       ## BB#0:
2567; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2568; CHECK-NEXT:    vmaskmovps (%eax), %xmm0, %xmm0
2569; CHECK-NEXT:    retl
2570  %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
2571  ret <4 x float> %res
2572}
2573declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
2574
2575
2576define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
2577; CHECK-LABEL: test_x86_avx_maskload_ps_256:
2578; CHECK:       ## BB#0:
2579; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2580; CHECK-NEXT:    vmaskmovps (%eax), %ymm0, %ymm0
2581; CHECK-NEXT:    retl
2582  %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
2583  ret <8 x float> %res
2584}
2585declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
2586
2587
2588define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) {
2589; CHECK-LABEL: test_x86_avx_maskstore_pd:
2590; CHECK:       ## BB#0:
2591; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2592; CHECK-NEXT:    vmaskmovpd %xmm1, %xmm0, (%eax)
2593; CHECK-NEXT:    retl
2594  call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
2595  ret void
2596}
2597declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
2598
2599
2600define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) {
2601; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
2602; CHECK:       ## BB#0:
2603; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2604; CHECK-NEXT:    vmaskmovpd %ymm1, %ymm0, (%eax)
2605; CHECK-NEXT:    vzeroupper
2606; CHECK-NEXT:    retl
2607  call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
2608  ret void
2609}
2610declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
2611
2612
2613define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) {
2614; CHECK-LABEL: test_x86_avx_maskstore_ps:
2615; CHECK:       ## BB#0:
2616; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2617; CHECK-NEXT:    vmaskmovps %xmm1, %xmm0, (%eax)
2618; CHECK-NEXT:    retl
2619  call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
2620  ret void
2621}
2622declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
2623
2624
2625define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) {
2626; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
2627; CHECK:       ## BB#0:
2628; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2629; CHECK-NEXT:    vmaskmovps %ymm1, %ymm0, (%eax)
2630; CHECK-NEXT:    vzeroupper
2631; CHECK-NEXT:    retl
2632  call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
2633  ret void
2634}
2635declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
2636
2637
2638define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
2639; CHECK-LABEL: test_x86_avx_max_pd_256:
2640; CHECK:       ## BB#0:
2641; CHECK-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
2642; CHECK-NEXT:    retl
2643  %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2644  ret <4 x double> %res
2645}
2646declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
2647
2648
2649define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
2650; CHECK-LABEL: test_x86_avx_max_ps_256:
2651; CHECK:       ## BB#0:
2652; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
2653; CHECK-NEXT:    retl
2654  %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2655  ret <8 x float> %res
2656}
2657declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
2658
2659
2660define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
2661; CHECK-LABEL: test_x86_avx_min_pd_256:
2662; CHECK:       ## BB#0:
2663; CHECK-NEXT:    vminpd %ymm1, %ymm0, %ymm0
2664; CHECK-NEXT:    retl
2665  %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2666  ret <4 x double> %res
2667}
2668declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
2669
2670
2671define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
2672; CHECK-LABEL: test_x86_avx_min_ps_256:
2673; CHECK:       ## BB#0:
2674; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0
2675; CHECK-NEXT:    retl
2676  %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2677  ret <8 x float> %res
2678}
2679declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
2680
2681
2682define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
2683; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
2684; CHECK:       ## BB#0:
2685; CHECK-NEXT:    vmovmskpd %ymm0, %eax
2686; CHECK-NEXT:    vzeroupper
2687; CHECK-NEXT:    retl
2688  %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
2689  ret i32 %res
2690}
2691declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
2692
2693
2694define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
2695; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
2696; CHECK:       ## BB#0:
2697; CHECK-NEXT:    vmovmskps %ymm0, %eax
2698; CHECK-NEXT:    vzeroupper
2699; CHECK-NEXT:    retl
2700  %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
2701  ret i32 %res
2702}
2703declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
2704
2705
2706
2707
2708
2709
2710
2711define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
2712; CHECK-LABEL: test_x86_avx_ptestc_256:
2713; CHECK:       ## BB#0:
2714; CHECK-NEXT:    vptest %ymm1, %ymm0
2715; CHECK-NEXT:    sbbl %eax, %eax
2716; CHECK-NEXT:    andl $1, %eax
2717; CHECK-NEXT:    vzeroupper
2718; CHECK-NEXT:    retl
2719  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2720  ret i32 %res
2721}
2722declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
2723
2724
2725define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
2726; CHECK-LABEL: test_x86_avx_ptestnzc_256:
2727; CHECK:       ## BB#0:
2728; CHECK-NEXT:    vptest %ymm1, %ymm0
2729; CHECK-NEXT:    seta %al
2730; CHECK-NEXT:    movzbl %al, %eax
2731; CHECK-NEXT:    vzeroupper
2732; CHECK-NEXT:    retl
2733  %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2734  ret i32 %res
2735}
2736declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
2737
2738
2739define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
2740; CHECK-LABEL: test_x86_avx_ptestz_256:
2741; CHECK:       ## BB#0:
2742; CHECK-NEXT:    vptest %ymm1, %ymm0
2743; CHECK-NEXT:    sete %al
2744; CHECK-NEXT:    movzbl %al, %eax
2745; CHECK-NEXT:    vzeroupper
2746; CHECK-NEXT:    retl
2747  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2748  ret i32 %res
2749}
2750declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
2751
2752
2753define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
2754; CHECK-LABEL: test_x86_avx_rcp_ps_256:
2755; CHECK:       ## BB#0:
2756; CHECK-NEXT:    vrcpps %ymm0, %ymm0
2757; CHECK-NEXT:    retl
2758  %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2759  ret <8 x float> %res
2760}
2761declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
2762
2763
2764define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
2765; CHECK-LABEL: test_x86_avx_round_pd_256:
2766; CHECK:       ## BB#0:
2767; CHECK-NEXT:    vroundpd $7, %ymm0, %ymm0
2768; CHECK-NEXT:    retl
2769  %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
2770  ret <4 x double> %res
2771}
2772declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
2773
2774
2775define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
2776; CHECK-LABEL: test_x86_avx_round_ps_256:
2777; CHECK:       ## BB#0:
2778; CHECK-NEXT:    vroundps $7, %ymm0, %ymm0
2779; CHECK-NEXT:    retl
2780  %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
2781  ret <8 x float> %res
2782}
2783declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
2784
2785
2786define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
2787; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
2788; CHECK:       ## BB#0:
2789; CHECK-NEXT:    vrsqrtps %ymm0, %ymm0
2790; CHECK-NEXT:    retl
2791  %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2792  ret <8 x float> %res
2793}
2794declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
2795
2796
2797define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
2798; CHECK-LABEL: test_x86_avx_sqrt_pd_256:
2799; CHECK:       ## BB#0:
2800; CHECK-NEXT:    vsqrtpd %ymm0, %ymm0
2801; CHECK-NEXT:    retl
2802  %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
2803  ret <4 x double> %res
2804}
2805declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
2806
2807
2808define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
2809; CHECK-LABEL: test_x86_avx_sqrt_ps_256:
2810; CHECK:       ## BB#0:
2811; CHECK-NEXT:    vsqrtps %ymm0, %ymm0
2812; CHECK-NEXT:    retl
2813  %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2814  ret <8 x float> %res
2815}
2816declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
2817
2818
2819define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
2820  ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
2821  ; add operation forces the execution domain.
2822; CHECK-LABEL: test_x86_avx_storeu_dq_256:
2823; CHECK:       ## BB#0:
2824; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2825; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
2826; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2827; CHECK-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
2828; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2829; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2830; CHECK-NEXT:    vmovups %ymm0, (%eax)
2831; CHECK-NEXT:    vzeroupper
2832; CHECK-NEXT:    retl
2833  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2834  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
2835  ret void
2836}
2837declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
2838
2839
2840define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
2841  ; add operation forces the execution domain.
2842; CHECK-LABEL: test_x86_avx_storeu_pd_256:
2843; CHECK:       ## BB#0:
2844; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2845; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
2846; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
2847; CHECK-NEXT:    vmovupd %ymm0, (%eax)
2848; CHECK-NEXT:    vzeroupper
2849; CHECK-NEXT:    retl
2850  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
2851  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
2852  ret void
2853}
2854declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
2855
2856
2857define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
2858; CHECK-LABEL: test_x86_avx_storeu_ps_256:
2859; CHECK:       ## BB#0:
2860; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2861; CHECK-NEXT:    vmovups %ymm0, (%eax)
2862; CHECK-NEXT:    vzeroupper
2863; CHECK-NEXT:    retl
2864  call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
2865  ret void
2866}
2867declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
2868
2869
2870define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
2871; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256:
2872; CHECK:       ## BB#0:
2873; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2874; CHECK-NEXT:    vbroadcastf128 (%eax), %ymm0
2875; CHECK-NEXT:    retl
2876  %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
2877  ret <4 x double> %res
2878}
2879declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
2880
2881
2882define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
2883; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256:
2884; CHECK:       ## BB#0:
2885; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2886; CHECK-NEXT:    vbroadcastf128 (%eax), %ymm0
2887; CHECK-NEXT:    retl
2888  %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
2889  ret <8 x float> %res
2890}
2891declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
2892
2893
2894define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
2895; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256:
2896; CHECK:       ## BB#0:
2897; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2898; CHECK-NEXT:    retl
2899  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2900  ret <4 x double> %res
2901}
2902declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2903
2904
2905define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
2906; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256:
2907; CHECK:       ## BB#0:
2908; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2909; CHECK-NEXT:    retl
2910  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2911  ret <8 x float> %res
2912}
2913declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2914
2915
2916define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
2917; CHECK-LABEL: test_x86_avx_vperm2f128_si_256:
2918; CHECK:       ## BB#0:
2919; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2920; CHECK-NEXT:    retl
2921  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
2922  ret <8 x i32> %res
2923}
2924declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
2925
2926
2927define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
2928; CHECK-LABEL: test_x86_avx_vpermil_pd:
2929; CHECK:       ## BB#0:
2930; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
2931; CHECK-NEXT:    retl
2932  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
2933  ret <2 x double> %res
2934}
2935declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
2936
2937
2938define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
2939; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
2940; CHECK:       ## BB#0:
2941; CHECK-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
2942; CHECK-NEXT:    retl
2943  %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
2944  ret <4 x double> %res
2945}
2946declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
2947
2948
2949define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
2950; CHECK-LABEL: test_x86_avx_vpermil_ps:
2951; CHECK:       ## BB#0:
2952; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
2953; CHECK-NEXT:    retl
2954  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
2955  ret <4 x float> %res
2956}
2957declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
2958
2959
2960define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
2961; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
2962; CHECK:       ## BB#0:
2963; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
2964; CHECK-NEXT:    retl
2965  %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
2966  ret <8 x float> %res
2967}
2968declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
2969
2970
2971define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
2972; CHECK-LABEL: test_x86_avx_vpermilvar_pd:
2973; CHECK:       ## BB#0:
2974; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
2975; CHECK-NEXT:    retl
2976  %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
2977  ret <2 x double> %res
2978}
2979declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
2980
2981
2982define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
2983; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256:
2984; CHECK:       ## BB#0:
2985; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0
2986; CHECK-NEXT:    retl
2987  %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
2988  ret <4 x double> %res
2989}
2990declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
2991
2992
2993define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
2994; CHECK-LABEL: test_x86_avx_vpermilvar_ps:
2995; CHECK:       ## BB#0:
2996; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm0
2997; CHECK-NEXT:    retl
2998  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
2999  ret <4 x float> %res
3000}
3001define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
3002; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load:
3003; CHECK:       ## BB#0:
3004; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3005; CHECK-NEXT:    vpermilps (%eax), %xmm0, %xmm0
3006; CHECK-NEXT:    retl
3007  %a2 = load <4 x i32>, <4 x i32>* %a1
3008  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
3009  ret <4 x float> %res
3010}
3011declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
3012
3013
3014define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
3015; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256:
3016; CHECK:       ## BB#0:
3017; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm0
3018; CHECK-NEXT:    retl
3019  %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
3020  ret <8 x float> %res
3021}
3022declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
3023
3024
3025define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
3026; CHECK-LABEL: test_x86_avx_vtestc_pd:
3027; CHECK:       ## BB#0:
3028; CHECK-NEXT:    vtestpd %xmm1, %xmm0
3029; CHECK-NEXT:    sbbl %eax, %eax
3030; CHECK-NEXT:    andl $1, %eax
3031; CHECK-NEXT:    retl
3032  %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3033  ret i32 %res
3034}
3035declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
3036
3037
3038define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
3039; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
3040; CHECK:       ## BB#0:
3041; CHECK-NEXT:    vtestpd %ymm1, %ymm0
3042; CHECK-NEXT:    sbbl %eax, %eax
3043; CHECK-NEXT:    andl $1, %eax
3044; CHECK-NEXT:    vzeroupper
3045; CHECK-NEXT:    retl
3046  %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3047  ret i32 %res
3048}
3049declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
3050
3051
3052define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
3053; CHECK-LABEL: test_x86_avx_vtestc_ps:
3054; CHECK:       ## BB#0:
3055; CHECK-NEXT:    vtestps %xmm1, %xmm0
3056; CHECK-NEXT:    sbbl %eax, %eax
3057; CHECK-NEXT:    andl $1, %eax
3058; CHECK-NEXT:    retl
3059  %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3060  ret i32 %res
3061}
3062declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
3063
3064
3065define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
3066; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
3067; CHECK:       ## BB#0:
3068; CHECK-NEXT:    vtestps %ymm1, %ymm0
3069; CHECK-NEXT:    sbbl %eax, %eax
3070; CHECK-NEXT:    andl $1, %eax
3071; CHECK-NEXT:    vzeroupper
3072; CHECK-NEXT:    retl
3073  %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3074  ret i32 %res
3075}
3076declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
3077
3078
3079define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
3080; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
3081; CHECK:       ## BB#0:
3082; CHECK-NEXT:    vtestpd %xmm1, %xmm0
3083; CHECK-NEXT:    seta %al
3084; CHECK-NEXT:    movzbl %al, %eax
3085; CHECK-NEXT:    retl
3086  %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3087  ret i32 %res
3088}
3089declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
3090
3091
3092define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
3093; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
3094; CHECK:       ## BB#0:
3095; CHECK-NEXT:    vtestpd %ymm1, %ymm0
3096; CHECK-NEXT:    seta %al
3097; CHECK-NEXT:    movzbl %al, %eax
3098; CHECK-NEXT:    vzeroupper
3099; CHECK-NEXT:    retl
3100  %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3101  ret i32 %res
3102}
3103declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
3104
3105
3106define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
3107; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
3108; CHECK:       ## BB#0:
3109; CHECK-NEXT:    vtestps %xmm1, %xmm0
3110; CHECK-NEXT:    seta %al
3111; CHECK-NEXT:    movzbl %al, %eax
3112; CHECK-NEXT:    retl
3113  %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3114  ret i32 %res
3115}
3116declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
3117
3118
3119define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
3120; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
3121; CHECK:       ## BB#0:
3122; CHECK-NEXT:    vtestps %ymm1, %ymm0
3123; CHECK-NEXT:    seta %al
3124; CHECK-NEXT:    movzbl %al, %eax
3125; CHECK-NEXT:    vzeroupper
3126; CHECK-NEXT:    retl
3127  %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3128  ret i32 %res
3129}
3130declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
3131
3132
3133define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
3134; CHECK-LABEL: test_x86_avx_vtestz_pd:
3135; CHECK:       ## BB#0:
3136; CHECK-NEXT:    vtestpd %xmm1, %xmm0
3137; CHECK-NEXT:    sete %al
3138; CHECK-NEXT:    movzbl %al, %eax
3139; CHECK-NEXT:    retl
3140  %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3141  ret i32 %res
3142}
3143declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
3144
3145
3146define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
3147; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
3148; CHECK:       ## BB#0:
3149; CHECK-NEXT:    vtestpd %ymm1, %ymm0
3150; CHECK-NEXT:    sete %al
3151; CHECK-NEXT:    movzbl %al, %eax
3152; CHECK-NEXT:    vzeroupper
3153; CHECK-NEXT:    retl
3154  %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3155  ret i32 %res
3156}
3157declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
3158
3159
3160define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
3161; CHECK-LABEL: test_x86_avx_vtestz_ps:
3162; CHECK:       ## BB#0:
3163; CHECK-NEXT:    vtestps %xmm1, %xmm0
3164; CHECK-NEXT:    sete %al
3165; CHECK-NEXT:    movzbl %al, %eax
3166; CHECK-NEXT:    retl
3167  %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3168  ret i32 %res
3169}
3170declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
3171
3172
3173define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
3174; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
3175; CHECK:       ## BB#0:
3176; CHECK-NEXT:    vtestps %ymm1, %ymm0
3177; CHECK-NEXT:    sete %al
3178; CHECK-NEXT:    movzbl %al, %eax
3179; CHECK-NEXT:    vzeroupper
3180; CHECK-NEXT:    retl
3181  %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3182  ret i32 %res
3183}
3184declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
3185
3186
3187define void @test_x86_avx_vzeroall() {
3188; CHECK-LABEL: test_x86_avx_vzeroall:
3189; CHECK:       ## BB#0:
3190; CHECK-NEXT:    vzeroall
3191; CHECK-NEXT:    vzeroupper
3192; CHECK-NEXT:    retl
3193  call void @llvm.x86.avx.vzeroall()
3194  ret void
3195}
3196declare void @llvm.x86.avx.vzeroall() nounwind
3197
3198
3199define void @test_x86_avx_vzeroupper() {
3200; CHECK-LABEL: test_x86_avx_vzeroupper:
3201; CHECK:       ## BB#0:
3202; CHECK-NEXT:    vzeroupper
3203; CHECK-NEXT:    vzeroupper
3204; CHECK-NEXT:    retl
3205  call void @llvm.x86.avx.vzeroupper()
3206  ret void
3207}
3208declare void @llvm.x86.avx.vzeroupper() nounwind
3209
3210; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
3211
3212define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
3213; CHECK-LABEL: monitor:
3214; CHECK:       ## BB#0:
3215; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
3216; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3217; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3218; CHECK-NEXT:    leal (%eax), %eax
3219; CHECK-NEXT:    monitor
3220; CHECK-NEXT:    retl
3221  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
3222  ret void
3223}
3224declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
3225
3226define void @mwait(i32 %E, i32 %H) nounwind {
3227; CHECK-LABEL: mwait:
3228; CHECK:       ## BB#0:
3229; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3230; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3231; CHECK-NEXT:    mwait
3232; CHECK-NEXT:    retl
3233  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
3234  ret void
3235}
3236declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
3237
3238define void @sfence() nounwind {
3239; CHECK-LABEL: sfence:
3240; CHECK:       ## BB#0:
3241; CHECK-NEXT:    sfence
3242; CHECK-NEXT:    retl
3243  tail call void @llvm.x86.sse.sfence()
3244  ret void
3245}
3246declare void @llvm.x86.sse.sfence() nounwind
3247
3248define void @lfence() nounwind {
3249; CHECK-LABEL: lfence:
3250; CHECK:       ## BB#0:
3251; CHECK-NEXT:    lfence
3252; CHECK-NEXT:    retl
3253  tail call void @llvm.x86.sse2.lfence()
3254  ret void
3255}
3256declare void @llvm.x86.sse2.lfence() nounwind
3257
3258define void @mfence() nounwind {
3259; CHECK-LABEL: mfence:
3260; CHECK:       ## BB#0:
3261; CHECK-NEXT:    mfence
3262; CHECK-NEXT:    retl
3263  tail call void @llvm.x86.sse2.mfence()
3264  ret void
3265}
3266declare void @llvm.x86.sse2.mfence() nounwind
3267
3268define void @clflush(i8* %p) nounwind {
3269; CHECK-LABEL: clflush:
3270; CHECK:       ## BB#0:
3271; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3272; CHECK-NEXT:    clflush (%eax)
3273; CHECK-NEXT:    retl
3274  tail call void @llvm.x86.sse2.clflush(i8* %p)
3275  ret void
3276}
3277declare void @llvm.x86.sse2.clflush(i8*) nounwind
3278
3279define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
3280; CHECK-LABEL: crc32_32_8:
3281; CHECK:       ## BB#0:
3282; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3283; CHECK-NEXT:    crc32b {{[0-9]+}}(%esp), %eax
3284; CHECK-NEXT:    retl
3285  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
3286  ret i32 %tmp
3287}
3288declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
3289
3290define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
3291; CHECK-LABEL: crc32_32_16:
3292; CHECK:       ## BB#0:
3293; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3294; CHECK-NEXT:    crc32w {{[0-9]+}}(%esp), %eax
3295; CHECK-NEXT:    retl
3296  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
3297  ret i32 %tmp
3298}
3299declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
3300
3301define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
3302; CHECK-LABEL: crc32_32_32:
3303; CHECK:       ## BB#0:
3304; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3305; CHECK-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
3306; CHECK-NEXT:    retl
3307  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
3308  ret i32 %tmp
3309}
3310declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
3311
3312define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
3313; CHECK-LABEL: movnt_dq:
3314; CHECK:       ## BB#0:
3315; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3316; CHECK-NEXT:    vpaddq LCPI276_0, %xmm0, %xmm0
3317; CHECK-NEXT:    vmovntdq %ymm0, (%eax)
3318; CHECK-NEXT:    vzeroupper
3319; CHECK-NEXT:    retl
3320  %a2 = add <2 x i64> %a1, <i64 1, i64 1>
3321  %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
3322  tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
3323  ret void
3324}
3325declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
3326
3327define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
3328; CHECK-LABEL: movnt_ps:
3329; CHECK:       ## BB#0:
3330; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3331; CHECK-NEXT:    vmovntps %ymm0, (%eax)
3332; CHECK-NEXT:    vzeroupper
3333; CHECK-NEXT:    retl
3334  tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
3335  ret void
3336}
3337declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
3338
3339define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
3340  ; add operation forces the execution domain.
3341; CHECK-LABEL: movnt_pd:
3342; CHECK:       ## BB#0:
3343; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3344; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
3345; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
3346; CHECK-NEXT:    vmovntpd %ymm0, (%eax)
3347; CHECK-NEXT:    vzeroupper
3348; CHECK-NEXT:    retl
3349  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
3350  tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
3351  ret void
3352}
3353declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
3354
3355
3356; Check for pclmulqdq
3357define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
3358; CHECK-LABEL: test_x86_pclmulqdq:
3359; CHECK:       ## BB#0:
3360; CHECK-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0
3361; CHECK-NEXT:    retl
3362  %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
3363  ret <2 x i64> %res
3364}
3365declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
3366