• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=AVX
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx512vl,aes,pclmul | FileCheck %s --check-prefix=AVX512VL
4
5define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
6; AVX-LABEL: test_x86_aesni_aesdec:
7; AVX:       ## BB#0:
8; AVX-NEXT:    vaesdec %xmm1, %xmm0, %xmm0
9; AVX-NEXT:    retl
10;
11; AVX512VL-LABEL: test_x86_aesni_aesdec:
12; AVX512VL:       ## BB#0:
13; AVX512VL-NEXT:    vaesdec %xmm1, %xmm0, %xmm0
14; AVX512VL-NEXT:    retl
15  %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
16  ret <2 x i64> %res
17}
18declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
19
20
21define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
22; AVX-LABEL: test_x86_aesni_aesdeclast:
23; AVX:       ## BB#0:
24; AVX-NEXT:    vaesdeclast %xmm1, %xmm0, %xmm0
25; AVX-NEXT:    retl
26;
27; AVX512VL-LABEL: test_x86_aesni_aesdeclast:
28; AVX512VL:       ## BB#0:
29; AVX512VL-NEXT:    vaesdeclast %xmm1, %xmm0, %xmm0
30; AVX512VL-NEXT:    retl
31  %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
32  ret <2 x i64> %res
33}
34declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
35
36
37define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
38; AVX-LABEL: test_x86_aesni_aesenc:
39; AVX:       ## BB#0:
40; AVX-NEXT:    vaesenc %xmm1, %xmm0, %xmm0
41; AVX-NEXT:    retl
42;
43; AVX512VL-LABEL: test_x86_aesni_aesenc:
44; AVX512VL:       ## BB#0:
45; AVX512VL-NEXT:    vaesenc %xmm1, %xmm0, %xmm0
46; AVX512VL-NEXT:    retl
47  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
48  ret <2 x i64> %res
49}
50declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
51
52
53define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
54; AVX-LABEL: test_x86_aesni_aesenclast:
55; AVX:       ## BB#0:
56; AVX-NEXT:    vaesenclast %xmm1, %xmm0, %xmm0
57; AVX-NEXT:    retl
58;
59; AVX512VL-LABEL: test_x86_aesni_aesenclast:
60; AVX512VL:       ## BB#0:
61; AVX512VL-NEXT:    vaesenclast %xmm1, %xmm0, %xmm0
62; AVX512VL-NEXT:    retl
63  %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
64  ret <2 x i64> %res
65}
66declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
67
68
69define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
70; AVX-LABEL: test_x86_aesni_aesimc:
71; AVX:       ## BB#0:
72; AVX-NEXT:    vaesimc %xmm0, %xmm0
73; AVX-NEXT:    retl
74;
75; AVX512VL-LABEL: test_x86_aesni_aesimc:
76; AVX512VL:       ## BB#0:
77; AVX512VL-NEXT:    vaesimc %xmm0, %xmm0
78; AVX512VL-NEXT:    retl
79  %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
80  ret <2 x i64> %res
81}
82declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
83
84
85define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
86; AVX-LABEL: test_x86_aesni_aeskeygenassist:
87; AVX:       ## BB#0:
88; AVX-NEXT:    vaeskeygenassist $7, %xmm0, %xmm0
89; AVX-NEXT:    retl
90;
91; AVX512VL-LABEL: test_x86_aesni_aeskeygenassist:
92; AVX512VL:       ## BB#0:
93; AVX512VL-NEXT:    vaeskeygenassist $7, %xmm0, %xmm0
94; AVX512VL-NEXT:    retl
95  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
96  ret <2 x i64> %res
97}
98declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
99
100
101define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
102; AVX-LABEL: test_x86_sse2_add_sd:
103; AVX:       ## BB#0:
104; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
105; AVX-NEXT:    retl
106;
107; AVX512VL-LABEL: test_x86_sse2_add_sd:
108; AVX512VL:       ## BB#0:
109; AVX512VL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
110; AVX512VL-NEXT:    retl
111  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
112  ret <2 x double> %res
113}
114declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
115
116
117define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
118; AVX-LABEL: test_x86_sse2_cmp_pd:
119; AVX:       ## BB#0:
120; AVX-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0
121; AVX-NEXT:    retl
122;
123; AVX512VL-LABEL: test_x86_sse2_cmp_pd:
124; AVX512VL:       ## BB#0:
125; AVX512VL-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0
126; AVX512VL-NEXT:    retl
127  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
128  ret <2 x double> %res
129}
130declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
131
132
133define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
134; AVX-LABEL: test_x86_sse2_cmp_sd:
135; AVX:       ## BB#0:
136; AVX-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0
137; AVX-NEXT:    retl
138;
139; AVX512VL-LABEL: test_x86_sse2_cmp_sd:
140; AVX512VL:       ## BB#0:
141; AVX512VL-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0
142; AVX512VL-NEXT:    retl
143  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
144  ret <2 x double> %res
145}
146declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
147
148
149define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
150; AVX-LABEL: test_x86_sse2_comieq_sd:
151; AVX:       ## BB#0:
152; AVX-NEXT:    vcomisd %xmm1, %xmm0
153; AVX-NEXT:    setnp %al
154; AVX-NEXT:    sete %cl
155; AVX-NEXT:    andb %al, %cl
156; AVX-NEXT:    movzbl %cl, %eax
157; AVX-NEXT:    retl
158;
159; AVX512VL-LABEL: test_x86_sse2_comieq_sd:
160; AVX512VL:       ## BB#0:
161; AVX512VL-NEXT:    vcomisd %xmm1, %xmm0
162; AVX512VL-NEXT:    setnp %al
163; AVX512VL-NEXT:    sete %cl
164; AVX512VL-NEXT:    andb %al, %cl
165; AVX512VL-NEXT:    movzbl %cl, %eax
166; AVX512VL-NEXT:    retl
167  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
168  ret i32 %res
169}
170declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
171
172
173define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
174; AVX-LABEL: test_x86_sse2_comige_sd:
175; AVX:       ## BB#0:
176; AVX-NEXT:    xorl %eax, %eax
177; AVX-NEXT:    vcomisd %xmm1, %xmm0
178; AVX-NEXT:    setae %al
179; AVX-NEXT:    retl
180;
181; AVX512VL-LABEL: test_x86_sse2_comige_sd:
182; AVX512VL:       ## BB#0:
183; AVX512VL-NEXT:    xorl %eax, %eax
184; AVX512VL-NEXT:    vcomisd %xmm1, %xmm0
185; AVX512VL-NEXT:    setae %al
186; AVX512VL-NEXT:    retl
187  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
188  ret i32 %res
189}
190declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
191
192
193define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
194; AVX-LABEL: test_x86_sse2_comigt_sd:
195; AVX:       ## BB#0:
196; AVX-NEXT:    xorl %eax, %eax
197; AVX-NEXT:    vcomisd %xmm1, %xmm0
198; AVX-NEXT:    seta %al
199; AVX-NEXT:    retl
200;
201; AVX512VL-LABEL: test_x86_sse2_comigt_sd:
202; AVX512VL:       ## BB#0:
203; AVX512VL-NEXT:    xorl %eax, %eax
204; AVX512VL-NEXT:    vcomisd %xmm1, %xmm0
205; AVX512VL-NEXT:    seta %al
206; AVX512VL-NEXT:    retl
207  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
208  ret i32 %res
209}
210declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
211
212
213define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
214; AVX-LABEL: test_x86_sse2_comile_sd:
215; AVX:       ## BB#0:
216; AVX-NEXT:    xorl %eax, %eax
217; AVX-NEXT:    vcomisd %xmm0, %xmm1
218; AVX-NEXT:    setae %al
219; AVX-NEXT:    retl
220;
221; AVX512VL-LABEL: test_x86_sse2_comile_sd:
222; AVX512VL:       ## BB#0:
223; AVX512VL-NEXT:    xorl %eax, %eax
224; AVX512VL-NEXT:    vcomisd %xmm0, %xmm1
225; AVX512VL-NEXT:    setae %al
226; AVX512VL-NEXT:    retl
227  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
228  ret i32 %res
229}
230declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
231
232
233define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
234; AVX-LABEL: test_x86_sse2_comilt_sd:
235; AVX:       ## BB#0:
236; AVX-NEXT:    xorl %eax, %eax
237; AVX-NEXT:    vcomisd %xmm0, %xmm1
238; AVX-NEXT:    seta %al
239; AVX-NEXT:    retl
240;
241; AVX512VL-LABEL: test_x86_sse2_comilt_sd:
242; AVX512VL:       ## BB#0:
243; AVX512VL-NEXT:    xorl %eax, %eax
244; AVX512VL-NEXT:    vcomisd %xmm0, %xmm1
245; AVX512VL-NEXT:    seta %al
246; AVX512VL-NEXT:    retl
247  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
248  ret i32 %res
249}
250declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
251
252
253define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
254; AVX-LABEL: test_x86_sse2_comineq_sd:
255; AVX:       ## BB#0:
256; AVX-NEXT:    vcomisd %xmm1, %xmm0
257; AVX-NEXT:    setp %al
258; AVX-NEXT:    setne %cl
259; AVX-NEXT:    orb %al, %cl
260; AVX-NEXT:    movzbl %cl, %eax
261; AVX-NEXT:    retl
262;
263; AVX512VL-LABEL: test_x86_sse2_comineq_sd:
264; AVX512VL:       ## BB#0:
265; AVX512VL-NEXT:    vcomisd %xmm1, %xmm0
266; AVX512VL-NEXT:    setp %al
267; AVX512VL-NEXT:    setne %cl
268; AVX512VL-NEXT:    orb %al, %cl
269; AVX512VL-NEXT:    movzbl %cl, %eax
270; AVX512VL-NEXT:    retl
271  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
272  ret i32 %res
273}
274declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
275
276
277define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
278; AVX-LABEL: test_x86_sse2_cvtdq2ps:
279; AVX:       ## BB#0:
280; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
281; AVX-NEXT:    retl
282;
283; AVX512VL-LABEL: test_x86_sse2_cvtdq2ps:
284; AVX512VL:       ## BB#0:
285; AVX512VL-NEXT:    vcvtdq2ps %xmm0, %xmm0
286; AVX512VL-NEXT:    retl
287  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
288  ret <4 x float> %res
289}
290declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
291
292
293define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
294; AVX-LABEL: test_x86_sse2_cvtpd2dq:
295; AVX:       ## BB#0:
296; AVX-NEXT:    vcvtpd2dq %xmm0, %xmm0
297; AVX-NEXT:    retl
298;
299; AVX512VL-LABEL: test_x86_sse2_cvtpd2dq:
300; AVX512VL:       ## BB#0:
301; AVX512VL-NEXT:    vcvtpd2dq %xmm0, %xmm0
302; AVX512VL-NEXT:    retl
303  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
304  ret <4 x i32> %res
305}
306declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
307
308
309define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
310; AVX-LABEL: test_x86_sse2_cvtpd2ps:
311; AVX:       ## BB#0:
312; AVX-NEXT:    vcvtpd2ps %xmm0, %xmm0
313; AVX-NEXT:    retl
314;
315; AVX512VL-LABEL: test_x86_sse2_cvtpd2ps:
316; AVX512VL:       ## BB#0:
317; AVX512VL-NEXT:    vcvtpd2ps %xmm0, %xmm0
318; AVX512VL-NEXT:    retl
319  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
320  ret <4 x float> %res
321}
322declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
323
324
325define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
326; AVX-LABEL: test_x86_sse2_cvtps2dq:
327; AVX:       ## BB#0:
328; AVX-NEXT:    vcvtps2dq %xmm0, %xmm0
329; AVX-NEXT:    retl
330;
331; AVX512VL-LABEL: test_x86_sse2_cvtps2dq:
332; AVX512VL:       ## BB#0:
333; AVX512VL-NEXT:    vcvtps2dq %xmm0, %xmm0
334; AVX512VL-NEXT:    retl
335  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
336  ret <4 x i32> %res
337}
338declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
339
340
341define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
342; AVX-LABEL: test_x86_sse2_cvtsd2si:
343; AVX:       ## BB#0:
344; AVX-NEXT:    vcvtsd2si %xmm0, %eax
345; AVX-NEXT:    retl
346;
347; AVX512VL-LABEL: test_x86_sse2_cvtsd2si:
348; AVX512VL:       ## BB#0:
349; AVX512VL-NEXT:    vcvtsd2si %xmm0, %eax
350; AVX512VL-NEXT:    retl
351  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
352  ret i32 %res
353}
354declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
355
356
357define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
358; AVX-LABEL: test_x86_sse2_cvtsd2ss:
359; AVX:       ## BB#0:
360; AVX-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0
361; AVX-NEXT:    retl
362;
363; AVX512VL-LABEL: test_x86_sse2_cvtsd2ss:
364; AVX512VL:       ## BB#0:
365; AVX512VL-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0
366; AVX512VL-NEXT:    retl
367  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
368  ret <4 x float> %res
369}
370declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
371
372
373define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) {
374; AVX-LABEL: test_x86_sse2_cvtsi2sd:
375; AVX:       ## BB#0:
376; AVX-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0
377; AVX-NEXT:    retl
378;
379; AVX512VL-LABEL: test_x86_sse2_cvtsi2sd:
380; AVX512VL:       ## BB#0:
381; AVX512VL-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0
382; AVX512VL-NEXT:    retl
383  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1]
384  ret <2 x double> %res
385}
386declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
387
388
389define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
390; AVX-LABEL: test_x86_sse2_cvtss2sd:
391; AVX:       ## BB#0:
392; AVX-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
393; AVX-NEXT:    retl
394;
395; AVX512VL-LABEL: test_x86_sse2_cvtss2sd:
396; AVX512VL:       ## BB#0:
397; AVX512VL-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
398; AVX512VL-NEXT:    retl
399  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
400  ret <2 x double> %res
401}
402declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
403
404
405define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
406; AVX-LABEL: test_x86_sse2_cvttpd2dq:
407; AVX:       ## BB#0:
408; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
409; AVX-NEXT:    retl
410;
411; AVX512VL-LABEL: test_x86_sse2_cvttpd2dq:
412; AVX512VL:       ## BB#0:
413; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
414; AVX512VL-NEXT:    retl
415  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
416  ret <4 x i32> %res
417}
418declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
419
420
421define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
422; AVX-LABEL: test_x86_sse2_cvttps2dq:
423; AVX:       ## BB#0:
424; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
425; AVX-NEXT:    retl
426;
427; AVX512VL-LABEL: test_x86_sse2_cvttps2dq:
428; AVX512VL:       ## BB#0:
429; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
430; AVX512VL-NEXT:    retl
431  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
432  ret <4 x i32> %res
433}
434declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
435
436
437define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
438; AVX-LABEL: test_x86_sse2_cvttsd2si:
439; AVX:       ## BB#0:
440; AVX-NEXT:    vcvttsd2si %xmm0, %eax
441; AVX-NEXT:    retl
442;
443; AVX512VL-LABEL: test_x86_sse2_cvttsd2si:
444; AVX512VL:       ## BB#0:
445; AVX512VL-NEXT:    vcvttsd2si %xmm0, %eax
446; AVX512VL-NEXT:    retl
447  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
448  ret i32 %res
449}
450declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
451
452
453define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
454; AVX-LABEL: test_x86_sse2_div_sd:
455; AVX:       ## BB#0:
456; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
457; AVX-NEXT:    retl
458;
459; AVX512VL-LABEL: test_x86_sse2_div_sd:
460; AVX512VL:       ## BB#0:
461; AVX512VL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
462; AVX512VL-NEXT:    retl
463  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
464  ret <2 x double> %res
465}
466declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
467
468
469
470define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
471; AVX-LABEL: test_x86_sse2_max_pd:
472; AVX:       ## BB#0:
473; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
474; AVX-NEXT:    retl
475;
476; AVX512VL-LABEL: test_x86_sse2_max_pd:
477; AVX512VL:       ## BB#0:
478; AVX512VL-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
479; AVX512VL-NEXT:    retl
480  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
481  ret <2 x double> %res
482}
483declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
484
485
486define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
487; AVX-LABEL: test_x86_sse2_max_sd:
488; AVX:       ## BB#0:
489; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
490; AVX-NEXT:    retl
491;
492; AVX512VL-LABEL: test_x86_sse2_max_sd:
493; AVX512VL:       ## BB#0:
494; AVX512VL-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
495; AVX512VL-NEXT:    retl
496  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
497  ret <2 x double> %res
498}
499declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
500
501
502define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
503; AVX-LABEL: test_x86_sse2_min_pd:
504; AVX:       ## BB#0:
505; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
506; AVX-NEXT:    retl
507;
508; AVX512VL-LABEL: test_x86_sse2_min_pd:
509; AVX512VL:       ## BB#0:
510; AVX512VL-NEXT:    vminpd %xmm1, %xmm0, %xmm0
511; AVX512VL-NEXT:    retl
512  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
513  ret <2 x double> %res
514}
515declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
516
517
518define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
519; AVX-LABEL: test_x86_sse2_min_sd:
520; AVX:       ## BB#0:
521; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
522; AVX-NEXT:    retl
523;
524; AVX512VL-LABEL: test_x86_sse2_min_sd:
525; AVX512VL:       ## BB#0:
526; AVX512VL-NEXT:    vminsd %xmm1, %xmm0, %xmm0
527; AVX512VL-NEXT:    retl
528  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
529  ret <2 x double> %res
530}
531declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
532
533
534define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
535; AVX-LABEL: test_x86_sse2_movmsk_pd:
536; AVX:       ## BB#0:
537; AVX-NEXT:    vmovmskpd %xmm0, %eax
538; AVX-NEXT:    retl
539;
540; AVX512VL-LABEL: test_x86_sse2_movmsk_pd:
541; AVX512VL:       ## BB#0:
542; AVX512VL-NEXT:    vmovmskpd %xmm0, %eax
543; AVX512VL-NEXT:    retl
544  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
545  ret i32 %res
546}
547declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
548
549
550
551
552define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
553; AVX-LABEL: test_x86_sse2_mul_sd:
554; AVX:       ## BB#0:
555; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
556; AVX-NEXT:    retl
557;
558; AVX512VL-LABEL: test_x86_sse2_mul_sd:
559; AVX512VL:       ## BB#0:
560; AVX512VL-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
561; AVX512VL-NEXT:    retl
562  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
563  ret <2 x double> %res
564}
565declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
566
567
568define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
569; AVX-LABEL: test_x86_sse2_packssdw_128:
570; AVX:       ## BB#0:
571; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
572; AVX-NEXT:    retl
573;
574; AVX512VL-LABEL: test_x86_sse2_packssdw_128:
575; AVX512VL:       ## BB#0:
576; AVX512VL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
577; AVX512VL-NEXT:    retl
578  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
579  ret <8 x i16> %res
580}
581declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
582
583
584define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
585; AVX-LABEL: test_x86_sse2_packsswb_128:
586; AVX:       ## BB#0:
587; AVX-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
588; AVX-NEXT:    retl
589;
590; AVX512VL-LABEL: test_x86_sse2_packsswb_128:
591; AVX512VL:       ## BB#0:
592; AVX512VL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
593; AVX512VL-NEXT:    retl
594  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
595  ret <16 x i8> %res
596}
597declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
598
599
600define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
601; AVX-LABEL: test_x86_sse2_packuswb_128:
602; AVX:       ## BB#0:
603; AVX-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
604; AVX-NEXT:    retl
605;
606; AVX512VL-LABEL: test_x86_sse2_packuswb_128:
607; AVX512VL:       ## BB#0:
608; AVX512VL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
609; AVX512VL-NEXT:    retl
610  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
611  ret <16 x i8> %res
612}
613declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
614
615
616define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
617; AVX-LABEL: test_x86_sse2_padds_b:
618; AVX:       ## BB#0:
619; AVX-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0
620; AVX-NEXT:    retl
621;
622; AVX512VL-LABEL: test_x86_sse2_padds_b:
623; AVX512VL:       ## BB#0:
624; AVX512VL-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0
625; AVX512VL-NEXT:    retl
626  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
627  ret <16 x i8> %res
628}
629declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
630
631
632define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
633; AVX-LABEL: test_x86_sse2_padds_w:
634; AVX:       ## BB#0:
635; AVX-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
636; AVX-NEXT:    retl
637;
638; AVX512VL-LABEL: test_x86_sse2_padds_w:
639; AVX512VL:       ## BB#0:
640; AVX512VL-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
641; AVX512VL-NEXT:    retl
642  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
643  ret <8 x i16> %res
644}
645declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
646
647
648define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
649; AVX-LABEL: test_x86_sse2_paddus_b:
650; AVX:       ## BB#0:
651; AVX-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
652; AVX-NEXT:    retl
653;
654; AVX512VL-LABEL: test_x86_sse2_paddus_b:
655; AVX512VL:       ## BB#0:
656; AVX512VL-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
657; AVX512VL-NEXT:    retl
658  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
659  ret <16 x i8> %res
660}
661declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
662
663
664define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
665; AVX-LABEL: test_x86_sse2_paddus_w:
666; AVX:       ## BB#0:
667; AVX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
668; AVX-NEXT:    retl
669;
670; AVX512VL-LABEL: test_x86_sse2_paddus_w:
671; AVX512VL:       ## BB#0:
672; AVX512VL-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
673; AVX512VL-NEXT:    retl
674  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
675  ret <8 x i16> %res
676}
677declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
678
679
680define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
681; AVX-LABEL: test_x86_sse2_pavg_b:
682; AVX:       ## BB#0:
683; AVX-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
684; AVX-NEXT:    retl
685;
686; AVX512VL-LABEL: test_x86_sse2_pavg_b:
687; AVX512VL:       ## BB#0:
688; AVX512VL-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
689; AVX512VL-NEXT:    retl
690  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
691  ret <16 x i8> %res
692}
693declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
694
695
696define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
697; AVX-LABEL: test_x86_sse2_pavg_w:
698; AVX:       ## BB#0:
699; AVX-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
700; AVX-NEXT:    retl
701;
702; AVX512VL-LABEL: test_x86_sse2_pavg_w:
703; AVX512VL:       ## BB#0:
704; AVX512VL-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
705; AVX512VL-NEXT:    retl
706  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
707  ret <8 x i16> %res
708}
709declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
710
711
712define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
713; AVX-LABEL: test_x86_sse2_pmadd_wd:
714; AVX:       ## BB#0:
715; AVX-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
716; AVX-NEXT:    retl
717;
718; AVX512VL-LABEL: test_x86_sse2_pmadd_wd:
719; AVX512VL:       ## BB#0:
720; AVX512VL-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
721; AVX512VL-NEXT:    retl
722  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
723  ret <4 x i32> %res
724}
725declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
726
727
728define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
729; AVX-LABEL: test_x86_sse2_pmaxs_w:
730; AVX:       ## BB#0:
731; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
732; AVX-NEXT:    retl
733;
734; AVX512VL-LABEL: test_x86_sse2_pmaxs_w:
735; AVX512VL:       ## BB#0:
736; AVX512VL-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
737; AVX512VL-NEXT:    retl
738  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
739  ret <8 x i16> %res
740}
741declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
742
743
744define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
745; AVX-LABEL: test_x86_sse2_pmaxu_b:
746; AVX:       ## BB#0:
747; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
748; AVX-NEXT:    retl
749;
750; AVX512VL-LABEL: test_x86_sse2_pmaxu_b:
751; AVX512VL:       ## BB#0:
752; AVX512VL-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
753; AVX512VL-NEXT:    retl
754  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
755  ret <16 x i8> %res
756}
757declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
758
759
760define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
761; AVX-LABEL: test_x86_sse2_pmins_w:
762; AVX:       ## BB#0:
763; AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
764; AVX-NEXT:    retl
765;
766; AVX512VL-LABEL: test_x86_sse2_pmins_w:
767; AVX512VL:       ## BB#0:
768; AVX512VL-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
769; AVX512VL-NEXT:    retl
770  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
771  ret <8 x i16> %res
772}
773declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
774
775
776define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
777; AVX-LABEL: test_x86_sse2_pminu_b:
778; AVX:       ## BB#0:
779; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
780; AVX-NEXT:    retl
781;
782; AVX512VL-LABEL: test_x86_sse2_pminu_b:
783; AVX512VL:       ## BB#0:
784; AVX512VL-NEXT:    vpminub %xmm1, %xmm0, %xmm0
785; AVX512VL-NEXT:    retl
786  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
787  ret <16 x i8> %res
788}
789declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
790
791
792define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
793; AVX-LABEL: test_x86_sse2_pmovmskb_128:
794; AVX:       ## BB#0:
795; AVX-NEXT:    vpmovmskb %xmm0, %eax
796; AVX-NEXT:    retl
797;
798; AVX512VL-LABEL: test_x86_sse2_pmovmskb_128:
799; AVX512VL:       ## BB#0:
800; AVX512VL-NEXT:    vpmovmskb %xmm0, %eax
801; AVX512VL-NEXT:    retl
802  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
803  ret i32 %res
804}
805declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
806
807
808define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
809; AVX-LABEL: test_x86_sse2_pmulh_w:
810; AVX:       ## BB#0:
811; AVX-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0
812; AVX-NEXT:    retl
813;
814; AVX512VL-LABEL: test_x86_sse2_pmulh_w:
815; AVX512VL:       ## BB#0:
816; AVX512VL-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0
817; AVX512VL-NEXT:    retl
818  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
819  ret <8 x i16> %res
820}
821declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
822
823
824define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
825; AVX-LABEL: test_x86_sse2_pmulhu_w:
826; AVX:       ## BB#0:
827; AVX-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
828; AVX-NEXT:    retl
829;
830; AVX512VL-LABEL: test_x86_sse2_pmulhu_w:
831; AVX512VL:       ## BB#0:
832; AVX512VL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
833; AVX512VL-NEXT:    retl
834  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
835  ret <8 x i16> %res
836}
837declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
838
839
840define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
841; AVX-LABEL: test_x86_sse2_pmulu_dq:
842; AVX:       ## BB#0:
843; AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
844; AVX-NEXT:    retl
845;
846; AVX512VL-LABEL: test_x86_sse2_pmulu_dq:
847; AVX512VL:       ## BB#0:
848; AVX512VL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
849; AVX512VL-NEXT:    retl
850  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
851  ret <2 x i64> %res
852}
853declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
854
855
856define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
857; AVX-LABEL: test_x86_sse2_psad_bw:
858; AVX:       ## BB#0:
859; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
860; AVX-NEXT:    retl
861;
862; AVX512VL-LABEL: test_x86_sse2_psad_bw:
863; AVX512VL:       ## BB#0:
864; AVX512VL-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
865; AVX512VL-NEXT:    retl
866  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
867  ret <2 x i64> %res
868}
869declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
870
871
872define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
873; AVX-LABEL: test_x86_sse2_psll_d:
874; AVX:       ## BB#0:
875; AVX-NEXT:    vpslld %xmm1, %xmm0, %xmm0
876; AVX-NEXT:    retl
877;
878; AVX512VL-LABEL: test_x86_sse2_psll_d:
879; AVX512VL:       ## BB#0:
880; AVX512VL-NEXT:    vpslld %xmm1, %xmm0, %xmm0
881; AVX512VL-NEXT:    retl
882  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
883  ret <4 x i32> %res
884}
885declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
886
887
888define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
889; AVX-LABEL: test_x86_sse2_psll_q:
890; AVX:       ## BB#0:
891; AVX-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
892; AVX-NEXT:    retl
893;
894; AVX512VL-LABEL: test_x86_sse2_psll_q:
895; AVX512VL:       ## BB#0:
896; AVX512VL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
897; AVX512VL-NEXT:    retl
898  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
899  ret <2 x i64> %res
900}
901declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
902
903
904define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
905; AVX-LABEL: test_x86_sse2_psll_w:
906; AVX:       ## BB#0:
907; AVX-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
908; AVX-NEXT:    retl
909;
910; AVX512VL-LABEL: test_x86_sse2_psll_w:
911; AVX512VL:       ## BB#0:
912; AVX512VL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
913; AVX512VL-NEXT:    retl
914  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
915  ret <8 x i16> %res
916}
917declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
918
919
920define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
921; AVX-LABEL: test_x86_sse2_pslli_d:
922; AVX:       ## BB#0:
923; AVX-NEXT:    vpslld $7, %xmm0, %xmm0
924; AVX-NEXT:    retl
925;
926; AVX512VL-LABEL: test_x86_sse2_pslli_d:
927; AVX512VL:       ## BB#0:
928; AVX512VL-NEXT:    vpslld $7, %xmm0, %xmm0
929; AVX512VL-NEXT:    retl
930  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
931  ret <4 x i32> %res
932}
933declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
934
935
936define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
937; AVX-LABEL: test_x86_sse2_pslli_q:
938; AVX:       ## BB#0:
939; AVX-NEXT:    vpsllq $7, %xmm0, %xmm0
940; AVX-NEXT:    retl
941;
942; AVX512VL-LABEL: test_x86_sse2_pslli_q:
943; AVX512VL:       ## BB#0:
944; AVX512VL-NEXT:    vpsllq $7, %xmm0, %xmm0
945; AVX512VL-NEXT:    retl
946  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
947  ret <2 x i64> %res
948}
949declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
950
951
952define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
953; AVX-LABEL: test_x86_sse2_pslli_w:
954; AVX:       ## BB#0:
955; AVX-NEXT:    vpsllw $7, %xmm0, %xmm0
956; AVX-NEXT:    retl
957;
958; AVX512VL-LABEL: test_x86_sse2_pslli_w:
959; AVX512VL:       ## BB#0:
960; AVX512VL-NEXT:    vpsllw $7, %xmm0, %xmm0
961; AVX512VL-NEXT:    retl
962  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
963  ret <8 x i16> %res
964}
965declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
966
967
968define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
969; AVX-LABEL: test_x86_sse2_psra_d:
970; AVX:       ## BB#0:
971; AVX-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
972; AVX-NEXT:    retl
973;
974; AVX512VL-LABEL: test_x86_sse2_psra_d:
975; AVX512VL:       ## BB#0:
976; AVX512VL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
977; AVX512VL-NEXT:    retl
978  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
979  ret <4 x i32> %res
980}
981declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
982
983
984define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
985; AVX-LABEL: test_x86_sse2_psra_w:
986; AVX:       ## BB#0:
987; AVX-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
988; AVX-NEXT:    retl
989;
990; AVX512VL-LABEL: test_x86_sse2_psra_w:
991; AVX512VL:       ## BB#0:
992; AVX512VL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
993; AVX512VL-NEXT:    retl
994  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
995  ret <8 x i16> %res
996}
997declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
998
999
1000define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
1001; AVX-LABEL: test_x86_sse2_psrai_d:
1002; AVX:       ## BB#0:
1003; AVX-NEXT:    vpsrad $7, %xmm0, %xmm0
1004; AVX-NEXT:    retl
1005;
1006; AVX512VL-LABEL: test_x86_sse2_psrai_d:
1007; AVX512VL:       ## BB#0:
1008; AVX512VL-NEXT:    vpsrad $7, %xmm0, %xmm0
1009; AVX512VL-NEXT:    retl
1010  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1011  ret <4 x i32> %res
1012}
1013declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
1014
1015
1016define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
1017; AVX-LABEL: test_x86_sse2_psrai_w:
1018; AVX:       ## BB#0:
1019; AVX-NEXT:    vpsraw $7, %xmm0, %xmm0
1020; AVX-NEXT:    retl
1021;
1022; AVX512VL-LABEL: test_x86_sse2_psrai_w:
1023; AVX512VL:       ## BB#0:
1024; AVX512VL-NEXT:    vpsraw $7, %xmm0, %xmm0
1025; AVX512VL-NEXT:    retl
1026  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1027  ret <8 x i16> %res
1028}
1029declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
1030
1031
1032define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
1033; AVX-LABEL: test_x86_sse2_psrl_d:
1034; AVX:       ## BB#0:
1035; AVX-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
1036; AVX-NEXT:    retl
1037;
1038; AVX512VL-LABEL: test_x86_sse2_psrl_d:
1039; AVX512VL:       ## BB#0:
1040; AVX512VL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
1041; AVX512VL-NEXT:    retl
1042  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1043  ret <4 x i32> %res
1044}
1045declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
1046
1047
1048define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
1049; AVX-LABEL: test_x86_sse2_psrl_q:
1050; AVX:       ## BB#0:
1051; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
1052; AVX-NEXT:    retl
1053;
1054; AVX512VL-LABEL: test_x86_sse2_psrl_q:
1055; AVX512VL:       ## BB#0:
1056; AVX512VL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
1057; AVX512VL-NEXT:    retl
1058  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1059  ret <2 x i64> %res
1060}
1061declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
1062
1063
1064define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
1065; AVX-LABEL: test_x86_sse2_psrl_w:
1066; AVX:       ## BB#0:
1067; AVX-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1068; AVX-NEXT:    retl
1069;
1070; AVX512VL-LABEL: test_x86_sse2_psrl_w:
1071; AVX512VL:       ## BB#0:
1072; AVX512VL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1073; AVX512VL-NEXT:    retl
1074  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1075  ret <8 x i16> %res
1076}
1077declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
1078
1079
1080define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
1081; AVX-LABEL: test_x86_sse2_psrli_d:
1082; AVX:       ## BB#0:
1083; AVX-NEXT:    vpsrld $7, %xmm0, %xmm0
1084; AVX-NEXT:    retl
1085;
1086; AVX512VL-LABEL: test_x86_sse2_psrli_d:
1087; AVX512VL:       ## BB#0:
1088; AVX512VL-NEXT:    vpsrld $7, %xmm0, %xmm0
1089; AVX512VL-NEXT:    retl
1090  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1091  ret <4 x i32> %res
1092}
1093declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
1094
1095
1096define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
1097; AVX-LABEL: test_x86_sse2_psrli_q:
1098; AVX:       ## BB#0:
1099; AVX-NEXT:    vpsrlq $7, %xmm0, %xmm0
1100; AVX-NEXT:    retl
1101;
1102; AVX512VL-LABEL: test_x86_sse2_psrli_q:
1103; AVX512VL:       ## BB#0:
1104; AVX512VL-NEXT:    vpsrlq $7, %xmm0, %xmm0
1105; AVX512VL-NEXT:    retl
1106  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
1107  ret <2 x i64> %res
1108}
1109declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
1110
1111
1112define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
1113; AVX-LABEL: test_x86_sse2_psrli_w:
1114; AVX:       ## BB#0:
1115; AVX-NEXT:    vpsrlw $7, %xmm0, %xmm0
1116; AVX-NEXT:    retl
1117;
1118; AVX512VL-LABEL: test_x86_sse2_psrli_w:
1119; AVX512VL:       ## BB#0:
1120; AVX512VL-NEXT:    vpsrlw $7, %xmm0, %xmm0
1121; AVX512VL-NEXT:    retl
1122  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1123  ret <8 x i16> %res
1124}
1125declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
1126
1127
1128define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
1129; AVX-LABEL: test_x86_sse2_psubs_b:
1130; AVX:       ## BB#0:
1131; AVX-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0
1132; AVX-NEXT:    retl
1133;
1134; AVX512VL-LABEL: test_x86_sse2_psubs_b:
1135; AVX512VL:       ## BB#0:
1136; AVX512VL-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0
1137; AVX512VL-NEXT:    retl
1138  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1139  ret <16 x i8> %res
1140}
1141declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
1142
1143
1144define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
1145; AVX-LABEL: test_x86_sse2_psubs_w:
1146; AVX:       ## BB#0:
1147; AVX-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
1148; AVX-NEXT:    retl
1149;
1150; AVX512VL-LABEL: test_x86_sse2_psubs_w:
1151; AVX512VL:       ## BB#0:
1152; AVX512VL-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
1153; AVX512VL-NEXT:    retl
1154  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1155  ret <8 x i16> %res
1156}
1157declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
1158
1159
1160define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
1161; AVX-LABEL: test_x86_sse2_psubus_b:
1162; AVX:       ## BB#0:
1163; AVX-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
1164; AVX-NEXT:    retl
1165;
1166; AVX512VL-LABEL: test_x86_sse2_psubus_b:
1167; AVX512VL:       ## BB#0:
1168; AVX512VL-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
1169; AVX512VL-NEXT:    retl
1170  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1171  ret <16 x i8> %res
1172}
1173declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
1174
1175
1176define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
1177; AVX-LABEL: test_x86_sse2_psubus_w:
1178; AVX:       ## BB#0:
1179; AVX-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
1180; AVX-NEXT:    retl
1181;
1182; AVX512VL-LABEL: test_x86_sse2_psubus_w:
1183; AVX512VL:       ## BB#0:
1184; AVX512VL-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
1185; AVX512VL-NEXT:    retl
1186  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1187  ret <8 x i16> %res
1188}
1189declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
1190
1191
1192define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
1193; AVX-LABEL: test_x86_sse2_sqrt_pd:
1194; AVX:       ## BB#0:
1195; AVX-NEXT:    vsqrtpd %xmm0, %xmm0
1196; AVX-NEXT:    retl
1197;
1198; AVX512VL-LABEL: test_x86_sse2_sqrt_pd:
1199; AVX512VL:       ## BB#0:
1200; AVX512VL-NEXT:    vsqrtpd %xmm0, %xmm0
1201; AVX512VL-NEXT:    retl
1202  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
1203  ret <2 x double> %res
1204}
1205declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
1206
1207
1208define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
1209; AVX-LABEL: test_x86_sse2_sqrt_sd:
1210; AVX:       ## BB#0:
1211; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
1212; AVX-NEXT:    retl
1213;
1214; AVX512VL-LABEL: test_x86_sse2_sqrt_sd:
1215; AVX512VL:       ## BB#0:
1216; AVX512VL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
1217; AVX512VL-NEXT:    retl
1218  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
1219  ret <2 x double> %res
1220}
1221declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
1222
1223
1224define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
1225; AVX-LABEL: test_x86_sse2_sub_sd:
1226; AVX:       ## BB#0:
1227; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
1228; AVX-NEXT:    retl
1229;
1230; AVX512VL-LABEL: test_x86_sse2_sub_sd:
1231; AVX512VL:       ## BB#0:
1232; AVX512VL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
1233; AVX512VL-NEXT:    retl
1234  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1235  ret <2 x double> %res
1236}
1237declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
1238
1239
1240define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
1241; AVX-LABEL: test_x86_sse2_ucomieq_sd:
1242; AVX:       ## BB#0:
1243; AVX-NEXT:    vucomisd %xmm1, %xmm0
1244; AVX-NEXT:    setnp %al
1245; AVX-NEXT:    sete %cl
1246; AVX-NEXT:    andb %al, %cl
1247; AVX-NEXT:    movzbl %cl, %eax
1248; AVX-NEXT:    retl
1249;
1250; AVX512VL-LABEL: test_x86_sse2_ucomieq_sd:
1251; AVX512VL:       ## BB#0:
1252; AVX512VL-NEXT:    vucomisd %xmm1, %xmm0
1253; AVX512VL-NEXT:    setnp %al
1254; AVX512VL-NEXT:    sete %cl
1255; AVX512VL-NEXT:    andb %al, %cl
1256; AVX512VL-NEXT:    movzbl %cl, %eax
1257; AVX512VL-NEXT:    retl
1258  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1259  ret i32 %res
1260}
1261declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1262
1263
1264define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
1265; AVX-LABEL: test_x86_sse2_ucomige_sd:
1266; AVX:       ## BB#0:
1267; AVX-NEXT:    xorl %eax, %eax
1268; AVX-NEXT:    vucomisd %xmm1, %xmm0
1269; AVX-NEXT:    setae %al
1270; AVX-NEXT:    retl
1271;
1272; AVX512VL-LABEL: test_x86_sse2_ucomige_sd:
1273; AVX512VL:       ## BB#0:
1274; AVX512VL-NEXT:    xorl %eax, %eax
1275; AVX512VL-NEXT:    vucomisd %xmm1, %xmm0
1276; AVX512VL-NEXT:    setae %al
1277; AVX512VL-NEXT:    retl
1278  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1279  ret i32 %res
1280}
1281declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
1282
1283
1284define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
1285; AVX-LABEL: test_x86_sse2_ucomigt_sd:
1286; AVX:       ## BB#0:
1287; AVX-NEXT:    xorl %eax, %eax
1288; AVX-NEXT:    vucomisd %xmm1, %xmm0
1289; AVX-NEXT:    seta %al
1290; AVX-NEXT:    retl
1291;
1292; AVX512VL-LABEL: test_x86_sse2_ucomigt_sd:
1293; AVX512VL:       ## BB#0:
1294; AVX512VL-NEXT:    xorl %eax, %eax
1295; AVX512VL-NEXT:    vucomisd %xmm1, %xmm0
1296; AVX512VL-NEXT:    seta %al
1297; AVX512VL-NEXT:    retl
1298  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1299  ret i32 %res
1300}
1301declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
1302
1303
1304define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
1305; AVX-LABEL: test_x86_sse2_ucomile_sd:
1306; AVX:       ## BB#0:
1307; AVX-NEXT:    xorl %eax, %eax
1308; AVX-NEXT:    vucomisd %xmm0, %xmm1
1309; AVX-NEXT:    setae %al
1310; AVX-NEXT:    retl
1311;
1312; AVX512VL-LABEL: test_x86_sse2_ucomile_sd:
1313; AVX512VL:       ## BB#0:
1314; AVX512VL-NEXT:    xorl %eax, %eax
1315; AVX512VL-NEXT:    vucomisd %xmm0, %xmm1
1316; AVX512VL-NEXT:    setae %al
1317; AVX512VL-NEXT:    retl
1318  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1319  ret i32 %res
1320}
1321declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
1322
1323
1324define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
1325; AVX-LABEL: test_x86_sse2_ucomilt_sd:
1326; AVX:       ## BB#0:
1327; AVX-NEXT:    xorl %eax, %eax
1328; AVX-NEXT:    vucomisd %xmm0, %xmm1
1329; AVX-NEXT:    seta %al
1330; AVX-NEXT:    retl
1331;
1332; AVX512VL-LABEL: test_x86_sse2_ucomilt_sd:
1333; AVX512VL:       ## BB#0:
1334; AVX512VL-NEXT:    xorl %eax, %eax
1335; AVX512VL-NEXT:    vucomisd %xmm0, %xmm1
1336; AVX512VL-NEXT:    seta %al
1337; AVX512VL-NEXT:    retl
1338  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1339  ret i32 %res
1340}
1341declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
1342
1343
1344define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
1345; AVX-LABEL: test_x86_sse2_ucomineq_sd:
1346; AVX:       ## BB#0:
1347; AVX-NEXT:    vucomisd %xmm1, %xmm0
1348; AVX-NEXT:    setp %al
1349; AVX-NEXT:    setne %cl
1350; AVX-NEXT:    orb %al, %cl
1351; AVX-NEXT:    movzbl %cl, %eax
1352; AVX-NEXT:    retl
1353;
1354; AVX512VL-LABEL: test_x86_sse2_ucomineq_sd:
1355; AVX512VL:       ## BB#0:
1356; AVX512VL-NEXT:    vucomisd %xmm1, %xmm0
1357; AVX512VL-NEXT:    setp %al
1358; AVX512VL-NEXT:    setne %cl
1359; AVX512VL-NEXT:    orb %al, %cl
1360; AVX512VL-NEXT:    movzbl %cl, %eax
1361; AVX512VL-NEXT:    retl
1362  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1363  ret i32 %res
1364}
1365declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
1366
1367
1368define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
1369; AVX-LABEL: test_x86_sse3_addsub_pd:
1370; AVX:       ## BB#0:
1371; AVX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
1372; AVX-NEXT:    retl
1373;
1374; AVX512VL-LABEL: test_x86_sse3_addsub_pd:
1375; AVX512VL:       ## BB#0:
1376; AVX512VL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
1377; AVX512VL-NEXT:    retl
1378  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1379  ret <2 x double> %res
1380}
1381declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
1382
1383
1384define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
1385; AVX-LABEL: test_x86_sse3_addsub_ps:
1386; AVX:       ## BB#0:
1387; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
1388; AVX-NEXT:    retl
1389;
1390; AVX512VL-LABEL: test_x86_sse3_addsub_ps:
1391; AVX512VL:       ## BB#0:
1392; AVX512VL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
1393; AVX512VL-NEXT:    retl
1394  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1395  ret <4 x float> %res
1396}
1397declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
1398
1399
1400define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
1401; AVX-LABEL: test_x86_sse3_hadd_pd:
1402; AVX:       ## BB#0:
1403; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
1404; AVX-NEXT:    retl
1405;
1406; AVX512VL-LABEL: test_x86_sse3_hadd_pd:
1407; AVX512VL:       ## BB#0:
1408; AVX512VL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
1409; AVX512VL-NEXT:    retl
1410  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1411  ret <2 x double> %res
1412}
1413declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
1414
1415
1416define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
1417; AVX-LABEL: test_x86_sse3_hadd_ps:
1418; AVX:       ## BB#0:
1419; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
1420; AVX-NEXT:    retl
1421;
1422; AVX512VL-LABEL: test_x86_sse3_hadd_ps:
1423; AVX512VL:       ## BB#0:
1424; AVX512VL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
1425; AVX512VL-NEXT:    retl
1426  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1427  ret <4 x float> %res
1428}
1429declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
1430
1431
1432define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
1433; AVX-LABEL: test_x86_sse3_hsub_pd:
1434; AVX:       ## BB#0:
1435; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
1436; AVX-NEXT:    retl
1437;
1438; AVX512VL-LABEL: test_x86_sse3_hsub_pd:
1439; AVX512VL:       ## BB#0:
1440; AVX512VL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
1441; AVX512VL-NEXT:    retl
1442  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1443  ret <2 x double> %res
1444}
1445declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
1446
1447
1448define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
1449; AVX-LABEL: test_x86_sse3_hsub_ps:
1450; AVX:       ## BB#0:
1451; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
1452; AVX-NEXT:    retl
1453;
1454; AVX512VL-LABEL: test_x86_sse3_hsub_ps:
1455; AVX512VL:       ## BB#0:
1456; AVX512VL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
1457; AVX512VL-NEXT:    retl
1458  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1459  ret <4 x float> %res
1460}
1461declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
1462
1463
1464define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
1465; AVX-LABEL: test_x86_sse3_ldu_dq:
1466; AVX:       ## BB#0:
1467; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
1468; AVX-NEXT:    vlddqu (%eax), %xmm0
1469; AVX-NEXT:    retl
1470;
1471; AVX512VL-LABEL: test_x86_sse3_ldu_dq:
1472; AVX512VL:       ## BB#0:
1473; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
1474; AVX512VL-NEXT:    vlddqu (%eax), %xmm0
1475; AVX512VL-NEXT:    retl
1476  %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
1477  ret <16 x i8> %res
1478}
1479declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
1480
1481
1482define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
1483; AVX-LABEL: test_x86_sse41_blendvpd:
1484; AVX:       ## BB#0:
1485; AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1486; AVX-NEXT:    retl
1487;
1488; AVX512VL-LABEL: test_x86_sse41_blendvpd:
1489; AVX512VL:       ## BB#0:
1490; AVX512VL-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1491; AVX512VL-NEXT:    retl
1492  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
1493  ret <2 x double> %res
1494}
1495declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
1496
1497
1498define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
1499; AVX-LABEL: test_x86_sse41_blendvps:
1500; AVX:       ## BB#0:
1501; AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1502; AVX-NEXT:    retl
1503;
1504; AVX512VL-LABEL: test_x86_sse41_blendvps:
1505; AVX512VL:       ## BB#0:
1506; AVX512VL-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1507; AVX512VL-NEXT:    retl
1508  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
1509  ret <4 x float> %res
1510}
1511declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
1512
1513
1514define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
1515; AVX-LABEL: test_x86_sse41_dppd:
1516; AVX:       ## BB#0:
1517; AVX-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0
1518; AVX-NEXT:    retl
1519;
1520; AVX512VL-LABEL: test_x86_sse41_dppd:
1521; AVX512VL:       ## BB#0:
1522; AVX512VL-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0
1523; AVX512VL-NEXT:    retl
1524  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
1525  ret <2 x double> %res
1526}
1527declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
1528
1529
1530define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
1531; AVX-LABEL: test_x86_sse41_dpps:
1532; AVX:       ## BB#0:
1533; AVX-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0
1534; AVX-NEXT:    retl
1535;
1536; AVX512VL-LABEL: test_x86_sse41_dpps:
1537; AVX512VL:       ## BB#0:
1538; AVX512VL-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0
1539; AVX512VL-NEXT:    retl
1540  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1541  ret <4 x float> %res
1542}
1543declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
1544
1545
1546define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
1547; AVX-LABEL: test_x86_sse41_insertps:
1548; AVX:       ## BB#0:
1549; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3]
1550; AVX-NEXT:    retl
1551;
1552; AVX512VL-LABEL: test_x86_sse41_insertps:
1553; AVX512VL:       ## BB#0:
1554; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3]
1555; AVX512VL-NEXT:    retl
1556  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 21) ; <<4 x float>> [#uses=1]
1557  ret <4 x float> %res
1558}
1559declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
1560
1561
1562
1563define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
1564; AVX-LABEL: test_x86_sse41_mpsadbw:
1565; AVX:       ## BB#0:
1566; AVX-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0
1567; AVX-NEXT:    retl
1568;
1569; AVX512VL-LABEL: test_x86_sse41_mpsadbw:
1570; AVX512VL:       ## BB#0:
1571; AVX512VL-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0
1572; AVX512VL-NEXT:    retl
1573  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
1574  ret <8 x i16> %res
1575}
1576declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
1577
1578
1579define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
1580; AVX-LABEL: test_x86_sse41_packusdw:
1581; AVX:       ## BB#0:
1582; AVX-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
1583; AVX-NEXT:    retl
1584;
1585; AVX512VL-LABEL: test_x86_sse41_packusdw:
1586; AVX512VL:       ## BB#0:
1587; AVX512VL-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
1588; AVX512VL-NEXT:    retl
1589  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
1590  ret <8 x i16> %res
1591}
1592declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
1593
1594
1595define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
1596; AVX-LABEL: test_x86_sse41_pblendvb:
1597; AVX:       ## BB#0:
1598; AVX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
1599; AVX-NEXT:    retl
1600;
1601; AVX512VL-LABEL: test_x86_sse41_pblendvb:
1602; AVX512VL:       ## BB#0:
1603; AVX512VL-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
1604; AVX512VL-NEXT:    retl
1605  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
1606  ret <16 x i8> %res
1607}
1608declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1609
1610
1611define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
1612; AVX-LABEL: test_x86_sse41_phminposuw:
1613; AVX:       ## BB#0:
1614; AVX-NEXT:    vphminposuw %xmm0, %xmm0
1615; AVX-NEXT:    retl
1616;
1617; AVX512VL-LABEL: test_x86_sse41_phminposuw:
1618; AVX512VL:       ## BB#0:
1619; AVX512VL-NEXT:    vphminposuw %xmm0, %xmm0
1620; AVX512VL-NEXT:    retl
1621  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
1622  ret <8 x i16> %res
1623}
1624declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
1625
1626
1627define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
1628; AVX-LABEL: test_x86_sse41_pmaxsb:
1629; AVX:       ## BB#0:
1630; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1631; AVX-NEXT:    retl
1632;
1633; AVX512VL-LABEL: test_x86_sse41_pmaxsb:
1634; AVX512VL:       ## BB#0:
1635; AVX512VL-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1636; AVX512VL-NEXT:    retl
1637  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1638  ret <16 x i8> %res
1639}
1640declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
1641
1642
1643define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
1644; AVX-LABEL: test_x86_sse41_pmaxsd:
1645; AVX:       ## BB#0:
1646; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1647; AVX-NEXT:    retl
1648;
1649; AVX512VL-LABEL: test_x86_sse41_pmaxsd:
1650; AVX512VL:       ## BB#0:
1651; AVX512VL-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1652; AVX512VL-NEXT:    retl
1653  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1654  ret <4 x i32> %res
1655}
1656declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
1657
1658
1659define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
1660; AVX-LABEL: test_x86_sse41_pmaxud:
1661; AVX:       ## BB#0:
1662; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1663; AVX-NEXT:    retl
1664;
1665; AVX512VL-LABEL: test_x86_sse41_pmaxud:
1666; AVX512VL:       ## BB#0:
1667; AVX512VL-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1668; AVX512VL-NEXT:    retl
1669  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1670  ret <4 x i32> %res
1671}
1672declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
1673
1674
1675define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
1676; AVX-LABEL: test_x86_sse41_pmaxuw:
1677; AVX:       ## BB#0:
1678; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
1679; AVX-NEXT:    retl
1680;
1681; AVX512VL-LABEL: test_x86_sse41_pmaxuw:
1682; AVX512VL:       ## BB#0:
1683; AVX512VL-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
1684; AVX512VL-NEXT:    retl
1685  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1686  ret <8 x i16> %res
1687}
1688declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
1689
1690
1691define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
1692; AVX-LABEL: test_x86_sse41_pminsb:
1693; AVX:       ## BB#0:
1694; AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
1695; AVX-NEXT:    retl
1696;
1697; AVX512VL-LABEL: test_x86_sse41_pminsb:
1698; AVX512VL:       ## BB#0:
1699; AVX512VL-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
1700; AVX512VL-NEXT:    retl
1701  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1702  ret <16 x i8> %res
1703}
1704declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
1705
1706
1707define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
1708; AVX-LABEL: test_x86_sse41_pminsd:
1709; AVX:       ## BB#0:
1710; AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1711; AVX-NEXT:    retl
1712;
1713; AVX512VL-LABEL: test_x86_sse41_pminsd:
1714; AVX512VL:       ## BB#0:
1715; AVX512VL-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1716; AVX512VL-NEXT:    retl
1717  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1718  ret <4 x i32> %res
1719}
1720declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
1721
1722
1723define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
1724; AVX-LABEL: test_x86_sse41_pminud:
1725; AVX:       ## BB#0:
1726; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1727; AVX-NEXT:    retl
1728;
1729; AVX512VL-LABEL: test_x86_sse41_pminud:
1730; AVX512VL:       ## BB#0:
1731; AVX512VL-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1732; AVX512VL-NEXT:    retl
1733  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1734  ret <4 x i32> %res
1735}
1736declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
1737
1738
1739define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
1740; AVX-LABEL: test_x86_sse41_pminuw:
1741; AVX:       ## BB#0:
1742; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1743; AVX-NEXT:    retl
1744;
1745; AVX512VL-LABEL: test_x86_sse41_pminuw:
1746; AVX512VL:       ## BB#0:
1747; AVX512VL-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1748; AVX512VL-NEXT:    retl
1749  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1750  ret <8 x i16> %res
1751}
1752declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
1753
1754
1755define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
1756; AVX-LABEL: test_x86_sse41_pmuldq:
1757; AVX:       ## BB#0:
1758; AVX-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0
1759; AVX-NEXT:    retl
1760;
1761; AVX512VL-LABEL: test_x86_sse41_pmuldq:
1762; AVX512VL:       ## BB#0:
1763; AVX512VL-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0
1764; AVX512VL-NEXT:    retl
1765  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
1766  ret <2 x i64> %res
1767}
1768declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
1769
1770
1771define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
1772; AVX-LABEL: test_x86_sse41_ptestc:
1773; AVX:       ## BB#0:
1774; AVX-NEXT:    vptest %xmm1, %xmm0
1775; AVX-NEXT:    sbbl %eax, %eax
1776; AVX-NEXT:    andl $1, %eax
1777; AVX-NEXT:    retl
1778;
1779; AVX512VL-LABEL: test_x86_sse41_ptestc:
1780; AVX512VL:       ## BB#0:
1781; AVX512VL-NEXT:    vptest %xmm1, %xmm0
1782; AVX512VL-NEXT:    sbbl %eax, %eax
1783; AVX512VL-NEXT:    andl $1, %eax
1784; AVX512VL-NEXT:    retl
1785  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1786  ret i32 %res
1787}
1788declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
1789
1790
1791define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
1792; AVX-LABEL: test_x86_sse41_ptestnzc:
1793; AVX:       ## BB#0:
1794; AVX-NEXT:    xorl %eax, %eax
1795; AVX-NEXT:    vptest %xmm1, %xmm0
1796; AVX-NEXT:    seta %al
1797; AVX-NEXT:    retl
1798;
1799; AVX512VL-LABEL: test_x86_sse41_ptestnzc:
1800; AVX512VL:       ## BB#0:
1801; AVX512VL-NEXT:    xorl %eax, %eax
1802; AVX512VL-NEXT:    vptest %xmm1, %xmm0
1803; AVX512VL-NEXT:    seta %al
1804; AVX512VL-NEXT:    retl
1805  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1806  ret i32 %res
1807}
1808declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
1809
1810
1811define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
1812; AVX-LABEL: test_x86_sse41_ptestz:
1813; AVX:       ## BB#0:
1814; AVX-NEXT:    xorl %eax, %eax
1815; AVX-NEXT:    vptest %xmm1, %xmm0
1816; AVX-NEXT:    sete %al
1817; AVX-NEXT:    retl
1818;
1819; AVX512VL-LABEL: test_x86_sse41_ptestz:
1820; AVX512VL:       ## BB#0:
1821; AVX512VL-NEXT:    xorl %eax, %eax
1822; AVX512VL-NEXT:    vptest %xmm1, %xmm0
1823; AVX512VL-NEXT:    sete %al
1824; AVX512VL-NEXT:    retl
1825  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1826  ret i32 %res
1827}
1828declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
1829
1830
1831define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
1832; AVX-LABEL: test_x86_sse41_round_pd:
1833; AVX:       ## BB#0:
1834; AVX-NEXT:    vroundpd $7, %xmm0, %xmm0
1835; AVX-NEXT:    retl
1836;
1837; AVX512VL-LABEL: test_x86_sse41_round_pd:
1838; AVX512VL:       ## BB#0:
1839; AVX512VL-NEXT:    vroundpd $7, %xmm0, %xmm0
1840; AVX512VL-NEXT:    retl
1841  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
1842  ret <2 x double> %res
1843}
1844declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1845
1846
1847define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
1848; AVX-LABEL: test_x86_sse41_round_ps:
1849; AVX:       ## BB#0:
1850; AVX-NEXT:    vroundps $7, %xmm0, %xmm0
1851; AVX-NEXT:    retl
1852;
1853; AVX512VL-LABEL: test_x86_sse41_round_ps:
1854; AVX512VL:       ## BB#0:
1855; AVX512VL-NEXT:    vroundps $7, %xmm0, %xmm0
1856; AVX512VL-NEXT:    retl
1857  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1858  ret <4 x float> %res
1859}
1860declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1861
1862
1863define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
1864; AVX-LABEL: test_x86_sse41_round_sd:
1865; AVX:       ## BB#0:
1866; AVX-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm0
1867; AVX-NEXT:    retl
1868;
1869; AVX512VL-LABEL: test_x86_sse41_round_sd:
1870; AVX512VL:       ## BB#0:
1871; AVX512VL-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm0
1872; AVX512VL-NEXT:    retl
1873  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
1874  ret <2 x double> %res
1875}
1876declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
1877
1878
1879define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
1880; AVX-LABEL: test_x86_sse41_round_ss:
1881; AVX:       ## BB#0:
1882; AVX-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm0
1883; AVX-NEXT:    retl
1884;
1885; AVX512VL-LABEL: test_x86_sse41_round_ss:
1886; AVX512VL:       ## BB#0:
1887; AVX512VL-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm0
1888; AVX512VL-NEXT:    retl
1889  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
1890  ret <4 x float> %res
1891}
1892declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
1893
1894
1895define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
1896; AVX-LABEL: test_x86_sse42_pcmpestri128:
1897; AVX:       ## BB#0:
1898; AVX-NEXT:    movl $7, %eax
1899; AVX-NEXT:    movl $7, %edx
1900; AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1901; AVX-NEXT:    movl %ecx, %eax
1902; AVX-NEXT:    retl
1903;
1904; AVX512VL-LABEL: test_x86_sse42_pcmpestri128:
1905; AVX512VL:       ## BB#0:
1906; AVX512VL-NEXT:    movl $7, %eax
1907; AVX512VL-NEXT:    movl $7, %edx
1908; AVX512VL-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1909; AVX512VL-NEXT:    movl %ecx, %eax
1910; AVX512VL-NEXT:    retl
1911  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1912  ret i32 %res
1913}
1914declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1915
1916
1917define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
1918; AVX-LABEL: test_x86_sse42_pcmpestri128_load:
1919; AVX:       ## BB#0:
1920; AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1921; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
1922; AVX-NEXT:    vmovdqa (%eax), %xmm0
1923; AVX-NEXT:    movl $7, %eax
1924; AVX-NEXT:    movl $7, %edx
1925; AVX-NEXT:    vpcmpestri $7, (%ecx), %xmm0
1926; AVX-NEXT:    movl %ecx, %eax
1927; AVX-NEXT:    retl
1928;
1929; AVX512VL-LABEL: test_x86_sse42_pcmpestri128_load:
1930; AVX512VL:       ## BB#0:
1931; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1932; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
1933; AVX512VL-NEXT:    vmovdqa64 (%eax), %xmm0
1934; AVX512VL-NEXT:    movl $7, %eax
1935; AVX512VL-NEXT:    movl $7, %edx
1936; AVX512VL-NEXT:    vpcmpestri $7, (%ecx), %xmm0
1937; AVX512VL-NEXT:    movl %ecx, %eax
1938; AVX512VL-NEXT:    retl
1939  %1 = load <16 x i8>, <16 x i8>* %a0
1940  %2 = load <16 x i8>, <16 x i8>* %a2
1941  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
1942  ret i32 %res
1943}
1944
1945
1946define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) nounwind {
1947; AVX-LABEL: test_x86_sse42_pcmpestria128:
1948; AVX:       ## BB#0:
1949; AVX-NEXT:    pushl %ebx
1950; AVX-NEXT:    movl $7, %eax
1951; AVX-NEXT:    movl $7, %edx
1952; AVX-NEXT:    xorl %ebx, %ebx
1953; AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1954; AVX-NEXT:    seta %bl
1955; AVX-NEXT:    movl %ebx, %eax
1956; AVX-NEXT:    popl %ebx
1957; AVX-NEXT:    retl
1958;
1959; AVX512VL-LABEL: test_x86_sse42_pcmpestria128:
1960; AVX512VL:       ## BB#0:
1961; AVX512VL-NEXT:    pushl %ebx
1962; AVX512VL-NEXT:    movl $7, %eax
1963; AVX512VL-NEXT:    movl $7, %edx
1964; AVX512VL-NEXT:    xorl %ebx, %ebx
1965; AVX512VL-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1966; AVX512VL-NEXT:    seta %bl
1967; AVX512VL-NEXT:    movl %ebx, %eax
1968; AVX512VL-NEXT:    popl %ebx
1969; AVX512VL-NEXT:    retl
1970  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1971  ret i32 %res
1972}
1973declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1974
1975
1976define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
1977; AVX-LABEL: test_x86_sse42_pcmpestric128:
1978; AVX:       ## BB#0:
1979; AVX-NEXT:    movl $7, %eax
1980; AVX-NEXT:    movl $7, %edx
1981; AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1982; AVX-NEXT:    sbbl %eax, %eax
1983; AVX-NEXT:    andl $1, %eax
1984; AVX-NEXT:    retl
1985;
1986; AVX512VL-LABEL: test_x86_sse42_pcmpestric128:
1987; AVX512VL:       ## BB#0:
1988; AVX512VL-NEXT:    movl $7, %eax
1989; AVX512VL-NEXT:    movl $7, %edx
1990; AVX512VL-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1991; AVX512VL-NEXT:    sbbl %eax, %eax
1992; AVX512VL-NEXT:    andl $1, %eax
1993; AVX512VL-NEXT:    retl
1994  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1995  ret i32 %res
1996}
1997declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1998
1999
2000define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) nounwind {
2001; AVX-LABEL: test_x86_sse42_pcmpestrio128:
2002; AVX:       ## BB#0:
2003; AVX-NEXT:    pushl %ebx
2004; AVX-NEXT:    movl $7, %eax
2005; AVX-NEXT:    movl $7, %edx
2006; AVX-NEXT:    xorl %ebx, %ebx
2007; AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
2008; AVX-NEXT:    seto %bl
2009; AVX-NEXT:    movl %ebx, %eax
2010; AVX-NEXT:    popl %ebx
2011; AVX-NEXT:    retl
2012;
2013; AVX512VL-LABEL: test_x86_sse42_pcmpestrio128:
2014; AVX512VL:       ## BB#0:
2015; AVX512VL-NEXT:    pushl %ebx
2016; AVX512VL-NEXT:    movl $7, %eax
2017; AVX512VL-NEXT:    movl $7, %edx
2018; AVX512VL-NEXT:    xorl %ebx, %ebx
2019; AVX512VL-NEXT:    vpcmpestri $7, %xmm1, %xmm0
2020; AVX512VL-NEXT:    seto %bl
2021; AVX512VL-NEXT:    movl %ebx, %eax
2022; AVX512VL-NEXT:    popl %ebx
2023; AVX512VL-NEXT:    retl
2024  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
2025  ret i32 %res
2026}
2027declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
2028
2029
2030define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) nounwind {
2031; AVX-LABEL: test_x86_sse42_pcmpestris128:
2032; AVX:       ## BB#0:
2033; AVX-NEXT:    pushl %ebx
2034; AVX-NEXT:    movl $7, %eax
2035; AVX-NEXT:    movl $7, %edx
2036; AVX-NEXT:    xorl %ebx, %ebx
2037; AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
2038; AVX-NEXT:    sets %bl
2039; AVX-NEXT:    movl %ebx, %eax
2040; AVX-NEXT:    popl %ebx
2041; AVX-NEXT:    retl
2042;
2043; AVX512VL-LABEL: test_x86_sse42_pcmpestris128:
2044; AVX512VL:       ## BB#0:
2045; AVX512VL-NEXT:    pushl %ebx
2046; AVX512VL-NEXT:    movl $7, %eax
2047; AVX512VL-NEXT:    movl $7, %edx
2048; AVX512VL-NEXT:    xorl %ebx, %ebx
2049; AVX512VL-NEXT:    vpcmpestri $7, %xmm1, %xmm0
2050; AVX512VL-NEXT:    sets %bl
2051; AVX512VL-NEXT:    movl %ebx, %eax
2052; AVX512VL-NEXT:    popl %ebx
2053; AVX512VL-NEXT:    retl
2054  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
2055  ret i32 %res
2056}
2057declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
2058
2059
2060define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) nounwind {
2061; AVX-LABEL: test_x86_sse42_pcmpestriz128:
2062; AVX:       ## BB#0:
2063; AVX-NEXT:    pushl %ebx
2064; AVX-NEXT:    movl $7, %eax
2065; AVX-NEXT:    movl $7, %edx
2066; AVX-NEXT:    xorl %ebx, %ebx
2067; AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
2068; AVX-NEXT:    sete %bl
2069; AVX-NEXT:    movl %ebx, %eax
2070; AVX-NEXT:    popl %ebx
2071; AVX-NEXT:    retl
2072;
2073; AVX512VL-LABEL: test_x86_sse42_pcmpestriz128:
2074; AVX512VL:       ## BB#0:
2075; AVX512VL-NEXT:    pushl %ebx
2076; AVX512VL-NEXT:    movl $7, %eax
2077; AVX512VL-NEXT:    movl $7, %edx
2078; AVX512VL-NEXT:    xorl %ebx, %ebx
2079; AVX512VL-NEXT:    vpcmpestri $7, %xmm1, %xmm0
2080; AVX512VL-NEXT:    sete %bl
2081; AVX512VL-NEXT:    movl %ebx, %eax
2082; AVX512VL-NEXT:    popl %ebx
2083; AVX512VL-NEXT:    retl
2084  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
2085  ret i32 %res
2086}
2087declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
2088
2089
2090define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
2091; AVX-LABEL: test_x86_sse42_pcmpestrm128:
2092; AVX:       ## BB#0:
2093; AVX-NEXT:    movl $7, %eax
2094; AVX-NEXT:    movl $7, %edx
2095; AVX-NEXT:    vpcmpestrm $7, %xmm1, %xmm0
2096; AVX-NEXT:    retl
2097;
2098; AVX512VL-LABEL: test_x86_sse42_pcmpestrm128:
2099; AVX512VL:       ## BB#0:
2100; AVX512VL-NEXT:    movl $7, %eax
2101; AVX512VL-NEXT:    movl $7, %edx
2102; AVX512VL-NEXT:    vpcmpestrm $7, %xmm1, %xmm0
2103; AVX512VL-NEXT:    retl
2104  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
2105  ret <16 x i8> %res
2106}
2107declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
2108
2109
2110define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
2111; AVX-LABEL: test_x86_sse42_pcmpestrm128_load:
2112; AVX:       ## BB#0:
2113; AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2114; AVX-NEXT:    movl $7, %eax
2115; AVX-NEXT:    movl $7, %edx
2116; AVX-NEXT:    vpcmpestrm $7, (%ecx), %xmm0
2117; AVX-NEXT:    retl
2118;
2119; AVX512VL-LABEL: test_x86_sse42_pcmpestrm128_load:
2120; AVX512VL:       ## BB#0:
2121; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2122; AVX512VL-NEXT:    movl $7, %eax
2123; AVX512VL-NEXT:    movl $7, %edx
2124; AVX512VL-NEXT:    vpcmpestrm $7, (%ecx), %xmm0
2125; AVX512VL-NEXT:    retl
2126  %1 = load <16 x i8>, <16 x i8>* %a2
2127  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
2128  ret <16 x i8> %res
2129}
2130
2131
2132define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
2133; AVX-LABEL: test_x86_sse42_pcmpistri128:
2134; AVX:       ## BB#0:
2135; AVX-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2136; AVX-NEXT:    movl %ecx, %eax
2137; AVX-NEXT:    retl
2138;
2139; AVX512VL-LABEL: test_x86_sse42_pcmpistri128:
2140; AVX512VL:       ## BB#0:
2141; AVX512VL-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2142; AVX512VL-NEXT:    movl %ecx, %eax
2143; AVX512VL-NEXT:    retl
2144  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
2145  ret i32 %res
2146}
2147declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
2148
2149
2150define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
2151; AVX-LABEL: test_x86_sse42_pcmpistri128_load:
2152; AVX:       ## BB#0:
2153; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
2154; AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2155; AVX-NEXT:    vmovdqa (%ecx), %xmm0
2156; AVX-NEXT:    vpcmpistri $7, (%eax), %xmm0
2157; AVX-NEXT:    movl %ecx, %eax
2158; AVX-NEXT:    retl
2159;
2160; AVX512VL-LABEL: test_x86_sse42_pcmpistri128_load:
2161; AVX512VL:       ## BB#0:
2162; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
2163; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2164; AVX512VL-NEXT:    vmovdqa64 (%ecx), %xmm0
2165; AVX512VL-NEXT:    vpcmpistri $7, (%eax), %xmm0
2166; AVX512VL-NEXT:    movl %ecx, %eax
2167; AVX512VL-NEXT:    retl
2168  %1 = load <16 x i8>, <16 x i8>* %a0
2169  %2 = load <16 x i8>, <16 x i8>* %a1
2170  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
2171  ret i32 %res
2172}
2173
2174
2175define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
2176; AVX-LABEL: test_x86_sse42_pcmpistria128:
2177; AVX:       ## BB#0:
2178; AVX-NEXT:    xorl %eax, %eax
2179; AVX-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2180; AVX-NEXT:    seta %al
2181; AVX-NEXT:    retl
2182;
2183; AVX512VL-LABEL: test_x86_sse42_pcmpistria128:
2184; AVX512VL:       ## BB#0:
2185; AVX512VL-NEXT:    xorl %eax, %eax
2186; AVX512VL-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2187; AVX512VL-NEXT:    seta %al
2188; AVX512VL-NEXT:    retl
2189  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
2190  ret i32 %res
2191}
2192declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
2193
2194
2195define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
2196; AVX-LABEL: test_x86_sse42_pcmpistric128:
2197; AVX:       ## BB#0:
2198; AVX-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2199; AVX-NEXT:    sbbl %eax, %eax
2200; AVX-NEXT:    andl $1, %eax
2201; AVX-NEXT:    retl
2202;
2203; AVX512VL-LABEL: test_x86_sse42_pcmpistric128:
2204; AVX512VL:       ## BB#0:
2205; AVX512VL-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2206; AVX512VL-NEXT:    sbbl %eax, %eax
2207; AVX512VL-NEXT:    andl $1, %eax
2208; AVX512VL-NEXT:    retl
2209  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
2210  ret i32 %res
2211}
2212declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
2213
2214
2215define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
2216; AVX-LABEL: test_x86_sse42_pcmpistrio128:
2217; AVX:       ## BB#0:
2218; AVX-NEXT:    xorl %eax, %eax
2219; AVX-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2220; AVX-NEXT:    seto %al
2221; AVX-NEXT:    retl
2222;
2223; AVX512VL-LABEL: test_x86_sse42_pcmpistrio128:
2224; AVX512VL:       ## BB#0:
2225; AVX512VL-NEXT:    xorl %eax, %eax
2226; AVX512VL-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2227; AVX512VL-NEXT:    seto %al
2228; AVX512VL-NEXT:    retl
2229  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
2230  ret i32 %res
2231}
2232declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
2233
2234
2235define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
2236; AVX-LABEL: test_x86_sse42_pcmpistris128:
2237; AVX:       ## BB#0:
2238; AVX-NEXT:    xorl %eax, %eax
2239; AVX-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2240; AVX-NEXT:    sets %al
2241; AVX-NEXT:    retl
2242;
2243; AVX512VL-LABEL: test_x86_sse42_pcmpistris128:
2244; AVX512VL:       ## BB#0:
2245; AVX512VL-NEXT:    xorl %eax, %eax
2246; AVX512VL-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2247; AVX512VL-NEXT:    sets %al
2248; AVX512VL-NEXT:    retl
2249  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
2250  ret i32 %res
2251}
2252declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
2253
2254
2255define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
2256; AVX-LABEL: test_x86_sse42_pcmpistriz128:
2257; AVX:       ## BB#0:
2258; AVX-NEXT:    xorl %eax, %eax
2259; AVX-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2260; AVX-NEXT:    sete %al
2261; AVX-NEXT:    retl
2262;
2263; AVX512VL-LABEL: test_x86_sse42_pcmpistriz128:
2264; AVX512VL:       ## BB#0:
2265; AVX512VL-NEXT:    xorl %eax, %eax
2266; AVX512VL-NEXT:    vpcmpistri $7, %xmm1, %xmm0
2267; AVX512VL-NEXT:    sete %al
2268; AVX512VL-NEXT:    retl
2269  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
2270  ret i32 %res
2271}
2272declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
2273
2274
2275define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
2276; AVX-LABEL: test_x86_sse42_pcmpistrm128:
2277; AVX:       ## BB#0:
2278; AVX-NEXT:    vpcmpistrm $7, %xmm1, %xmm0
2279; AVX-NEXT:    retl
2280;
2281; AVX512VL-LABEL: test_x86_sse42_pcmpistrm128:
2282; AVX512VL:       ## BB#0:
2283; AVX512VL-NEXT:    vpcmpistrm $7, %xmm1, %xmm0
2284; AVX512VL-NEXT:    retl
2285  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
2286  ret <16 x i8> %res
2287}
2288declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
2289
2290
2291define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
2292; AVX-LABEL: test_x86_sse42_pcmpistrm128_load:
2293; AVX:       ## BB#0:
2294; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
2295; AVX-NEXT:    vpcmpistrm $7, (%eax), %xmm0
2296; AVX-NEXT:    retl
2297;
2298; AVX512VL-LABEL: test_x86_sse42_pcmpistrm128_load:
2299; AVX512VL:       ## BB#0:
2300; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
2301; AVX512VL-NEXT:    vpcmpistrm $7, (%eax), %xmm0
2302; AVX512VL-NEXT:    retl
2303  %1 = load <16 x i8>, <16 x i8>* %a1
2304  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
2305  ret <16 x i8> %res
2306}
2307
2308
2309define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
2310; AVX-LABEL: test_x86_sse_add_ss:
2311; AVX:       ## BB#0:
2312; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
2313; AVX-NEXT:    retl
2314;
2315; AVX512VL-LABEL: test_x86_sse_add_ss:
2316; AVX512VL:       ## BB#0:
2317; AVX512VL-NEXT:    vaddss %xmm1, %xmm0, %xmm0
2318; AVX512VL-NEXT:    retl
2319  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2320  ret <4 x float> %res
2321}
2322declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
2323
2324
2325define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
2326; AVX-LABEL: test_x86_sse_cmp_ps:
2327; AVX:       ## BB#0:
2328; AVX-NEXT:    vcmpordps %xmm1, %xmm0, %xmm0
2329; AVX-NEXT:    retl
2330;
2331; AVX512VL-LABEL: test_x86_sse_cmp_ps:
2332; AVX512VL:       ## BB#0:
2333; AVX512VL-NEXT:    vcmpordps %xmm1, %xmm0, %xmm0
2334; AVX512VL-NEXT:    retl
2335  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
2336  ret <4 x float> %res
2337}
2338declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
2339
2340
2341define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
2342; AVX-LABEL: test_x86_sse_cmp_ss:
2343; AVX:       ## BB#0:
2344; AVX-NEXT:    vcmpordss %xmm1, %xmm0, %xmm0
2345; AVX-NEXT:    retl
2346;
2347; AVX512VL-LABEL: test_x86_sse_cmp_ss:
2348; AVX512VL:       ## BB#0:
2349; AVX512VL-NEXT:    vcmpordss %xmm1, %xmm0, %xmm0
2350; AVX512VL-NEXT:    retl
2351  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
2352  ret <4 x float> %res
2353}
2354declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
2355
2356
2357define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
2358; AVX-LABEL: test_x86_sse_comieq_ss:
2359; AVX:       ## BB#0:
2360; AVX-NEXT:    vcomiss %xmm1, %xmm0
2361; AVX-NEXT:    setnp %al
2362; AVX-NEXT:    sete %cl
2363; AVX-NEXT:    andb %al, %cl
2364; AVX-NEXT:    movzbl %cl, %eax
2365; AVX-NEXT:    retl
2366;
2367; AVX512VL-LABEL: test_x86_sse_comieq_ss:
2368; AVX512VL:       ## BB#0:
2369; AVX512VL-NEXT:    vcomiss %xmm1, %xmm0
2370; AVX512VL-NEXT:    setnp %al
2371; AVX512VL-NEXT:    sete %cl
2372; AVX512VL-NEXT:    andb %al, %cl
2373; AVX512VL-NEXT:    movzbl %cl, %eax
2374; AVX512VL-NEXT:    retl
2375  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2376  ret i32 %res
2377}
2378declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
2379
2380
2381define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
2382; AVX-LABEL: test_x86_sse_comige_ss:
2383; AVX:       ## BB#0:
2384; AVX-NEXT:    xorl %eax, %eax
2385; AVX-NEXT:    vcomiss %xmm1, %xmm0
2386; AVX-NEXT:    setae %al
2387; AVX-NEXT:    retl
2388;
2389; AVX512VL-LABEL: test_x86_sse_comige_ss:
2390; AVX512VL:       ## BB#0:
2391; AVX512VL-NEXT:    xorl %eax, %eax
2392; AVX512VL-NEXT:    vcomiss %xmm1, %xmm0
2393; AVX512VL-NEXT:    setae %al
2394; AVX512VL-NEXT:    retl
2395  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2396  ret i32 %res
2397}
2398declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
2399
2400
2401define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
2402; AVX-LABEL: test_x86_sse_comigt_ss:
2403; AVX:       ## BB#0:
2404; AVX-NEXT:    xorl %eax, %eax
2405; AVX-NEXT:    vcomiss %xmm1, %xmm0
2406; AVX-NEXT:    seta %al
2407; AVX-NEXT:    retl
2408;
2409; AVX512VL-LABEL: test_x86_sse_comigt_ss:
2410; AVX512VL:       ## BB#0:
2411; AVX512VL-NEXT:    xorl %eax, %eax
2412; AVX512VL-NEXT:    vcomiss %xmm1, %xmm0
2413; AVX512VL-NEXT:    seta %al
2414; AVX512VL-NEXT:    retl
2415  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2416  ret i32 %res
2417}
2418declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
2419
2420
2421define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
2422; AVX-LABEL: test_x86_sse_comile_ss:
2423; AVX:       ## BB#0:
2424; AVX-NEXT:    xorl %eax, %eax
2425; AVX-NEXT:    vcomiss %xmm0, %xmm1
2426; AVX-NEXT:    setae %al
2427; AVX-NEXT:    retl
2428;
2429; AVX512VL-LABEL: test_x86_sse_comile_ss:
2430; AVX512VL:       ## BB#0:
2431; AVX512VL-NEXT:    xorl %eax, %eax
2432; AVX512VL-NEXT:    vcomiss %xmm0, %xmm1
2433; AVX512VL-NEXT:    setae %al
2434; AVX512VL-NEXT:    retl
2435  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2436  ret i32 %res
2437}
2438declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
2439
2440
2441define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
2442; AVX-LABEL: test_x86_sse_comilt_ss:
2443; AVX:       ## BB#0:
2444; AVX-NEXT:    xorl %eax, %eax
2445; AVX-NEXT:    vcomiss %xmm0, %xmm1
2446; AVX-NEXT:    seta %al
2447; AVX-NEXT:    retl
2448;
2449; AVX512VL-LABEL: test_x86_sse_comilt_ss:
2450; AVX512VL:       ## BB#0:
2451; AVX512VL-NEXT:    xorl %eax, %eax
2452; AVX512VL-NEXT:    vcomiss %xmm0, %xmm1
2453; AVX512VL-NEXT:    seta %al
2454; AVX512VL-NEXT:    retl
2455  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2456  ret i32 %res
2457}
2458declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
2459
2460
2461define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
2462; AVX-LABEL: test_x86_sse_comineq_ss:
2463; AVX:       ## BB#0:
2464; AVX-NEXT:    vcomiss %xmm1, %xmm0
2465; AVX-NEXT:    setp %al
2466; AVX-NEXT:    setne %cl
2467; AVX-NEXT:    orb %al, %cl
2468; AVX-NEXT:    movzbl %cl, %eax
2469; AVX-NEXT:    retl
2470;
2471; AVX512VL-LABEL: test_x86_sse_comineq_ss:
2472; AVX512VL:       ## BB#0:
2473; AVX512VL-NEXT:    vcomiss %xmm1, %xmm0
2474; AVX512VL-NEXT:    setp %al
2475; AVX512VL-NEXT:    setne %cl
2476; AVX512VL-NEXT:    orb %al, %cl
2477; AVX512VL-NEXT:    movzbl %cl, %eax
2478; AVX512VL-NEXT:    retl
2479  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2480  ret i32 %res
2481}
2482declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
2483
2484
2485define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
2486; AVX-LABEL: test_x86_sse_cvtsi2ss:
2487; AVX:       ## BB#0:
2488; AVX-NEXT:    movl $7, %eax
2489; AVX-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
2490; AVX-NEXT:    retl
2491;
2492; AVX512VL-LABEL: test_x86_sse_cvtsi2ss:
2493; AVX512VL:       ## BB#0:
2494; AVX512VL-NEXT:    movl $7, %eax
2495; AVX512VL-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
2496; AVX512VL-NEXT:    retl
2497  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
2498  ret <4 x float> %res
2499}
2500declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
2501
2502
2503define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
2504; AVX-LABEL: test_x86_sse_cvtss2si:
2505; AVX:       ## BB#0:
2506; AVX-NEXT:    vcvtss2si %xmm0, %eax
2507; AVX-NEXT:    retl
2508;
2509; AVX512VL-LABEL: test_x86_sse_cvtss2si:
2510; AVX512VL:       ## BB#0:
2511; AVX512VL-NEXT:    vcvtss2si %xmm0, %eax
2512; AVX512VL-NEXT:    retl
2513  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
2514  ret i32 %res
2515}
2516declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
2517
2518
2519define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
2520; AVX-LABEL: test_x86_sse_cvttss2si:
2521; AVX:       ## BB#0:
2522; AVX-NEXT:    vcvttss2si %xmm0, %eax
2523; AVX-NEXT:    retl
2524;
2525; AVX512VL-LABEL: test_x86_sse_cvttss2si:
2526; AVX512VL:       ## BB#0:
2527; AVX512VL-NEXT:    vcvttss2si %xmm0, %eax
2528; AVX512VL-NEXT:    retl
2529  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
2530  ret i32 %res
2531}
2532declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
2533
2534
2535define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
2536; AVX-LABEL: test_x86_sse_div_ss:
2537; AVX:       ## BB#0:
2538; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
2539; AVX-NEXT:    retl
2540;
2541; AVX512VL-LABEL: test_x86_sse_div_ss:
2542; AVX512VL:       ## BB#0:
2543; AVX512VL-NEXT:    vdivss %xmm1, %xmm0, %xmm0
2544; AVX512VL-NEXT:    retl
2545  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2546  ret <4 x float> %res
2547}
2548declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
2549
2550
2551define void @test_x86_sse_ldmxcsr(i8* %a0) {
2552; AVX-LABEL: test_x86_sse_ldmxcsr:
2553; AVX:       ## BB#0:
2554; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
2555; AVX-NEXT:    vldmxcsr (%eax)
2556; AVX-NEXT:    retl
2557;
2558; AVX512VL-LABEL: test_x86_sse_ldmxcsr:
2559; AVX512VL:       ## BB#0:
2560; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
2561; AVX512VL-NEXT:    vldmxcsr (%eax)
2562; AVX512VL-NEXT:    retl
2563  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
2564  ret void
2565}
2566declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
2567
2568
2569
2570define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
2571; AVX-LABEL: test_x86_sse_max_ps:
2572; AVX:       ## BB#0:
2573; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
2574; AVX-NEXT:    retl
2575;
2576; AVX512VL-LABEL: test_x86_sse_max_ps:
2577; AVX512VL:       ## BB#0:
2578; AVX512VL-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
2579; AVX512VL-NEXT:    retl
2580  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2581  ret <4 x float> %res
2582}
2583declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
2584
2585
2586define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
2587; AVX-LABEL: test_x86_sse_max_ss:
2588; AVX:       ## BB#0:
2589; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
2590; AVX-NEXT:    retl
2591;
2592; AVX512VL-LABEL: test_x86_sse_max_ss:
2593; AVX512VL:       ## BB#0:
2594; AVX512VL-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
2595; AVX512VL-NEXT:    retl
2596  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2597  ret <4 x float> %res
2598}
2599declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
2600
2601
2602define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
2603; AVX-LABEL: test_x86_sse_min_ps:
2604; AVX:       ## BB#0:
2605; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
2606; AVX-NEXT:    retl
2607;
2608; AVX512VL-LABEL: test_x86_sse_min_ps:
2609; AVX512VL:       ## BB#0:
2610; AVX512VL-NEXT:    vminps %xmm1, %xmm0, %xmm0
2611; AVX512VL-NEXT:    retl
2612  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2613  ret <4 x float> %res
2614}
2615declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
2616
2617
2618define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
2619; AVX-LABEL: test_x86_sse_min_ss:
2620; AVX:       ## BB#0:
2621; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
2622; AVX-NEXT:    retl
2623;
2624; AVX512VL-LABEL: test_x86_sse_min_ss:
2625; AVX512VL:       ## BB#0:
2626; AVX512VL-NEXT:    vminss %xmm1, %xmm0, %xmm0
2627; AVX512VL-NEXT:    retl
2628  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2629  ret <4 x float> %res
2630}
2631declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
2632
2633
2634define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
2635; AVX-LABEL: test_x86_sse_movmsk_ps:
2636; AVX:       ## BB#0:
2637; AVX-NEXT:    vmovmskps %xmm0, %eax
2638; AVX-NEXT:    retl
2639;
2640; AVX512VL-LABEL: test_x86_sse_movmsk_ps:
2641; AVX512VL:       ## BB#0:
2642; AVX512VL-NEXT:    vmovmskps %xmm0, %eax
2643; AVX512VL-NEXT:    retl
2644  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
2645  ret i32 %res
2646}
2647declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
2648
2649
2650
2651define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
2652; AVX-LABEL: test_x86_sse_mul_ss:
2653; AVX:       ## BB#0:
2654; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
2655; AVX-NEXT:    retl
2656;
2657; AVX512VL-LABEL: test_x86_sse_mul_ss:
2658; AVX512VL:       ## BB#0:
2659; AVX512VL-NEXT:    vmulss %xmm1, %xmm0, %xmm0
2660; AVX512VL-NEXT:    retl
2661  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2662  ret <4 x float> %res
2663}
2664declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
2665
2666
2667define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
2668; AVX-LABEL: test_x86_sse_rcp_ps:
2669; AVX:       ## BB#0:
2670; AVX-NEXT:    vrcpps %xmm0, %xmm0
2671; AVX-NEXT:    retl
2672;
2673; AVX512VL-LABEL: test_x86_sse_rcp_ps:
2674; AVX512VL:       ## BB#0:
2675; AVX512VL-NEXT:    vrcp14ps %xmm0, %xmm0
2676; AVX512VL-NEXT:    retl
2677  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2678  ret <4 x float> %res
2679}
2680declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
2681
2682
2683define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
2684; AVX-LABEL: test_x86_sse_rcp_ss:
2685; AVX:       ## BB#0:
2686; AVX-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
2687; AVX-NEXT:    retl
2688;
2689; AVX512VL-LABEL: test_x86_sse_rcp_ss:
2690; AVX512VL:       ## BB#0:
2691; AVX512VL-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
2692; AVX512VL-NEXT:    retl
2693  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2694  ret <4 x float> %res
2695}
2696declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
2697
2698
2699define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
2700; AVX-LABEL: test_x86_sse_rsqrt_ps:
2701; AVX:       ## BB#0:
2702; AVX-NEXT:    vrsqrtps %xmm0, %xmm0
2703; AVX-NEXT:    retl
2704;
2705; AVX512VL-LABEL: test_x86_sse_rsqrt_ps:
2706; AVX512VL:       ## BB#0:
2707; AVX512VL-NEXT:    vrsqrt14ps %xmm0, %xmm0
2708; AVX512VL-NEXT:    retl
2709  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2710  ret <4 x float> %res
2711}
2712declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
2713
2714
2715define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
2716; AVX-LABEL: test_x86_sse_rsqrt_ss:
2717; AVX:       ## BB#0:
2718; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
2719; AVX-NEXT:    retl
2720;
2721; AVX512VL-LABEL: test_x86_sse_rsqrt_ss:
2722; AVX512VL:       ## BB#0:
2723; AVX512VL-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
2724; AVX512VL-NEXT:    retl
2725  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2726  ret <4 x float> %res
2727}
2728declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
2729
2730
2731define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
2732; AVX-LABEL: test_x86_sse_sqrt_ps:
2733; AVX:       ## BB#0:
2734; AVX-NEXT:    vsqrtps %xmm0, %xmm0
2735; AVX-NEXT:    retl
2736;
2737; AVX512VL-LABEL: test_x86_sse_sqrt_ps:
2738; AVX512VL:       ## BB#0:
2739; AVX512VL-NEXT:    vsqrtps %xmm0, %xmm0
2740; AVX512VL-NEXT:    retl
2741  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2742  ret <4 x float> %res
2743}
2744declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
2745
2746
2747define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
2748; AVX-LABEL: test_x86_sse_sqrt_ss:
2749; AVX:       ## BB#0:
2750; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
2751; AVX-NEXT:    retl
2752;
2753; AVX512VL-LABEL: test_x86_sse_sqrt_ss:
2754; AVX512VL:       ## BB#0:
2755; AVX512VL-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
2756; AVX512VL-NEXT:    retl
2757  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2758  ret <4 x float> %res
2759}
2760declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
2761
2762
2763define void @test_x86_sse_stmxcsr(i8* %a0) {
2764; AVX-LABEL: test_x86_sse_stmxcsr:
2765; AVX:       ## BB#0:
2766; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
2767; AVX-NEXT:    vstmxcsr (%eax)
2768; AVX-NEXT:    retl
2769;
2770; AVX512VL-LABEL: test_x86_sse_stmxcsr:
2771; AVX512VL:       ## BB#0:
2772; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
2773; AVX512VL-NEXT:    vstmxcsr (%eax)
2774; AVX512VL-NEXT:    retl
2775  call void @llvm.x86.sse.stmxcsr(i8* %a0)
2776  ret void
2777}
2778declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
2779
2780
2781define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
2782; AVX-LABEL: test_x86_sse_sub_ss:
2783; AVX:       ## BB#0:
2784; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
2785; AVX-NEXT:    retl
2786;
2787; AVX512VL-LABEL: test_x86_sse_sub_ss:
2788; AVX512VL:       ## BB#0:
2789; AVX512VL-NEXT:    vsubss %xmm1, %xmm0, %xmm0
2790; AVX512VL-NEXT:    retl
2791  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2792  ret <4 x float> %res
2793}
2794declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
2795
2796
2797define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
2798; AVX-LABEL: test_x86_sse_ucomieq_ss:
2799; AVX:       ## BB#0:
2800; AVX-NEXT:    vucomiss %xmm1, %xmm0
2801; AVX-NEXT:    setnp %al
2802; AVX-NEXT:    sete %cl
2803; AVX-NEXT:    andb %al, %cl
2804; AVX-NEXT:    movzbl %cl, %eax
2805; AVX-NEXT:    retl
2806;
2807; AVX512VL-LABEL: test_x86_sse_ucomieq_ss:
2808; AVX512VL:       ## BB#0:
2809; AVX512VL-NEXT:    vucomiss %xmm1, %xmm0
2810; AVX512VL-NEXT:    setnp %al
2811; AVX512VL-NEXT:    sete %cl
2812; AVX512VL-NEXT:    andb %al, %cl
2813; AVX512VL-NEXT:    movzbl %cl, %eax
2814; AVX512VL-NEXT:    retl
2815  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2816  ret i32 %res
2817}
2818declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
2819
2820
2821define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
2822; AVX-LABEL: test_x86_sse_ucomige_ss:
2823; AVX:       ## BB#0:
2824; AVX-NEXT:    xorl %eax, %eax
2825; AVX-NEXT:    vucomiss %xmm1, %xmm0
2826; AVX-NEXT:    setae %al
2827; AVX-NEXT:    retl
2828;
2829; AVX512VL-LABEL: test_x86_sse_ucomige_ss:
2830; AVX512VL:       ## BB#0:
2831; AVX512VL-NEXT:    xorl %eax, %eax
2832; AVX512VL-NEXT:    vucomiss %xmm1, %xmm0
2833; AVX512VL-NEXT:    setae %al
2834; AVX512VL-NEXT:    retl
2835  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2836  ret i32 %res
2837}
2838declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
2839
2840
2841define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
2842; AVX-LABEL: test_x86_sse_ucomigt_ss:
2843; AVX:       ## BB#0:
2844; AVX-NEXT:    xorl %eax, %eax
2845; AVX-NEXT:    vucomiss %xmm1, %xmm0
2846; AVX-NEXT:    seta %al
2847; AVX-NEXT:    retl
2848;
2849; AVX512VL-LABEL: test_x86_sse_ucomigt_ss:
2850; AVX512VL:       ## BB#0:
2851; AVX512VL-NEXT:    xorl %eax, %eax
2852; AVX512VL-NEXT:    vucomiss %xmm1, %xmm0
2853; AVX512VL-NEXT:    seta %al
2854; AVX512VL-NEXT:    retl
2855  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2856  ret i32 %res
2857}
2858declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
2859
2860
2861define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
2862; AVX-LABEL: test_x86_sse_ucomile_ss:
2863; AVX:       ## BB#0:
2864; AVX-NEXT:    xorl %eax, %eax
2865; AVX-NEXT:    vucomiss %xmm0, %xmm1
2866; AVX-NEXT:    setae %al
2867; AVX-NEXT:    retl
2868;
2869; AVX512VL-LABEL: test_x86_sse_ucomile_ss:
2870; AVX512VL:       ## BB#0:
2871; AVX512VL-NEXT:    xorl %eax, %eax
2872; AVX512VL-NEXT:    vucomiss %xmm0, %xmm1
2873; AVX512VL-NEXT:    setae %al
2874; AVX512VL-NEXT:    retl
2875  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2876  ret i32 %res
2877}
2878declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
2879
2880
2881define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
2882; AVX-LABEL: test_x86_sse_ucomilt_ss:
2883; AVX:       ## BB#0:
2884; AVX-NEXT:    xorl %eax, %eax
2885; AVX-NEXT:    vucomiss %xmm0, %xmm1
2886; AVX-NEXT:    seta %al
2887; AVX-NEXT:    retl
2888;
2889; AVX512VL-LABEL: test_x86_sse_ucomilt_ss:
2890; AVX512VL:       ## BB#0:
2891; AVX512VL-NEXT:    xorl %eax, %eax
2892; AVX512VL-NEXT:    vucomiss %xmm0, %xmm1
2893; AVX512VL-NEXT:    seta %al
2894; AVX512VL-NEXT:    retl
2895  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2896  ret i32 %res
2897}
2898declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
2899
2900
2901define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
2902; AVX-LABEL: test_x86_sse_ucomineq_ss:
2903; AVX:       ## BB#0:
2904; AVX-NEXT:    vucomiss %xmm1, %xmm0
2905; AVX-NEXT:    setp %al
2906; AVX-NEXT:    setne %cl
2907; AVX-NEXT:    orb %al, %cl
2908; AVX-NEXT:    movzbl %cl, %eax
2909; AVX-NEXT:    retl
2910;
2911; AVX512VL-LABEL: test_x86_sse_ucomineq_ss:
2912; AVX512VL:       ## BB#0:
2913; AVX512VL-NEXT:    vucomiss %xmm1, %xmm0
2914; AVX512VL-NEXT:    setp %al
2915; AVX512VL-NEXT:    setne %cl
2916; AVX512VL-NEXT:    orb %al, %cl
2917; AVX512VL-NEXT:    movzbl %cl, %eax
2918; AVX512VL-NEXT:    retl
2919  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2920  ret i32 %res
2921}
2922declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
2923
2924
2925define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
2926; AVX-LABEL: test_x86_ssse3_pabs_b_128:
2927; AVX:       ## BB#0:
2928; AVX-NEXT:    vpabsb %xmm0, %xmm0
2929; AVX-NEXT:    retl
2930;
2931; AVX512VL-LABEL: test_x86_ssse3_pabs_b_128:
2932; AVX512VL:       ## BB#0:
2933; AVX512VL-NEXT:    vpabsb %xmm0, %xmm0
2934; AVX512VL-NEXT:    retl
2935  %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
2936  ret <16 x i8> %res
2937}
2938declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
2939
2940
2941define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
2942; AVX-LABEL: test_x86_ssse3_pabs_d_128:
2943; AVX:       ## BB#0:
2944; AVX-NEXT:    vpabsd %xmm0, %xmm0
2945; AVX-NEXT:    retl
2946;
2947; AVX512VL-LABEL: test_x86_ssse3_pabs_d_128:
2948; AVX512VL:       ## BB#0:
2949; AVX512VL-NEXT:    vpabsd %xmm0, %xmm0
2950; AVX512VL-NEXT:    retl
2951  %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
2952  ret <4 x i32> %res
2953}
2954declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
2955
2956
2957define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
2958; AVX-LABEL: test_x86_ssse3_pabs_w_128:
2959; AVX:       ## BB#0:
2960; AVX-NEXT:    vpabsw %xmm0, %xmm0
2961; AVX-NEXT:    retl
2962;
2963; AVX512VL-LABEL: test_x86_ssse3_pabs_w_128:
2964; AVX512VL:       ## BB#0:
2965; AVX512VL-NEXT:    vpabsw %xmm0, %xmm0
2966; AVX512VL-NEXT:    retl
2967  %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
2968  ret <8 x i16> %res
2969}
2970declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
2971
2972
2973define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2974; AVX-LABEL: test_x86_ssse3_phadd_d_128:
2975; AVX:       ## BB#0:
2976; AVX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
2977; AVX-NEXT:    retl
2978;
2979; AVX512VL-LABEL: test_x86_ssse3_phadd_d_128:
2980; AVX512VL:       ## BB#0:
2981; AVX512VL-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
2982; AVX512VL-NEXT:    retl
2983  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2984  ret <4 x i32> %res
2985}
2986declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2987
2988
2989define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2990; AVX-LABEL: test_x86_ssse3_phadd_sw_128:
2991; AVX:       ## BB#0:
2992; AVX-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0
2993; AVX-NEXT:    retl
2994;
2995; AVX512VL-LABEL: test_x86_ssse3_phadd_sw_128:
2996; AVX512VL:       ## BB#0:
2997; AVX512VL-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0
2998; AVX512VL-NEXT:    retl
2999  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
3000  ret <8 x i16> %res
3001}
3002declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
3003
3004
3005define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
3006; AVX-LABEL: test_x86_ssse3_phadd_w_128:
3007; AVX:       ## BB#0:
3008; AVX-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
3009; AVX-NEXT:    retl
3010;
3011; AVX512VL-LABEL: test_x86_ssse3_phadd_w_128:
3012; AVX512VL:       ## BB#0:
3013; AVX512VL-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
3014; AVX512VL-NEXT:    retl
3015  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
3016  ret <8 x i16> %res
3017}
3018declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
3019
3020
3021define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
3022; AVX-LABEL: test_x86_ssse3_phsub_d_128:
3023; AVX:       ## BB#0:
3024; AVX-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
3025; AVX-NEXT:    retl
3026;
3027; AVX512VL-LABEL: test_x86_ssse3_phsub_d_128:
3028; AVX512VL:       ## BB#0:
3029; AVX512VL-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
3030; AVX512VL-NEXT:    retl
3031  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
3032  ret <4 x i32> %res
3033}
3034declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
3035
3036
3037define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
3038; AVX-LABEL: test_x86_ssse3_phsub_sw_128:
3039; AVX:       ## BB#0:
3040; AVX-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0
3041; AVX-NEXT:    retl
3042;
3043; AVX512VL-LABEL: test_x86_ssse3_phsub_sw_128:
3044; AVX512VL:       ## BB#0:
3045; AVX512VL-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0
3046; AVX512VL-NEXT:    retl
3047  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
3048  ret <8 x i16> %res
3049}
3050declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
3051
3052
3053define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
3054; AVX-LABEL: test_x86_ssse3_phsub_w_128:
3055; AVX:       ## BB#0:
3056; AVX-NEXT:    vphsubw %xmm1, %xmm0, %xmm0
3057; AVX-NEXT:    retl
3058;
3059; AVX512VL-LABEL: test_x86_ssse3_phsub_w_128:
3060; AVX512VL:       ## BB#0:
3061; AVX512VL-NEXT:    vphsubw %xmm1, %xmm0, %xmm0
3062; AVX512VL-NEXT:    retl
3063  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
3064  ret <8 x i16> %res
3065}
3066declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
3067
3068
3069define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
3070; AVX-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
3071; AVX:       ## BB#0:
3072; AVX-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0
3073; AVX-NEXT:    retl
3074;
3075; AVX512VL-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
3076; AVX512VL:       ## BB#0:
3077; AVX512VL-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0
3078; AVX512VL-NEXT:    retl
3079  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
3080  ret <8 x i16> %res
3081}
3082declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
3083
3084
3085define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
3086; AVX-LABEL: test_x86_ssse3_pmul_hr_sw_128:
3087; AVX:       ## BB#0:
3088; AVX-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0
3089; AVX-NEXT:    retl
3090;
3091; AVX512VL-LABEL: test_x86_ssse3_pmul_hr_sw_128:
3092; AVX512VL:       ## BB#0:
3093; AVX512VL-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0
3094; AVX512VL-NEXT:    retl
3095  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
3096  ret <8 x i16> %res
3097}
3098declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
3099
3100
3101define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
3102; AVX-LABEL: test_x86_ssse3_pshuf_b_128:
3103; AVX:       ## BB#0:
3104; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
3105; AVX-NEXT:    retl
3106;
3107; AVX512VL-LABEL: test_x86_ssse3_pshuf_b_128:
3108; AVX512VL:       ## BB#0:
3109; AVX512VL-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
3110; AVX512VL-NEXT:    retl
3111  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
3112  ret <16 x i8> %res
3113}
3114declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
3115
3116
3117define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
3118; AVX-LABEL: test_x86_ssse3_psign_b_128:
3119; AVX:       ## BB#0:
3120; AVX-NEXT:    vpsignb %xmm1, %xmm0, %xmm0
3121; AVX-NEXT:    retl
3122;
3123; AVX512VL-LABEL: test_x86_ssse3_psign_b_128:
3124; AVX512VL:       ## BB#0:
3125; AVX512VL-NEXT:    vpsignb %xmm1, %xmm0, %xmm0
3126; AVX512VL-NEXT:    retl
3127  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
3128  ret <16 x i8> %res
3129}
3130declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
3131
3132
3133define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
3134; AVX-LABEL: test_x86_ssse3_psign_d_128:
3135; AVX:       ## BB#0:
3136; AVX-NEXT:    vpsignd %xmm1, %xmm0, %xmm0
3137; AVX-NEXT:    retl
3138;
3139; AVX512VL-LABEL: test_x86_ssse3_psign_d_128:
3140; AVX512VL:       ## BB#0:
3141; AVX512VL-NEXT:    vpsignd %xmm1, %xmm0, %xmm0
3142; AVX512VL-NEXT:    retl
3143  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
3144  ret <4 x i32> %res
3145}
3146declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
3147
3148
3149define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
3150; AVX-LABEL: test_x86_ssse3_psign_w_128:
3151; AVX:       ## BB#0:
3152; AVX-NEXT:    vpsignw %xmm1, %xmm0, %xmm0
3153; AVX-NEXT:    retl
3154;
3155; AVX512VL-LABEL: test_x86_ssse3_psign_w_128:
3156; AVX512VL:       ## BB#0:
3157; AVX512VL-NEXT:    vpsignw %xmm1, %xmm0, %xmm0
3158; AVX512VL-NEXT:    retl
3159  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
3160  ret <8 x i16> %res
3161}
3162declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
3163
3164
3165define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
3166; AVX-LABEL: test_x86_avx_addsub_pd_256:
3167; AVX:       ## BB#0:
3168; AVX-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
3169; AVX-NEXT:    retl
3170;
3171; AVX512VL-LABEL: test_x86_avx_addsub_pd_256:
3172; AVX512VL:       ## BB#0:
3173; AVX512VL-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
3174; AVX512VL-NEXT:    retl
3175  %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
3176  ret <4 x double> %res
3177}
3178declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
3179
3180
3181define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
3182; AVX-LABEL: test_x86_avx_addsub_ps_256:
3183; AVX:       ## BB#0:
3184; AVX-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
3185; AVX-NEXT:    retl
3186;
3187; AVX512VL-LABEL: test_x86_avx_addsub_ps_256:
3188; AVX512VL:       ## BB#0:
3189; AVX512VL-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
3190; AVX512VL-NEXT:    retl
3191  %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
3192  ret <8 x float> %res
3193}
3194declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
3195
3196
3197define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
3198; AVX-LABEL: test_x86_avx_blendv_pd_256:
3199; AVX:       ## BB#0:
3200; AVX-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
3201; AVX-NEXT:    retl
3202;
3203; AVX512VL-LABEL: test_x86_avx_blendv_pd_256:
3204; AVX512VL:       ## BB#0:
3205; AVX512VL-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
3206; AVX512VL-NEXT:    retl
3207  %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
3208  ret <4 x double> %res
3209}
3210declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
3211
3212
3213define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
3214; AVX-LABEL: test_x86_avx_blendv_ps_256:
3215; AVX:       ## BB#0:
3216; AVX-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0
3217; AVX-NEXT:    retl
3218;
3219; AVX512VL-LABEL: test_x86_avx_blendv_ps_256:
3220; AVX512VL:       ## BB#0:
3221; AVX512VL-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0
3222; AVX512VL-NEXT:    retl
3223  %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
3224  ret <8 x float> %res
3225}
3226declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
3227
3228
3229define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
3230; AVX-LABEL: test_x86_avx_cmp_pd_256:
3231; AVX:       ## BB#0:
3232; AVX-NEXT:    vcmpordpd %ymm1, %ymm0, %ymm0
3233; AVX-NEXT:    retl
3234;
3235; AVX512VL-LABEL: test_x86_avx_cmp_pd_256:
3236; AVX512VL:       ## BB#0:
3237; AVX512VL-NEXT:    vcmpordpd %ymm1, %ymm0, %ymm0
3238; AVX512VL-NEXT:    retl
3239  %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
3240  ret <4 x double> %res
3241}
3242declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
3243
3244
3245define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
3246; AVX-LABEL: test_x86_avx_cmp_ps_256:
3247; AVX:       ## BB#0:
3248; AVX-NEXT:    vcmpordps %ymm1, %ymm0, %ymm0
3249; AVX-NEXT:    retl
3250;
3251; AVX512VL-LABEL: test_x86_avx_cmp_ps_256:
3252; AVX512VL:       ## BB#0:
3253; AVX512VL-NEXT:    vcmpordps %ymm1, %ymm0, %ymm0
3254; AVX512VL-NEXT:    retl
3255  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
3256  ret <8 x float> %res
3257}
3258
3259define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
3260; AVX-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
3261; AVX:       ## BB#0:
3262; AVX-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1
3263; AVX-NEXT:    vcmpltps %ymm1, %ymm0, %ymm1
3264; AVX-NEXT:    vcmpleps %ymm1, %ymm0, %ymm1
3265; AVX-NEXT:    vcmpunordps %ymm1, %ymm0, %ymm1
3266; AVX-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm1
3267; AVX-NEXT:    vcmpnltps %ymm1, %ymm0, %ymm1
3268; AVX-NEXT:    vcmpnleps %ymm1, %ymm0, %ymm1
3269; AVX-NEXT:    vcmpordps %ymm1, %ymm0, %ymm1
3270; AVX-NEXT:    vcmpeq_uqps %ymm1, %ymm0, %ymm1
3271; AVX-NEXT:    vcmpngeps %ymm1, %ymm0, %ymm1
3272; AVX-NEXT:    vcmpngtps %ymm1, %ymm0, %ymm1
3273; AVX-NEXT:    vcmpfalseps %ymm1, %ymm0, %ymm1
3274; AVX-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %ymm1
3275; AVX-NEXT:    vcmpgeps %ymm1, %ymm0, %ymm1
3276; AVX-NEXT:    vcmpgtps %ymm1, %ymm0, %ymm1
3277; AVX-NEXT:    vcmptrueps %ymm1, %ymm0, %ymm1
3278; AVX-NEXT:    vcmpeq_osps %ymm1, %ymm0, %ymm1
3279; AVX-NEXT:    vcmplt_oqps %ymm1, %ymm0, %ymm1
3280; AVX-NEXT:    vcmple_oqps %ymm1, %ymm0, %ymm1
3281; AVX-NEXT:    vcmpunord_sps %ymm1, %ymm0, %ymm1
3282; AVX-NEXT:    vcmpneq_usps %ymm1, %ymm0, %ymm1
3283; AVX-NEXT:    vcmpnlt_uqps %ymm1, %ymm0, %ymm1
3284; AVX-NEXT:    vcmpnle_uqps %ymm1, %ymm0, %ymm1
3285; AVX-NEXT:    vcmpord_sps %ymm1, %ymm0, %ymm1
3286; AVX-NEXT:    vcmpeq_usps %ymm1, %ymm0, %ymm1
3287; AVX-NEXT:    vcmpnge_uqps %ymm1, %ymm0, %ymm1
3288; AVX-NEXT:    vcmpngt_uqps %ymm1, %ymm0, %ymm1
3289; AVX-NEXT:    vcmpfalse_osps %ymm1, %ymm0, %ymm1
3290; AVX-NEXT:    vcmpneq_osps %ymm1, %ymm0, %ymm1
3291; AVX-NEXT:    vcmpge_oqps %ymm1, %ymm0, %ymm1
3292; AVX-NEXT:    vcmpgt_oqps %ymm1, %ymm0, %ymm1
3293; AVX-NEXT:    vcmptrue_usps %ymm1, %ymm0, %ymm0
3294; AVX-NEXT:    retl
3295;
3296; AVX512VL-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
3297; AVX512VL:       ## BB#0:
3298; AVX512VL-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1
3299; AVX512VL-NEXT:    vcmpltps %ymm1, %ymm0, %ymm1
3300; AVX512VL-NEXT:    vcmpleps %ymm1, %ymm0, %ymm1
3301; AVX512VL-NEXT:    vcmpunordps %ymm1, %ymm0, %ymm1
3302; AVX512VL-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm1
3303; AVX512VL-NEXT:    vcmpnltps %ymm1, %ymm0, %ymm1
3304; AVX512VL-NEXT:    vcmpnleps %ymm1, %ymm0, %ymm1
3305; AVX512VL-NEXT:    vcmpordps %ymm1, %ymm0, %ymm1
3306; AVX512VL-NEXT:    vcmpeq_uqps %ymm1, %ymm0, %ymm1
3307; AVX512VL-NEXT:    vcmpngeps %ymm1, %ymm0, %ymm1
3308; AVX512VL-NEXT:    vcmpngtps %ymm1, %ymm0, %ymm1
3309; AVX512VL-NEXT:    vcmpfalseps %ymm1, %ymm0, %ymm1
3310; AVX512VL-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %ymm1
3311; AVX512VL-NEXT:    vcmpgeps %ymm1, %ymm0, %ymm1
3312; AVX512VL-NEXT:    vcmpgtps %ymm1, %ymm0, %ymm1
3313; AVX512VL-NEXT:    vcmptrueps %ymm1, %ymm0, %ymm1
3314; AVX512VL-NEXT:    vcmpeq_osps %ymm1, %ymm0, %ymm1
3315; AVX512VL-NEXT:    vcmplt_oqps %ymm1, %ymm0, %ymm1
3316; AVX512VL-NEXT:    vcmple_oqps %ymm1, %ymm0, %ymm1
3317; AVX512VL-NEXT:    vcmpunord_sps %ymm1, %ymm0, %ymm1
3318; AVX512VL-NEXT:    vcmpneq_usps %ymm1, %ymm0, %ymm1
3319; AVX512VL-NEXT:    vcmpnlt_uqps %ymm1, %ymm0, %ymm1
3320; AVX512VL-NEXT:    vcmpnle_uqps %ymm1, %ymm0, %ymm1
3321; AVX512VL-NEXT:    vcmpord_sps %ymm1, %ymm0, %ymm1
3322; AVX512VL-NEXT:    vcmpeq_usps %ymm1, %ymm0, %ymm1
3323; AVX512VL-NEXT:    vcmpnge_uqps %ymm1, %ymm0, %ymm1
3324; AVX512VL-NEXT:    vcmpngt_uqps %ymm1, %ymm0, %ymm1
3325; AVX512VL-NEXT:    vcmpfalse_osps %ymm1, %ymm0, %ymm1
3326; AVX512VL-NEXT:    vcmpneq_osps %ymm1, %ymm0, %ymm1
3327; AVX512VL-NEXT:    vcmpge_oqps %ymm1, %ymm0, %ymm1
3328; AVX512VL-NEXT:    vcmpgt_oqps %ymm1, %ymm0, %ymm1
3329; AVX512VL-NEXT:    vcmptrue_usps %ymm1, %ymm0, %ymm0
3330; AVX512VL-NEXT:    retl
3331  %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
3332  %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
3333  %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
3334  %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
3335  %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
3336  %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
3337  %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
3338  %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
3339  %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
3340  %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
3341  %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
3342  %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
3343  %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
3344  %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
3345  %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
3346  %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
3347  %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
3348  %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
3349  %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
3350  %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
3351  %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
3352  %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
3353  %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
3354  %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
3355  %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
3356  %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
3357  %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
3358  %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
3359  %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
3360  %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
3361  %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
3362  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
3363  ret <8 x float> %res
3364}
3365declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
3366
3367
3368define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
3369; AVX-LABEL: test_x86_avx_cvt_pd2_ps_256:
3370; AVX:       ## BB#0:
3371; AVX-NEXT:    vcvtpd2psy %ymm0, %xmm0
3372; AVX-NEXT:    vzeroupper
3373; AVX-NEXT:    retl
3374;
3375; AVX512VL-LABEL: test_x86_avx_cvt_pd2_ps_256:
3376; AVX512VL:       ## BB#0:
3377; AVX512VL-NEXT:    vcvtpd2psy %ymm0, %xmm0
3378; AVX512VL-NEXT:    retl
3379  %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
3380  ret <4 x float> %res
3381}
3382declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
3383
3384
3385define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
3386; AVX-LABEL: test_x86_avx_cvt_pd2dq_256:
3387; AVX:       ## BB#0:
3388; AVX-NEXT:    vcvtpd2dqy %ymm0, %xmm0
3389; AVX-NEXT:    vzeroupper
3390; AVX-NEXT:    retl
3391;
3392; AVX512VL-LABEL: test_x86_avx_cvt_pd2dq_256:
3393; AVX512VL:       ## BB#0:
3394; AVX512VL-NEXT:    vcvtpd2dqy %ymm0, %xmm0
3395; AVX512VL-NEXT:    retl
3396  %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
3397  ret <4 x i32> %res
3398}
3399declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
3400
3401
3402define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
3403; AVX-LABEL: test_x86_avx_cvt_ps2dq_256:
3404; AVX:       ## BB#0:
3405; AVX-NEXT:    vcvtps2dq %ymm0, %ymm0
3406; AVX-NEXT:    retl
3407;
3408; AVX512VL-LABEL: test_x86_avx_cvt_ps2dq_256:
3409; AVX512VL:       ## BB#0:
3410; AVX512VL-NEXT:    vcvtps2dq %ymm0, %ymm0
3411; AVX512VL-NEXT:    retl
3412  %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
3413  ret <8 x i32> %res
3414}
3415declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
3416
3417
3418define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
3419; AVX-LABEL: test_x86_avx_cvtdq2_ps_256:
3420; AVX:       ## BB#0:
3421; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0
3422; AVX-NEXT:    retl
3423;
3424; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256:
3425; AVX512VL:       ## BB#0:
3426; AVX512VL-NEXT:    vcvtdq2ps %ymm0, %ymm0
3427; AVX512VL-NEXT:    retl
3428  %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
3429  ret <8 x float> %res
3430}
3431declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
3432
3433
3434define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
3435; AVX-LABEL: test_x86_avx_dp_ps_256:
3436; AVX:       ## BB#0:
3437; AVX-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0
3438; AVX-NEXT:    retl
3439;
3440; AVX512VL-LABEL: test_x86_avx_dp_ps_256:
3441; AVX512VL:       ## BB#0:
3442; AVX512VL-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0
3443; AVX512VL-NEXT:    retl
3444  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
3445  ret <8 x float> %res
3446}
3447declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
3448
3449
3450define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
3451; AVX-LABEL: test_x86_avx_hadd_pd_256:
3452; AVX:       ## BB#0:
3453; AVX-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
3454; AVX-NEXT:    retl
3455;
3456; AVX512VL-LABEL: test_x86_avx_hadd_pd_256:
3457; AVX512VL:       ## BB#0:
3458; AVX512VL-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
3459; AVX512VL-NEXT:    retl
3460  %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
3461  ret <4 x double> %res
3462}
3463declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
3464
3465
3466define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
3467; AVX-LABEL: test_x86_avx_hadd_ps_256:
3468; AVX:       ## BB#0:
3469; AVX-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
3470; AVX-NEXT:    retl
3471;
3472; AVX512VL-LABEL: test_x86_avx_hadd_ps_256:
3473; AVX512VL:       ## BB#0:
3474; AVX512VL-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
3475; AVX512VL-NEXT:    retl
3476  %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
3477  ret <8 x float> %res
3478}
3479declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
3480
3481
3482define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
3483; AVX-LABEL: test_x86_avx_hsub_pd_256:
3484; AVX:       ## BB#0:
3485; AVX-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
3486; AVX-NEXT:    retl
3487;
3488; AVX512VL-LABEL: test_x86_avx_hsub_pd_256:
3489; AVX512VL:       ## BB#0:
3490; AVX512VL-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
3491; AVX512VL-NEXT:    retl
3492  %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
3493  ret <4 x double> %res
3494}
3495declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
3496
3497
3498define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
3499; AVX-LABEL: test_x86_avx_hsub_ps_256:
3500; AVX:       ## BB#0:
3501; AVX-NEXT:    vhsubps %ymm1, %ymm0, %ymm0
3502; AVX-NEXT:    retl
3503;
3504; AVX512VL-LABEL: test_x86_avx_hsub_ps_256:
3505; AVX512VL:       ## BB#0:
3506; AVX512VL-NEXT:    vhsubps %ymm1, %ymm0, %ymm0
3507; AVX512VL-NEXT:    retl
3508  %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
3509  ret <8 x float> %res
3510}
3511declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
3512
3513
3514define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
3515; AVX-LABEL: test_x86_avx_ldu_dq_256:
3516; AVX:       ## BB#0:
3517; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3518; AVX-NEXT:    vlddqu (%eax), %ymm0
3519; AVX-NEXT:    retl
3520;
3521; AVX512VL-LABEL: test_x86_avx_ldu_dq_256:
3522; AVX512VL:       ## BB#0:
3523; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3524; AVX512VL-NEXT:    vlddqu (%eax), %ymm0
3525; AVX512VL-NEXT:    retl
3526  %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
3527  ret <32 x i8> %res
3528}
3529declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
3530
3531
3532define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
3533; AVX-LABEL: test_x86_avx_maskload_pd:
3534; AVX:       ## BB#0:
3535; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3536; AVX-NEXT:    vmaskmovpd (%eax), %xmm0, %xmm0
3537; AVX-NEXT:    retl
3538;
3539; AVX512VL-LABEL: test_x86_avx_maskload_pd:
3540; AVX512VL:       ## BB#0:
3541; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3542; AVX512VL-NEXT:    vmaskmovpd (%eax), %xmm0, %xmm0
3543; AVX512VL-NEXT:    retl
3544  %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
3545  ret <2 x double> %res
3546}
3547declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
3548
3549
3550define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
3551; AVX-LABEL: test_x86_avx_maskload_pd_256:
3552; AVX:       ## BB#0:
3553; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3554; AVX-NEXT:    vmaskmovpd (%eax), %ymm0, %ymm0
3555; AVX-NEXT:    retl
3556;
3557; AVX512VL-LABEL: test_x86_avx_maskload_pd_256:
3558; AVX512VL:       ## BB#0:
3559; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3560; AVX512VL-NEXT:    vmaskmovpd (%eax), %ymm0, %ymm0
3561; AVX512VL-NEXT:    retl
3562  %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
3563  ret <4 x double> %res
3564}
3565declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
3566
3567
3568define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
3569; AVX-LABEL: test_x86_avx_maskload_ps:
3570; AVX:       ## BB#0:
3571; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3572; AVX-NEXT:    vmaskmovps (%eax), %xmm0, %xmm0
3573; AVX-NEXT:    retl
3574;
3575; AVX512VL-LABEL: test_x86_avx_maskload_ps:
3576; AVX512VL:       ## BB#0:
3577; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3578; AVX512VL-NEXT:    vmaskmovps (%eax), %xmm0, %xmm0
3579; AVX512VL-NEXT:    retl
3580  %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
3581  ret <4 x float> %res
3582}
3583declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
3584
3585
3586define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
3587; AVX-LABEL: test_x86_avx_maskload_ps_256:
3588; AVX:       ## BB#0:
3589; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3590; AVX-NEXT:    vmaskmovps (%eax), %ymm0, %ymm0
3591; AVX-NEXT:    retl
3592;
3593; AVX512VL-LABEL: test_x86_avx_maskload_ps_256:
3594; AVX512VL:       ## BB#0:
3595; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3596; AVX512VL-NEXT:    vmaskmovps (%eax), %ymm0, %ymm0
3597; AVX512VL-NEXT:    retl
3598  %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
3599  ret <8 x float> %res
3600}
3601declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
3602
3603
3604define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) {
3605; AVX-LABEL: test_x86_avx_maskstore_pd:
3606; AVX:       ## BB#0:
3607; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3608; AVX-NEXT:    vmaskmovpd %xmm1, %xmm0, (%eax)
3609; AVX-NEXT:    retl
3610;
3611; AVX512VL-LABEL: test_x86_avx_maskstore_pd:
3612; AVX512VL:       ## BB#0:
3613; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3614; AVX512VL-NEXT:    vmaskmovpd %xmm1, %xmm0, (%eax)
3615; AVX512VL-NEXT:    retl
3616  call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
3617  ret void
3618}
3619declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
3620
3621
3622define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) {
3623; AVX-LABEL: test_x86_avx_maskstore_pd_256:
3624; AVX:       ## BB#0:
3625; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3626; AVX-NEXT:    vmaskmovpd %ymm1, %ymm0, (%eax)
3627; AVX-NEXT:    vzeroupper
3628; AVX-NEXT:    retl
3629;
3630; AVX512VL-LABEL: test_x86_avx_maskstore_pd_256:
3631; AVX512VL:       ## BB#0:
3632; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3633; AVX512VL-NEXT:    vmaskmovpd %ymm1, %ymm0, (%eax)
3634; AVX512VL-NEXT:    retl
3635  call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
3636  ret void
3637}
3638declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
3639
3640
3641define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) {
3642; AVX-LABEL: test_x86_avx_maskstore_ps:
3643; AVX:       ## BB#0:
3644; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3645; AVX-NEXT:    vmaskmovps %xmm1, %xmm0, (%eax)
3646; AVX-NEXT:    retl
3647;
3648; AVX512VL-LABEL: test_x86_avx_maskstore_ps:
3649; AVX512VL:       ## BB#0:
3650; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3651; AVX512VL-NEXT:    vmaskmovps %xmm1, %xmm0, (%eax)
3652; AVX512VL-NEXT:    retl
3653  call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
3654  ret void
3655}
3656declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
3657
3658
3659define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) {
3660; AVX-LABEL: test_x86_avx_maskstore_ps_256:
3661; AVX:       ## BB#0:
3662; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3663; AVX-NEXT:    vmaskmovps %ymm1, %ymm0, (%eax)
3664; AVX-NEXT:    vzeroupper
3665; AVX-NEXT:    retl
3666;
3667; AVX512VL-LABEL: test_x86_avx_maskstore_ps_256:
3668; AVX512VL:       ## BB#0:
3669; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3670; AVX512VL-NEXT:    vmaskmovps %ymm1, %ymm0, (%eax)
3671; AVX512VL-NEXT:    retl
3672  call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
3673  ret void
3674}
3675declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
3676
3677
3678define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
3679; AVX-LABEL: test_x86_avx_max_pd_256:
3680; AVX:       ## BB#0:
3681; AVX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
3682; AVX-NEXT:    retl
3683;
3684; AVX512VL-LABEL: test_x86_avx_max_pd_256:
3685; AVX512VL:       ## BB#0:
3686; AVX512VL-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
3687; AVX512VL-NEXT:    retl
3688  %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
3689  ret <4 x double> %res
3690}
3691declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
3692
3693
3694define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
3695; AVX-LABEL: test_x86_avx_max_ps_256:
3696; AVX:       ## BB#0:
3697; AVX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
3698; AVX-NEXT:    retl
3699;
3700; AVX512VL-LABEL: test_x86_avx_max_ps_256:
3701; AVX512VL:       ## BB#0:
3702; AVX512VL-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
3703; AVX512VL-NEXT:    retl
3704  %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
3705  ret <8 x float> %res
3706}
3707declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
3708
3709
3710define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
3711; AVX-LABEL: test_x86_avx_min_pd_256:
3712; AVX:       ## BB#0:
3713; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
3714; AVX-NEXT:    retl
3715;
3716; AVX512VL-LABEL: test_x86_avx_min_pd_256:
3717; AVX512VL:       ## BB#0:
3718; AVX512VL-NEXT:    vminpd %ymm1, %ymm0, %ymm0
3719; AVX512VL-NEXT:    retl
3720  %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
3721  ret <4 x double> %res
3722}
3723declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
3724
3725
3726define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
3727; AVX-LABEL: test_x86_avx_min_ps_256:
3728; AVX:       ## BB#0:
3729; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
3730; AVX-NEXT:    retl
3731;
3732; AVX512VL-LABEL: test_x86_avx_min_ps_256:
3733; AVX512VL:       ## BB#0:
3734; AVX512VL-NEXT:    vminps %ymm1, %ymm0, %ymm0
3735; AVX512VL-NEXT:    retl
3736  %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
3737  ret <8 x float> %res
3738}
3739declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
3740
3741
3742define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
3743; AVX-LABEL: test_x86_avx_movmsk_pd_256:
3744; AVX:       ## BB#0:
3745; AVX-NEXT:    vmovmskpd %ymm0, %eax
3746; AVX-NEXT:    vzeroupper
3747; AVX-NEXT:    retl
3748;
3749; AVX512VL-LABEL: test_x86_avx_movmsk_pd_256:
3750; AVX512VL:       ## BB#0:
3751; AVX512VL-NEXT:    vmovmskpd %ymm0, %eax
3752; AVX512VL-NEXT:    retl
3753  %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
3754  ret i32 %res
3755}
3756declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
3757
3758
3759define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
3760; AVX-LABEL: test_x86_avx_movmsk_ps_256:
3761; AVX:       ## BB#0:
3762; AVX-NEXT:    vmovmskps %ymm0, %eax
3763; AVX-NEXT:    vzeroupper
3764; AVX-NEXT:    retl
3765;
3766; AVX512VL-LABEL: test_x86_avx_movmsk_ps_256:
3767; AVX512VL:       ## BB#0:
3768; AVX512VL-NEXT:    vmovmskps %ymm0, %eax
3769; AVX512VL-NEXT:    retl
3770  %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
3771  ret i32 %res
3772}
3773declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
3774
3775
3776
3777
3778
3779
3780
3781define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
3782; AVX-LABEL: test_x86_avx_ptestc_256:
3783; AVX:       ## BB#0:
3784; AVX-NEXT:    vptest %ymm1, %ymm0
3785; AVX-NEXT:    sbbl %eax, %eax
3786; AVX-NEXT:    andl $1, %eax
3787; AVX-NEXT:    vzeroupper
3788; AVX-NEXT:    retl
3789;
3790; AVX512VL-LABEL: test_x86_avx_ptestc_256:
3791; AVX512VL:       ## BB#0:
3792; AVX512VL-NEXT:    vptest %ymm1, %ymm0
3793; AVX512VL-NEXT:    sbbl %eax, %eax
3794; AVX512VL-NEXT:    andl $1, %eax
3795; AVX512VL-NEXT:    retl
3796  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
3797  ret i32 %res
3798}
3799declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
3800
3801
3802define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
3803; AVX-LABEL: test_x86_avx_ptestnzc_256:
3804; AVX:       ## BB#0:
3805; AVX-NEXT:    xorl %eax, %eax
3806; AVX-NEXT:    vptest %ymm1, %ymm0
3807; AVX-NEXT:    seta %al
3808; AVX-NEXT:    vzeroupper
3809; AVX-NEXT:    retl
3810;
3811; AVX512VL-LABEL: test_x86_avx_ptestnzc_256:
3812; AVX512VL:       ## BB#0:
3813; AVX512VL-NEXT:    xorl %eax, %eax
3814; AVX512VL-NEXT:    vptest %ymm1, %ymm0
3815; AVX512VL-NEXT:    seta %al
3816; AVX512VL-NEXT:    retl
3817  %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
3818  ret i32 %res
3819}
3820declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
3821
3822
3823define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
3824; AVX-LABEL: test_x86_avx_ptestz_256:
3825; AVX:       ## BB#0:
3826; AVX-NEXT:    xorl %eax, %eax
3827; AVX-NEXT:    vptest %ymm1, %ymm0
3828; AVX-NEXT:    sete %al
3829; AVX-NEXT:    vzeroupper
3830; AVX-NEXT:    retl
3831;
3832; AVX512VL-LABEL: test_x86_avx_ptestz_256:
3833; AVX512VL:       ## BB#0:
3834; AVX512VL-NEXT:    xorl %eax, %eax
3835; AVX512VL-NEXT:    vptest %ymm1, %ymm0
3836; AVX512VL-NEXT:    sete %al
3837; AVX512VL-NEXT:    retl
3838  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
3839  ret i32 %res
3840}
3841declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
3842
3843
3844define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
3845; AVX-LABEL: test_x86_avx_rcp_ps_256:
3846; AVX:       ## BB#0:
3847; AVX-NEXT:    vrcpps %ymm0, %ymm0
3848; AVX-NEXT:    retl
3849;
3850; AVX512VL-LABEL: test_x86_avx_rcp_ps_256:
3851; AVX512VL:       ## BB#0:
3852; AVX512VL-NEXT:    vrcp14ps %ymm0, %ymm0
3853; AVX512VL-NEXT:    retl
3854  %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
3855  ret <8 x float> %res
3856}
3857declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
3858
3859
3860define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
3861; AVX-LABEL: test_x86_avx_round_pd_256:
3862; AVX:       ## BB#0:
3863; AVX-NEXT:    vroundpd $7, %ymm0, %ymm0
3864; AVX-NEXT:    retl
3865;
3866; AVX512VL-LABEL: test_x86_avx_round_pd_256:
3867; AVX512VL:       ## BB#0:
3868; AVX512VL-NEXT:    vroundpd $7, %ymm0, %ymm0
3869; AVX512VL-NEXT:    retl
3870  %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
3871  ret <4 x double> %res
3872}
3873declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
3874
3875
3876define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
3877; AVX-LABEL: test_x86_avx_round_ps_256:
3878; AVX:       ## BB#0:
3879; AVX-NEXT:    vroundps $7, %ymm0, %ymm0
3880; AVX-NEXT:    retl
3881;
3882; AVX512VL-LABEL: test_x86_avx_round_ps_256:
3883; AVX512VL:       ## BB#0:
3884; AVX512VL-NEXT:    vroundps $7, %ymm0, %ymm0
3885; AVX512VL-NEXT:    retl
3886  %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
3887  ret <8 x float> %res
3888}
3889declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
3890
3891
3892define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
3893; AVX-LABEL: test_x86_avx_rsqrt_ps_256:
3894; AVX:       ## BB#0:
3895; AVX-NEXT:    vrsqrtps %ymm0, %ymm0
3896; AVX-NEXT:    retl
3897;
3898; AVX512VL-LABEL: test_x86_avx_rsqrt_ps_256:
3899; AVX512VL:       ## BB#0:
3900; AVX512VL-NEXT:    vrsqrt14ps %ymm0, %ymm0
3901; AVX512VL-NEXT:    retl
3902  %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
3903  ret <8 x float> %res
3904}
3905declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
3906
3907
3908define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
3909; AVX-LABEL: test_x86_avx_sqrt_pd_256:
3910; AVX:       ## BB#0:
3911; AVX-NEXT:    vsqrtpd %ymm0, %ymm0
3912; AVX-NEXT:    retl
3913;
3914; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256:
3915; AVX512VL:       ## BB#0:
3916; AVX512VL-NEXT:    vsqrtpd %ymm0, %ymm0
3917; AVX512VL-NEXT:    retl
3918  %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
3919  ret <4 x double> %res
3920}
3921declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
3922
3923
3924define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
3925; AVX-LABEL: test_x86_avx_sqrt_ps_256:
3926; AVX:       ## BB#0:
3927; AVX-NEXT:    vsqrtps %ymm0, %ymm0
3928; AVX-NEXT:    retl
3929;
3930; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256:
3931; AVX512VL:       ## BB#0:
3932; AVX512VL-NEXT:    vsqrtps %ymm0, %ymm0
3933; AVX512VL-NEXT:    retl
3934  %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
3935  ret <8 x float> %res
3936}
3937declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
3938
3939
3940define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
3941; AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256:
3942; AVX:       ## BB#0:
3943; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3944; AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
3945; AVX-NEXT:    retl
3946;
3947; AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256:
3948; AVX512VL:       ## BB#0:
3949; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3950; AVX512VL-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
3951; AVX512VL-NEXT:    retl
3952  %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
3953  ret <4 x double> %res
3954}
3955declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
3956
3957
3958define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
3959; AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256:
3960; AVX:       ## BB#0:
3961; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
3962; AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
3963; AVX-NEXT:    retl
3964;
3965; AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256:
3966; AVX512VL:       ## BB#0:
3967; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
3968; AVX512VL-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
3969; AVX512VL-NEXT:    retl
3970  %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
3971  ret <8 x float> %res
3972}
3973declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
3974
3975
3976define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
3977; AVX-LABEL: test_x86_avx_vperm2f128_pd_256:
3978; AVX:       ## BB#0:
3979; AVX-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
3980; AVX-NEXT:    retl
3981;
3982; AVX512VL-LABEL: test_x86_avx_vperm2f128_pd_256:
3983; AVX512VL:       ## BB#0:
3984; AVX512VL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
3985; AVX512VL-NEXT:    retl
3986  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
3987  ret <4 x double> %res
3988}
3989declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
3990
3991
3992define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
3993; AVX-LABEL: test_x86_avx_vperm2f128_ps_256:
3994; AVX:       ## BB#0:
3995; AVX-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
3996; AVX-NEXT:    retl
3997;
3998; AVX512VL-LABEL: test_x86_avx_vperm2f128_ps_256:
3999; AVX512VL:       ## BB#0:
4000; AVX512VL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
4001; AVX512VL-NEXT:    retl
4002  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
4003  ret <8 x float> %res
4004}
4005declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
4006
4007
4008define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
4009; AVX-LABEL: test_x86_avx_vperm2f128_si_256:
4010; AVX:       ## BB#0:
4011; AVX-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
4012; AVX-NEXT:    retl
4013;
4014; AVX512VL-LABEL: test_x86_avx_vperm2f128_si_256:
4015; AVX512VL:       ## BB#0:
4016; AVX512VL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
4017; AVX512VL-NEXT:    retl
4018  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
4019  ret <8 x i32> %res
4020}
4021declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
4022
4023
4024define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
4025; AVX-LABEL: test_x86_avx_vpermilvar_pd:
4026; AVX:       ## BB#0:
4027; AVX-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
4028; AVX-NEXT:    retl
4029;
4030; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd:
4031; AVX512VL:       ## BB#0:
4032; AVX512VL-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
4033; AVX512VL-NEXT:    retl
4034  %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
4035  ret <2 x double> %res
4036}
4037declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
4038
4039
4040define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
4041; AVX-LABEL: test_x86_avx_vpermilvar_pd_256:
4042; AVX:       ## BB#0:
4043; AVX-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0
4044; AVX-NEXT:    retl
4045;
4046; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256:
4047; AVX512VL:       ## BB#0:
4048; AVX512VL-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0
4049; AVX512VL-NEXT:    retl
4050  %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
4051  ret <4 x double> %res
4052}
4053declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
4054
4055define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
4056; AVX-LABEL: test_x86_avx_vpermilvar_pd_256_2:
4057; AVX:       ## BB#0:
4058; AVX-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
4059; AVX-NEXT:    retl
4060;
4061; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
4062; AVX512VL:       ## BB#0:
4063; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
4064; AVX512VL-NEXT:    retl
4065  %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
4066  ret <4 x double> %res
4067}
4068
4069define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
4070; AVX-LABEL: test_x86_avx_vpermilvar_ps:
4071; AVX:       ## BB#0:
4072; AVX-NEXT:    vpermilps %xmm1, %xmm0, %xmm0
4073; AVX-NEXT:    retl
4074;
4075; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps:
4076; AVX512VL:       ## BB#0:
4077; AVX512VL-NEXT:    vpermilps %xmm1, %xmm0, %xmm0
4078; AVX512VL-NEXT:    retl
4079  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
4080  ret <4 x float> %res
4081}
4082define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
4083; AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
4084; AVX:       ## BB#0:
4085; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
4086; AVX-NEXT:    vpermilps (%eax), %xmm0, %xmm0
4087; AVX-NEXT:    retl
4088;
4089; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
4090; AVX512VL:       ## BB#0:
4091; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
4092; AVX512VL-NEXT:    vpermilps (%eax), %xmm0, %xmm0
4093; AVX512VL-NEXT:    retl
4094  %a2 = load <4 x i32>, <4 x i32>* %a1
4095  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
4096  ret <4 x float> %res
4097}
4098declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
4099
4100
4101define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
4102; AVX-LABEL: test_x86_avx_vpermilvar_ps_256:
4103; AVX:       ## BB#0:
4104; AVX-NEXT:    vpermilps %ymm1, %ymm0, %ymm0
4105; AVX-NEXT:    retl
4106;
4107; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_256:
4108; AVX512VL:       ## BB#0:
4109; AVX512VL-NEXT:    vpermilps %ymm1, %ymm0, %ymm0
4110; AVX512VL-NEXT:    retl
4111  %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
4112  ret <8 x float> %res
4113}
4114declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
4115
4116
4117define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
4118; AVX-LABEL: test_x86_avx_vtestc_pd:
4119; AVX:       ## BB#0:
4120; AVX-NEXT:    vtestpd %xmm1, %xmm0
4121; AVX-NEXT:    sbbl %eax, %eax
4122; AVX-NEXT:    andl $1, %eax
4123; AVX-NEXT:    retl
4124;
4125; AVX512VL-LABEL: test_x86_avx_vtestc_pd:
4126; AVX512VL:       ## BB#0:
4127; AVX512VL-NEXT:    vtestpd %xmm1, %xmm0
4128; AVX512VL-NEXT:    sbbl %eax, %eax
4129; AVX512VL-NEXT:    andl $1, %eax
4130; AVX512VL-NEXT:    retl
4131  %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
4132  ret i32 %res
4133}
4134declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
4135
4136
4137define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
4138; AVX-LABEL: test_x86_avx_vtestc_pd_256:
4139; AVX:       ## BB#0:
4140; AVX-NEXT:    vtestpd %ymm1, %ymm0
4141; AVX-NEXT:    sbbl %eax, %eax
4142; AVX-NEXT:    andl $1, %eax
4143; AVX-NEXT:    vzeroupper
4144; AVX-NEXT:    retl
4145;
4146; AVX512VL-LABEL: test_x86_avx_vtestc_pd_256:
4147; AVX512VL:       ## BB#0:
4148; AVX512VL-NEXT:    vtestpd %ymm1, %ymm0
4149; AVX512VL-NEXT:    sbbl %eax, %eax
4150; AVX512VL-NEXT:    andl $1, %eax
4151; AVX512VL-NEXT:    retl
4152  %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
4153  ret i32 %res
4154}
4155declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
4156
4157
4158define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
4159; AVX-LABEL: test_x86_avx_vtestc_ps:
4160; AVX:       ## BB#0:
4161; AVX-NEXT:    vtestps %xmm1, %xmm0
4162; AVX-NEXT:    sbbl %eax, %eax
4163; AVX-NEXT:    andl $1, %eax
4164; AVX-NEXT:    retl
4165;
4166; AVX512VL-LABEL: test_x86_avx_vtestc_ps:
4167; AVX512VL:       ## BB#0:
4168; AVX512VL-NEXT:    vtestps %xmm1, %xmm0
4169; AVX512VL-NEXT:    sbbl %eax, %eax
4170; AVX512VL-NEXT:    andl $1, %eax
4171; AVX512VL-NEXT:    retl
4172  %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
4173  ret i32 %res
4174}
4175declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
4176
4177
4178define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
4179; AVX-LABEL: test_x86_avx_vtestc_ps_256:
4180; AVX:       ## BB#0:
4181; AVX-NEXT:    vtestps %ymm1, %ymm0
4182; AVX-NEXT:    sbbl %eax, %eax
4183; AVX-NEXT:    andl $1, %eax
4184; AVX-NEXT:    vzeroupper
4185; AVX-NEXT:    retl
4186;
4187; AVX512VL-LABEL: test_x86_avx_vtestc_ps_256:
4188; AVX512VL:       ## BB#0:
4189; AVX512VL-NEXT:    vtestps %ymm1, %ymm0
4190; AVX512VL-NEXT:    sbbl %eax, %eax
4191; AVX512VL-NEXT:    andl $1, %eax
4192; AVX512VL-NEXT:    retl
4193  %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
4194  ret i32 %res
4195}
4196declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
4197
4198
4199define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
4200; AVX-LABEL: test_x86_avx_vtestnzc_pd:
4201; AVX:       ## BB#0:
4202; AVX-NEXT:    xorl %eax, %eax
4203; AVX-NEXT:    vtestpd %xmm1, %xmm0
4204; AVX-NEXT:    seta %al
4205; AVX-NEXT:    retl
4206;
4207; AVX512VL-LABEL: test_x86_avx_vtestnzc_pd:
4208; AVX512VL:       ## BB#0:
4209; AVX512VL-NEXT:    xorl %eax, %eax
4210; AVX512VL-NEXT:    vtestpd %xmm1, %xmm0
4211; AVX512VL-NEXT:    seta %al
4212; AVX512VL-NEXT:    retl
4213  %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
4214  ret i32 %res
4215}
4216declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
4217
4218
4219define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
4220; AVX-LABEL: test_x86_avx_vtestnzc_pd_256:
4221; AVX:       ## BB#0:
4222; AVX-NEXT:    xorl %eax, %eax
4223; AVX-NEXT:    vtestpd %ymm1, %ymm0
4224; AVX-NEXT:    seta %al
4225; AVX-NEXT:    vzeroupper
4226; AVX-NEXT:    retl
4227;
4228; AVX512VL-LABEL: test_x86_avx_vtestnzc_pd_256:
4229; AVX512VL:       ## BB#0:
4230; AVX512VL-NEXT:    xorl %eax, %eax
4231; AVX512VL-NEXT:    vtestpd %ymm1, %ymm0
4232; AVX512VL-NEXT:    seta %al
4233; AVX512VL-NEXT:    retl
4234  %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
4235  ret i32 %res
4236}
4237declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
4238
4239
4240define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
4241; AVX-LABEL: test_x86_avx_vtestnzc_ps:
4242; AVX:       ## BB#0:
4243; AVX-NEXT:    xorl %eax, %eax
4244; AVX-NEXT:    vtestps %xmm1, %xmm0
4245; AVX-NEXT:    seta %al
4246; AVX-NEXT:    retl
4247;
4248; AVX512VL-LABEL: test_x86_avx_vtestnzc_ps:
4249; AVX512VL:       ## BB#0:
4250; AVX512VL-NEXT:    xorl %eax, %eax
4251; AVX512VL-NEXT:    vtestps %xmm1, %xmm0
4252; AVX512VL-NEXT:    seta %al
4253; AVX512VL-NEXT:    retl
4254  %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
4255  ret i32 %res
4256}
4257declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
4258
4259
4260define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
4261; AVX-LABEL: test_x86_avx_vtestnzc_ps_256:
4262; AVX:       ## BB#0:
4263; AVX-NEXT:    xorl %eax, %eax
4264; AVX-NEXT:    vtestps %ymm1, %ymm0
4265; AVX-NEXT:    seta %al
4266; AVX-NEXT:    vzeroupper
4267; AVX-NEXT:    retl
4268;
4269; AVX512VL-LABEL: test_x86_avx_vtestnzc_ps_256:
4270; AVX512VL:       ## BB#0:
4271; AVX512VL-NEXT:    xorl %eax, %eax
4272; AVX512VL-NEXT:    vtestps %ymm1, %ymm0
4273; AVX512VL-NEXT:    seta %al
4274; AVX512VL-NEXT:    retl
4275  %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
4276  ret i32 %res
4277}
4278declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
4279
4280
4281define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
4282; AVX-LABEL: test_x86_avx_vtestz_pd:
4283; AVX:       ## BB#0:
4284; AVX-NEXT:    xorl %eax, %eax
4285; AVX-NEXT:    vtestpd %xmm1, %xmm0
4286; AVX-NEXT:    sete %al
4287; AVX-NEXT:    retl
4288;
4289; AVX512VL-LABEL: test_x86_avx_vtestz_pd:
4290; AVX512VL:       ## BB#0:
4291; AVX512VL-NEXT:    xorl %eax, %eax
4292; AVX512VL-NEXT:    vtestpd %xmm1, %xmm0
4293; AVX512VL-NEXT:    sete %al
4294; AVX512VL-NEXT:    retl
4295  %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
4296  ret i32 %res
4297}
4298declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
4299
4300
4301define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
4302; AVX-LABEL: test_x86_avx_vtestz_pd_256:
4303; AVX:       ## BB#0:
4304; AVX-NEXT:    xorl %eax, %eax
4305; AVX-NEXT:    vtestpd %ymm1, %ymm0
4306; AVX-NEXT:    sete %al
4307; AVX-NEXT:    vzeroupper
4308; AVX-NEXT:    retl
4309;
4310; AVX512VL-LABEL: test_x86_avx_vtestz_pd_256:
4311; AVX512VL:       ## BB#0:
4312; AVX512VL-NEXT:    xorl %eax, %eax
4313; AVX512VL-NEXT:    vtestpd %ymm1, %ymm0
4314; AVX512VL-NEXT:    sete %al
4315; AVX512VL-NEXT:    retl
4316  %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
4317  ret i32 %res
4318}
4319declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
4320
4321
4322define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
4323; AVX-LABEL: test_x86_avx_vtestz_ps:
4324; AVX:       ## BB#0:
4325; AVX-NEXT:    xorl %eax, %eax
4326; AVX-NEXT:    vtestps %xmm1, %xmm0
4327; AVX-NEXT:    sete %al
4328; AVX-NEXT:    retl
4329;
4330; AVX512VL-LABEL: test_x86_avx_vtestz_ps:
4331; AVX512VL:       ## BB#0:
4332; AVX512VL-NEXT:    xorl %eax, %eax
4333; AVX512VL-NEXT:    vtestps %xmm1, %xmm0
4334; AVX512VL-NEXT:    sete %al
4335; AVX512VL-NEXT:    retl
4336  %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
4337  ret i32 %res
4338}
4339declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
4340
4341
4342define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
4343; AVX-LABEL: test_x86_avx_vtestz_ps_256:
4344; AVX:       ## BB#0:
4345; AVX-NEXT:    xorl %eax, %eax
4346; AVX-NEXT:    vtestps %ymm1, %ymm0
4347; AVX-NEXT:    sete %al
4348; AVX-NEXT:    vzeroupper
4349; AVX-NEXT:    retl
4350;
4351; AVX512VL-LABEL: test_x86_avx_vtestz_ps_256:
4352; AVX512VL:       ## BB#0:
4353; AVX512VL-NEXT:    xorl %eax, %eax
4354; AVX512VL-NEXT:    vtestps %ymm1, %ymm0
4355; AVX512VL-NEXT:    sete %al
4356; AVX512VL-NEXT:    retl
4357  %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
4358  ret i32 %res
4359}
4360declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
4361
4362
4363define void @test_x86_avx_vzeroall() {
4364; AVX-LABEL: test_x86_avx_vzeroall:
4365; AVX:       ## BB#0:
4366; AVX-NEXT:    vzeroall
4367; AVX-NEXT:    retl
4368;
4369; AVX512VL-LABEL: test_x86_avx_vzeroall:
4370; AVX512VL:       ## BB#0:
4371; AVX512VL-NEXT:    vzeroall
4372; AVX512VL-NEXT:    retl
4373  call void @llvm.x86.avx.vzeroall()
4374  ret void
4375}
4376declare void @llvm.x86.avx.vzeroall() nounwind
4377
4378
4379define void @test_x86_avx_vzeroupper() {
4380; AVX-LABEL: test_x86_avx_vzeroupper:
4381; AVX:       ## BB#0:
4382; AVX-NEXT:    vzeroupper
4383; AVX-NEXT:    retl
4384;
4385; AVX512VL-LABEL: test_x86_avx_vzeroupper:
4386; AVX512VL:       ## BB#0:
4387; AVX512VL-NEXT:    vzeroupper
4388; AVX512VL-NEXT:    retl
4389  call void @llvm.x86.avx.vzeroupper()
4390  ret void
4391}
4392declare void @llvm.x86.avx.vzeroupper() nounwind
4393
4394; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
4395
4396define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
4397; AVX-LABEL: monitor:
4398; AVX:       ## BB#0:
4399; AVX-NEXT:    movl {{[0-9]+}}(%esp), %edx
4400; AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4401; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
4402; AVX-NEXT:    leal (%eax), %eax
4403; AVX-NEXT:    monitor
4404; AVX-NEXT:    retl
4405;
4406; AVX512VL-LABEL: monitor:
4407; AVX512VL:       ## BB#0:
4408; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %edx
4409; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4410; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
4411; AVX512VL-NEXT:    leal (%eax), %eax
4412; AVX512VL-NEXT:    monitor
4413; AVX512VL-NEXT:    retl
4414  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
4415  ret void
4416}
4417declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
4418
4419define void @mwait(i32 %E, i32 %H) nounwind {
4420; AVX-LABEL: mwait:
4421; AVX:       ## BB#0:
4422; AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4423; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
4424; AVX-NEXT:    mwait
4425; AVX-NEXT:    retl
4426;
4427; AVX512VL-LABEL: mwait:
4428; AVX512VL:       ## BB#0:
4429; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4430; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
4431; AVX512VL-NEXT:    mwait
4432; AVX512VL-NEXT:    retl
4433  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
4434  ret void
4435}
4436declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
4437
4438define void @sfence() nounwind {
4439; AVX-LABEL: sfence:
4440; AVX:       ## BB#0:
4441; AVX-NEXT:    sfence
4442; AVX-NEXT:    retl
4443;
4444; AVX512VL-LABEL: sfence:
4445; AVX512VL:       ## BB#0:
4446; AVX512VL-NEXT:    sfence
4447; AVX512VL-NEXT:    retl
4448  tail call void @llvm.x86.sse.sfence()
4449  ret void
4450}
4451declare void @llvm.x86.sse.sfence() nounwind
4452
4453define void @lfence() nounwind {
4454; AVX-LABEL: lfence:
4455; AVX:       ## BB#0:
4456; AVX-NEXT:    lfence
4457; AVX-NEXT:    retl
4458;
4459; AVX512VL-LABEL: lfence:
4460; AVX512VL:       ## BB#0:
4461; AVX512VL-NEXT:    lfence
4462; AVX512VL-NEXT:    retl
4463  tail call void @llvm.x86.sse2.lfence()
4464  ret void
4465}
4466declare void @llvm.x86.sse2.lfence() nounwind
4467
4468define void @mfence() nounwind {
4469; AVX-LABEL: mfence:
4470; AVX:       ## BB#0:
4471; AVX-NEXT:    mfence
4472; AVX-NEXT:    retl
4473;
4474; AVX512VL-LABEL: mfence:
4475; AVX512VL:       ## BB#0:
4476; AVX512VL-NEXT:    mfence
4477; AVX512VL-NEXT:    retl
4478  tail call void @llvm.x86.sse2.mfence()
4479  ret void
4480}
4481declare void @llvm.x86.sse2.mfence() nounwind
4482
4483define void @clflush(i8* %p) nounwind {
4484; AVX-LABEL: clflush:
4485; AVX:       ## BB#0:
4486; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
4487; AVX-NEXT:    clflush (%eax)
4488; AVX-NEXT:    retl
4489;
4490; AVX512VL-LABEL: clflush:
4491; AVX512VL:       ## BB#0:
4492; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
4493; AVX512VL-NEXT:    clflush (%eax)
4494; AVX512VL-NEXT:    retl
4495  tail call void @llvm.x86.sse2.clflush(i8* %p)
4496  ret void
4497}
4498declare void @llvm.x86.sse2.clflush(i8*) nounwind
4499
4500define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
4501; AVX-LABEL: crc32_32_8:
4502; AVX:       ## BB#0:
4503; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
4504; AVX-NEXT:    crc32b {{[0-9]+}}(%esp), %eax
4505; AVX-NEXT:    retl
4506;
4507; AVX512VL-LABEL: crc32_32_8:
4508; AVX512VL:       ## BB#0:
4509; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
4510; AVX512VL-NEXT:    crc32b {{[0-9]+}}(%esp), %eax
4511; AVX512VL-NEXT:    retl
4512  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
4513  ret i32 %tmp
4514}
4515declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
4516
4517define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
4518; AVX-LABEL: crc32_32_16:
4519; AVX:       ## BB#0:
4520; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
4521; AVX-NEXT:    crc32w {{[0-9]+}}(%esp), %eax
4522; AVX-NEXT:    retl
4523;
4524; AVX512VL-LABEL: crc32_32_16:
4525; AVX512VL:       ## BB#0:
4526; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
4527; AVX512VL-NEXT:    crc32w {{[0-9]+}}(%esp), %eax
4528; AVX512VL-NEXT:    retl
4529  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
4530  ret i32 %tmp
4531}
4532declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
4533
4534define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
4535; AVX-LABEL: crc32_32_32:
4536; AVX:       ## BB#0:
4537; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
4538; AVX-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
4539; AVX-NEXT:    retl
4540;
4541; AVX512VL-LABEL: crc32_32_32:
4542; AVX512VL:       ## BB#0:
4543; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
4544; AVX512VL-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
4545; AVX512VL-NEXT:    retl
4546  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
4547  ret i32 %tmp
4548}
4549declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
4550
4551define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
4552; AVX-LABEL: movnt_dq:
4553; AVX:       ## BB#0:
4554; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
4555; AVX-NEXT:    vpaddq LCPI254_0, %xmm0, %xmm0
4556; AVX-NEXT:    vmovntdq %ymm0, (%eax)
4557; AVX-NEXT:    vzeroupper
4558; AVX-NEXT:    retl
4559;
4560; AVX512VL-LABEL: movnt_dq:
4561; AVX512VL:       ## BB#0:
4562; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
4563; AVX512VL-NEXT:    vpaddq LCPI254_0, %xmm0, %xmm0
4564; AVX512VL-NEXT:    vmovntdq %ymm0, (%eax)
4565; AVX512VL-NEXT:    retl
4566  %a2 = add <2 x i64> %a1, <i64 1, i64 1>
4567  %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
4568  tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
4569  ret void
4570}
4571declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
4572
4573define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
4574; AVX-LABEL: movnt_ps:
4575; AVX:       ## BB#0:
4576; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
4577; AVX-NEXT:    vmovntps %ymm0, (%eax)
4578; AVX-NEXT:    vzeroupper
4579; AVX-NEXT:    retl
4580;
4581; AVX512VL-LABEL: movnt_ps:
4582; AVX512VL:       ## BB#0:
4583; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
4584; AVX512VL-NEXT:    vmovntps %ymm0, (%eax)
4585; AVX512VL-NEXT:    retl
4586  tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
4587  ret void
4588}
4589declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
4590
4591define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
4592  ; add operation forces the execution domain.
4593; AVX-LABEL: movnt_pd:
4594; AVX:       ## BB#0:
4595; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
4596; AVX-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
4597; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
4598; AVX-NEXT:    vmovntpd %ymm0, (%eax)
4599; AVX-NEXT:    vzeroupper
4600; AVX-NEXT:    retl
4601;
4602; AVX512VL-LABEL: movnt_pd:
4603; AVX512VL:       ## BB#0:
4604; AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
4605; AVX512VL-NEXT:    vpxord %ymm1, %ymm1, %ymm1
4606; AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
4607; AVX512VL-NEXT:    vmovntpd %ymm0, (%eax)
4608; AVX512VL-NEXT:    retl
4609  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
4610  tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
4611  ret void
4612}
4613declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
4614
4615
4616; Check for pclmulqdq
4617define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
4618; AVX-LABEL: test_x86_pclmulqdq:
4619; AVX:       ## BB#0:
4620; AVX-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0
4621; AVX-NEXT:    retl
4622;
4623; AVX512VL-LABEL: test_x86_pclmulqdq:
4624; AVX512VL:       ## BB#0:
4625; AVX512VL-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0
4626; AVX512VL-NEXT:    retl
4627  %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
4628  ret <2 x i64> %res
4629}
4630declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
4631