• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX2
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=X86 --check-prefix=X86-AVX512
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX2
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=X64 --check-prefix=X64-AVX512
6
7define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
8; X86-LABEL: test_x86_avx2_pblendw:
9; X86:       ## %bb.0:
10; X86-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
11; X86-NEXT:    retl
12;
13; X64-LABEL: test_x86_avx2_pblendw:
14; X64:       ## %bb.0:
15; X64-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
16; X64-NEXT:    retq
17  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
18  ret <16 x i16> %res
19}
20declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
21
22
23define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
24; X86-LABEL: test_x86_avx2_pblendd_128:
25; X86:       ## %bb.0:
26; X86-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
27; X86-NEXT:    retl
28;
29; X64-LABEL: test_x86_avx2_pblendd_128:
30; X64:       ## %bb.0:
31; X64-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
32; X64-NEXT:    retq
33  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
34  ret <4 x i32> %res
35}
36declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
37
38
39define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
40; X86-LABEL: test_x86_avx2_pblendd_256:
41; X86:       ## %bb.0:
42; X86-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
43; X86-NEXT:    retl
44;
45; X64-LABEL: test_x86_avx2_pblendd_256:
46; X64:       ## %bb.0:
47; X64-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
48; X64-NEXT:    retq
49  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
50  ret <8 x i32> %res
51}
52declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
53
54
55define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) {
56; X86-LABEL: test_x86_avx2_movntdqa:
57; X86:       ## %bb.0:
58; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
59; X86-NEXT:    vmovntdqa (%eax), %ymm0
60; X86-NEXT:    retl
61;
62; X64-LABEL: test_x86_avx2_movntdqa:
63; X64:       ## %bb.0:
64; X64-NEXT:    vmovntdqa (%rdi), %ymm0
65; X64-NEXT:    retq
66  %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1]
67  ret <4 x i64> %res
68}
69declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
70
71
72define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
73; X86-LABEL: test_x86_avx2_mpsadbw:
74; X86:       ## %bb.0:
75; X86-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0
76; X86-NEXT:    retl
77;
78; X64-LABEL: test_x86_avx2_mpsadbw:
79; X64:       ## %bb.0:
80; X64-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0
81; X64-NEXT:    retq
82  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
83  ret <16 x i16> %res
84}
85declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
86
87
88define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
89; X86-LABEL: test_x86_avx2_psll_dq_bs:
90; X86:       ## %bb.0:
91; X86-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
92; X86-NEXT:    retl
93;
94; X64-LABEL: test_x86_avx2_psll_dq_bs:
95; X64:       ## %bb.0:
96; X64-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
97; X64-NEXT:    retq
98  %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
99  ret <4 x i64> %res
100}
101declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
102
103
104define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
105; X86-LABEL: test_x86_avx2_psrl_dq_bs:
106; X86:       ## %bb.0:
107; X86-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
108; X86-NEXT:    retl
109;
110; X64-LABEL: test_x86_avx2_psrl_dq_bs:
111; X64:       ## %bb.0:
112; X64-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
113; X64-NEXT:    retq
114  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
115  ret <4 x i64> %res
116}
117declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
118
119
120define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
121; X86-LABEL: test_x86_avx2_psll_dq:
122; X86:       ## %bb.0:
123; X86-NEXT:    vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
124; X86-NEXT:    retl
125;
126; X64-LABEL: test_x86_avx2_psll_dq:
127; X64:       ## %bb.0:
128; X64-NEXT:    vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
129; X64-NEXT:    retq
130  %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
131  ret <4 x i64> %res
132}
133declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
134
135
136define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
137; X86-LABEL: test_x86_avx2_psrl_dq:
138; X86:       ## %bb.0:
139; X86-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
140; X86-NEXT:    retl
141;
142; X64-LABEL: test_x86_avx2_psrl_dq:
143; X64:       ## %bb.0:
144; X64-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
145; X64-NEXT:    retq
146  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
147  ret <4 x i64> %res
148}
149declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
150
151
152define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
153; X86-LABEL: test_x86_avx2_vextracti128:
154; X86:       ## %bb.0:
155; X86-NEXT:    vextractf128 $1, %ymm0, %xmm0
156; X86-NEXT:    vzeroupper
157; X86-NEXT:    retl
158;
159; X64-LABEL: test_x86_avx2_vextracti128:
160; X64:       ## %bb.0:
161; X64-NEXT:    vextractf128 $1, %ymm0, %xmm0
162; X64-NEXT:    vzeroupper
163; X64-NEXT:    retq
164  %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
165  ret <2 x i64> %res
166}
167declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
168
169
170define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
171; X86-LABEL: test_x86_avx2_vinserti128:
172; X86:       ## %bb.0:
173; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
174; X86-NEXT:    retl
175;
176; X64-LABEL: test_x86_avx2_vinserti128:
177; X64:       ## %bb.0:
178; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
179; X64-NEXT:    retq
180  %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
181  ret <4 x i64> %res
182}
183declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
184
185
186define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
187; X86-LABEL: test_x86_avx2_vbroadcast_sd_pd_256:
188; X86:       ## %bb.0:
189; X86-NEXT:    vbroadcastsd %xmm0, %ymm0
190; X86-NEXT:    retl
191;
192; X64-LABEL: test_x86_avx2_vbroadcast_sd_pd_256:
193; X64:       ## %bb.0:
194; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
195; X64-NEXT:    retq
196  %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0)
197  ret <4 x double> %res
198}
199declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
200
201
202define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
203; X86-LABEL: test_x86_avx2_vbroadcast_ss_ps:
204; X86:       ## %bb.0:
205; X86-NEXT:    vbroadcastss %xmm0, %xmm0
206; X86-NEXT:    retl
207;
208; X64-LABEL: test_x86_avx2_vbroadcast_ss_ps:
209; X64:       ## %bb.0:
210; X64-NEXT:    vbroadcastss %xmm0, %xmm0
211; X64-NEXT:    retq
212  %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0)
213  ret <4 x float> %res
214}
215declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
216
217
218define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
219; X86-LABEL: test_x86_avx2_vbroadcast_ss_ps_256:
220; X86:       ## %bb.0:
221; X86-NEXT:    vbroadcastss %xmm0, %ymm0
222; X86-NEXT:    retl
223;
224; X64-LABEL: test_x86_avx2_vbroadcast_ss_ps_256:
225; X64:       ## %bb.0:
226; X64-NEXT:    vbroadcastss %xmm0, %ymm0
227; X64-NEXT:    retq
228  %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0)
229  ret <8 x float> %res
230}
231declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
232
233
234define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
235; X86-LABEL: test_x86_avx2_pbroadcastb_128:
236; X86:       ## %bb.0:
237; X86-NEXT:    vpbroadcastb %xmm0, %xmm0
238; X86-NEXT:    retl
239;
240; X64-LABEL: test_x86_avx2_pbroadcastb_128:
241; X64:       ## %bb.0:
242; X64-NEXT:    vpbroadcastb %xmm0, %xmm0
243; X64-NEXT:    retq
244  %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0)
245  ret <16 x i8> %res
246}
247declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
248
249
250define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
251; X86-LABEL: test_x86_avx2_pbroadcastb_256:
252; X86:       ## %bb.0:
253; X86-NEXT:    vpbroadcastb %xmm0, %ymm0
254; X86-NEXT:    retl
255;
256; X64-LABEL: test_x86_avx2_pbroadcastb_256:
257; X64:       ## %bb.0:
258; X64-NEXT:    vpbroadcastb %xmm0, %ymm0
259; X64-NEXT:    retq
260  %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0)
261  ret <32 x i8> %res
262}
263declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
264
265
266define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
267; X86-LABEL: test_x86_avx2_pbroadcastw_128:
268; X86:       ## %bb.0:
269; X86-NEXT:    vpbroadcastw %xmm0, %xmm0
270; X86-NEXT:    retl
271;
272; X64-LABEL: test_x86_avx2_pbroadcastw_128:
273; X64:       ## %bb.0:
274; X64-NEXT:    vpbroadcastw %xmm0, %xmm0
275; X64-NEXT:    retq
276  %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0)
277  ret <8 x i16> %res
278}
279declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
280
281
282define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
283; X86-LABEL: test_x86_avx2_pbroadcastw_256:
284; X86:       ## %bb.0:
285; X86-NEXT:    vpbroadcastw %xmm0, %ymm0
286; X86-NEXT:    retl
287;
288; X64-LABEL: test_x86_avx2_pbroadcastw_256:
289; X64:       ## %bb.0:
290; X64-NEXT:    vpbroadcastw %xmm0, %ymm0
291; X64-NEXT:    retq
292  %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0)
293  ret <16 x i16> %res
294}
295declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
296
297
298define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
299; X86-LABEL: test_x86_avx2_pbroadcastd_128:
300; X86:       ## %bb.0:
301; X86-NEXT:    vbroadcastss %xmm0, %xmm0
302; X86-NEXT:    retl
303;
304; X64-LABEL: test_x86_avx2_pbroadcastd_128:
305; X64:       ## %bb.0:
306; X64-NEXT:    vbroadcastss %xmm0, %xmm0
307; X64-NEXT:    retq
308  %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0)
309  ret <4 x i32> %res
310}
311declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
312
313
314define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
315; X86-LABEL: test_x86_avx2_pbroadcastd_256:
316; X86:       ## %bb.0:
317; X86-NEXT:    vbroadcastss %xmm0, %ymm0
318; X86-NEXT:    retl
319;
320; X64-LABEL: test_x86_avx2_pbroadcastd_256:
321; X64:       ## %bb.0:
322; X64-NEXT:    vbroadcastss %xmm0, %ymm0
323; X64-NEXT:    retq
324  %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0)
325  ret <8 x i32> %res
326}
327declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
328
329
330define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
331; X86-LABEL: test_x86_avx2_pbroadcastq_128:
332; X86:       ## %bb.0:
333; X86-NEXT:    vpbroadcastq %xmm0, %xmm0
334; X86-NEXT:    retl
335;
336; X64-LABEL: test_x86_avx2_pbroadcastq_128:
337; X64:       ## %bb.0:
338; X64-NEXT:    vpbroadcastq %xmm0, %xmm0
339; X64-NEXT:    retq
340  %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
341  ret <2 x i64> %res
342}
343declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
344
345
346define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
347; X86-LABEL: test_x86_avx2_pbroadcastq_256:
348; X86:       ## %bb.0:
349; X86-NEXT:    vbroadcastsd %xmm0, %ymm0
350; X86-NEXT:    retl
351;
352; X64-LABEL: test_x86_avx2_pbroadcastq_256:
353; X64:       ## %bb.0:
354; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
355; X64-NEXT:    retq
356  %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0)
357  ret <4 x i64> %res
358}
359declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
360
361
362define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
363; X86-LABEL: test_x86_avx2_pmovsxbd:
364; X86:       ## %bb.0:
365; X86-NEXT:    vpmovsxbd %xmm0, %ymm0
366; X86-NEXT:    retl
367;
368; X64-LABEL: test_x86_avx2_pmovsxbd:
369; X64:       ## %bb.0:
370; X64-NEXT:    vpmovsxbd %xmm0, %ymm0
371; X64-NEXT:    retq
372  %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
373  ret <8 x i32> %res
374}
375declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
376
377
378define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
379; X86-LABEL: test_x86_avx2_pmovsxbq:
380; X86:       ## %bb.0:
381; X86-NEXT:    vpmovsxbq %xmm0, %ymm0
382; X86-NEXT:    retl
383;
384; X64-LABEL: test_x86_avx2_pmovsxbq:
385; X64:       ## %bb.0:
386; X64-NEXT:    vpmovsxbq %xmm0, %ymm0
387; X64-NEXT:    retq
388  %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
389  ret <4 x i64> %res
390}
391declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
392
393
394define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
395; X86-LABEL: test_x86_avx2_pmovsxbw:
396; X86:       ## %bb.0:
397; X86-NEXT:    vpmovsxbw %xmm0, %ymm0
398; X86-NEXT:    retl
399;
400; X64-LABEL: test_x86_avx2_pmovsxbw:
401; X64:       ## %bb.0:
402; X64-NEXT:    vpmovsxbw %xmm0, %ymm0
403; X64-NEXT:    retq
404  %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
405  ret <16 x i16> %res
406}
407declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
408
409
410define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
411; X86-LABEL: test_x86_avx2_pmovsxdq:
412; X86:       ## %bb.0:
413; X86-NEXT:    vpmovsxdq %xmm0, %ymm0
414; X86-NEXT:    retl
415;
416; X64-LABEL: test_x86_avx2_pmovsxdq:
417; X64:       ## %bb.0:
418; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
419; X64-NEXT:    retq
420  %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
421  ret <4 x i64> %res
422}
423declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
424
425
426define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
427; X86-LABEL: test_x86_avx2_pmovsxwd:
428; X86:       ## %bb.0:
429; X86-NEXT:    vpmovsxwd %xmm0, %ymm0
430; X86-NEXT:    retl
431;
432; X64-LABEL: test_x86_avx2_pmovsxwd:
433; X64:       ## %bb.0:
434; X64-NEXT:    vpmovsxwd %xmm0, %ymm0
435; X64-NEXT:    retq
436  %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
437  ret <8 x i32> %res
438}
439declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
440
441
442define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
443; X86-LABEL: test_x86_avx2_pmovsxwq:
444; X86:       ## %bb.0:
445; X86-NEXT:    vpmovsxwq %xmm0, %ymm0
446; X86-NEXT:    retl
447;
448; X64-LABEL: test_x86_avx2_pmovsxwq:
449; X64:       ## %bb.0:
450; X64-NEXT:    vpmovsxwq %xmm0, %ymm0
451; X64-NEXT:    retq
452  %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
453  ret <4 x i64> %res
454}
455declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
456
457
458define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
459; X86-LABEL: test_x86_avx2_pmovzxbd:
460; X86:       ## %bb.0:
461; X86-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
462; X86-NEXT:    retl
463;
464; X64-LABEL: test_x86_avx2_pmovzxbd:
465; X64:       ## %bb.0:
466; X64-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
467; X64-NEXT:    retq
468  %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
469  ret <8 x i32> %res
470}
471declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
472
473
474define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
475; X86-LABEL: test_x86_avx2_pmovzxbq:
476; X86:       ## %bb.0:
477; X86-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
478; X86-NEXT:    retl
479;
480; X64-LABEL: test_x86_avx2_pmovzxbq:
481; X64:       ## %bb.0:
482; X64-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
483; X64-NEXT:    retq
484  %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
485  ret <4 x i64> %res
486}
487declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
488
489
490define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
491; X86-LABEL: test_x86_avx2_pmovzxbw:
492; X86:       ## %bb.0:
493; X86-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
494; X86-NEXT:    retl
495;
496; X64-LABEL: test_x86_avx2_pmovzxbw:
497; X64:       ## %bb.0:
498; X64-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
499; X64-NEXT:    retq
500  %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
501  ret <16 x i16> %res
502}
503declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
504
505
506define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
507; X86-LABEL: test_x86_avx2_pmovzxdq:
508; X86:       ## %bb.0:
509; X86-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
510; X86-NEXT:    retl
511;
512; X64-LABEL: test_x86_avx2_pmovzxdq:
513; X64:       ## %bb.0:
514; X64-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
515; X64-NEXT:    retq
516  %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
517  ret <4 x i64> %res
518}
519declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
520
521
522define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
523; X86-LABEL: test_x86_avx2_pmovzxwd:
524; X86:       ## %bb.0:
525; X86-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
526; X86-NEXT:    retl
527;
528; X64-LABEL: test_x86_avx2_pmovzxwd:
529; X64:       ## %bb.0:
530; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
531; X64-NEXT:    retq
532  %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
533  ret <8 x i32> %res
534}
535declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
536
537
538define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
539; X86-LABEL: test_x86_avx2_pmovzxwq:
540; X86:       ## %bb.0:
541; X86-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
542; X86-NEXT:    retl
543;
544; X64-LABEL: test_x86_avx2_pmovzxwq:
545; X64:       ## %bb.0:
546; X64-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
547; X64-NEXT:    retq
548  %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
549  ret <4 x i64> %res
550}
551declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
552
553; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
554define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
555  ; add operation forces the execution domain.
556; X86-LABEL: test_x86_avx_storeu_dq_256:
557; X86:       ## %bb.0:
558; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
559; X86-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
560; X86-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
561; X86-NEXT:    vmovdqu %ymm0, (%eax)
562; X86-NEXT:    vzeroupper
563; X86-NEXT:    retl
564;
565; X64-LABEL: test_x86_avx_storeu_dq_256:
566; X64:       ## %bb.0:
567; X64-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
568; X64-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
569; X64-NEXT:    vmovdqu %ymm0, (%rdi)
570; X64-NEXT:    vzeroupper
571; X64-NEXT:    retq
572  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
573  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
574  ret void
575}
576declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
577
578define <32 x i8> @mm256_max_epi8(<32 x i8> %a0, <32 x i8> %a1) {
579; X86-LABEL: mm256_max_epi8:
580; X86:       ## %bb.0:
581; X86-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
582; X86-NEXT:    retl
583;
584; X64-LABEL: mm256_max_epi8:
585; X64:       ## %bb.0:
586; X64-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
587; X64-NEXT:    retq
588  %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
589  ret <32 x i8> %res
590}
591declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
592
593define <16 x i16> @mm256_max_epi16(<16 x i16> %a0, <16 x i16> %a1) {
594; X86-LABEL: mm256_max_epi16:
595; X86:       ## %bb.0:
596; X86-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
597; X86-NEXT:    retl
598;
599; X64-LABEL: mm256_max_epi16:
600; X64:       ## %bb.0:
601; X64-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
602; X64-NEXT:    retq
603  %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
604  ret <16 x i16> %res
605}
606declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
607
608define <8 x i32> @mm256_max_epi32(<8 x i32> %a0, <8 x i32> %a1) {
609; X86-LABEL: mm256_max_epi32:
610; X86:       ## %bb.0:
611; X86-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
612; X86-NEXT:    retl
613;
614; X64-LABEL: mm256_max_epi32:
615; X64:       ## %bb.0:
616; X64-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
617; X64-NEXT:    retq
618  %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
619  ret <8 x i32> %res
620}
621declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
622
623define <32 x i8> @mm256_max_epu8(<32 x i8> %a0, <32 x i8> %a1) {
624; X86-LABEL: mm256_max_epu8:
625; X86:       ## %bb.0:
626; X86-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
627; X86-NEXT:    retl
628;
629; X64-LABEL: mm256_max_epu8:
630; X64:       ## %bb.0:
631; X64-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
632; X64-NEXT:    retq
633  %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
634  ret <32 x i8> %res
635}
636declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
637
638define <16 x i16> @mm256_max_epu16(<16 x i16> %a0, <16 x i16> %a1) {
639; X86-LABEL: mm256_max_epu16:
640; X86:       ## %bb.0:
641; X86-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
642; X86-NEXT:    retl
643;
644; X64-LABEL: mm256_max_epu16:
645; X64:       ## %bb.0:
646; X64-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
647; X64-NEXT:    retq
648  %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
649  ret <16 x i16> %res
650}
651declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
652
653define <8 x i32> @mm256_max_epu32(<8 x i32> %a0, <8 x i32> %a1) {
654; X86-LABEL: mm256_max_epu32:
655; X86:       ## %bb.0:
656; X86-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
657; X86-NEXT:    retl
658;
659; X64-LABEL: mm256_max_epu32:
660; X64:       ## %bb.0:
661; X64-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
662; X64-NEXT:    retq
663  %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
664  ret <8 x i32> %res
665}
666declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
667
668define <32 x i8> @mm256_min_epi8(<32 x i8> %a0, <32 x i8> %a1) {
669; X86-LABEL: mm256_min_epi8:
670; X86:       ## %bb.0:
671; X86-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
672; X86-NEXT:    retl
673;
674; X64-LABEL: mm256_min_epi8:
675; X64:       ## %bb.0:
676; X64-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
677; X64-NEXT:    retq
678  %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
679  ret <32 x i8> %res
680}
681declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
682
683define <16 x i16> @mm256_min_epi16(<16 x i16> %a0, <16 x i16> %a1) {
684; X86-LABEL: mm256_min_epi16:
685; X86:       ## %bb.0:
686; X86-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
687; X86-NEXT:    retl
688;
689; X64-LABEL: mm256_min_epi16:
690; X64:       ## %bb.0:
691; X64-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
692; X64-NEXT:    retq
693  %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
694  ret <16 x i16> %res
695}
696declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
697
698define <8 x i32> @mm256_min_epi32(<8 x i32> %a0, <8 x i32> %a1) {
699; X86-LABEL: mm256_min_epi32:
700; X86:       ## %bb.0:
701; X86-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
702; X86-NEXT:    retl
703;
704; X64-LABEL: mm256_min_epi32:
705; X64:       ## %bb.0:
706; X64-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
707; X64-NEXT:    retq
708  %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
709  ret <8 x i32> %res
710}
711declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
712
713define <32 x i8> @mm256_min_epu8(<32 x i8> %a0, <32 x i8> %a1) {
714; X86-LABEL: mm256_min_epu8:
715; X86:       ## %bb.0:
716; X86-NEXT:    vpminub %ymm1, %ymm0, %ymm0
717; X86-NEXT:    retl
718;
719; X64-LABEL: mm256_min_epu8:
720; X64:       ## %bb.0:
721; X64-NEXT:    vpminub %ymm1, %ymm0, %ymm0
722; X64-NEXT:    retq
723  %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
724  ret <32 x i8> %res
725}
726declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
727
728define <16 x i16> @mm256_min_epu16(<16 x i16> %a0, <16 x i16> %a1) {
729; X86-LABEL: mm256_min_epu16:
730; X86:       ## %bb.0:
731; X86-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
732; X86-NEXT:    retl
733;
734; X64-LABEL: mm256_min_epu16:
735; X64:       ## %bb.0:
736; X64-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
737; X64-NEXT:    retq
738  %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
739  ret <16 x i16> %res
740}
741declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
742
743define <8 x i32> @mm256_min_epu32(<8 x i32> %a0, <8 x i32> %a1) {
744; X86-LABEL: mm256_min_epu32:
745; X86:       ## %bb.0:
746; X86-NEXT:    vpminud %ymm1, %ymm0, %ymm0
747; X86-NEXT:    retl
748;
749; X64-LABEL: mm256_min_epu32:
750; X64:       ## %bb.0:
751; X64-NEXT:    vpminud %ymm1, %ymm0, %ymm0
752; X64-NEXT:    retq
753  %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
754  ret <8 x i32> %res
755}
756declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
757
758define <32 x i8> @mm256_avg_epu8(<32 x i8> %a0, <32 x i8> %a1) {
759; X86-LABEL: mm256_avg_epu8:
760; X86:       ## %bb.0:
761; X86-NEXT:    vpavgb %ymm1, %ymm0, %ymm0
762; X86-NEXT:    retl
763;
764; X64-LABEL: mm256_avg_epu8:
765; X64:       ## %bb.0:
766; X64-NEXT:    vpavgb %ymm1, %ymm0, %ymm0
767; X64-NEXT:    retq
768  %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
769  ret <32 x i8> %res
770}
771declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
772
773define <16 x i16> @mm256_avg_epu16(<16 x i16> %a0, <16 x i16> %a1) {
774; X86-LABEL: mm256_avg_epu16:
775; X86:       ## %bb.0:
776; X86-NEXT:    vpavgw %ymm1, %ymm0, %ymm0
777; X86-NEXT:    retl
778;
779; X64-LABEL: mm256_avg_epu16:
780; X64:       ## %bb.0:
781; X64-NEXT:    vpavgw %ymm1, %ymm0, %ymm0
782; X64-NEXT:    retq
783  %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
784  ret <16 x i16> %res
785}
786declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
787
788define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) {
789; X86-LABEL: test_x86_avx2_pabs_b:
790; X86:       ## %bb.0:
791; X86-NEXT:    vpabsb %ymm0, %ymm0
792; X86-NEXT:    retl
793;
794; X64-LABEL: test_x86_avx2_pabs_b:
795; X64:       ## %bb.0:
796; X64-NEXT:    vpabsb %ymm0, %ymm0
797; X64-NEXT:    retq
798  %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1]
799  ret <32 x i8> %res
800}
801declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
802
803define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) {
804; X86-LABEL: test_x86_avx2_pabs_d:
805; X86:       ## %bb.0:
806; X86-NEXT:    vpabsd %ymm0, %ymm0
807; X86-NEXT:    retl
808;
809; X64-LABEL: test_x86_avx2_pabs_d:
810; X64:       ## %bb.0:
811; X64-NEXT:    vpabsd %ymm0, %ymm0
812; X64-NEXT:    retq
813  %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1]
814  ret <8 x i32> %res
815}
816declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
817
818
819define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) {
820; X86-LABEL: test_x86_avx2_pabs_w:
821; X86:       ## %bb.0:
822; X86-NEXT:    vpabsw %ymm0, %ymm0
823; X86-NEXT:    retl
824;
825; X64-LABEL: test_x86_avx2_pabs_w:
826; X64:       ## %bb.0:
827; X64-NEXT:    vpabsw %ymm0, %ymm0
828; X64-NEXT:    retq
829  %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1]
830  ret <16 x i16> %res
831}
832declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
833
834
835define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
836; X86-LABEL: test_x86_avx2_vperm2i128:
837; X86:       ## %bb.0:
838; X86-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
839; X86-NEXT:    retl
840;
841; X64-LABEL: test_x86_avx2_vperm2i128:
842; X64:       ## %bb.0:
843; X64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
844; X64-NEXT:    retq
845  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1]
846  ret <4 x i64> %res
847}
848declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
849
850
851define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) {
852; X86-LABEL: test_x86_avx2_pmulu_dq:
853; X86:       ## %bb.0:
854; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
855; X86-NEXT:    retl
856;
857; X64-LABEL: test_x86_avx2_pmulu_dq:
858; X64:       ## %bb.0:
859; X64-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
860; X64-NEXT:    retq
861  %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1]
862  ret <4 x i64> %res
863}
864declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
865
866
867define <4 x i64> @test_x86_avx2_pmul_dq(<8 x i32> %a0, <8 x i32> %a1) {
868; X86-LABEL: test_x86_avx2_pmul_dq:
869; X86:       ## %bb.0:
870; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0
871; X86-NEXT:    retl
872;
873; X64-LABEL: test_x86_avx2_pmul_dq:
874; X64:       ## %bb.0:
875; X64-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0
876; X64-NEXT:    retq
877  %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1]
878  ret <4 x i64> %res
879}
880declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
881