• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X64
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
6
7define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
8; CHECK-LABEL: test_x86_avx2_pblendw:
9; CHECK:       ## %bb.0:
10; CHECK-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
11; CHECK-NEXT:    ret{{[l|q]}}
12  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
13  ret <16 x i16> %res
14}
15declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
16
17
18define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
19; CHECK-LABEL: test_x86_avx2_pblendd_128:
20; CHECK:       ## %bb.0:
21; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
22; CHECK-NEXT:    ret{{[l|q]}}
23  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
24  ret <4 x i32> %res
25}
26declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
27
28
29define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
30; CHECK-LABEL: test_x86_avx2_pblendd_256:
31; CHECK:       ## %bb.0:
32; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
33; CHECK-NEXT:    ret{{[l|q]}}
34  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
35  ret <8 x i32> %res
36}
37declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
38
39
40define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) {
41; X86-LABEL: test_x86_avx2_movntdqa:
42; X86:       ## %bb.0:
43; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
44; X86-NEXT:    vmovntdqa (%eax), %ymm0
45; X86-NEXT:    retl
46;
47; X64-LABEL: test_x86_avx2_movntdqa:
48; X64:       ## %bb.0:
49; X64-NEXT:    vmovntdqa (%rdi), %ymm0
50; X64-NEXT:    retq
51  %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1]
52  ret <4 x i64> %res
53}
54declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
55
56
57define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
58; CHECK-LABEL: test_x86_avx2_mpsadbw:
59; CHECK:       ## %bb.0:
60; CHECK-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0
61; CHECK-NEXT:    ret{{[l|q]}}
62  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
63  ret <16 x i16> %res
64}
65declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
66
67
68define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
69; CHECK-LABEL: test_x86_avx2_psll_dq_bs:
70; CHECK:       ## %bb.0:
71; CHECK-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
72; CHECK-NEXT:    ret{{[l|q]}}
73  %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
74  ret <4 x i64> %res
75}
76declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
77
78
79define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
80; CHECK-LABEL: test_x86_avx2_psrl_dq_bs:
81; CHECK:       ## %bb.0:
82; CHECK-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
83; CHECK-NEXT:    ret{{[l|q]}}
84  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
85  ret <4 x i64> %res
86}
87declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
88
89
90define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
91; CHECK-LABEL: test_x86_avx2_psll_dq:
92; CHECK:       ## %bb.0:
93; CHECK-NEXT:    vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
94; CHECK-NEXT:    ret{{[l|q]}}
95  %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
96  ret <4 x i64> %res
97}
98declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
99
100
101define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
102; CHECK-LABEL: test_x86_avx2_psrl_dq:
103; CHECK:       ## %bb.0:
104; CHECK-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
105; CHECK-NEXT:    ret{{[l|q]}}
106  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
107  ret <4 x i64> %res
108}
109declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
110
111
112define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
113; CHECK-LABEL: test_x86_avx2_vextracti128:
114; CHECK:       ## %bb.0:
115; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
116; CHECK-NEXT:    vzeroupper
117; CHECK-NEXT:    ret{{[l|q]}}
118  %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
119  ret <2 x i64> %res
120}
121declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
122
123
124define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
125; CHECK-LABEL: test_x86_avx2_vinserti128:
126; CHECK:       ## %bb.0:
127; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
128; CHECK-NEXT:    ret{{[l|q]}}
129  %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
130  ret <4 x i64> %res
131}
132declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
133
134
135define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
136; CHECK-LABEL: test_x86_avx2_vbroadcast_sd_pd_256:
137; CHECK:       ## %bb.0:
138; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
139; CHECK-NEXT:    ret{{[l|q]}}
140  %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0)
141  ret <4 x double> %res
142}
143declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
144
145
146define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
147; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps:
148; CHECK:       ## %bb.0:
149; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
150; CHECK-NEXT:    ret{{[l|q]}}
151  %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0)
152  ret <4 x float> %res
153}
154declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
155
156
157define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
158; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps_256:
159; CHECK:       ## %bb.0:
160; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
161; CHECK-NEXT:    ret{{[l|q]}}
162  %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0)
163  ret <8 x float> %res
164}
165declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
166
167
168define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
169; CHECK-LABEL: test_x86_avx2_pbroadcastb_128:
170; CHECK:       ## %bb.0:
171; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
172; CHECK-NEXT:    ret{{[l|q]}}
173  %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0)
174  ret <16 x i8> %res
175}
176declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
177
178
179define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
180; CHECK-LABEL: test_x86_avx2_pbroadcastb_256:
181; CHECK:       ## %bb.0:
182; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
183; CHECK-NEXT:    ret{{[l|q]}}
184  %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0)
185  ret <32 x i8> %res
186}
187declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
188
189
190define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
191; CHECK-LABEL: test_x86_avx2_pbroadcastw_128:
192; CHECK:       ## %bb.0:
193; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
194; CHECK-NEXT:    ret{{[l|q]}}
195  %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0)
196  ret <8 x i16> %res
197}
198declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
199
200
201define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
202; CHECK-LABEL: test_x86_avx2_pbroadcastw_256:
203; CHECK:       ## %bb.0:
204; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
205; CHECK-NEXT:    ret{{[l|q]}}
206  %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0)
207  ret <16 x i16> %res
208}
209declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
210
211
212define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
213; CHECK-LABEL: test_x86_avx2_pbroadcastd_128:
214; CHECK:       ## %bb.0:
215; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
216; CHECK-NEXT:    ret{{[l|q]}}
217  %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0)
218  ret <4 x i32> %res
219}
220declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
221
222
223define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
224; CHECK-LABEL: test_x86_avx2_pbroadcastd_256:
225; CHECK:       ## %bb.0:
226; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
227; CHECK-NEXT:    ret{{[l|q]}}
228  %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0)
229  ret <8 x i32> %res
230}
231declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
232
233
234define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
235; CHECK-LABEL: test_x86_avx2_pbroadcastq_128:
236; CHECK:       ## %bb.0:
237; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
238; CHECK-NEXT:    ret{{[l|q]}}
239  %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
240  ret <2 x i64> %res
241}
242declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
243
244
245define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
246; CHECK-LABEL: test_x86_avx2_pbroadcastq_256:
247; CHECK:       ## %bb.0:
248; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
249; CHECK-NEXT:    ret{{[l|q]}}
250  %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0)
251  ret <4 x i64> %res
252}
253declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
254
255
256define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
257; CHECK-LABEL: test_x86_avx2_pmovsxbd:
258; CHECK:       ## %bb.0:
259; CHECK-NEXT:    vpmovsxbd %xmm0, %ymm0
260; CHECK-NEXT:    ret{{[l|q]}}
261  %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
262  ret <8 x i32> %res
263}
264declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
265
266
267define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
268; CHECK-LABEL: test_x86_avx2_pmovsxbq:
269; CHECK:       ## %bb.0:
270; CHECK-NEXT:    vpmovsxbq %xmm0, %ymm0
271; CHECK-NEXT:    ret{{[l|q]}}
272  %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
273  ret <4 x i64> %res
274}
275declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
276
277
278define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
279; CHECK-LABEL: test_x86_avx2_pmovsxbw:
280; CHECK:       ## %bb.0:
281; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
282; CHECK-NEXT:    ret{{[l|q]}}
283  %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
284  ret <16 x i16> %res
285}
286declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
287
288
289define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
290; CHECK-LABEL: test_x86_avx2_pmovsxdq:
291; CHECK:       ## %bb.0:
292; CHECK-NEXT:    vpmovsxdq %xmm0, %ymm0
293; CHECK-NEXT:    ret{{[l|q]}}
294  %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
295  ret <4 x i64> %res
296}
297declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
298
299
300define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
301; CHECK-LABEL: test_x86_avx2_pmovsxwd:
302; CHECK:       ## %bb.0:
303; CHECK-NEXT:    vpmovsxwd %xmm0, %ymm0
304; CHECK-NEXT:    ret{{[l|q]}}
305  %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
306  ret <8 x i32> %res
307}
308declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
309
310
311define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
312; CHECK-LABEL: test_x86_avx2_pmovsxwq:
313; CHECK:       ## %bb.0:
314; CHECK-NEXT:    vpmovsxwq %xmm0, %ymm0
315; CHECK-NEXT:    ret{{[l|q]}}
316  %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
317  ret <4 x i64> %res
318}
319declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
320
321
322define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
323; CHECK-LABEL: test_x86_avx2_pmovzxbd:
324; CHECK:       ## %bb.0:
325; CHECK-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
326; CHECK-NEXT:    ret{{[l|q]}}
327  %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
328  ret <8 x i32> %res
329}
330declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
331
332
333define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
334; CHECK-LABEL: test_x86_avx2_pmovzxbq:
335; CHECK:       ## %bb.0:
336; CHECK-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
337; CHECK-NEXT:    ret{{[l|q]}}
338  %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
339  ret <4 x i64> %res
340}
341declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
342
343
344define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
345; CHECK-LABEL: test_x86_avx2_pmovzxbw:
346; CHECK:       ## %bb.0:
347; CHECK-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
348; CHECK-NEXT:    ret{{[l|q]}}
349  %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
350  ret <16 x i16> %res
351}
352declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
353
354
355define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
356; CHECK-LABEL: test_x86_avx2_pmovzxdq:
357; CHECK:       ## %bb.0:
358; CHECK-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
359; CHECK-NEXT:    ret{{[l|q]}}
360  %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
361  ret <4 x i64> %res
362}
363declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
364
365
366define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
367; CHECK-LABEL: test_x86_avx2_pmovzxwd:
368; CHECK:       ## %bb.0:
369; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
370; CHECK-NEXT:    ret{{[l|q]}}
371  %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
372  ret <8 x i32> %res
373}
374declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
375
376
377define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
378; CHECK-LABEL: test_x86_avx2_pmovzxwq:
379; CHECK:       ## %bb.0:
380; CHECK-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
381; CHECK-NEXT:    ret{{[l|q]}}
382  %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
383  ret <4 x i64> %res
384}
385declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
386
387; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
388define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
389  ; add operation forces the execution domain.
390; X86-LABEL: test_x86_avx_storeu_dq_256:
391; X86:       ## %bb.0:
392; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
393; X86-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
394; X86-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
395; X86-NEXT:    vmovdqu %ymm0, (%eax)
396; X86-NEXT:    vzeroupper
397; X86-NEXT:    retl
398;
399; X64-LABEL: test_x86_avx_storeu_dq_256:
400; X64:       ## %bb.0:
401; X64-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
402; X64-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
403; X64-NEXT:    vmovdqu %ymm0, (%rdi)
404; X64-NEXT:    vzeroupper
405; X64-NEXT:    retq
406  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
407  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
408  ret void
409}
410declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
411
412define <32 x i8> @mm256_max_epi8(<32 x i8> %a0, <32 x i8> %a1) {
413; CHECK-LABEL: mm256_max_epi8:
414; CHECK:       ## %bb.0:
415; CHECK-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
416; CHECK-NEXT:    ret{{[l|q]}}
417  %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
418  ret <32 x i8> %res
419}
420declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
421
422define <16 x i16> @mm256_max_epi16(<16 x i16> %a0, <16 x i16> %a1) {
423; CHECK-LABEL: mm256_max_epi16:
424; CHECK:       ## %bb.0:
425; CHECK-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
426; CHECK-NEXT:    ret{{[l|q]}}
427  %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
428  ret <16 x i16> %res
429}
430declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
431
432define <8 x i32> @mm256_max_epi32(<8 x i32> %a0, <8 x i32> %a1) {
433; CHECK-LABEL: mm256_max_epi32:
434; CHECK:       ## %bb.0:
435; CHECK-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
436; CHECK-NEXT:    ret{{[l|q]}}
437  %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
438  ret <8 x i32> %res
439}
440declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
441
442define <32 x i8> @mm256_max_epu8(<32 x i8> %a0, <32 x i8> %a1) {
443; CHECK-LABEL: mm256_max_epu8:
444; CHECK:       ## %bb.0:
445; CHECK-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
446; CHECK-NEXT:    ret{{[l|q]}}
447  %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
448  ret <32 x i8> %res
449}
450declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
451
452define <16 x i16> @mm256_max_epu16(<16 x i16> %a0, <16 x i16> %a1) {
453; CHECK-LABEL: mm256_max_epu16:
454; CHECK:       ## %bb.0:
455; CHECK-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
456; CHECK-NEXT:    ret{{[l|q]}}
457  %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
458  ret <16 x i16> %res
459}
460declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
461
462define <8 x i32> @mm256_max_epu32(<8 x i32> %a0, <8 x i32> %a1) {
463; CHECK-LABEL: mm256_max_epu32:
464; CHECK:       ## %bb.0:
465; CHECK-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
466; CHECK-NEXT:    ret{{[l|q]}}
467  %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
468  ret <8 x i32> %res
469}
470declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
471
472define <32 x i8> @mm256_min_epi8(<32 x i8> %a0, <32 x i8> %a1) {
473; CHECK-LABEL: mm256_min_epi8:
474; CHECK:       ## %bb.0:
475; CHECK-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
476; CHECK-NEXT:    ret{{[l|q]}}
477  %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
478  ret <32 x i8> %res
479}
480declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
481
482define <16 x i16> @mm256_min_epi16(<16 x i16> %a0, <16 x i16> %a1) {
483; CHECK-LABEL: mm256_min_epi16:
484; CHECK:       ## %bb.0:
485; CHECK-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
486; CHECK-NEXT:    ret{{[l|q]}}
487  %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
488  ret <16 x i16> %res
489}
490declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
491
492define <8 x i32> @mm256_min_epi32(<8 x i32> %a0, <8 x i32> %a1) {
493; CHECK-LABEL: mm256_min_epi32:
494; CHECK:       ## %bb.0:
495; CHECK-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
496; CHECK-NEXT:    ret{{[l|q]}}
497  %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
498  ret <8 x i32> %res
499}
500declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
501
502define <32 x i8> @mm256_min_epu8(<32 x i8> %a0, <32 x i8> %a1) {
503; CHECK-LABEL: mm256_min_epu8:
504; CHECK:       ## %bb.0:
505; CHECK-NEXT:    vpminub %ymm1, %ymm0, %ymm0
506; CHECK-NEXT:    ret{{[l|q]}}
507  %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
508  ret <32 x i8> %res
509}
510declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
511
512define <16 x i16> @mm256_min_epu16(<16 x i16> %a0, <16 x i16> %a1) {
513; CHECK-LABEL: mm256_min_epu16:
514; CHECK:       ## %bb.0:
515; CHECK-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
516; CHECK-NEXT:    ret{{[l|q]}}
517  %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
518  ret <16 x i16> %res
519}
520declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
521
522define <8 x i32> @mm256_min_epu32(<8 x i32> %a0, <8 x i32> %a1) {
523; CHECK-LABEL: mm256_min_epu32:
524; CHECK:       ## %bb.0:
525; CHECK-NEXT:    vpminud %ymm1, %ymm0, %ymm0
526; CHECK-NEXT:    ret{{[l|q]}}
527  %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
528  ret <8 x i32> %res
529}
530declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
531
532define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) {
533; CHECK-LABEL: test_x86_avx2_pabs_b:
534; CHECK:       ## %bb.0:
535; CHECK-NEXT:    vpabsb %ymm0, %ymm0
536; CHECK-NEXT:    ret{{[l|q]}}
537  %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1]
538  ret <32 x i8> %res
539}
540declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
541
542define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) {
543; CHECK-LABEL: test_x86_avx2_pabs_d:
544; CHECK:       ## %bb.0:
545; CHECK-NEXT:    vpabsd %ymm0, %ymm0
546; CHECK-NEXT:    ret{{[l|q]}}
547  %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1]
548  ret <8 x i32> %res
549}
550declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
551
552
553define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) {
554; CHECK-LABEL: test_x86_avx2_pabs_w:
555; CHECK:       ## %bb.0:
556; CHECK-NEXT:    vpabsw %ymm0, %ymm0
557; CHECK-NEXT:    ret{{[l|q]}}
558  %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1]
559  ret <16 x i16> %res
560}
561declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
562
563
564define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
565; CHECK-LABEL: test_x86_avx2_vperm2i128:
566; CHECK:       ## %bb.0:
567; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
568; CHECK-NEXT:    ret{{[l|q]}}
569  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1]
570  ret <4 x i64> %res
571}
572declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
573
574
575define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) {
576; CHECK-LABEL: test_x86_avx2_pmulu_dq:
577; CHECK:       ## %bb.0:
578; CHECK-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
579; CHECK-NEXT:    ret{{[l|q]}}
580  %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1]
581  ret <4 x i64> %res
582}
583declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
584
585
586define <4 x i64> @test_x86_avx2_pmul_dq(<8 x i32> %a0, <8 x i32> %a1) {
587; CHECK-LABEL: test_x86_avx2_pmul_dq:
588; CHECK:       ## %bb.0:
589; CHECK-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0
590; CHECK-NEXT:    ret{{[l|q]}}
591  %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1]
592  ret <4 x i64> %res
593}
594declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
595
596
597define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {
598; CHECK-LABEL: test_x86_avx2_padds_b:
599; CHECK:       ## %bb.0:
600; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0
601; CHECK-NEXT:    ret{{[l|q]}}
602  %res = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
603  ret <32 x i8> %res
604}
605declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
606
607
608define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {
609; CHECK-LABEL: test_x86_avx2_padds_w:
610; CHECK:       ## %bb.0:
611; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0
612; CHECK-NEXT:    ret{{[l|q]}}
613  %res = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
614  ret <16 x i16> %res
615}
616declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
617
618
619define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
620; CHECK-LABEL: test_x86_avx2_paddus_b:
621; CHECK:       ## %bb.0:
622; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0
623; CHECK-NEXT:    ret{{[l|q]}}
624  %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
625  ret <32 x i8> %res
626}
627declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
628
629
630define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
631; CHECK-LABEL: test_x86_avx2_paddus_w:
632; CHECK:       ## %bb.0:
633; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0
634; CHECK-NEXT:    ret{{[l|q]}}
635  %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
636  ret <16 x i16> %res
637}
638declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
639
640
641define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {
642; CHECK-LABEL: test_x86_avx2_psubs_b:
643; CHECK:       ## %bb.0:
644; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0
645; CHECK-NEXT:    ret{{[l|q]}}
646  %res = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
647  ret <32 x i8> %res
648}
649declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
650
651
652define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {
653; CHECK-LABEL: test_x86_avx2_psubs_w:
654; CHECK:       ## %bb.0:
655; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0
656; CHECK-NEXT:    ret{{[l|q]}}
657  %res = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
658  ret <16 x i16> %res
659}
660declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
661
662
663define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
664; CHECK-LABEL: test_x86_avx2_psubus_b:
665; CHECK:       ## %bb.0:
666; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0
667; CHECK-NEXT:    ret{{[l|q]}}
668  %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
669  ret <32 x i8> %res
670}
671declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
672
673
674define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
675; CHECK-LABEL: test_x86_avx2_psubus_w:
676; CHECK:       ## %bb.0:
677; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0
678; CHECK-NEXT:    ret{{[l|q]}}
679  %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
680  ret <16 x i16> %res
681}
682declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
683