• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
2
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2                 -o /dev/null
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl                                    -o /dev/null
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl                  -o /dev/null
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512bw                  -o /dev/null
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
8; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx                                    -o /dev/null
9
10define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
11  ; Force the execution domain with an add.
12  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
13  %x = and <4 x i64> %a2, %b
14  ret <4 x i64> %x
15}
16
17define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
18  ; Force the execution domain with an add.
19  %a2 = add <2 x i64> %a, <i64 1, i64 1>
20  %x = and <2 x i64> %a2, %b
21  ret <2 x i64> %x
22}
23
24define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
25  ; Force the execution domain with an add.
26  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
27  %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
28  %x = and <4 x i64> %a, %y
29  ret <4 x i64> %x
30}
31
32define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
33  ; Force the execution domain with an add.
34  %a2 = add <2 x i64> %a, <i64 1, i64 1>
35  %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
36  %x = and <2 x i64> %a, %y
37  ret <2 x i64> %x
38}
39
40define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
41  ; Force the execution domain with an add.
42  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
43  %x = or <4 x i64> %a2, %b
44  ret <4 x i64> %x
45}
46
47define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
48  ; Force the execution domain with an add.
49  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
50  %x = xor <4 x i64> %a2, %b
51  ret <4 x i64> %x
52}
53
54define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
55  ; Force the execution domain with an add.
56  %a2 = add <2 x i64> %a, <i64 1, i64 1>
57  %x = or <2 x i64> %a2, %b
58  ret <2 x i64> %x
59}
60
61define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
62  ; Force the execution domain with an add.
63  %a2 = add <2 x i64> %a, <i64 1, i64 1>
64  %x = xor <2 x i64> %a2, %b
65  ret <2 x i64> %x
66}
67
68define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
69  %x = add <4 x i64> %i, %j
70  ret <4 x i64> %x
71}
72
73define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
74  %x = add <8 x i32> %i, %j
75  ret <8 x i32> %x
76}
77
78define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
79  %x = add <16 x i16> %i, %j
80  ret <16 x i16> %x
81}
82
83define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
84  %x = add <32 x i8> %i, %j
85  ret <32 x i8> %x
86}
87
88define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
89  %x = sub <4 x i64> %i, %j
90  ret <4 x i64> %x
91}
92
93define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
94  %x = sub <8 x i32> %i, %j
95  ret <8 x i32> %x
96}
97
98define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
99  %x = sub <16 x i16> %i, %j
100  ret <16 x i16> %x
101}
102
103define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
104  %x = sub <32 x i8> %i, %j
105  ret <32 x i8> %x
106}
107
108define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
109  %x = mul <16 x i16> %i, %j
110  ret <16 x i16> %x
111}
112
113define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
114  %bincmp = icmp slt <8 x i32> %i, %j
115  %x = sext <8 x i1> %bincmp to <8 x i32>
116  ret <8 x i32> %x
117}
118
119define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
120  %bincmp = icmp eq <32 x i8> %i, %j
121  %x = sext <32 x i1> %bincmp to <32 x i8>
122  ret <32 x i8> %x
123}
124
125define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
126  %bincmp = icmp eq <16 x i16> %i, %j
127  %x = sext <16 x i1> %bincmp to <16 x i16>
128  ret <16 x i16> %x
129}
130
131define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
132  %bincmp = icmp slt <32 x i8> %i, %j
133  %x = sext <32 x i1> %bincmp to <32 x i8>
134  ret <32 x i8> %x
135}
136
137define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
138  %bincmp = icmp slt <16 x i16> %i, %j
139  %x = sext <16 x i1> %bincmp to <16 x i16>
140  ret <16 x i16> %x
141}
142
143define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
144  %bincmp = icmp eq <8 x i32> %i, %j
145  %x = sext <8 x i1> %bincmp to <8 x i32>
146  ret <8 x i32> %x
147}
148
149define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
150  %x = add <2 x i64> %i, %j
151  ret <2 x i64> %x
152}
153
154define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
155  %x = add <4 x i32> %i, %j
156  ret <4 x i32> %x
157}
158
159define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
160  %x = add <8 x i16> %i, %j
161  ret <8 x i16> %x
162}
163
164define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
165  %x = add <16 x i8> %i, %j
166  ret <16 x i8> %x
167}
168
169define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
170  %x = sub <2 x i64> %i, %j
171  ret <2 x i64> %x
172}
173
174define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
175  %x = sub <4 x i32> %i, %j
176  ret <4 x i32> %x
177}
178
179define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
180  %x = sub <8 x i16> %i, %j
181  ret <8 x i16> %x
182}
183
184define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
185  %x = sub <16 x i8> %i, %j
186  ret <16 x i8> %x
187}
188
189define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
190  %x = mul <8 x i16> %i, %j
191  ret <8 x i16> %x
192}
193
194define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
195  %bincmp = icmp slt <8 x i16> %i, %j
196  %x = sext <8 x i1> %bincmp to <8 x i16>
197  ret <8 x i16> %x
198}
199
200define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
201  %bincmp = icmp slt <16 x i8> %i, %j
202  %x = sext <16 x i1> %bincmp to <16 x i8>
203  ret <16 x i8> %x
204}
205
206define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
207  %bincmp = icmp eq <8 x i16> %i, %j
208  %x = sext <8 x i1> %bincmp to <8 x i16>
209  ret <8 x i16> %x
210}
211
212define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
213  %bincmp = icmp eq <16 x i8> %i, %j
214  %x = sext <16 x i1> %bincmp to <16 x i8>
215  ret <16 x i8> %x
216}
217
218define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
219  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
220  ret <8 x i16> %shuffle
221}
222
223define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
224  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
225  ret <16 x i16> %shuffle
226}
227
228define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
229  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
230  ret <16 x i8> %shuffle
231}
232
233define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
234  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
235  ret <32 x i8> %shuffle
236}
237
238define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
239  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
240  ret <2 x i64> %shuffle
241}
242
243define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
244  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
245  ret <4 x i32> %shuffle
246}
247
248define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
249  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
250  ret <8 x i32> %shuffle
251}
252
253define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
254  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
255  ret <4 x double> %shuffle
256}
257
258define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
259  %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
260  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
261  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
262  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
263  ret <2 x double> %bitcast64
264}
265
266define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
267  %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
268  ret <16 x i16> %shuffle
269}
270
271define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
272  %r1 = extractelement <2 x i64> %x, i32 0
273  %r2 = extractelement <2 x i64> %x, i32 1
274  store i64 %r2, i64* %dst, align 1
275  ret i64 %r1
276}
277
278define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
279  %r1 = extractelement <4 x i32> %x, i32 1
280  %r2 = extractelement <4 x i32> %x, i32 3
281  store i32 %r2, i32* %dst, align 1
282  ret i32 %r1
283}
284
285define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
286  %r1 = extractelement <8 x i16> %x, i32 1
287  %r2 = extractelement <8 x i16> %x, i32 3
288  store i16 %r2, i16* %dst, align 1
289  ret i16 %r1
290}
291
292define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
293  %r1 = extractelement <16 x i8> %x, i32 1
294  %r2 = extractelement <16 x i8> %x, i32 3
295  store i8 %r2, i8* %dst, align 1
296  ret i8 %r1
297}
298
299define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
300  %val = load i64, i64* %ptr
301  %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
302  %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
303  ret <2 x i64> %r2
304}
305
306define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
307  %val = load i32, i32* %ptr
308  %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
309  %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
310  ret <4 x i32> %r2
311}
312
313define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
314  %val = load i16, i16* %ptr
315  %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
316  %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
317  ret <8 x i16> %r2
318}
319
320define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
321  %val = load i8, i8* %ptr
322  %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
323  %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
324  ret <16 x i8> %r2
325}
326
327define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
328  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
329  ret <4 x i32> %shuffle
330}
331
332define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
333 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
334  ret <4 x i32> %shuffle
335}
336
337define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
338  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
339  ret <16 x i8> %shuffle
340}
341
342define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
343  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
344  ret <16 x i16> %shuffle
345}
346
347define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
348; vmovshdup 256 test
349  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
350  ret <8 x float> %shuffle
351}
352
353define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
354; vmovshdup 128 test
355  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
356  ret <4 x float> %shuffle
357}
358
359define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
360; vmovsldup 256 test
361  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
362  ret <8 x float> %shuffle
363}
364
365define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
366; vmovsldup 128 test
367  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
368  ret <4 x float> %shuffle
369}
370
371define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
372  %a = load double, double* %ptr
373  %v = insertelement <2 x double> undef, double %a, i32 0
374  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
375  ret <2 x double> %shuffle
376}
377
378define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
379  %a = load double, double* %ptr
380  %v = insertelement <2 x double> undef, double %a, i32 0
381  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
382  ret <2 x double> %shuffle
383}
384
385define void @store_floats(<4 x float> %x, i64* %p) {
386  %a = fadd <4 x float> %x, %x
387  %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
388  %c = bitcast <2 x float> %b to i64
389  store i64 %c, i64* %p
390  ret void
391}
392
393define void @store_double(<2 x double> %x, i64* %p) {
394  %a = fadd <2 x double> %x, %x
395  %b = extractelement <2 x double> %a, i32 0
396  %c = bitcast double %b to i64
397  store i64 %c, i64* %p
398  ret void
399}
400
401define void @store_h_double(<2 x double> %x, i64* %p) {
402  %a = fadd <2 x double> %x, %x
403  %b = extractelement <2 x double> %a, i32 1
404  %c = bitcast double %b to i64
405  store i64 %c, i64* %p
406  ret void
407}
408
409define <2 x double> @test39(double* %ptr) nounwind {
410  %a = load double, double* %ptr
411  %v = insertelement <2 x double> undef, double %a, i32 0
412  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
413  ret <2 x double> %shuffle
414  }
415
416define <2 x double> @test40(<2 x double>* %ptr) nounwind {
417  %v = load  <2 x double>,  <2 x double>* %ptr
418  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
419  ret <2 x double> %shuffle
420  }
421
422define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
423  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
424  ret <2 x double> %shuffle
425}
426
427define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
428  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
429  ret <4 x double> %shuffle
430}
431
432