• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL --check-prefix=CHECK
2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX --check-prefix=CHECK
3
4; CHECK-LABEL: mask16
5; CHECK: kmovw
6; CHECK-NEXT: knotw
7; CHECK-NEXT: kmovw
8define i16 @mask16(i16 %x) {
9  %m0 = bitcast i16 %x to <16 x i1>
10  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
11  %ret = bitcast <16 x i1> %m1 to i16
12  ret i16 %ret
13}
14
15; CHECK-LABEL: mask8
16; KNL: kmovw
17; KNL-NEXT: knotw
18; KNL-NEXT: kmovw
19; SKX: kmovb
20; SKX-NEXT: knotb
21; SKX-NEXT: kmovb
22
23define i8 @mask8(i8 %x) {
24  %m0 = bitcast i8 %x to <8 x i1>
25  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
26  %ret = bitcast <8 x i1> %m1 to i8
27  ret i8 %ret
28}
29
30; CHECK-LABEL: mask16_mem
31; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
32; CHECK-NEXT: knotw
33; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
34; CHECK: ret
35
36define void @mask16_mem(i16* %ptr) {
37  %x = load i16, i16* %ptr, align 4
38  %m0 = bitcast i16 %x to <16 x i1>
39  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
40  %ret = bitcast <16 x i1> %m1 to i16
41  store i16 %ret, i16* %ptr, align 4
42  ret void
43}
44
45; CHECK-LABEL: mask8_mem
46; KNL: kmovw ([[ARG1]]), %k{{[0-7]}}
47; KNL-NEXT: knotw
48; KNL-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
49; SKX: kmovb ([[ARG1]]), %k{{[0-7]}}
50; SKX-NEXT: knotb
51; SKX-NEXT: kmovb %k{{[0-7]}}, ([[ARG1]])
52
53define void @mask8_mem(i8* %ptr) {
54  %x = load i8, i8* %ptr, align 4
55  %m0 = bitcast i8 %x to <8 x i1>
56  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
57  %ret = bitcast <8 x i1> %m1 to i8
58  store i8 %ret, i8* %ptr, align 4
59  ret void
60}
61
62; CHECK-LABEL: mand16
63; CHECK: kandw
64; CHECK: kxorw
65; CHECK: korw
66define i16 @mand16(i16 %x, i16 %y) {
67  %ma = bitcast i16 %x to <16 x i1>
68  %mb = bitcast i16 %y to <16 x i1>
69  %mc = and <16 x i1> %ma, %mb
70  %md = xor <16 x i1> %ma, %mb
71  %me = or <16 x i1> %mc, %md
72  %ret = bitcast <16 x i1> %me to i16
73  ret i16 %ret
74}
75
76; CHECK-LABEL: shuf_test1
77; CHECK: kshiftrw        $8
78define i8 @shuf_test1(i16 %v) nounwind {
79   %v1 = bitcast i16 %v to <16 x i1>
80   %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
81   %mask1 = bitcast <8 x i1> %mask to i8
82   ret i8 %mask1
83}
84
85; CHECK-LABEL: zext_test1
86; CHECK: kshiftlw
87; CHECK: kshiftrw
88; CHECK: kmovw
89
90define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
91  %cmp_res = icmp ugt <16 x i32> %a, %b
92  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
93  %res = zext i1 %cmp_res.i1 to i32
94  ret i32 %res
95}
96
97; CHECK-LABEL: zext_test2
98; CHECK: kshiftlw
99; CHECK: kshiftrw
100; CHECK: kmovw
101
102define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
103  %cmp_res = icmp ugt <16 x i32> %a, %b
104  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
105  %res = zext i1 %cmp_res.i1 to i16
106  ret i16 %res
107}
108
109; CHECK-LABEL: zext_test3
110; CHECK: kshiftlw
111; CHECK: kshiftrw
112; CHECK: kmovw
113
114define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
115  %cmp_res = icmp ugt <16 x i32> %a, %b
116  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
117  %res = zext i1 %cmp_res.i1 to i8
118  ret i8 %res
119}
120
121; CHECK-LABEL: conv1
122; KNL: kmovw   %k0, %eax
123; KNL: movb    %al, (%rdi)
124; SKX: kmovb   %k0, (%rdi)
125define i8 @conv1(<8 x i1>* %R) {
126entry:
127  store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
128
129  %maskPtr = alloca <8 x i1>
130  store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
131  %mask = load <8 x i1>, <8 x i1>* %maskPtr
132  %mask_convert = bitcast <8 x i1> %mask to i8
133  ret i8 %mask_convert
134}
135
136; SKX-LABEL: test4
137; SKX: vpcmpgt
138; SKX: knot
139; SKX: vpcmpgt
140; SKX: vpmovm2d
141define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
142  %x_gt_y = icmp sgt <4 x i64> %x, %y
143  %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
144  %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
145  %resse = sext <4 x i1>%res to <4 x i32>
146  ret <4 x i32> %resse
147}
148
149; SKX-LABEL: test5
150; SKX: vpcmpgt
151; SKX: knot
152; SKX: vpcmpgt
153; SKX: vpmovm2q
154define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
155  %x_gt_y = icmp slt <2 x i64> %x, %y
156  %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
157  %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
158  %resse = sext <2 x i1>%res to <2 x i64>
159  ret <2 x i64> %resse
160}
161
162; KNL-LABEL: test6
163; KNL: vpmovsxbd
164; KNL: vpandd
165; KNL: kmovw   %eax, %k1
166; KNL vptestmd {{.*}}, %k0 {%k1}
167
168; SKX-LABEL: test6
169; SKX: vpmovb2m
170; SKX: kmovw   %eax, %k1
171; SKX: kandw
172define void @test6(<16 x i1> %mask)  {
173allocas:
174  %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
175  %b = bitcast <16 x i1> %a to i16
176  %c = icmp eq i16 %b, 0
177  br i1 %c, label %true, label %false
178
179true:
180  ret void
181
182false:
183  ret void
184}
185
186; KNL-LABEL: test7
187; KNL: vpmovsxwq
188; KNL: vpandq
189; KNL: vptestmq {{.*}}, %k0
190; KNL: korw
191
192; SKX-LABEL: test7
193; SKX: vpmovw2m
194; SKX: kmovb   %eax, %k1
195; SKX: korb
196
197define void @test7(<8 x i1> %mask)  {
198allocas:
199  %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
200  %b = bitcast <8 x i1> %a to i8
201  %c = icmp eq i8 %b, 0
202  br i1 %c, label %true, label %false
203
204true:
205  ret void
206
207false:
208  ret void
209}
210
211; KNL-LABEL: test8
212; KNL: vpxord  %zmm2, %zmm2, %zmm2
213; KNL: jg
214; KNL: vpcmpltud       %zmm2, %zmm1, %k1
215; KNL: jmp
216; KNL: vpcmpgtd        %zmm2, %zmm0, %k1
217
218; SKX-LABEL: test8
219; SKX: jg
220; SKX: vpcmpltud {{.*}}, %k0
221; SKX: vpmovm2b
222; SKX: vpcmpgtd {{.*}}, %k0
223; SKX: vpmovm2b
224
225define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
226  %cond = icmp sgt i32 %a1, %b1
227  %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
228  %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
229  %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
230  %res = sext <16 x i1> %mix to <16 x i8>
231  ret <16 x i8> %res
232}
233
234; KNL-LABEL: test9
235; KNL: jg
236; KNL: vpmovsxbd       %xmm1, %zmm0
237; KNL: jmp
238; KNL: vpmovsxbd       %xmm0, %zmm0
239
240; SKX-LABEL: test9
241; SKX: vpmovb2m        %xmm1, %k0
242; SKX: vpmovm2b        %k0, %xmm0
243; SKX: retq
244; SKX: vpmovb2m        %xmm0, %k0
245; SKX: vpmovm2b        %k0, %xmm0
246
247define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
248  %mask = icmp sgt i32 %a1, %b1
249  %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
250  ret <16 x i1>%c
251}
252
253; KNL-LABEL: test10
254; KNL: jg
255; KNL: vpmovsxwq       %xmm1, %zmm0
256; KNL: jmp
257; KNL: vpmovsxwq       %xmm0, %zmm0
258
259; SKX-LABEL: test10
260; SKX: jg
261; SKX: vpmovw2m        %xmm1, %k0
262; SKX: vpmovm2w        %k0, %xmm0
263; SKX: retq
264; SKX: vpmovw2m        %xmm0, %k0
265; SKX: vpmovm2w        %k0, %xmm0
266define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
267  %mask = icmp sgt i32 %a1, %b1
268  %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
269  ret <8 x i1>%c
270}
271
272; SKX-LABEL: test11
273; SKX: jg
274; SKX: vpmovd2m        %xmm1, %k0
275; SKX: vpmovm2d        %k0, %xmm0
276; SKX: retq
277; SKX: vpmovd2m        %xmm0, %k0
278; SKX: vpmovm2d        %k0, %xmm0
279define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
280  %mask = icmp sgt i32 %a1, %b1
281  %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
282  ret <4 x i1>%c
283}
284
285; KNL-LABEL: test12
286; KNL: movl    %edi, %eax
287define i32 @test12(i32 %x, i32 %y)  {
288  %a = bitcast i16 21845 to <16 x i1>
289  %b = extractelement <16 x i1> %a, i32 0
290  %c = select i1 %b, i32 %x, i32 %y
291  ret i32 %c
292}
293
294; KNL-LABEL: test13
295; KNL: movl    %esi, %eax
296define i32 @test13(i32 %x, i32 %y)  {
297  %a = bitcast i16 21845 to <16 x i1>
298  %b = extractelement <16 x i1> %a, i32 3
299  %c = select i1 %b, i32 %x, i32 %y
300  ret i32 %c
301}
302
303; SKX-LABEL: test14
304; SKX: movb     $11, %al
305; SKX: kmovb    %eax, %k0
306; SKX: vpmovm2d %k0, %xmm0
307
308define <4 x i1> @test14()  {
309  %a = bitcast i16 21845 to <16 x i1>
310  %b = extractelement <16 x i1> %a, i32 2
311  %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
312  ret <4 x i1> %c
313}
314
315; KNL-LABEL: test15
316; KNL: cmovgw
317define <16 x i1> @test15(i32 %x, i32 %y)  {
318  %a = bitcast i16 21845 to <16 x i1>
319  %b = bitcast i16 1 to <16 x i1>
320  %mask = icmp sgt i32 %x, %y
321  %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
322  ret <16 x i1> %c
323}
324
325; SKX-LABEL: test16
326; SKX: kxnorw  %k1, %k1, %k1
327; SKX: kshiftrw        $15, %k1, %k1
328; SKX: kshiftlq        $5, %k1, %k1
329; SKX: korq    %k1, %k0, %k0
330; SKX: vpmovm2b        %k0, %zmm0
331define <64 x i8> @test16(i64 %x) {
332  %a = bitcast i64 %x to <64 x i1>
333  %b = insertelement <64 x i1>%a, i1 true, i32 5
334  %c = sext <64 x i1>%b to <64 x i8>
335  ret <64 x i8>%c
336}
337
338; SKX-LABEL: test17
339; SKX: setg    %al
340; SKX: andl    $1, %eax
341; SKX: kmovw   %eax, %k1
342; SKX: kshiftlq        $5, %k1, %k1
343; SKX: korq    %k1, %k0, %k0
344; SKX: vpmovm2b        %k0, %zmm0
345define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
346  %a = bitcast i64 %x to <64 x i1>
347  %b = icmp sgt i32 %y, %z
348  %c = insertelement <64 x i1>%a, i1 %b, i32 5
349  %d = sext <64 x i1>%c to <64 x i8>
350  ret <64 x i8>%d
351}
352
353; KNL-LABEL: test18
354define <8 x i1> @test18(i8 %a, i16 %y) {
355  %b = bitcast i8 %a to <8 x i1>
356  %b1 = bitcast i16 %y to <16 x i1>
357  %el1 = extractelement <16 x i1>%b1, i32 8
358  %el2 = extractelement <16 x i1>%b1, i32 9
359  %c = insertelement <8 x i1>%b, i1 %el1, i32 7
360  %d = insertelement <8 x i1>%c, i1 %el2, i32 6
361  ret <8 x i1>%d
362}
363
364; KNL-LABEL: test21
365; KNL: vpand %ymm
366; KNL: vextracti128    $1, %ymm2
367; KNL: vpand %ymm
368
369; SKX-LABEL: test21
370; SKX: vpmovb2m
371; SKX: vmovdqu16 {{.*}}%k1
372
373define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
374  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
375  ret <32 x i16> %ret
376}
377
378; SKX-LABEL: test22
379; SKX: kmovb
380define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
381  store <4 x i1> %a, <4 x i1>* %addr
382  ret void
383}
384
385; SKX-LABEL: test23
386; SKX: kmovb
387define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
388  store <2 x i1> %a, <2 x i1>* %addr
389  ret void
390}
391