• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -mattr=+avx512dq -mattr=+avx512vl| FileCheck %s
3
4define <8 x i1> @test(<2 x i1> %a) {
5; CHECK-LABEL: test:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
8; CHECK-NEXT:    vpmovq2m %xmm0, %k0
9; CHECK-NEXT:    kshiftlb $2, %k0, %k0
10; CHECK-NEXT:    vpmovm2w %k0, %xmm0
11; CHECK-NEXT:    retq
12  %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
13  ret <8 x i1> %res
14}
15
16define <8 x i1> @test1(<2 x i1> %a) {
17; CHECK-LABEL: test1:
18; CHECK:       # %bb.0:
19; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
20; CHECK-NEXT:    vpmovq2m %xmm0, %k0
21; CHECK-NEXT:    kshiftlb $4, %k0, %k0
22; CHECK-NEXT:    vpmovm2w %k0, %xmm0
23; CHECK-NEXT:    retq
24  %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
25  ret <8 x i1> %res
26}
27
28define <8 x i1> @test2(<2 x i1> %a) {
29; CHECK-LABEL: test2:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
32; CHECK-NEXT:    vpmovq2m %xmm0, %k0
33; CHECK-NEXT:    kshiftlb $4, %k0, %k0
34; CHECK-NEXT:    vpmovm2w %k0, %xmm0
35; CHECK-NEXT:    retq
36  %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
37  ret <8 x i1> %res
38}
39
40define <8 x i1> @test3(<4 x i1> %a) {
41; CHECK-LABEL: test3:
42; CHECK:       # %bb.0:
43; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
44; CHECK-NEXT:    vpmovd2m %xmm0, %k0
45; CHECK-NEXT:    vpmovm2w %k0, %xmm0
46; CHECK-NEXT:    retq
47
48  %res = shufflevector <4 x i1> %a, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
49  ret <8 x i1> %res
50}
51
52define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
53; CHECK-LABEL: test4:
54; CHECK:       # %bb.0:
55; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
56; CHECK-NEXT:    vpmovd2m %xmm1, %k0
57; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
58; CHECK-NEXT:    vpmovd2m %xmm0, %k1
59; CHECK-NEXT:    kshiftlb $4, %k0, %k0
60; CHECK-NEXT:    korb %k0, %k1, %k0
61; CHECK-NEXT:    vpmovm2w %k0, %xmm0
62; CHECK-NEXT:    retq
63
64  %res = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
65  ret <8 x i1> %res
66}
67
68define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
69; CHECK-LABEL: test5:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vpsllq $63, %xmm1, %xmm1
72; CHECK-NEXT:    vpmovq2m %xmm1, %k0
73; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
74; CHECK-NEXT:    vpmovq2m %xmm0, %k1
75; CHECK-NEXT:    kshiftlb $2, %k0, %k0
76; CHECK-NEXT:    korw %k0, %k1, %k0
77; CHECK-NEXT:    vpmovm2d %k0, %xmm0
78; CHECK-NEXT:    retq
79
80  %res = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
81  ret <4 x i1> %res
82}
83
84define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) {
85; CHECK-LABEL: test6:
86; CHECK:       # %bb.0:
87; CHECK-NEXT:    vpsllq $63, %xmm1, %xmm1
88; CHECK-NEXT:    vpmovq2m %xmm1, %k0
89; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
90; CHECK-NEXT:    vpmovq2m %xmm0, %k1
91; CHECK-NEXT:    kshiftlb $2, %k0, %k0
92; CHECK-NEXT:    korw %k0, %k1, %k0
93; CHECK-NEXT:    vpmovm2b %k0, %xmm0
94; CHECK-NEXT:    retq
95
96  %res = shufflevector <2 x i1> %a, <2 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
97  ret <16 x i1> %res
98}
99
100define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) {
101; CHECK-LABEL: test7:
102; CHECK:       # %bb.0:
103; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
104; CHECK-NEXT:    vpmovd2m %xmm1, %k0
105; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
106; CHECK-NEXT:    vpmovd2m %xmm0, %k1
107; CHECK-NEXT:    kshiftlb $4, %k0, %k0
108; CHECK-NEXT:    korb %k0, %k1, %k0
109; CHECK-NEXT:    vpmovm2b %k0, %ymm0
110; CHECK-NEXT:    retq
111
112  %res = shufflevector <4 x i1> %a, <4 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
113  ret <32 x i1> %res
114}
115
116define <64 x i1> @test8(<8 x i1> %a, <8 x i1>%b) {
117; CHECK-LABEL: test8:
118; CHECK:       # %bb.0:
119; CHECK-NEXT:    vpsllw $15, %xmm1, %xmm1
120; CHECK-NEXT:    vpmovw2m %xmm1, %k0
121; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
122; CHECK-NEXT:    vpmovw2m %xmm0, %k1
123; CHECK-NEXT:    kunpckdq %k1, %k0, %k0
124; CHECK-NEXT:    vpmovm2b %k0, %zmm0
125; CHECK-NEXT:    retq
126
127  %res = shufflevector <8 x i1> %a, <8 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
128  ret <64 x i1> %res
129}
130
131define <4 x i1> @test9(<8 x i1> %a, <8 x i1> %b) {
132; CHECK-LABEL: test9:
133; CHECK:       # %bb.0:
134; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
135; CHECK-NEXT:    vpmovw2m %xmm0, %k0
136; CHECK-NEXT:    kshiftrb $4, %k0, %k0
137; CHECK-NEXT:    vpmovm2d %k0, %xmm0
138; CHECK-NEXT:    retq
139  %res = shufflevector <8 x i1> %a, <8 x i1> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
140  ret <4 x i1> %res
141}
142
143define <2 x i1> @test10(<4 x i1> %a, <4 x i1> %b) {
144; CHECK-LABEL: test10:
145; CHECK:       # %bb.0:
146; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
147; CHECK-NEXT:    vpmovd2m %xmm0, %k0
148; CHECK-NEXT:    kshiftrb $2, %k0, %k0
149; CHECK-NEXT:    vpmovm2q %k0, %xmm0
150; CHECK-NEXT:    retq
151  %res = shufflevector <4 x i1> %a, <4 x i1> %b, <2 x i32> <i32 2, i32 3>
152  ret <2 x i1> %res
153}
154
155define <8 x i1> @test11(<4 x i1> %a, <4 x i1>%b) {
156; CHECK-LABEL: test11:
157; CHECK:       # %bb.0:
158; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
159; CHECK-NEXT:    vpmovd2m %xmm0, %k0
160; CHECK-NEXT:    kshiftlb $4, %k0, %k0
161; CHECK-NEXT:    vpmovm2w %k0, %xmm0
162; CHECK-NEXT:    retq
163  %res = shufflevector <4 x i1> %a, <4 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
164  ret <8 x i1> %res
165}
166
167define <16 x i1> @test12(<2 x i1> %a) {
168; CHECK-LABEL: test12:
169; CHECK:       # %bb.0:
170; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
171; CHECK-NEXT:    vpmovq2m %xmm0, %k0
172; CHECK-NEXT:    kshiftlw $10, %k0, %k0
173; CHECK-NEXT:    vpmovm2b %k0, %xmm0
174; CHECK-NEXT:    retq
175  %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
176  ret <16 x i1> %res
177}
178
179define <32 x i1> @test13(<2 x i1> %a) {
180; CHECK-LABEL: test13:
181; CHECK:       # %bb.0:
182; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
183; CHECK-NEXT:    vpmovq2m %xmm0, %k0
184; CHECK-NEXT:    kshiftld $10, %k0, %k0
185; CHECK-NEXT:    vpmovm2b %k0, %ymm0
186; CHECK-NEXT:    retq
187  %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <32 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
188  ret <32 x i1> %res
189}
190
191define <64 x i1> @test14(<2 x i1> %a) {
192; CHECK-LABEL: test14:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
195; CHECK-NEXT:    vpmovq2m %xmm0, %k0
196; CHECK-NEXT:    kshiftlq $10, %k0, %k0
197; CHECK-NEXT:    vpmovm2b %k0, %zmm0
198; CHECK-NEXT:    retq
199  %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <64 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
200  ret <64 x i1> %res
201}
202
203; Make sure we can recognize this shuffle as an insertion in to a zero vector.
204define i8 @test15(<2 x i64> %x) {
205; CHECK-LABEL: test15:
206; CHECK:       # %bb.0:
207; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k0
208; CHECK-NEXT:    kmovd %k0, %eax
209; CHECK-NEXT:    # kill: def $al killed $al killed $eax
210; CHECK-NEXT:    retq
211  %a = icmp eq <2 x i64> %x, zeroinitializer
212  %b = shufflevector <2 x i1> %a, <2 x i1> <i1 false, i1 undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
213  %c = bitcast <8 x i1> %b to i8
214  ret i8 %c
215}
216