• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlvbmi-builtins.c
6
7define <2 x i64> @test_mm_mask2_permutex2var_epi8(<2 x i64> %__A, <2 x i64> %__I, i16 zeroext %__U, <2 x i64> %__B) {
8; X86-LABEL: test_mm_mask2_permutex2var_epi8:
9; X86:       # %bb.0: # %entry
10; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
11; X86-NEXT:    vpermi2b %xmm2, %xmm0, %xmm1 {%k1}
12; X86-NEXT:    vmovdqa %xmm1, %xmm0
13; X86-NEXT:    retl
14;
15; X64-LABEL: test_mm_mask2_permutex2var_epi8:
16; X64:       # %bb.0: # %entry
17; X64-NEXT:    kmovd %edi, %k1
18; X64-NEXT:    vpermi2b %xmm2, %xmm0, %xmm1 {%k1}
19; X64-NEXT:    vmovdqa %xmm1, %xmm0
20; X64-NEXT:    retq
21entry:
22  %0 = bitcast <2 x i64> %__A to <16 x i8>
23  %1 = bitcast <2 x i64> %__I to <16 x i8>
24  %2 = bitcast <2 x i64> %__B to <16 x i8>
25  %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
26  %4 = bitcast i16 %__U to <16 x i1>
27  %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> %1
28  %6 = bitcast <16 x i8> %5 to <2 x i64>
29  ret <2 x i64> %6
30}
31
32define <4 x i64> @test_mm256_mask2_permutex2var_epi8(<4 x i64> %__A, <4 x i64> %__I, i32 %__U, <4 x i64> %__B) {
33; X86-LABEL: test_mm256_mask2_permutex2var_epi8:
34; X86:       # %bb.0: # %entry
35; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
36; X86-NEXT:    vpermi2b %ymm2, %ymm0, %ymm1 {%k1}
37; X86-NEXT:    vmovdqa %ymm1, %ymm0
38; X86-NEXT:    retl
39;
40; X64-LABEL: test_mm256_mask2_permutex2var_epi8:
41; X64:       # %bb.0: # %entry
42; X64-NEXT:    kmovd %edi, %k1
43; X64-NEXT:    vpermi2b %ymm2, %ymm0, %ymm1 {%k1}
44; X64-NEXT:    vmovdqa %ymm1, %ymm0
45; X64-NEXT:    retq
46entry:
47  %0 = bitcast <4 x i64> %__A to <32 x i8>
48  %1 = bitcast <4 x i64> %__I to <32 x i8>
49  %2 = bitcast <4 x i64> %__B to <32 x i8>
50  %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2)
51  %4 = bitcast i32 %__U to <32 x i1>
52  %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> %1
53  %6 = bitcast <32 x i8> %5 to <4 x i64>
54  ret <4 x i64> %6
55}
56
57define <2 x i64> @test_mm_permutex2var_epi8(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
58; CHECK-LABEL: test_mm_permutex2var_epi8:
59; CHECK:       # %bb.0: # %entry
60; CHECK-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0
61; CHECK-NEXT:    ret{{[l|q]}}
62entry:
63  %0 = bitcast <2 x i64> %__A to <16 x i8>
64  %1 = bitcast <2 x i64> %__I to <16 x i8>
65  %2 = bitcast <2 x i64> %__B to <16 x i8>
66  %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
67  %4 = bitcast <16 x i8> %3 to <2 x i64>
68  ret <2 x i64> %4
69}
70
71define <2 x i64> @test_mm_mask_permutex2var_epi8(<2 x i64> %__A, i16 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
72; X86-LABEL: test_mm_mask_permutex2var_epi8:
73; X86:       # %bb.0: # %entry
74; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
75; X86-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0 {%k1}
76; X86-NEXT:    retl
77;
78; X64-LABEL: test_mm_mask_permutex2var_epi8:
79; X64:       # %bb.0: # %entry
80; X64-NEXT:    kmovd %edi, %k1
81; X64-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0 {%k1}
82; X64-NEXT:    retq
83entry:
84  %0 = bitcast <2 x i64> %__A to <16 x i8>
85  %1 = bitcast <2 x i64> %__I to <16 x i8>
86  %2 = bitcast <2 x i64> %__B to <16 x i8>
87  %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
88  %4 = bitcast i16 %__U to <16 x i1>
89  %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> %0
90  %6 = bitcast <16 x i8> %5 to <2 x i64>
91  ret <2 x i64> %6
92}
93
94define <2 x i64> @test_mm_maskz_permutex2var_epi8(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
95; X86-LABEL: test_mm_maskz_permutex2var_epi8:
96; X86:       # %bb.0: # %entry
97; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
98; X86-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0 {%k1} {z}
99; X86-NEXT:    retl
100;
101; X64-LABEL: test_mm_maskz_permutex2var_epi8:
102; X64:       # %bb.0: # %entry
103; X64-NEXT:    kmovd %edi, %k1
104; X64-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0 {%k1} {z}
105; X64-NEXT:    retq
106entry:
107  %0 = bitcast <2 x i64> %__A to <16 x i8>
108  %1 = bitcast <2 x i64> %__I to <16 x i8>
109  %2 = bitcast <2 x i64> %__B to <16 x i8>
110  %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
111  %4 = bitcast i16 %__U to <16 x i1>
112  %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer
113  %6 = bitcast <16 x i8> %5 to <2 x i64>
114  ret <2 x i64> %6
115}
116
117define <4 x i64> @test_mm256_permutex2var_epi8(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
118; CHECK-LABEL: test_mm256_permutex2var_epi8:
119; CHECK:       # %bb.0: # %entry
120; CHECK-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0
121; CHECK-NEXT:    ret{{[l|q]}}
122entry:
123  %0 = bitcast <4 x i64> %__A to <32 x i8>
124  %1 = bitcast <4 x i64> %__I to <32 x i8>
125  %2 = bitcast <4 x i64> %__B to <32 x i8>
126  %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2)
127  %4 = bitcast <32 x i8> %3 to <4 x i64>
128  ret <4 x i64> %4
129}
130
131define <4 x i64> @test_mm256_mask_permutex2var_epi8(<4 x i64> %__A, i32 %__U, <4 x i64> %__I, <4 x i64> %__B) {
132; X86-LABEL: test_mm256_mask_permutex2var_epi8:
133; X86:       # %bb.0: # %entry
134; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
135; X86-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0 {%k1}
136; X86-NEXT:    retl
137;
138; X64-LABEL: test_mm256_mask_permutex2var_epi8:
139; X64:       # %bb.0: # %entry
140; X64-NEXT:    kmovd %edi, %k1
141; X64-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0 {%k1}
142; X64-NEXT:    retq
143entry:
144  %0 = bitcast <4 x i64> %__A to <32 x i8>
145  %1 = bitcast <4 x i64> %__I to <32 x i8>
146  %2 = bitcast <4 x i64> %__B to <32 x i8>
147  %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2)
148  %4 = bitcast i32 %__U to <32 x i1>
149  %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> %0
150  %6 = bitcast <32 x i8> %5 to <4 x i64>
151  ret <4 x i64> %6
152}
153
154define <4 x i64> @test_mm256_maskz_permutex2var_epi8(i32 %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
155; X86-LABEL: test_mm256_maskz_permutex2var_epi8:
156; X86:       # %bb.0: # %entry
157; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
158; X86-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0 {%k1} {z}
159; X86-NEXT:    retl
160;
161; X64-LABEL: test_mm256_maskz_permutex2var_epi8:
162; X64:       # %bb.0: # %entry
163; X64-NEXT:    kmovd %edi, %k1
164; X64-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0 {%k1} {z}
165; X64-NEXT:    retq
166entry:
167  %0 = bitcast <4 x i64> %__A to <32 x i8>
168  %1 = bitcast <4 x i64> %__I to <32 x i8>
169  %2 = bitcast <4 x i64> %__B to <32 x i8>
170  %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2)
171  %4 = bitcast i32 %__U to <32 x i1>
172  %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer
173  %6 = bitcast <32 x i8> %5 to <4 x i64>
174  ret <4 x i64> %6
175}
176
177define <2 x i64> @test_mm_mask_multishift_epi64_epi8(<2 x i64> %__W, i16 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) {
178; X86-LABEL: test_mm_mask_multishift_epi64_epi8:
179; X86:       # %bb.0: # %entry
180; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
181; X86-NEXT:    vpmultishiftqb %xmm2, %xmm1, %xmm0 {%k1}
182; X86-NEXT:    retl
183;
184; X64-LABEL: test_mm_mask_multishift_epi64_epi8:
185; X64:       # %bb.0: # %entry
186; X64-NEXT:    kmovd %edi, %k1
187; X64-NEXT:    vpmultishiftqb %xmm2, %xmm1, %xmm0 {%k1}
188; X64-NEXT:    retq
189entry:
190  %0 = bitcast <2 x i64> %__X to <16 x i8>
191  %1 = bitcast <2 x i64> %__Y to <16 x i8>
192  %2 = tail call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> %0, <16 x i8> %1)
193  %3 = bitcast <2 x i64> %__W to <16 x i8>
194  %4 = bitcast i16 %__M to <16 x i1>
195  %5 = select <16 x i1> %4, <16 x i8> %2, <16 x i8> %3
196  %6 = bitcast <16 x i8> %5 to <2 x i64>
197  ret <2 x i64> %6
198}
199
200define <2 x i64> @test_mm_maskz_multishift_epi64_epi8(i16 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) {
201; X86-LABEL: test_mm_maskz_multishift_epi64_epi8:
202; X86:       # %bb.0: # %entry
203; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
204; X86-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z}
205; X86-NEXT:    retl
206;
207; X64-LABEL: test_mm_maskz_multishift_epi64_epi8:
208; X64:       # %bb.0: # %entry
209; X64-NEXT:    kmovd %edi, %k1
210; X64-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z}
211; X64-NEXT:    retq
212entry:
213  %0 = bitcast <2 x i64> %__X to <16 x i8>
214  %1 = bitcast <2 x i64> %__Y to <16 x i8>
215  %2 = tail call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> %0, <16 x i8> %1)
216  %3 = bitcast i16 %__M to <16 x i1>
217  %4 = select <16 x i1> %3, <16 x i8> %2, <16 x i8> zeroinitializer
218  %5 = bitcast <16 x i8> %4 to <2 x i64>
219  ret <2 x i64> %5
220}
221
222define <2 x i64> @test_mm_multishift_epi64_epi8(<2 x i64> %__X, <2 x i64> %__Y) {
223; CHECK-LABEL: test_mm_multishift_epi64_epi8:
224; CHECK:       # %bb.0: # %entry
225; CHECK-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm0
226; CHECK-NEXT:    ret{{[l|q]}}
227entry:
228  %0 = bitcast <2 x i64> %__X to <16 x i8>
229  %1 = bitcast <2 x i64> %__Y to <16 x i8>
230  %2 = tail call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> %0, <16 x i8> %1)
231  %3 = bitcast <16 x i8> %2 to <2 x i64>
232  ret <2 x i64> %3
233}
234
235define <4 x i64> @test_mm256_mask_multishift_epi64_epi8(<4 x i64> %__W, i32 %__M, <4 x i64> %__X, <4 x i64> %__Y) {
236; X86-LABEL: test_mm256_mask_multishift_epi64_epi8:
237; X86:       # %bb.0: # %entry
238; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
239; X86-NEXT:    vpmultishiftqb %ymm2, %ymm1, %ymm0 {%k1}
240; X86-NEXT:    retl
241;
242; X64-LABEL: test_mm256_mask_multishift_epi64_epi8:
243; X64:       # %bb.0: # %entry
244; X64-NEXT:    kmovd %edi, %k1
245; X64-NEXT:    vpmultishiftqb %ymm2, %ymm1, %ymm0 {%k1}
246; X64-NEXT:    retq
247entry:
248  %0 = bitcast <4 x i64> %__X to <32 x i8>
249  %1 = bitcast <4 x i64> %__Y to <32 x i8>
250  %2 = tail call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> %0, <32 x i8> %1)
251  %3 = bitcast <4 x i64> %__W to <32 x i8>
252  %4 = bitcast i32 %__M to <32 x i1>
253  %5 = select <32 x i1> %4, <32 x i8> %2, <32 x i8> %3
254  %6 = bitcast <32 x i8> %5 to <4 x i64>
255  ret <4 x i64> %6
256}
257
258define <4 x i64> @test_mm256_maskz_multishift_epi64_epi8(i32 %__M, <4 x i64> %__X, <4 x i64> %__Y) {
259; X86-LABEL: test_mm256_maskz_multishift_epi64_epi8:
260; X86:       # %bb.0: # %entry
261; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
262; X86-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm0 {%k1} {z}
263; X86-NEXT:    retl
264;
265; X64-LABEL: test_mm256_maskz_multishift_epi64_epi8:
266; X64:       # %bb.0: # %entry
267; X64-NEXT:    kmovd %edi, %k1
268; X64-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm0 {%k1} {z}
269; X64-NEXT:    retq
270entry:
271  %0 = bitcast <4 x i64> %__X to <32 x i8>
272  %1 = bitcast <4 x i64> %__Y to <32 x i8>
273  %2 = tail call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> %0, <32 x i8> %1) #3
274  %3 = bitcast i32 %__M to <32 x i1>
275  %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer
276  %5 = bitcast <32 x i8> %4 to <4 x i64>
277  ret <4 x i64> %5
278}
279
280define <4 x i64> @test_mm256_multishift_epi64_epi8(<4 x i64> %__X, <4 x i64> %__Y) {
281; CHECK-LABEL: test_mm256_multishift_epi64_epi8:
282; CHECK:       # %bb.0: # %entry
283; CHECK-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm0
284; CHECK-NEXT:    ret{{[l|q]}}
285entry:
286  %0 = bitcast <4 x i64> %__X to <32 x i8>
287  %1 = bitcast <4 x i64> %__Y to <32 x i8>
288  %2 = tail call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> %0, <32 x i8> %1)
289  %3 = bitcast <32 x i8> %2 to <4 x i64>
290  ret <4 x i64> %3
291}
292
293declare <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>)
294declare <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>)
295declare <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8>, <16 x i8>)
296declare <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8>, <32 x i8>)
297