• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlvbmi-builtins.c
6
7define <2 x i64> @test_mm_mask2_permutex2var_epi8(<2 x i64> %__A, <2 x i64> %__I, i16 zeroext %__U, <2 x i64> %__B) {
8; X86-LABEL: test_mm_mask2_permutex2var_epi8:
9; X86:       # %bb.0: # %entry
10; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
11; X86-NEXT:    vpermi2b %xmm2, %xmm0, %xmm1 {%k1}
12; X86-NEXT:    vmovdqa %xmm1, %xmm0
13; X86-NEXT:    retl
14;
15; X64-LABEL: test_mm_mask2_permutex2var_epi8:
16; X64:       # %bb.0: # %entry
17; X64-NEXT:    kmovd %edi, %k1
18; X64-NEXT:    vpermi2b %xmm2, %xmm0, %xmm1 {%k1}
19; X64-NEXT:    vmovdqa %xmm1, %xmm0
20; X64-NEXT:    retq
21entry:
22  %0 = bitcast <2 x i64> %__A to <16 x i8>
23  %1 = bitcast <2 x i64> %__I to <16 x i8>
24  %2 = bitcast <2 x i64> %__B to <16 x i8>
25  %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
26  %4 = bitcast i16 %__U to <16 x i1>
27  %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> %1
28  %6 = bitcast <16 x i8> %5 to <2 x i64>
29  ret <2 x i64> %6
30}
31
32define <4 x i64> @test_mm256_mask2_permutex2var_epi8(<4 x i64> %__A, <4 x i64> %__I, i32 %__U, <4 x i64> %__B) {
33; X86-LABEL: test_mm256_mask2_permutex2var_epi8:
34; X86:       # %bb.0: # %entry
35; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
36; X86-NEXT:    vpermi2b %ymm2, %ymm0, %ymm1 {%k1}
37; X86-NEXT:    vmovdqa %ymm1, %ymm0
38; X86-NEXT:    retl
39;
40; X64-LABEL: test_mm256_mask2_permutex2var_epi8:
41; X64:       # %bb.0: # %entry
42; X64-NEXT:    kmovd %edi, %k1
43; X64-NEXT:    vpermi2b %ymm2, %ymm0, %ymm1 {%k1}
44; X64-NEXT:    vmovdqa %ymm1, %ymm0
45; X64-NEXT:    retq
46entry:
47  %0 = bitcast <4 x i64> %__A to <32 x i8>
48  %1 = bitcast <4 x i64> %__I to <32 x i8>
49  %2 = bitcast <4 x i64> %__B to <32 x i8>
50  %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2)
51  %4 = bitcast i32 %__U to <32 x i1>
52  %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> %1
53  %6 = bitcast <32 x i8> %5 to <4 x i64>
54  ret <4 x i64> %6
55}
56
57define <2 x i64> @test_mm_permutex2var_epi8(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
58; CHECK-LABEL: test_mm_permutex2var_epi8:
59; CHECK:       # %bb.0: # %entry
60; CHECK-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0
61; CHECK-NEXT:    ret{{[l|q]}}
62entry:
63  %0 = bitcast <2 x i64> %__A to <16 x i8>
64  %1 = bitcast <2 x i64> %__I to <16 x i8>
65  %2 = bitcast <2 x i64> %__B to <16 x i8>
66  %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
67  %4 = bitcast <16 x i8> %3 to <2 x i64>
68  ret <2 x i64> %4
69}
70
71define <2 x i64> @test_mm_mask_permutex2var_epi8(<2 x i64> %__A, i16 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
72; X86-LABEL: test_mm_mask_permutex2var_epi8:
73; X86:       # %bb.0: # %entry
74; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
75; X86-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0 {%k1}
76; X86-NEXT:    retl
77;
78; X64-LABEL: test_mm_mask_permutex2var_epi8:
79; X64:       # %bb.0: # %entry
80; X64-NEXT:    kmovd %edi, %k1
81; X64-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0 {%k1}
82; X64-NEXT:    retq
83entry:
84  %0 = bitcast <2 x i64> %__A to <16 x i8>
85  %1 = bitcast <2 x i64> %__I to <16 x i8>
86  %2 = bitcast <2 x i64> %__B to <16 x i8>
87  %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
88  %4 = bitcast i16 %__U to <16 x i1>
89  %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> %0
90  %6 = bitcast <16 x i8> %5 to <2 x i64>
91  ret <2 x i64> %6
92}
93
94define <2 x i64> @test_mm_maskz_permutex2var_epi8(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
95; X86-LABEL: test_mm_maskz_permutex2var_epi8:
96; X86:       # %bb.0: # %entry
97; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
98; X86-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0 {%k1} {z}
99; X86-NEXT:    retl
100;
101; X64-LABEL: test_mm_maskz_permutex2var_epi8:
102; X64:       # %bb.0: # %entry
103; X64-NEXT:    kmovd %edi, %k1
104; X64-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0 {%k1} {z}
105; X64-NEXT:    retq
106entry:
107  %0 = bitcast <2 x i64> %__A to <16 x i8>
108  %1 = bitcast <2 x i64> %__I to <16 x i8>
109  %2 = bitcast <2 x i64> %__B to <16 x i8>
110  %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
111  %4 = bitcast i16 %__U to <16 x i1>
112  %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer
113  %6 = bitcast <16 x i8> %5 to <2 x i64>
114  ret <2 x i64> %6
115}
116
117define <4 x i64> @test_mm256_permutex2var_epi8(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
118; CHECK-LABEL: test_mm256_permutex2var_epi8:
119; CHECK:       # %bb.0: # %entry
120; CHECK-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0
121; CHECK-NEXT:    ret{{[l|q]}}
122entry:
123  %0 = bitcast <4 x i64> %__A to <32 x i8>
124  %1 = bitcast <4 x i64> %__I to <32 x i8>
125  %2 = bitcast <4 x i64> %__B to <32 x i8>
126  %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2)
127  %4 = bitcast <32 x i8> %3 to <4 x i64>
128  ret <4 x i64> %4
129}
130
131define <4 x i64> @test_mm256_mask_permutex2var_epi8(<4 x i64> %__A, i32 %__U, <4 x i64> %__I, <4 x i64> %__B) {
132; X86-LABEL: test_mm256_mask_permutex2var_epi8:
133; X86:       # %bb.0: # %entry
134; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
135; X86-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0 {%k1}
136; X86-NEXT:    retl
137;
138; X64-LABEL: test_mm256_mask_permutex2var_epi8:
139; X64:       # %bb.0: # %entry
140; X64-NEXT:    kmovd %edi, %k1
141; X64-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0 {%k1}
142; X64-NEXT:    retq
143entry:
144  %0 = bitcast <4 x i64> %__A to <32 x i8>
145  %1 = bitcast <4 x i64> %__I to <32 x i8>
146  %2 = bitcast <4 x i64> %__B to <32 x i8>
147  %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2)
148  %4 = bitcast i32 %__U to <32 x i1>
149  %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> %0
150  %6 = bitcast <32 x i8> %5 to <4 x i64>
151  ret <4 x i64> %6
152}
153
154define <4 x i64> @test_mm256_maskz_permutex2var_epi8(i32 %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
155; X86-LABEL: test_mm256_maskz_permutex2var_epi8:
156; X86:       # %bb.0: # %entry
157; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
158; X86-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0 {%k1} {z}
159; X86-NEXT:    retl
160;
161; X64-LABEL: test_mm256_maskz_permutex2var_epi8:
162; X64:       # %bb.0: # %entry
163; X64-NEXT:    kmovd %edi, %k1
164; X64-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0 {%k1} {z}
165; X64-NEXT:    retq
166entry:
167  %0 = bitcast <4 x i64> %__A to <32 x i8>
168  %1 = bitcast <4 x i64> %__I to <32 x i8>
169  %2 = bitcast <4 x i64> %__B to <32 x i8>
170  %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2)
171  %4 = bitcast i32 %__U to <32 x i1>
172  %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer
173  %6 = bitcast <32 x i8> %5 to <4 x i64>
174  ret <4 x i64> %6
175}
176
177declare <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>)
178declare <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>)
179