• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX256
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
6
7define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
8; AVX256-LABEL: testv8i1_sext_v8i16:
9; AVX256:       # %bb.0:
10; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
11; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
12; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
13; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
14; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
15; AVX256-NEXT:    vzeroupper
16; AVX256-NEXT:    retq
17;
18; AVX512VL-LABEL: testv8i1_sext_v8i16:
19; AVX512VL:       # %bb.0:
20; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
21; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
22; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
23; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
24; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
25; AVX512VL-NEXT:    vzeroupper
26; AVX512VL-NEXT:    retq
27;
28; AVX512F-LABEL: testv8i1_sext_v8i16:
29; AVX512F:       # %bb.0:
30; AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
31; AVX512F-NEXT:    vpcmpeqd (%rdi), %ymm0, %ymm0
32; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
33; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
34; AVX512F-NEXT:    vzeroupper
35; AVX512F-NEXT:    retq
36  %in = load <8 x i32>, <8 x i32>* %p
37  %cmp = icmp eq <8 x i32> %in, zeroinitializer
38  %ext = sext <8 x i1> %cmp to <8 x i16>
39  ret <8 x i16> %ext
40}
41
42define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
43; AVX256-LABEL: testv16i1_sext_v16i8:
44; AVX256:       # %bb.0:
45; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
46; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
47; AVX256-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
48; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
49; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k2} {z}
50; AVX256-NEXT:    vpmovdw %ymm1, %xmm1
51; AVX256-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
52; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
53; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
54; AVX256-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
55; AVX256-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
56; AVX256-NEXT:    vzeroupper
57; AVX256-NEXT:    retq
58;
59; AVX512VL-LABEL: testv16i1_sext_v16i8:
60; AVX512VL:       # %bb.0:
61; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
62; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
63; AVX512VL-NEXT:    vpcmpeqd (%rsi), %ymm0, %k1
64; AVX512VL-NEXT:    kunpckbw %k0, %k1, %k1
65; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
66; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
67; AVX512VL-NEXT:    vzeroupper
68; AVX512VL-NEXT:    retq
69;
70; AVX512F-LABEL: testv16i1_sext_v16i8:
71; AVX512F:       # %bb.0:
72; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
73; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
74; AVX512F-NEXT:    vmovdqa (%rsi), %ymm0
75; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k1
76; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
77; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
78; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
79; AVX512F-NEXT:    vzeroupper
80; AVX512F-NEXT:    retq
81  %in = load <8 x i32>, <8 x i32>* %p
82  %cmp = icmp eq <8 x i32> %in, zeroinitializer
83  %in2 = load <8 x i32>, <8 x i32>* %q
84  %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
85  %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
86  %ext = sext <16 x i1> %concat to <16 x i8>
87  ret <16 x i8> %ext
88}
89
90define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
91; AVX256-LABEL: testv16i1_sext_v16i16:
92; AVX256:       # %bb.0:
93; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
94; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
95; AVX256-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
96; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
97; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1} {z}
98; AVX256-NEXT:    vpmovdw %ymm1, %xmm1
99; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k2} {z}
100; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
101; AVX256-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
102; AVX256-NEXT:    retq
103;
104; AVX512VL-LABEL: testv16i1_sext_v16i16:
105; AVX512VL:       # %bb.0:
106; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
107; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
108; AVX512VL-NEXT:    vpcmpeqd (%rsi), %ymm0, %k1
109; AVX512VL-NEXT:    kunpckbw %k0, %k1, %k1
110; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
111; AVX512VL-NEXT:    vpmovdw %zmm0, %ymm0
112; AVX512VL-NEXT:    retq
113;
114; AVX512F-LABEL: testv16i1_sext_v16i16:
115; AVX512F:       # %bb.0:
116; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
117; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
118; AVX512F-NEXT:    vmovdqa (%rsi), %ymm0
119; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k1
120; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
121; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
122; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
123; AVX512F-NEXT:    retq
124  %in = load <8 x i32>, <8 x i32>* %p
125  %cmp = icmp eq <8 x i32> %in, zeroinitializer
126  %in2 = load <8 x i32>, <8 x i32>* %q
127  %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
128  %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
129  %ext = sext <16 x i1> %concat to <16 x i16>
130  ret <16 x i16> %ext
131}
132
133define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
134; AVX256-LABEL: testv8i1_zext_v8i16:
135; AVX256:       # %bb.0:
136; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
137; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
138; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
139; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
140; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
141; AVX256-NEXT:    vpsrlw $15, %xmm0, %xmm0
142; AVX256-NEXT:    vzeroupper
143; AVX256-NEXT:    retq
144;
145; AVX512VL-LABEL: testv8i1_zext_v8i16:
146; AVX512VL:       # %bb.0:
147; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
148; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
149; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
150; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
151; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
152; AVX512VL-NEXT:    vpsrlw $15, %xmm0, %xmm0
153; AVX512VL-NEXT:    vzeroupper
154; AVX512VL-NEXT:    retq
155;
156; AVX512F-LABEL: testv8i1_zext_v8i16:
157; AVX512F:       # %bb.0:
158; AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
159; AVX512F-NEXT:    vpcmpeqd (%rdi), %ymm0, %ymm0
160; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
161; AVX512F-NEXT:    vpsrlw $15, %xmm0, %xmm0
162; AVX512F-NEXT:    vzeroupper
163; AVX512F-NEXT:    retq
164  %in = load <8 x i32>, <8 x i32>* %p
165  %cmp = icmp eq <8 x i32> %in, zeroinitializer
166  %ext = zext <8 x i1> %cmp to <8 x i16>
167  ret <8 x i16> %ext
168}
169
170define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
171; AVX256-LABEL: testv16i1_zext_v16i8:
172; AVX256:       # %bb.0:
173; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
174; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
175; AVX256-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
176; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
177; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k2} {z}
178; AVX256-NEXT:    vpmovdw %ymm1, %xmm1
179; AVX256-NEXT:    vpsrlw $15, %xmm1, %xmm1
180; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
181; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
182; AVX256-NEXT:    vpsrlw $15, %xmm0, %xmm0
183; AVX256-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
184; AVX256-NEXT:    vzeroupper
185; AVX256-NEXT:    retq
186;
187; AVX512VL-LABEL: testv16i1_zext_v16i8:
188; AVX512VL:       # %bb.0:
189; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
190; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
191; AVX512VL-NEXT:    vpcmpeqd (%rsi), %ymm0, %k1
192; AVX512VL-NEXT:    kunpckbw %k0, %k1, %k1
193; AVX512VL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
194; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
195; AVX512VL-NEXT:    vzeroupper
196; AVX512VL-NEXT:    retq
197;
198; AVX512F-LABEL: testv16i1_zext_v16i8:
199; AVX512F:       # %bb.0:
200; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
201; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
202; AVX512F-NEXT:    vmovdqa (%rsi), %ymm0
203; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k1
204; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
205; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
206; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
207; AVX512F-NEXT:    vzeroupper
208; AVX512F-NEXT:    retq
209  %in = load <8 x i32>, <8 x i32>* %p
210  %cmp = icmp eq <8 x i32> %in, zeroinitializer
211  %in2 = load <8 x i32>, <8 x i32>* %q
212  %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
213  %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
214  %ext = zext <16 x i1> %concat to <16 x i8>
215  ret <16 x i8> %ext
216}
217
218define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
219; AVX256-LABEL: testv16i1_zext_v16i16:
220; AVX256:       # %bb.0:
221; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
222; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
223; AVX256-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
224; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
225; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1} {z}
226; AVX256-NEXT:    vpmovdw %ymm1, %xmm1
227; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k2} {z}
228; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
229; AVX256-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
230; AVX256-NEXT:    vpsrlw $15, %ymm0, %ymm0
231; AVX256-NEXT:    retq
232;
233; AVX512VL-LABEL: testv16i1_zext_v16i16:
234; AVX512VL:       # %bb.0:
235; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
236; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
237; AVX512VL-NEXT:    vpcmpeqd (%rsi), %ymm0, %k1
238; AVX512VL-NEXT:    kunpckbw %k0, %k1, %k1
239; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
240; AVX512VL-NEXT:    vpmovdw %zmm0, %ymm0
241; AVX512VL-NEXT:    vpsrlw $15, %ymm0, %ymm0
242; AVX512VL-NEXT:    retq
243;
244; AVX512F-LABEL: testv16i1_zext_v16i16:
245; AVX512F:       # %bb.0:
246; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
247; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
248; AVX512F-NEXT:    vmovdqa (%rsi), %ymm0
249; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k1
250; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
251; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
252; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
253; AVX512F-NEXT:    vpsrlw $15, %ymm0, %ymm0
254; AVX512F-NEXT:    retq
255  %in = load <8 x i32>, <8 x i32>* %p
256  %cmp = icmp eq <8 x i32> %in, zeroinitializer
257  %in2 = load <8 x i32>, <8 x i32>* %q
258  %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
259  %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
260  %ext = zext <16 x i1> %concat to <16 x i16>
261  ret <16 x i16> %ext
262}
263