• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
3
4define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
5; ALL-LABEL: testv8i64:
6; ALL:       ## BB#0:
7; ALL-NEXT:    vplzcntq %zmm0, %zmm0
8; ALL-NEXT:    retq
9  %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0)
10  ret <8 x i64> %out
11}
12
13define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
14; ALL-LABEL: testv8i64u:
15; ALL:       ## BB#0:
16; ALL-NEXT:    vplzcntq %zmm0, %zmm0
17; ALL-NEXT:    retq
18  %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1)
19  ret <8 x i64> %out
20}
21
22define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
23; ALL-LABEL: testv16i32:
24; ALL:       ## BB#0:
25; ALL-NEXT:    vplzcntd %zmm0, %zmm0
26; ALL-NEXT:    retq
27  %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0)
28  ret <16 x i32> %out
29}
30
31define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
32; ALL-LABEL: testv16i32u:
33; ALL:       ## BB#0:
34; ALL-NEXT:    vplzcntd %zmm0, %zmm0
35; ALL-NEXT:    retq
36  %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1)
37  ret <16 x i32> %out
38}
39
40define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
41; ALL-LABEL: testv32i16:
42; ALL:       ## BB#0:
43; ALL-NEXT:    vpmovzxwd %ymm0, %zmm0
44; ALL-NEXT:    vplzcntd %zmm0, %zmm0
45; ALL-NEXT:    vpmovdw %zmm0, %ymm0
46; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
47; ALL-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
48; ALL-NEXT:    vpmovzxwd %ymm1, %zmm1
49; ALL-NEXT:    vplzcntd %zmm1, %zmm1
50; ALL-NEXT:    vpmovdw %zmm1, %ymm1
51; ALL-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
52; ALL-NEXT:    retq
53;
54; AVX512BW-LABEL: testv32i16:
55; AVX512BW:       ## BB#0:
56; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
57; AVX512BW-NEXT:    vpmovzxwd %ymm1, %zmm1
58; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
59; AVX512BW-NEXT:    vpmovdw %zmm1, %ymm1
60; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
61; AVX512BW-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
62; AVX512BW-NEXT:    vpmovzxwd %ymm0, %zmm0
63; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
64; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
65; AVX512BW-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
66; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
67; AVX512BW-NEXT:    retq
68  %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 0)
69  ret <32 x i16> %out
70}
71
72define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
73; ALL-LABEL: testv32i16u:
74; ALL:       ## BB#0:
75; ALL-NEXT:    vpmovzxwd %ymm0, %zmm0
76; ALL-NEXT:    vplzcntd %zmm0, %zmm0
77; ALL-NEXT:    vpmovdw %zmm0, %ymm0
78; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
79; ALL-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
80; ALL-NEXT:    vpmovzxwd %ymm1, %zmm1
81; ALL-NEXT:    vplzcntd %zmm1, %zmm1
82; ALL-NEXT:    vpmovdw %zmm1, %ymm1
83; ALL-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
84; ALL-NEXT:    retq
85;
86; AVX512BW-LABEL: testv32i16u:
87; AVX512BW:       ## BB#0:
88; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
89; AVX512BW-NEXT:    vpmovzxwd %ymm1, %zmm1
90; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
91; AVX512BW-NEXT:    vpmovdw %zmm1, %ymm1
92; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
93; AVX512BW-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
94; AVX512BW-NEXT:    vpmovzxwd %ymm0, %zmm0
95; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
96; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
97; AVX512BW-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
98; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
99; AVX512BW-NEXT:    retq
100  %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 -1)
101  ret <32 x i16> %out
102}
103
104define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
105; ALL-LABEL: testv64i8:
106; ALL:       ## BB#0:
107; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm2
108; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
109; ALL-NEXT:    vplzcntd %zmm2, %zmm2
110; ALL-NEXT:    vpmovdb %zmm2, %xmm2
111; ALL-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
112; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
113; ALL-NEXT:    vpmovzxbd %xmm0, %zmm0
114; ALL-NEXT:    vplzcntd %zmm0, %zmm0
115; ALL-NEXT:    vpmovdb %zmm0, %xmm0
116; ALL-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
117; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
118; ALL-NEXT:    vextractf128 $1, %ymm1, %xmm2
119; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
120; ALL-NEXT:    vplzcntd %zmm2, %zmm2
121; ALL-NEXT:    vpmovdb %zmm2, %xmm2
122; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
123; ALL-NEXT:    vpmovzxbd %xmm1, %zmm1
124; ALL-NEXT:    vplzcntd %zmm1, %zmm1
125; ALL-NEXT:    vpmovdb %zmm1, %xmm1
126; ALL-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
127; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
128; ALL-NEXT:    retq
129;
130; AVX512BW-LABEL: testv64i8:
131; AVX512BW:       ## BB#0:
132; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
133; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
134; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
135; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
136; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
137; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
138; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
139; AVX512BW-NEXT:    vpmovzxbd %xmm1, %zmm1
140; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
141; AVX512BW-NEXT:    vpmovdb %zmm1, %xmm1
142; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
143; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
144; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
145; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
146; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
147; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
148; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
149; AVX512BW-NEXT:    vpmovzxbd %xmm0, %zmm0
150; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
151; AVX512BW-NEXT:    vpmovdb %zmm0, %xmm0
152; AVX512BW-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
153; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
154; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
155; AVX512BW-NEXT:    retq
156  %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 0)
157  ret <64 x i8> %out
158}
159
160define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
161; ALL-LABEL: testv64i8u:
162; ALL:       ## BB#0:
163; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm2
164; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
165; ALL-NEXT:    vplzcntd %zmm2, %zmm2
166; ALL-NEXT:    vpmovdb %zmm2, %xmm2
167; ALL-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
168; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
169; ALL-NEXT:    vpmovzxbd %xmm0, %zmm0
170; ALL-NEXT:    vplzcntd %zmm0, %zmm0
171; ALL-NEXT:    vpmovdb %zmm0, %xmm0
172; ALL-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
173; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
174; ALL-NEXT:    vextractf128 $1, %ymm1, %xmm2
175; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
176; ALL-NEXT:    vplzcntd %zmm2, %zmm2
177; ALL-NEXT:    vpmovdb %zmm2, %xmm2
178; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
179; ALL-NEXT:    vpmovzxbd %xmm1, %zmm1
180; ALL-NEXT:    vplzcntd %zmm1, %zmm1
181; ALL-NEXT:    vpmovdb %zmm1, %xmm1
182; ALL-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
183; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
184; ALL-NEXT:    retq
185;
186; AVX512BW-LABEL: testv64i8u:
187; AVX512BW:       ## BB#0:
188; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
189; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
190; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
191; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
192; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
193; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
194; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
195; AVX512BW-NEXT:    vpmovzxbd %xmm1, %zmm1
196; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
197; AVX512BW-NEXT:    vpmovdb %zmm1, %xmm1
198; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
199; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
200; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
201; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
202; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
203; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
204; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
205; AVX512BW-NEXT:    vpmovzxbd %xmm0, %zmm0
206; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
207; AVX512BW-NEXT:    vpmovdb %zmm0, %xmm0
208; AVX512BW-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
209; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
210; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
211; AVX512BW-NEXT:    retq
212  %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 -1)
213  ret <64 x i8> %out
214}
215
216declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1)
217declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
218declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1)
219declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1)
220