• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX --check-prefix=AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX --check-prefix=AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX512VL
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=X64 --check-prefix=AVX512VLBWDQ
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX512 --check-prefix=AVX512VLCD
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX512 --check-prefix=AVX512CD
8;
9; Just one 32-bit run to make sure we do reasonable things for i64 lzcnt.
10; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX
11
12define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
13; AVX1-LABEL: testv4i64:
14; AVX1:       # %bb.0:
15; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
16; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm1
18; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
19; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm5
20; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm1
21; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm6
22; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
23; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm6, %xmm7
24; AVX1-NEXT:    vpand %xmm7, %xmm5, %xmm5
25; AVX1-NEXT:    vpshufb %xmm6, %xmm4, %xmm6
26; AVX1-NEXT:    vpaddb %xmm6, %xmm5, %xmm5
27; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm6
28; AVX1-NEXT:    vpsrlw $8, %xmm6, %xmm6
29; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm6
30; AVX1-NEXT:    vpsrlw $8, %xmm5, %xmm5
31; AVX1-NEXT:    vpaddw %xmm6, %xmm5, %xmm5
32; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm2, %xmm6
33; AVX1-NEXT:    vpsrld $16, %xmm6, %xmm6
34; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm6
35; AVX1-NEXT:    vpsrld $16, %xmm5, %xmm5
36; AVX1-NEXT:    vpaddd %xmm6, %xmm5, %xmm5
37; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm2
38; AVX1-NEXT:    vpsrlq $32, %xmm2, %xmm2
39; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
40; AVX1-NEXT:    vpsrlq $32, %xmm5, %xmm5
41; AVX1-NEXT:    vpaddq %xmm2, %xmm5, %xmm2
42; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm5
43; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
44; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm6
45; AVX1-NEXT:    vpand %xmm3, %xmm6, %xmm3
46; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm3, %xmm6
47; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm5
48; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
49; AVX1-NEXT:    vpaddb %xmm3, %xmm5, %xmm3
50; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm4
51; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
52; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm4
53; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
54; AVX1-NEXT:    vpaddw %xmm4, %xmm3, %xmm3
55; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm4
56; AVX1-NEXT:    vpsrld $16, %xmm4, %xmm4
57; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm4
58; AVX1-NEXT:    vpsrld $16, %xmm3, %xmm3
59; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
60; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
61; AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
62; AVX1-NEXT:    vpand %xmm0, %xmm3, %xmm0
63; AVX1-NEXT:    vpsrlq $32, %xmm3, %xmm1
64; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
65; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
66; AVX1-NEXT:    retq
67;
68; AVX2-LABEL: testv4i64:
69; AVX2:       # %bb.0:
70; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
71; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
72; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
73; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
74; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
75; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
76; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
77; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
78; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
79; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
80; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
81; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
82; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
83; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
84; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
85; AVX2-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
86; AVX2-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
87; AVX2-NEXT:    vpsrld $16, %ymm2, %ymm2
88; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
89; AVX2-NEXT:    vpsrld $16, %ymm1, %ymm1
90; AVX2-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
91; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
92; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
93; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
94; AVX2-NEXT:    vpsrlq $32, %ymm1, %ymm1
95; AVX2-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
96; AVX2-NEXT:    retq
97;
98; AVX512VL-LABEL: testv4i64:
99; AVX512VL:       # %bb.0:
100; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
101; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
102; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
103; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
104; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
105; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
106; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
107; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
108; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
109; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
110; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
111; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
112; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
113; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
114; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
115; AVX512VL-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
116; AVX512VL-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
117; AVX512VL-NEXT:    vpsrld $16, %ymm2, %ymm2
118; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
119; AVX512VL-NEXT:    vpsrld $16, %ymm1, %ymm1
120; AVX512VL-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
121; AVX512VL-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
122; AVX512VL-NEXT:    vpsrlq $32, %ymm0, %ymm0
123; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
124; AVX512VL-NEXT:    vpsrlq $32, %ymm1, %ymm1
125; AVX512VL-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
126; AVX512VL-NEXT:    retq
127;
128; AVX512VLBWDQ-LABEL: testv4i64:
129; AVX512VLBWDQ:       # %bb.0:
130; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
131; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
132; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
133; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
134; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
135; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
136; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
137; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
138; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
139; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
140; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
141; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
142; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
143; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
144; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
145; AVX512VLBWDQ-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
146; AVX512VLBWDQ-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
147; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm2, %ymm2
148; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
149; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm1, %ymm1
150; AVX512VLBWDQ-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
151; AVX512VLBWDQ-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
152; AVX512VLBWDQ-NEXT:    vpsrlq $32, %ymm0, %ymm0
153; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
154; AVX512VLBWDQ-NEXT:    vpsrlq $32, %ymm1, %ymm1
155; AVX512VLBWDQ-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
156; AVX512VLBWDQ-NEXT:    retq
157;
158; AVX512VLCD-LABEL: testv4i64:
159; AVX512VLCD:       # %bb.0:
160; AVX512VLCD-NEXT:    vplzcntq %ymm0, %ymm0
161; AVX512VLCD-NEXT:    retq
162;
163; AVX512CD-LABEL: testv4i64:
164; AVX512CD:       # %bb.0:
165; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
166; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
167; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
168; AVX512CD-NEXT:    retq
169;
170; X32-AVX-LABEL: testv4i64:
171; X32-AVX:       # %bb.0:
172; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
173; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
174; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
175; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
176; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
177; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
178; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
179; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
180; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
181; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
182; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
183; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
184; X32-AVX-NEXT:    vpsrlw $8, %ymm2, %ymm2
185; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
186; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
187; X32-AVX-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
188; X32-AVX-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
189; X32-AVX-NEXT:    vpsrld $16, %ymm2, %ymm2
190; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
191; X32-AVX-NEXT:    vpsrld $16, %ymm1, %ymm1
192; X32-AVX-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
193; X32-AVX-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
194; X32-AVX-NEXT:    vpsrlq $32, %ymm0, %ymm0
195; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
196; X32-AVX-NEXT:    vpsrlq $32, %ymm1, %ymm1
197; X32-AVX-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
198; X32-AVX-NEXT:    retl
199
200  %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
201  ret <4 x i64> %out
202}
203
204define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
205; AVX1-LABEL: testv4i64u:
206; AVX1:       # %bb.0:
207; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
208; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
209; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm1
210; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
211; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm5
212; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm1
213; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm6
214; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
215; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm6, %xmm7
216; AVX1-NEXT:    vpand %xmm7, %xmm5, %xmm5
217; AVX1-NEXT:    vpshufb %xmm6, %xmm4, %xmm6
218; AVX1-NEXT:    vpaddb %xmm6, %xmm5, %xmm5
219; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm6
220; AVX1-NEXT:    vpsrlw $8, %xmm6, %xmm6
221; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm6
222; AVX1-NEXT:    vpsrlw $8, %xmm5, %xmm5
223; AVX1-NEXT:    vpaddw %xmm6, %xmm5, %xmm5
224; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm2, %xmm6
225; AVX1-NEXT:    vpsrld $16, %xmm6, %xmm6
226; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm6
227; AVX1-NEXT:    vpsrld $16, %xmm5, %xmm5
228; AVX1-NEXT:    vpaddd %xmm6, %xmm5, %xmm5
229; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm2
230; AVX1-NEXT:    vpsrlq $32, %xmm2, %xmm2
231; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
232; AVX1-NEXT:    vpsrlq $32, %xmm5, %xmm5
233; AVX1-NEXT:    vpaddq %xmm2, %xmm5, %xmm2
234; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm5
235; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
236; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm6
237; AVX1-NEXT:    vpand %xmm3, %xmm6, %xmm3
238; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm3, %xmm6
239; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm5
240; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
241; AVX1-NEXT:    vpaddb %xmm3, %xmm5, %xmm3
242; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm4
243; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
244; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm4
245; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
246; AVX1-NEXT:    vpaddw %xmm4, %xmm3, %xmm3
247; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm4
248; AVX1-NEXT:    vpsrld $16, %xmm4, %xmm4
249; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm4
250; AVX1-NEXT:    vpsrld $16, %xmm3, %xmm3
251; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
252; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
253; AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
254; AVX1-NEXT:    vpand %xmm0, %xmm3, %xmm0
255; AVX1-NEXT:    vpsrlq $32, %xmm3, %xmm1
256; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
257; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
258; AVX1-NEXT:    retq
259;
260; AVX2-LABEL: testv4i64u:
261; AVX2:       # %bb.0:
262; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
263; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
264; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
265; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
266; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
267; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
268; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
269; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
270; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
271; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
272; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
273; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
274; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
275; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
276; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
277; AVX2-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
278; AVX2-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
279; AVX2-NEXT:    vpsrld $16, %ymm2, %ymm2
280; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
281; AVX2-NEXT:    vpsrld $16, %ymm1, %ymm1
282; AVX2-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
283; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
284; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
285; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
286; AVX2-NEXT:    vpsrlq $32, %ymm1, %ymm1
287; AVX2-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
288; AVX2-NEXT:    retq
289;
290; AVX512VL-LABEL: testv4i64u:
291; AVX512VL:       # %bb.0:
292; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
293; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
294; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
295; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
296; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
297; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
298; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
299; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
300; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
301; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
302; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
303; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
304; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
305; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
306; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
307; AVX512VL-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
308; AVX512VL-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
309; AVX512VL-NEXT:    vpsrld $16, %ymm2, %ymm2
310; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
311; AVX512VL-NEXT:    vpsrld $16, %ymm1, %ymm1
312; AVX512VL-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
313; AVX512VL-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
314; AVX512VL-NEXT:    vpsrlq $32, %ymm0, %ymm0
315; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
316; AVX512VL-NEXT:    vpsrlq $32, %ymm1, %ymm1
317; AVX512VL-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
318; AVX512VL-NEXT:    retq
319;
320; AVX512VLBWDQ-LABEL: testv4i64u:
321; AVX512VLBWDQ:       # %bb.0:
322; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
323; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
324; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
325; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
326; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
327; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
328; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
329; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
330; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
331; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
332; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
333; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
334; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
335; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
336; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
337; AVX512VLBWDQ-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
338; AVX512VLBWDQ-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
339; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm2, %ymm2
340; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
341; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm1, %ymm1
342; AVX512VLBWDQ-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
343; AVX512VLBWDQ-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
344; AVX512VLBWDQ-NEXT:    vpsrlq $32, %ymm0, %ymm0
345; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
346; AVX512VLBWDQ-NEXT:    vpsrlq $32, %ymm1, %ymm1
347; AVX512VLBWDQ-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
348; AVX512VLBWDQ-NEXT:    retq
349;
350; AVX512VLCD-LABEL: testv4i64u:
351; AVX512VLCD:       # %bb.0:
352; AVX512VLCD-NEXT:    vplzcntq %ymm0, %ymm0
353; AVX512VLCD-NEXT:    retq
354;
355; AVX512CD-LABEL: testv4i64u:
356; AVX512CD:       # %bb.0:
357; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
358; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
359; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
360; AVX512CD-NEXT:    retq
361;
362; X32-AVX-LABEL: testv4i64u:
363; X32-AVX:       # %bb.0:
364; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
365; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
366; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
367; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
368; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
369; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
370; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
371; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
372; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
373; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
374; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
375; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
376; X32-AVX-NEXT:    vpsrlw $8, %ymm2, %ymm2
377; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
378; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
379; X32-AVX-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
380; X32-AVX-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
381; X32-AVX-NEXT:    vpsrld $16, %ymm2, %ymm2
382; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
383; X32-AVX-NEXT:    vpsrld $16, %ymm1, %ymm1
384; X32-AVX-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
385; X32-AVX-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
386; X32-AVX-NEXT:    vpsrlq $32, %ymm0, %ymm0
387; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
388; X32-AVX-NEXT:    vpsrlq $32, %ymm1, %ymm1
389; X32-AVX-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
390; X32-AVX-NEXT:    retl
391
392  %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
393  ret <4 x i64> %out
394}
395
396define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
397; AVX1-LABEL: testv8i32:
398; AVX1:       # %bb.0:
399; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
400; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
401; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
402; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
403; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
404; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
405; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm5
406; AVX1-NEXT:    vpxor %xmm6, %xmm6, %xmm6
407; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm5, %xmm7
408; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
409; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
410; AVX1-NEXT:    vpaddb %xmm5, %xmm3, %xmm3
411; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm1, %xmm5
412; AVX1-NEXT:    vpsrlw $8, %xmm5, %xmm5
413; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm5
414; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
415; AVX1-NEXT:    vpaddw %xmm5, %xmm3, %xmm3
416; AVX1-NEXT:    vpcmpeqw %xmm6, %xmm1, %xmm1
417; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
418; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
419; AVX1-NEXT:    vpsrld $16, %xmm3, %xmm3
420; AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
421; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
422; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
423; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
424; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
425; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm2, %xmm5
426; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
427; AVX1-NEXT:    vpshufb %xmm2, %xmm4, %xmm2
428; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
429; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm0, %xmm3
430; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
431; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm3
432; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
433; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
434; AVX1-NEXT:    vpcmpeqw %xmm6, %xmm0, %xmm0
435; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
436; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
437; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
438; AVX1-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
439; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
440; AVX1-NEXT:    retq
441;
442; AVX2-LABEL: testv8i32:
443; AVX2:       # %bb.0:
444; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
445; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
446; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
447; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
448; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
449; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
450; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
451; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
452; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
453; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
454; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
455; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
456; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
457; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
458; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
459; AVX2-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
460; AVX2-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
461; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
462; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
463; AVX2-NEXT:    vpsrld $16, %ymm1, %ymm1
464; AVX2-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
465; AVX2-NEXT:    retq
466;
467; AVX512VL-LABEL: testv8i32:
468; AVX512VL:       # %bb.0:
469; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
470; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
471; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
472; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
473; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
474; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
475; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
476; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
477; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
478; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
479; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
480; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
481; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
482; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
483; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
484; AVX512VL-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
485; AVX512VL-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
486; AVX512VL-NEXT:    vpsrld $16, %ymm0, %ymm0
487; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
488; AVX512VL-NEXT:    vpsrld $16, %ymm1, %ymm1
489; AVX512VL-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
490; AVX512VL-NEXT:    retq
491;
492; AVX512VLBWDQ-LABEL: testv8i32:
493; AVX512VLBWDQ:       # %bb.0:
494; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
495; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
496; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
497; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
498; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
499; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
500; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
501; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
502; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
503; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
504; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
505; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
506; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
507; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
508; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
509; AVX512VLBWDQ-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
510; AVX512VLBWDQ-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
511; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm0, %ymm0
512; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
513; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm1, %ymm1
514; AVX512VLBWDQ-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
515; AVX512VLBWDQ-NEXT:    retq
516;
517; AVX512VLCD-LABEL: testv8i32:
518; AVX512VLCD:       # %bb.0:
519; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
520; AVX512VLCD-NEXT:    retq
521;
522; AVX512CD-LABEL: testv8i32:
523; AVX512CD:       # %bb.0:
524; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
525; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
526; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
527; AVX512CD-NEXT:    retq
528;
529; X32-AVX-LABEL: testv8i32:
530; X32-AVX:       # %bb.0:
531; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
532; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
533; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
534; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
535; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
536; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
537; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
538; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
539; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
540; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
541; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
542; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
543; X32-AVX-NEXT:    vpsrlw $8, %ymm2, %ymm2
544; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
545; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
546; X32-AVX-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
547; X32-AVX-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
548; X32-AVX-NEXT:    vpsrld $16, %ymm0, %ymm0
549; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
550; X32-AVX-NEXT:    vpsrld $16, %ymm1, %ymm1
551; X32-AVX-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
552; X32-AVX-NEXT:    retl
553
554  %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
555  ret <8 x i32> %out
556}
557
558define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
559; AVX1-LABEL: testv8i32u:
560; AVX1:       # %bb.0:
561; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
562; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
563; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
564; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
565; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
566; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
567; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm5
568; AVX1-NEXT:    vpxor %xmm6, %xmm6, %xmm6
569; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm5, %xmm7
570; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
571; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
572; AVX1-NEXT:    vpaddb %xmm5, %xmm3, %xmm3
573; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm1, %xmm5
574; AVX1-NEXT:    vpsrlw $8, %xmm5, %xmm5
575; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm5
576; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
577; AVX1-NEXT:    vpaddw %xmm5, %xmm3, %xmm3
578; AVX1-NEXT:    vpcmpeqw %xmm6, %xmm1, %xmm1
579; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
580; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
581; AVX1-NEXT:    vpsrld $16, %xmm3, %xmm3
582; AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
583; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
584; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
585; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
586; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
587; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm2, %xmm5
588; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
589; AVX1-NEXT:    vpshufb %xmm2, %xmm4, %xmm2
590; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
591; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm0, %xmm3
592; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
593; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm3
594; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
595; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
596; AVX1-NEXT:    vpcmpeqw %xmm6, %xmm0, %xmm0
597; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
598; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
599; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
600; AVX1-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
601; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
602; AVX1-NEXT:    retq
603;
604; AVX2-LABEL: testv8i32u:
605; AVX2:       # %bb.0:
606; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
607; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
608; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
609; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
610; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
611; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
612; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
613; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
614; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
615; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
616; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
617; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
618; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
619; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
620; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
621; AVX2-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
622; AVX2-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
623; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
624; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
625; AVX2-NEXT:    vpsrld $16, %ymm1, %ymm1
626; AVX2-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
627; AVX2-NEXT:    retq
628;
629; AVX512VL-LABEL: testv8i32u:
630; AVX512VL:       # %bb.0:
631; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
632; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
633; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
634; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
635; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
636; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
637; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
638; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
639; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
640; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
641; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
642; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
643; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
644; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
645; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
646; AVX512VL-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
647; AVX512VL-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
648; AVX512VL-NEXT:    vpsrld $16, %ymm0, %ymm0
649; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
650; AVX512VL-NEXT:    vpsrld $16, %ymm1, %ymm1
651; AVX512VL-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
652; AVX512VL-NEXT:    retq
653;
654; AVX512VLBWDQ-LABEL: testv8i32u:
655; AVX512VLBWDQ:       # %bb.0:
656; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
657; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
658; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
659; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
660; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
661; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
662; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
663; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
664; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
665; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
666; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
667; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
668; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
669; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
670; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
671; AVX512VLBWDQ-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
672; AVX512VLBWDQ-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
673; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm0, %ymm0
674; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
675; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm1, %ymm1
676; AVX512VLBWDQ-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
677; AVX512VLBWDQ-NEXT:    retq
678;
679; AVX512VLCD-LABEL: testv8i32u:
680; AVX512VLCD:       # %bb.0:
681; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
682; AVX512VLCD-NEXT:    retq
683;
684; AVX512CD-LABEL: testv8i32u:
685; AVX512CD:       # %bb.0:
686; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
687; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
688; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
689; AVX512CD-NEXT:    retq
690;
691; X32-AVX-LABEL: testv8i32u:
692; X32-AVX:       # %bb.0:
693; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
694; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
695; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
696; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
697; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
698; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
699; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
700; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
701; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
702; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
703; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
704; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
705; X32-AVX-NEXT:    vpsrlw $8, %ymm2, %ymm2
706; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
707; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
708; X32-AVX-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
709; X32-AVX-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
710; X32-AVX-NEXT:    vpsrld $16, %ymm0, %ymm0
711; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
712; X32-AVX-NEXT:    vpsrld $16, %ymm1, %ymm1
713; X32-AVX-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
714; X32-AVX-NEXT:    retl
715
716  %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)
717  ret <8 x i32> %out
718}
719
720define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
721; AVX1-LABEL: testv16i16:
722; AVX1:       # %bb.0:
723; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
724; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
725; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
726; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
727; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
728; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
729; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm5
730; AVX1-NEXT:    vpxor %xmm6, %xmm6, %xmm6
731; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm5, %xmm7
732; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
733; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
734; AVX1-NEXT:    vpaddb %xmm5, %xmm3, %xmm3
735; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm1, %xmm1
736; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
737; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
738; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
739; AVX1-NEXT:    vpaddw %xmm1, %xmm3, %xmm1
740; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
741; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
742; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
743; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
744; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm2, %xmm5
745; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
746; AVX1-NEXT:    vpshufb %xmm2, %xmm4, %xmm2
747; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
748; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm0, %xmm0
749; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
750; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
751; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
752; AVX1-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
753; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
754; AVX1-NEXT:    retq
755;
756; AVX2-LABEL: testv16i16:
757; AVX2:       # %bb.0:
758; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
759; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
760; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
761; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
762; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
763; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
764; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
765; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
766; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
767; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
768; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
769; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
770; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
771; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
772; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
773; AVX2-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
774; AVX2-NEXT:    retq
775;
776; AVX512VL-LABEL: testv16i16:
777; AVX512VL:       # %bb.0:
778; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
779; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
780; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
781; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
782; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
783; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
784; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
785; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
786; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
787; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
788; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
789; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
790; AVX512VL-NEXT:    vpsrlw $8, %ymm0, %ymm0
791; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
792; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
793; AVX512VL-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
794; AVX512VL-NEXT:    retq
795;
796; AVX512VLBWDQ-LABEL: testv16i16:
797; AVX512VLBWDQ:       # %bb.0:
798; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
799; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
800; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
801; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
802; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
803; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
804; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
805; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
806; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
807; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
808; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
809; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
810; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
811; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
812; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
813; AVX512VLBWDQ-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
814; AVX512VLBWDQ-NEXT:    retq
815;
816; AVX512-LABEL: testv16i16:
817; AVX512:       # %bb.0:
818; AVX512-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
819; AVX512-NEXT:    vplzcntd %zmm0, %zmm0
820; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
821; AVX512-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
822; AVX512-NEXT:    retq
823;
824; X32-AVX-LABEL: testv16i16:
825; X32-AVX:       # %bb.0:
826; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
827; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
828; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
829; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
830; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
831; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
832; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
833; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
834; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
835; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
836; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
837; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
838; X32-AVX-NEXT:    vpsrlw $8, %ymm0, %ymm0
839; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
840; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
841; X32-AVX-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
842; X32-AVX-NEXT:    retl
843  %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0)
844  ret <16 x i16> %out
845}
846
847define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
848; AVX1-LABEL: testv16i16u:
849; AVX1:       # %bb.0:
850; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
851; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
852; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
853; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
854; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
855; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
856; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm5
857; AVX1-NEXT:    vpxor %xmm6, %xmm6, %xmm6
858; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm5, %xmm7
859; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
860; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
861; AVX1-NEXT:    vpaddb %xmm5, %xmm3, %xmm3
862; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm1, %xmm1
863; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
864; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
865; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
866; AVX1-NEXT:    vpaddw %xmm1, %xmm3, %xmm1
867; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
868; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
869; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
870; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
871; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm2, %xmm5
872; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
873; AVX1-NEXT:    vpshufb %xmm2, %xmm4, %xmm2
874; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
875; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm0, %xmm0
876; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
877; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
878; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
879; AVX1-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
880; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
881; AVX1-NEXT:    retq
882;
883; AVX2-LABEL: testv16i16u:
884; AVX2:       # %bb.0:
885; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
886; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
887; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
888; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
889; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
890; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
891; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
892; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
893; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
894; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
895; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
896; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
897; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
898; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
899; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
900; AVX2-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
901; AVX2-NEXT:    retq
902;
903; AVX512VL-LABEL: testv16i16u:
904; AVX512VL:       # %bb.0:
905; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
906; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
907; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
908; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
909; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
910; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
911; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
912; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
913; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
914; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
915; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
916; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
917; AVX512VL-NEXT:    vpsrlw $8, %ymm0, %ymm0
918; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
919; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
920; AVX512VL-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
921; AVX512VL-NEXT:    retq
922;
923; AVX512VLBWDQ-LABEL: testv16i16u:
924; AVX512VLBWDQ:       # %bb.0:
925; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
926; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
927; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
928; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
929; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
930; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
931; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
932; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
933; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
934; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
935; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
936; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
937; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
938; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
939; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
940; AVX512VLBWDQ-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
941; AVX512VLBWDQ-NEXT:    retq
942;
943; AVX512-LABEL: testv16i16u:
944; AVX512:       # %bb.0:
945; AVX512-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
946; AVX512-NEXT:    vplzcntd %zmm0, %zmm0
947; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
948; AVX512-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
949; AVX512-NEXT:    retq
950;
951; X32-AVX-LABEL: testv16i16u:
952; X32-AVX:       # %bb.0:
953; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
954; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
955; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
956; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
957; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
958; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
959; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
960; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
961; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
962; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
963; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
964; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
965; X32-AVX-NEXT:    vpsrlw $8, %ymm0, %ymm0
966; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
967; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
968; X32-AVX-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
969; X32-AVX-NEXT:    retl
970  %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1)
971  ret <16 x i16> %out
972}
973
974define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
975; AVX1-LABEL: testv32i8:
976; AVX1:       # %bb.0:
977; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
978; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
979; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
980; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
981; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
982; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
983; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
984; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
985; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm1, %xmm6
986; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
987; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm1
988; AVX1-NEXT:    vpaddb %xmm1, %xmm3, %xmm1
989; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
990; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
991; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
992; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
993; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm0, %xmm2
994; AVX1-NEXT:    vpand %xmm2, %xmm3, %xmm2
995; AVX1-NEXT:    vpshufb %xmm0, %xmm4, %xmm0
996; AVX1-NEXT:    vpaddb %xmm0, %xmm2, %xmm0
997; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
998; AVX1-NEXT:    retq
999;
1000; AVX2-LABEL: testv32i8:
1001; AVX2:       # %bb.0:
1002; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1003; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
1004; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1005; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
1006; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
1007; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
1008; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1009; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
1010; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
1011; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
1012; AVX2-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
1013; AVX2-NEXT:    retq
1014;
1015; AVX512VL-LABEL: testv32i8:
1016; AVX512VL:       # %bb.0:
1017; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1018; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
1019; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1020; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
1021; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm0
1022; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm0
1023; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1024; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
1025; AVX512VL-NEXT:    vpand %ymm1, %ymm2, %ymm1
1026; AVX512VL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
1027; AVX512VL-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
1028; AVX512VL-NEXT:    retq
1029;
1030; AVX512VLBWDQ-LABEL: testv32i8:
1031; AVX512VLBWDQ:       # %bb.0:
1032; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1033; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
1034; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1035; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
1036; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
1037; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
1038; AVX512VLBWDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1039; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
1040; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm2, %ymm1
1041; AVX512VLBWDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
1042; AVX512VLBWDQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
1043; AVX512VLBWDQ-NEXT:    retq
1044;
1045; AVX512-LABEL: testv32i8:
1046; AVX512:       # %bb.0:
1047; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1048; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
1049; AVX512-NEXT:    vplzcntd %zmm1, %zmm1
1050; AVX512-NEXT:    vpmovdb %zmm1, %xmm1
1051; AVX512-NEXT:    vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
1052; AVX512-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
1053; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1054; AVX512-NEXT:    vplzcntd %zmm0, %zmm0
1055; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
1056; AVX512-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
1057; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1058; AVX512-NEXT:    retq
1059;
1060; X32-AVX-LABEL: testv32i8:
1061; X32-AVX:       # %bb.0:
1062; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1063; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
1064; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1065; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
1066; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm0
1067; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm0
1068; X32-AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1069; X32-AVX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
1070; X32-AVX-NEXT:    vpand %ymm1, %ymm2, %ymm1
1071; X32-AVX-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
1072; X32-AVX-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
1073; X32-AVX-NEXT:    retl
1074  %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0)
1075  ret <32 x i8> %out
1076}
1077
1078define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
1079; AVX1-LABEL: testv32i8u:
1080; AVX1:       # %bb.0:
1081; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1082; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1083; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
1084; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1085; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
1086; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
1087; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
1088; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
1089; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm1, %xmm6
1090; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
1091; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm1
1092; AVX1-NEXT:    vpaddb %xmm1, %xmm3, %xmm1
1093; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
1094; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
1095; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
1096; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
1097; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm0, %xmm2
1098; AVX1-NEXT:    vpand %xmm2, %xmm3, %xmm2
1099; AVX1-NEXT:    vpshufb %xmm0, %xmm4, %xmm0
1100; AVX1-NEXT:    vpaddb %xmm0, %xmm2, %xmm0
1101; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1102; AVX1-NEXT:    retq
1103;
1104; AVX2-LABEL: testv32i8u:
1105; AVX2:       # %bb.0:
1106; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1107; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
1108; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1109; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
1110; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
1111; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
1112; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1113; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
1114; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
1115; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
1116; AVX2-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
1117; AVX2-NEXT:    retq
1118;
1119; AVX512VL-LABEL: testv32i8u:
1120; AVX512VL:       # %bb.0:
1121; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1122; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
1123; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1124; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
1125; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm0
1126; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm0
1127; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1128; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
1129; AVX512VL-NEXT:    vpand %ymm1, %ymm2, %ymm1
1130; AVX512VL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
1131; AVX512VL-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
1132; AVX512VL-NEXT:    retq
1133;
1134; AVX512VLBWDQ-LABEL: testv32i8u:
1135; AVX512VLBWDQ:       # %bb.0:
1136; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1137; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
1138; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1139; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
1140; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
1141; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
1142; AVX512VLBWDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1143; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
1144; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm2, %ymm1
1145; AVX512VLBWDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
1146; AVX512VLBWDQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
1147; AVX512VLBWDQ-NEXT:    retq
1148;
1149; AVX512-LABEL: testv32i8u:
1150; AVX512:       # %bb.0:
1151; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1152; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
1153; AVX512-NEXT:    vplzcntd %zmm1, %zmm1
1154; AVX512-NEXT:    vpmovdb %zmm1, %xmm1
1155; AVX512-NEXT:    vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
1156; AVX512-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
1157; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1158; AVX512-NEXT:    vplzcntd %zmm0, %zmm0
1159; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
1160; AVX512-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
1161; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1162; AVX512-NEXT:    retq
1163;
1164; X32-AVX-LABEL: testv32i8u:
1165; X32-AVX:       # %bb.0:
1166; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1167; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
1168; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1169; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
1170; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm0
1171; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm0
1172; X32-AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1173; X32-AVX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
1174; X32-AVX-NEXT:    vpand %ymm1, %ymm2, %ymm1
1175; X32-AVX-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
1176; X32-AVX-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
1177; X32-AVX-NEXT:    retl
1178  %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1)
1179  ret <32 x i8> %out
1180}
1181
1182define <4 x i64> @foldv4i64() nounwind {
1183; X64-LABEL: foldv4i64:
1184; X64:       # %bb.0:
1185; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
1186; X64-NEXT:    retq
1187;
1188; X32-AVX-LABEL: foldv4i64:
1189; X32-AVX:       # %bb.0:
1190; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,0,0,64,0,56,0]
1191; X32-AVX-NEXT:    retl
1192  %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
1193  ret <4 x i64> %out
1194}
1195
1196define <4 x i64> @foldv4i64u() nounwind {
1197; X64-LABEL: foldv4i64u:
1198; X64:       # %bb.0:
1199; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
1200; X64-NEXT:    retq
1201;
1202; X32-AVX-LABEL: foldv4i64u:
1203; X32-AVX:       # %bb.0:
1204; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,0,0,64,0,56,0]
1205; X32-AVX-NEXT:    retl
1206  %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
1207  ret <4 x i64> %out
1208}
1209
1210define <8 x i32> @foldv8i32() nounwind {
1211; X64-LABEL: foldv8i32:
1212; X64:       # %bb.0:
1213; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1214; X64-NEXT:    retq
1215;
1216; X32-AVX-LABEL: foldv8i32:
1217; X32-AVX:       # %bb.0:
1218; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1219; X32-AVX-NEXT:    retl
1220  %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
1221  ret <8 x i32> %out
1222}
1223
1224define <8 x i32> @foldv8i32u() nounwind {
1225; X64-LABEL: foldv8i32u:
1226; X64:       # %bb.0:
1227; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1228; X64-NEXT:    retq
1229;
1230; X32-AVX-LABEL: foldv8i32u:
1231; X32-AVX:       # %bb.0:
1232; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1233; X32-AVX-NEXT:    retl
1234  %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
1235  ret <8 x i32> %out
1236}
1237
1238define <16 x i16> @foldv16i16() nounwind {
1239; X64-LABEL: foldv16i16:
1240; X64:       # %bb.0:
1241; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1242; X64-NEXT:    retq
1243;
1244; X32-AVX-LABEL: foldv16i16:
1245; X32-AVX:       # %bb.0:
1246; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1247; X32-AVX-NEXT:    retl
1248  %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
1249  ret <16 x i16> %out
1250}
1251
1252define <16 x i16> @foldv16i16u() nounwind {
1253; X64-LABEL: foldv16i16u:
1254; X64:       # %bb.0:
1255; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1256; X64-NEXT:    retq
1257;
1258; X32-AVX-LABEL: foldv16i16u:
1259; X32-AVX:       # %bb.0:
1260; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1261; X32-AVX-NEXT:    retl
1262  %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
1263  ret <16 x i16> %out
1264}
1265
1266define <32 x i8> @foldv32i8() nounwind {
1267; X64-LABEL: foldv32i8:
1268; X64:       # %bb.0:
1269; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1270; X64-NEXT:    retq
1271;
1272; X32-AVX-LABEL: foldv32i8:
1273; X32-AVX:       # %bb.0:
1274; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1275; X32-AVX-NEXT:    retl
1276  %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
1277  ret <32 x i8> %out
1278}
1279
1280define <32 x i8> @foldv32i8u() nounwind {
1281; X64-LABEL: foldv32i8u:
1282; X64:       # %bb.0:
1283; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1284; X64-NEXT:    retq
1285;
1286; X32-AVX-LABEL: foldv32i8u:
1287; X32-AVX:       # %bb.0:
1288; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1289; X32-AVX-NEXT:    retl
1290  %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
1291  ret <32 x i8> %out
1292}
1293
1294declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
1295declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
1296declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
1297declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
1298