• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s --check-prefix=AVX512VLCD --check-prefix=ALL --check-prefix=AVX512
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=AVX512CD --check-prefix=ALL --check-prefix=AVX512
6
7define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
8; AVX1-LABEL: testv4i64:
9; AVX1:       # BB#0:
10; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
11; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
12; AVX1-NEXT:    bsrq %rax, %rax
13; AVX1-NEXT:    movl $127, %ecx
14; AVX1-NEXT:    cmoveq %rcx, %rax
15; AVX1-NEXT:    xorq $63, %rax
16; AVX1-NEXT:    vmovq %rax, %xmm2
17; AVX1-NEXT:    vmovq %xmm1, %rax
18; AVX1-NEXT:    bsrq %rax, %rax
19; AVX1-NEXT:    cmoveq %rcx, %rax
20; AVX1-NEXT:    xorq $63, %rax
21; AVX1-NEXT:    vmovq %rax, %xmm1
22; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
23; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
24; AVX1-NEXT:    bsrq %rax, %rax
25; AVX1-NEXT:    cmoveq %rcx, %rax
26; AVX1-NEXT:    xorq $63, %rax
27; AVX1-NEXT:    vmovq %rax, %xmm2
28; AVX1-NEXT:    vmovq %xmm0, %rax
29; AVX1-NEXT:    bsrq %rax, %rax
30; AVX1-NEXT:    cmoveq %rcx, %rax
31; AVX1-NEXT:    xorq $63, %rax
32; AVX1-NEXT:    vmovq %rax, %xmm0
33; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
34; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
35; AVX1-NEXT:    retq
36;
37; AVX2-LABEL: testv4i64:
38; AVX2:       # BB#0:
39; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
40; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
41; AVX2-NEXT:    bsrq %rax, %rax
42; AVX2-NEXT:    movl $127, %ecx
43; AVX2-NEXT:    cmoveq %rcx, %rax
44; AVX2-NEXT:    xorq $63, %rax
45; AVX2-NEXT:    vmovq %rax, %xmm2
46; AVX2-NEXT:    vmovq %xmm1, %rax
47; AVX2-NEXT:    bsrq %rax, %rax
48; AVX2-NEXT:    cmoveq %rcx, %rax
49; AVX2-NEXT:    xorq $63, %rax
50; AVX2-NEXT:    vmovq %rax, %xmm1
51; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
52; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
53; AVX2-NEXT:    bsrq %rax, %rax
54; AVX2-NEXT:    cmoveq %rcx, %rax
55; AVX2-NEXT:    xorq $63, %rax
56; AVX2-NEXT:    vmovq %rax, %xmm2
57; AVX2-NEXT:    vmovq %xmm0, %rax
58; AVX2-NEXT:    bsrq %rax, %rax
59; AVX2-NEXT:    cmoveq %rcx, %rax
60; AVX2-NEXT:    xorq $63, %rax
61; AVX2-NEXT:    vmovq %rax, %xmm0
62; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
63; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
64; AVX2-NEXT:    retq
65;
66; AVX512VLCD-LABEL: testv4i64:
67; AVX512VLCD:       ## BB#0:
68; AVX512VLCD-NEXT:    vplzcntq %ymm0, %ymm0
69; AVX512VLCD-NEXT:    retq
70;
71; AVX512CD-LABEL: testv4i64:
72; AVX512CD:       ## BB#0:
73; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
74; AVX512CD-NEXT:    retq
75
76  %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
77  ret <4 x i64> %out
78}
79
80define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
81; AVX1-LABEL: testv4i64u:
82; AVX1:       # BB#0:
83; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
84; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
85; AVX1-NEXT:    bsrq %rax, %rax
86; AVX1-NEXT:    xorq $63, %rax
87; AVX1-NEXT:    vmovq %rax, %xmm2
88; AVX1-NEXT:    vmovq %xmm1, %rax
89; AVX1-NEXT:    bsrq %rax, %rax
90; AVX1-NEXT:    xorq $63, %rax
91; AVX1-NEXT:    vmovq %rax, %xmm1
92; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
93; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
94; AVX1-NEXT:    bsrq %rax, %rax
95; AVX1-NEXT:    xorq $63, %rax
96; AVX1-NEXT:    vmovq %rax, %xmm2
97; AVX1-NEXT:    vmovq %xmm0, %rax
98; AVX1-NEXT:    bsrq %rax, %rax
99; AVX1-NEXT:    xorq $63, %rax
100; AVX1-NEXT:    vmovq %rax, %xmm0
101; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
102; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
103; AVX1-NEXT:    retq
104;
105; AVX2-LABEL: testv4i64u:
106; AVX2:       # BB#0:
107; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
108; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
109; AVX2-NEXT:    bsrq %rax, %rax
110; AVX2-NEXT:    xorq $63, %rax
111; AVX2-NEXT:    vmovq %rax, %xmm2
112; AVX2-NEXT:    vmovq %xmm1, %rax
113; AVX2-NEXT:    bsrq %rax, %rax
114; AVX2-NEXT:    xorq $63, %rax
115; AVX2-NEXT:    vmovq %rax, %xmm1
116; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
117; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
118; AVX2-NEXT:    bsrq %rax, %rax
119; AVX2-NEXT:    xorq $63, %rax
120; AVX2-NEXT:    vmovq %rax, %xmm2
121; AVX2-NEXT:    vmovq %xmm0, %rax
122; AVX2-NEXT:    bsrq %rax, %rax
123; AVX2-NEXT:    xorq $63, %rax
124; AVX2-NEXT:    vmovq %rax, %xmm0
125; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
126; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
127; AVX2-NEXT:    retq
128;
129; AVX512VLCD-LABEL: testv4i64u:
130; AVX512VLCD:       ## BB#0:
131; AVX512VLCD-NEXT:    vplzcntq %ymm0, %ymm0
132; AVX512VLCD-NEXT:    retq
133;
134; AVX512CD-LABEL: testv4i64u:
135; AVX512CD:       ## BB#0:
136; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
137; AVX512CD-NEXT:    retq
138
139  %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
140  ret <4 x i64> %out
141}
142
143define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
144; AVX1-LABEL: testv8i32:
145; AVX1:       # BB#0:
146; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
147; AVX1-NEXT:    vpextrd $1, %xmm1, %eax
148; AVX1-NEXT:    bsrl %eax, %ecx
149; AVX1-NEXT:    movl $63, %eax
150; AVX1-NEXT:    cmovel %eax, %ecx
151; AVX1-NEXT:    xorl $31, %ecx
152; AVX1-NEXT:    vmovd %xmm1, %edx
153; AVX1-NEXT:    bsrl %edx, %edx
154; AVX1-NEXT:    cmovel %eax, %edx
155; AVX1-NEXT:    xorl $31, %edx
156; AVX1-NEXT:    vmovd %edx, %xmm2
157; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm2, %xmm2
158; AVX1-NEXT:    vpextrd $2, %xmm1, %ecx
159; AVX1-NEXT:    bsrl %ecx, %ecx
160; AVX1-NEXT:    cmovel %eax, %ecx
161; AVX1-NEXT:    xorl $31, %ecx
162; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm2, %xmm2
163; AVX1-NEXT:    vpextrd $3, %xmm1, %ecx
164; AVX1-NEXT:    bsrl %ecx, %ecx
165; AVX1-NEXT:    cmovel %eax, %ecx
166; AVX1-NEXT:    xorl $31, %ecx
167; AVX1-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm1
168; AVX1-NEXT:    vpextrd $1, %xmm0, %ecx
169; AVX1-NEXT:    bsrl %ecx, %ecx
170; AVX1-NEXT:    cmovel %eax, %ecx
171; AVX1-NEXT:    xorl $31, %ecx
172; AVX1-NEXT:    vmovd %xmm0, %edx
173; AVX1-NEXT:    bsrl %edx, %edx
174; AVX1-NEXT:    cmovel %eax, %edx
175; AVX1-NEXT:    xorl $31, %edx
176; AVX1-NEXT:    vmovd %edx, %xmm2
177; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm2, %xmm2
178; AVX1-NEXT:    vpextrd $2, %xmm0, %ecx
179; AVX1-NEXT:    bsrl %ecx, %ecx
180; AVX1-NEXT:    cmovel %eax, %ecx
181; AVX1-NEXT:    xorl $31, %ecx
182; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm2, %xmm2
183; AVX1-NEXT:    vpextrd $3, %xmm0, %ecx
184; AVX1-NEXT:    bsrl %ecx, %ecx
185; AVX1-NEXT:    cmovel %eax, %ecx
186; AVX1-NEXT:    xorl $31, %ecx
187; AVX1-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm0
188; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
189; AVX1-NEXT:    retq
190;
191; AVX2-LABEL: testv8i32:
192; AVX2:       # BB#0:
193; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
194; AVX2-NEXT:    vpextrd $1, %xmm1, %eax
195; AVX2-NEXT:    bsrl %eax, %ecx
196; AVX2-NEXT:    movl $63, %eax
197; AVX2-NEXT:    cmovel %eax, %ecx
198; AVX2-NEXT:    xorl $31, %ecx
199; AVX2-NEXT:    vmovd %xmm1, %edx
200; AVX2-NEXT:    bsrl %edx, %edx
201; AVX2-NEXT:    cmovel %eax, %edx
202; AVX2-NEXT:    xorl $31, %edx
203; AVX2-NEXT:    vmovd %edx, %xmm2
204; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm2, %xmm2
205; AVX2-NEXT:    vpextrd $2, %xmm1, %ecx
206; AVX2-NEXT:    bsrl %ecx, %ecx
207; AVX2-NEXT:    cmovel %eax, %ecx
208; AVX2-NEXT:    xorl $31, %ecx
209; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm2, %xmm2
210; AVX2-NEXT:    vpextrd $3, %xmm1, %ecx
211; AVX2-NEXT:    bsrl %ecx, %ecx
212; AVX2-NEXT:    cmovel %eax, %ecx
213; AVX2-NEXT:    xorl $31, %ecx
214; AVX2-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm1
215; AVX2-NEXT:    vpextrd $1, %xmm0, %ecx
216; AVX2-NEXT:    bsrl %ecx, %ecx
217; AVX2-NEXT:    cmovel %eax, %ecx
218; AVX2-NEXT:    xorl $31, %ecx
219; AVX2-NEXT:    vmovd %xmm0, %edx
220; AVX2-NEXT:    bsrl %edx, %edx
221; AVX2-NEXT:    cmovel %eax, %edx
222; AVX2-NEXT:    xorl $31, %edx
223; AVX2-NEXT:    vmovd %edx, %xmm2
224; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm2, %xmm2
225; AVX2-NEXT:    vpextrd $2, %xmm0, %ecx
226; AVX2-NEXT:    bsrl %ecx, %ecx
227; AVX2-NEXT:    cmovel %eax, %ecx
228; AVX2-NEXT:    xorl $31, %ecx
229; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm2, %xmm2
230; AVX2-NEXT:    vpextrd $3, %xmm0, %ecx
231; AVX2-NEXT:    bsrl %ecx, %ecx
232; AVX2-NEXT:    cmovel %eax, %ecx
233; AVX2-NEXT:    xorl $31, %ecx
234; AVX2-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm0
235; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
236; AVX2-NEXT:    retq
237;
238; AVX512VLCD-LABEL: testv8i32:
239; AVX512VLCD:       ## BB#0:
240; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
241; AVX512VLCD-NEXT:    retq
242;
243; AVX512CD-LABEL: testv8i32:
244; AVX512CD:       ## BB#0:
245; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
246; AVX512CD-NEXT:    retq
247
248  %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
249  ret <8 x i32> %out
250}
251
252define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
253; AVX1-LABEL: testv8i32u:
254; AVX1:       # BB#0:
255; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
256; AVX1-NEXT:    vpextrd $1, %xmm1, %eax
257; AVX1-NEXT:    bsrl %eax, %eax
258; AVX1-NEXT:    xorl $31, %eax
259; AVX1-NEXT:    vmovd %xmm1, %ecx
260; AVX1-NEXT:    bsrl %ecx, %ecx
261; AVX1-NEXT:    xorl $31, %ecx
262; AVX1-NEXT:    vmovd %ecx, %xmm2
263; AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
264; AVX1-NEXT:    vpextrd $2, %xmm1, %eax
265; AVX1-NEXT:    bsrl %eax, %eax
266; AVX1-NEXT:    xorl $31, %eax
267; AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
268; AVX1-NEXT:    vpextrd $3, %xmm1, %eax
269; AVX1-NEXT:    bsrl %eax, %eax
270; AVX1-NEXT:    xorl $31, %eax
271; AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
272; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
273; AVX1-NEXT:    bsrl %eax, %eax
274; AVX1-NEXT:    xorl $31, %eax
275; AVX1-NEXT:    vmovd %xmm0, %ecx
276; AVX1-NEXT:    bsrl %ecx, %ecx
277; AVX1-NEXT:    xorl $31, %ecx
278; AVX1-NEXT:    vmovd %ecx, %xmm2
279; AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
280; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
281; AVX1-NEXT:    bsrl %eax, %eax
282; AVX1-NEXT:    xorl $31, %eax
283; AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
284; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
285; AVX1-NEXT:    bsrl %eax, %eax
286; AVX1-NEXT:    xorl $31, %eax
287; AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
288; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
289; AVX1-NEXT:    retq
290;
291; AVX2-LABEL: testv8i32u:
292; AVX2:       # BB#0:
293; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
294; AVX2-NEXT:    vpextrd $1, %xmm1, %eax
295; AVX2-NEXT:    bsrl %eax, %eax
296; AVX2-NEXT:    xorl $31, %eax
297; AVX2-NEXT:    vmovd %xmm1, %ecx
298; AVX2-NEXT:    bsrl %ecx, %ecx
299; AVX2-NEXT:    xorl $31, %ecx
300; AVX2-NEXT:    vmovd %ecx, %xmm2
301; AVX2-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
302; AVX2-NEXT:    vpextrd $2, %xmm1, %eax
303; AVX2-NEXT:    bsrl %eax, %eax
304; AVX2-NEXT:    xorl $31, %eax
305; AVX2-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
306; AVX2-NEXT:    vpextrd $3, %xmm1, %eax
307; AVX2-NEXT:    bsrl %eax, %eax
308; AVX2-NEXT:    xorl $31, %eax
309; AVX2-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
310; AVX2-NEXT:    vpextrd $1, %xmm0, %eax
311; AVX2-NEXT:    bsrl %eax, %eax
312; AVX2-NEXT:    xorl $31, %eax
313; AVX2-NEXT:    vmovd %xmm0, %ecx
314; AVX2-NEXT:    bsrl %ecx, %ecx
315; AVX2-NEXT:    xorl $31, %ecx
316; AVX2-NEXT:    vmovd %ecx, %xmm2
317; AVX2-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
318; AVX2-NEXT:    vpextrd $2, %xmm0, %eax
319; AVX2-NEXT:    bsrl %eax, %eax
320; AVX2-NEXT:    xorl $31, %eax
321; AVX2-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
322; AVX2-NEXT:    vpextrd $3, %xmm0, %eax
323; AVX2-NEXT:    bsrl %eax, %eax
324; AVX2-NEXT:    xorl $31, %eax
325; AVX2-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
326; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
327; AVX2-NEXT:    retq
328;
329; AVX512VLCD-LABEL: testv8i32u:
330; AVX512VLCD:       ## BB#0:
331; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
332; AVX512VLCD-NEXT:    retq
333;
334; AVX512CD-LABEL: testv8i32u:
335; AVX512CD:       ## BB#0:
336; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
337; AVX512CD-NEXT:    retq
338
339  %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)
340  ret <8 x i32> %out
341}
342
343define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
344; AVX1-LABEL: testv16i16:
345; AVX1:       # BB#0:
346; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
347; AVX1-NEXT:    vpextrw $1, %xmm1, %eax
348; AVX1-NEXT:    bsrw %ax, %cx
349; AVX1-NEXT:    movw $31, %ax
350; AVX1-NEXT:    cmovew %ax, %cx
351; AVX1-NEXT:    xorl $15, %ecx
352; AVX1-NEXT:    vmovd %xmm1, %edx
353; AVX1-NEXT:    bsrw %dx, %dx
354; AVX1-NEXT:    cmovew %ax, %dx
355; AVX1-NEXT:    xorl $15, %edx
356; AVX1-NEXT:    vmovd %edx, %xmm2
357; AVX1-NEXT:    vpinsrw $1, %ecx, %xmm2, %xmm2
358; AVX1-NEXT:    vpextrw $2, %xmm1, %ecx
359; AVX1-NEXT:    bsrw %cx, %cx
360; AVX1-NEXT:    cmovew %ax, %cx
361; AVX1-NEXT:    xorl $15, %ecx
362; AVX1-NEXT:    vpinsrw $2, %ecx, %xmm2, %xmm2
363; AVX1-NEXT:    vpextrw $3, %xmm1, %ecx
364; AVX1-NEXT:    bsrw %cx, %cx
365; AVX1-NEXT:    cmovew %ax, %cx
366; AVX1-NEXT:    xorl $15, %ecx
367; AVX1-NEXT:    vpinsrw $3, %ecx, %xmm2, %xmm2
368; AVX1-NEXT:    vpextrw $4, %xmm1, %ecx
369; AVX1-NEXT:    bsrw %cx, %cx
370; AVX1-NEXT:    cmovew %ax, %cx
371; AVX1-NEXT:    xorl $15, %ecx
372; AVX1-NEXT:    vpinsrw $4, %ecx, %xmm2, %xmm2
373; AVX1-NEXT:    vpextrw $5, %xmm1, %ecx
374; AVX1-NEXT:    bsrw %cx, %cx
375; AVX1-NEXT:    cmovew %ax, %cx
376; AVX1-NEXT:    xorl $15, %ecx
377; AVX1-NEXT:    vpinsrw $5, %ecx, %xmm2, %xmm2
378; AVX1-NEXT:    vpextrw $6, %xmm1, %ecx
379; AVX1-NEXT:    bsrw %cx, %cx
380; AVX1-NEXT:    cmovew %ax, %cx
381; AVX1-NEXT:    xorl $15, %ecx
382; AVX1-NEXT:    vpinsrw $6, %ecx, %xmm2, %xmm2
383; AVX1-NEXT:    vpextrw $7, %xmm1, %ecx
384; AVX1-NEXT:    bsrw %cx, %cx
385; AVX1-NEXT:    cmovew %ax, %cx
386; AVX1-NEXT:    xorl $15, %ecx
387; AVX1-NEXT:    vpinsrw $7, %ecx, %xmm2, %xmm1
388; AVX1-NEXT:    vpextrw $1, %xmm0, %ecx
389; AVX1-NEXT:    bsrw %cx, %cx
390; AVX1-NEXT:    cmovew %ax, %cx
391; AVX1-NEXT:    xorl $15, %ecx
392; AVX1-NEXT:    vmovd %xmm0, %edx
393; AVX1-NEXT:    bsrw %dx, %dx
394; AVX1-NEXT:    cmovew %ax, %dx
395; AVX1-NEXT:    xorl $15, %edx
396; AVX1-NEXT:    vmovd %edx, %xmm2
397; AVX1-NEXT:    vpinsrw $1, %ecx, %xmm2, %xmm2
398; AVX1-NEXT:    vpextrw $2, %xmm0, %ecx
399; AVX1-NEXT:    bsrw %cx, %cx
400; AVX1-NEXT:    cmovew %ax, %cx
401; AVX1-NEXT:    xorl $15, %ecx
402; AVX1-NEXT:    vpinsrw $2, %ecx, %xmm2, %xmm2
403; AVX1-NEXT:    vpextrw $3, %xmm0, %ecx
404; AVX1-NEXT:    bsrw %cx, %cx
405; AVX1-NEXT:    cmovew %ax, %cx
406; AVX1-NEXT:    xorl $15, %ecx
407; AVX1-NEXT:    vpinsrw $3, %ecx, %xmm2, %xmm2
408; AVX1-NEXT:    vpextrw $4, %xmm0, %ecx
409; AVX1-NEXT:    bsrw %cx, %cx
410; AVX1-NEXT:    cmovew %ax, %cx
411; AVX1-NEXT:    xorl $15, %ecx
412; AVX1-NEXT:    vpinsrw $4, %ecx, %xmm2, %xmm2
413; AVX1-NEXT:    vpextrw $5, %xmm0, %ecx
414; AVX1-NEXT:    bsrw %cx, %cx
415; AVX1-NEXT:    cmovew %ax, %cx
416; AVX1-NEXT:    xorl $15, %ecx
417; AVX1-NEXT:    vpinsrw $5, %ecx, %xmm2, %xmm2
418; AVX1-NEXT:    vpextrw $6, %xmm0, %ecx
419; AVX1-NEXT:    bsrw %cx, %cx
420; AVX1-NEXT:    cmovew %ax, %cx
421; AVX1-NEXT:    xorl $15, %ecx
422; AVX1-NEXT:    vpinsrw $6, %ecx, %xmm2, %xmm2
423; AVX1-NEXT:    vpextrw $7, %xmm0, %ecx
424; AVX1-NEXT:    bsrw %cx, %cx
425; AVX1-NEXT:    cmovew %ax, %cx
426; AVX1-NEXT:    xorl $15, %ecx
427; AVX1-NEXT:    vpinsrw $7, %ecx, %xmm2, %xmm0
428; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
429; AVX1-NEXT:    retq
430;
431; AVX2-LABEL: testv16i16:
432; AVX2:       # BB#0:
433; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
434; AVX2-NEXT:    vpextrw $1, %xmm1, %eax
435; AVX2-NEXT:    bsrw %ax, %cx
436; AVX2-NEXT:    movw $31, %ax
437; AVX2-NEXT:    cmovew %ax, %cx
438; AVX2-NEXT:    xorl $15, %ecx
439; AVX2-NEXT:    vmovd %xmm1, %edx
440; AVX2-NEXT:    bsrw %dx, %dx
441; AVX2-NEXT:    cmovew %ax, %dx
442; AVX2-NEXT:    xorl $15, %edx
443; AVX2-NEXT:    vmovd %edx, %xmm2
444; AVX2-NEXT:    vpinsrw $1, %ecx, %xmm2, %xmm2
445; AVX2-NEXT:    vpextrw $2, %xmm1, %ecx
446; AVX2-NEXT:    bsrw %cx, %cx
447; AVX2-NEXT:    cmovew %ax, %cx
448; AVX2-NEXT:    xorl $15, %ecx
449; AVX2-NEXT:    vpinsrw $2, %ecx, %xmm2, %xmm2
450; AVX2-NEXT:    vpextrw $3, %xmm1, %ecx
451; AVX2-NEXT:    bsrw %cx, %cx
452; AVX2-NEXT:    cmovew %ax, %cx
453; AVX2-NEXT:    xorl $15, %ecx
454; AVX2-NEXT:    vpinsrw $3, %ecx, %xmm2, %xmm2
455; AVX2-NEXT:    vpextrw $4, %xmm1, %ecx
456; AVX2-NEXT:    bsrw %cx, %cx
457; AVX2-NEXT:    cmovew %ax, %cx
458; AVX2-NEXT:    xorl $15, %ecx
459; AVX2-NEXT:    vpinsrw $4, %ecx, %xmm2, %xmm2
460; AVX2-NEXT:    vpextrw $5, %xmm1, %ecx
461; AVX2-NEXT:    bsrw %cx, %cx
462; AVX2-NEXT:    cmovew %ax, %cx
463; AVX2-NEXT:    xorl $15, %ecx
464; AVX2-NEXT:    vpinsrw $5, %ecx, %xmm2, %xmm2
465; AVX2-NEXT:    vpextrw $6, %xmm1, %ecx
466; AVX2-NEXT:    bsrw %cx, %cx
467; AVX2-NEXT:    cmovew %ax, %cx
468; AVX2-NEXT:    xorl $15, %ecx
469; AVX2-NEXT:    vpinsrw $6, %ecx, %xmm2, %xmm2
470; AVX2-NEXT:    vpextrw $7, %xmm1, %ecx
471; AVX2-NEXT:    bsrw %cx, %cx
472; AVX2-NEXT:    cmovew %ax, %cx
473; AVX2-NEXT:    xorl $15, %ecx
474; AVX2-NEXT:    vpinsrw $7, %ecx, %xmm2, %xmm1
475; AVX2-NEXT:    vpextrw $1, %xmm0, %ecx
476; AVX2-NEXT:    bsrw %cx, %cx
477; AVX2-NEXT:    cmovew %ax, %cx
478; AVX2-NEXT:    xorl $15, %ecx
479; AVX2-NEXT:    vmovd %xmm0, %edx
480; AVX2-NEXT:    bsrw %dx, %dx
481; AVX2-NEXT:    cmovew %ax, %dx
482; AVX2-NEXT:    xorl $15, %edx
483; AVX2-NEXT:    vmovd %edx, %xmm2
484; AVX2-NEXT:    vpinsrw $1, %ecx, %xmm2, %xmm2
485; AVX2-NEXT:    vpextrw $2, %xmm0, %ecx
486; AVX2-NEXT:    bsrw %cx, %cx
487; AVX2-NEXT:    cmovew %ax, %cx
488; AVX2-NEXT:    xorl $15, %ecx
489; AVX2-NEXT:    vpinsrw $2, %ecx, %xmm2, %xmm2
490; AVX2-NEXT:    vpextrw $3, %xmm0, %ecx
491; AVX2-NEXT:    bsrw %cx, %cx
492; AVX2-NEXT:    cmovew %ax, %cx
493; AVX2-NEXT:    xorl $15, %ecx
494; AVX2-NEXT:    vpinsrw $3, %ecx, %xmm2, %xmm2
495; AVX2-NEXT:    vpextrw $4, %xmm0, %ecx
496; AVX2-NEXT:    bsrw %cx, %cx
497; AVX2-NEXT:    cmovew %ax, %cx
498; AVX2-NEXT:    xorl $15, %ecx
499; AVX2-NEXT:    vpinsrw $4, %ecx, %xmm2, %xmm2
500; AVX2-NEXT:    vpextrw $5, %xmm0, %ecx
501; AVX2-NEXT:    bsrw %cx, %cx
502; AVX2-NEXT:    cmovew %ax, %cx
503; AVX2-NEXT:    xorl $15, %ecx
504; AVX2-NEXT:    vpinsrw $5, %ecx, %xmm2, %xmm2
505; AVX2-NEXT:    vpextrw $6, %xmm0, %ecx
506; AVX2-NEXT:    bsrw %cx, %cx
507; AVX2-NEXT:    cmovew %ax, %cx
508; AVX2-NEXT:    xorl $15, %ecx
509; AVX2-NEXT:    vpinsrw $6, %ecx, %xmm2, %xmm2
510; AVX2-NEXT:    vpextrw $7, %xmm0, %ecx
511; AVX2-NEXT:    bsrw %cx, %cx
512; AVX2-NEXT:    cmovew %ax, %cx
513; AVX2-NEXT:    xorl $15, %ecx
514; AVX2-NEXT:    vpinsrw $7, %ecx, %xmm2, %xmm0
515; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
516; AVX2-NEXT:    retq
517;
518; AVX512VLCD-LABEL: testv16i16:
519; AVX512VLCD:       ## BB#0:
520; AVX512VLCD-NEXT:    vpmovzxwd %ymm0, %zmm0
521; AVX512VLCD-NEXT:    vplzcntd %zmm0, %zmm0
522; AVX512VLCD-NEXT:    vpmovdw %zmm0, %ymm0
523; AVX512VLCD-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
524; AVX512VLCD-NEXT:    retq
525;
526; AVX512CD-LABEL: testv16i16:
527; AVX512CD:       ## BB#0:
528; AVX512CD-NEXT:    vpmovzxwd %ymm0, %zmm0
529; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
530; AVX512CD-NEXT:    vpmovdw %zmm0, %ymm0
531; AVX512CD-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
532; AVX512CD-NEXT:    retq
533  %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0)
534  ret <16 x i16> %out
535}
536
537define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
538; AVX1-LABEL: testv16i16u:
539; AVX1:       # BB#0:
540; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
541; AVX1-NEXT:    vpextrw $1, %xmm1, %eax
542; AVX1-NEXT:    bsrw %ax, %ax
543; AVX1-NEXT:    xorl $15, %eax
544; AVX1-NEXT:    vmovd %xmm1, %ecx
545; AVX1-NEXT:    bsrw %cx, %cx
546; AVX1-NEXT:    xorl $15, %ecx
547; AVX1-NEXT:    vmovd %ecx, %xmm2
548; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
549; AVX1-NEXT:    vpextrw $2, %xmm1, %eax
550; AVX1-NEXT:    bsrw %ax, %ax
551; AVX1-NEXT:    xorl $15, %eax
552; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
553; AVX1-NEXT:    vpextrw $3, %xmm1, %eax
554; AVX1-NEXT:    bsrw %ax, %ax
555; AVX1-NEXT:    xorl $15, %eax
556; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
557; AVX1-NEXT:    vpextrw $4, %xmm1, %eax
558; AVX1-NEXT:    bsrw %ax, %ax
559; AVX1-NEXT:    xorl $15, %eax
560; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
561; AVX1-NEXT:    vpextrw $5, %xmm1, %eax
562; AVX1-NEXT:    bsrw %ax, %ax
563; AVX1-NEXT:    xorl $15, %eax
564; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
565; AVX1-NEXT:    vpextrw $6, %xmm1, %eax
566; AVX1-NEXT:    bsrw %ax, %ax
567; AVX1-NEXT:    xorl $15, %eax
568; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
569; AVX1-NEXT:    vpextrw $7, %xmm1, %eax
570; AVX1-NEXT:    bsrw %ax, %ax
571; AVX1-NEXT:    xorl $15, %eax
572; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm1
573; AVX1-NEXT:    vpextrw $1, %xmm0, %eax
574; AVX1-NEXT:    bsrw %ax, %ax
575; AVX1-NEXT:    xorl $15, %eax
576; AVX1-NEXT:    vmovd %xmm0, %ecx
577; AVX1-NEXT:    bsrw %cx, %cx
578; AVX1-NEXT:    xorl $15, %ecx
579; AVX1-NEXT:    vmovd %ecx, %xmm2
580; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
581; AVX1-NEXT:    vpextrw $2, %xmm0, %eax
582; AVX1-NEXT:    bsrw %ax, %ax
583; AVX1-NEXT:    xorl $15, %eax
584; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
585; AVX1-NEXT:    vpextrw $3, %xmm0, %eax
586; AVX1-NEXT:    bsrw %ax, %ax
587; AVX1-NEXT:    xorl $15, %eax
588; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
589; AVX1-NEXT:    vpextrw $4, %xmm0, %eax
590; AVX1-NEXT:    bsrw %ax, %ax
591; AVX1-NEXT:    xorl $15, %eax
592; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
593; AVX1-NEXT:    vpextrw $5, %xmm0, %eax
594; AVX1-NEXT:    bsrw %ax, %ax
595; AVX1-NEXT:    xorl $15, %eax
596; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
597; AVX1-NEXT:    vpextrw $6, %xmm0, %eax
598; AVX1-NEXT:    bsrw %ax, %ax
599; AVX1-NEXT:    xorl $15, %eax
600; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
601; AVX1-NEXT:    vpextrw $7, %xmm0, %eax
602; AVX1-NEXT:    bsrw %ax, %ax
603; AVX1-NEXT:    xorl $15, %eax
604; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm0
605; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
606; AVX1-NEXT:    retq
607;
608; AVX2-LABEL: testv16i16u:
609; AVX2:       # BB#0:
610; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
611; AVX2-NEXT:    vpextrw $1, %xmm1, %eax
612; AVX2-NEXT:    bsrw %ax, %ax
613; AVX2-NEXT:    xorl $15, %eax
614; AVX2-NEXT:    vmovd %xmm1, %ecx
615; AVX2-NEXT:    bsrw %cx, %cx
616; AVX2-NEXT:    xorl $15, %ecx
617; AVX2-NEXT:    vmovd %ecx, %xmm2
618; AVX2-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
619; AVX2-NEXT:    vpextrw $2, %xmm1, %eax
620; AVX2-NEXT:    bsrw %ax, %ax
621; AVX2-NEXT:    xorl $15, %eax
622; AVX2-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
623; AVX2-NEXT:    vpextrw $3, %xmm1, %eax
624; AVX2-NEXT:    bsrw %ax, %ax
625; AVX2-NEXT:    xorl $15, %eax
626; AVX2-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
627; AVX2-NEXT:    vpextrw $4, %xmm1, %eax
628; AVX2-NEXT:    bsrw %ax, %ax
629; AVX2-NEXT:    xorl $15, %eax
630; AVX2-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
631; AVX2-NEXT:    vpextrw $5, %xmm1, %eax
632; AVX2-NEXT:    bsrw %ax, %ax
633; AVX2-NEXT:    xorl $15, %eax
634; AVX2-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
635; AVX2-NEXT:    vpextrw $6, %xmm1, %eax
636; AVX2-NEXT:    bsrw %ax, %ax
637; AVX2-NEXT:    xorl $15, %eax
638; AVX2-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
639; AVX2-NEXT:    vpextrw $7, %xmm1, %eax
640; AVX2-NEXT:    bsrw %ax, %ax
641; AVX2-NEXT:    xorl $15, %eax
642; AVX2-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm1
643; AVX2-NEXT:    vpextrw $1, %xmm0, %eax
644; AVX2-NEXT:    bsrw %ax, %ax
645; AVX2-NEXT:    xorl $15, %eax
646; AVX2-NEXT:    vmovd %xmm0, %ecx
647; AVX2-NEXT:    bsrw %cx, %cx
648; AVX2-NEXT:    xorl $15, %ecx
649; AVX2-NEXT:    vmovd %ecx, %xmm2
650; AVX2-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
651; AVX2-NEXT:    vpextrw $2, %xmm0, %eax
652; AVX2-NEXT:    bsrw %ax, %ax
653; AVX2-NEXT:    xorl $15, %eax
654; AVX2-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
655; AVX2-NEXT:    vpextrw $3, %xmm0, %eax
656; AVX2-NEXT:    bsrw %ax, %ax
657; AVX2-NEXT:    xorl $15, %eax
658; AVX2-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
659; AVX2-NEXT:    vpextrw $4, %xmm0, %eax
660; AVX2-NEXT:    bsrw %ax, %ax
661; AVX2-NEXT:    xorl $15, %eax
662; AVX2-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
663; AVX2-NEXT:    vpextrw $5, %xmm0, %eax
664; AVX2-NEXT:    bsrw %ax, %ax
665; AVX2-NEXT:    xorl $15, %eax
666; AVX2-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
667; AVX2-NEXT:    vpextrw $6, %xmm0, %eax
668; AVX2-NEXT:    bsrw %ax, %ax
669; AVX2-NEXT:    xorl $15, %eax
670; AVX2-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
671; AVX2-NEXT:    vpextrw $7, %xmm0, %eax
672; AVX2-NEXT:    bsrw %ax, %ax
673; AVX2-NEXT:    xorl $15, %eax
674; AVX2-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm0
675; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
676; AVX2-NEXT:    retq
677;
678; AVX512VLCD-LABEL: testv16i16u:
679; AVX512VLCD:       ## BB#0:
680; AVX512VLCD-NEXT:    vpmovzxwd %ymm0, %zmm0
681; AVX512VLCD-NEXT:    vplzcntd %zmm0, %zmm0
682; AVX512VLCD-NEXT:    vpmovdw %zmm0, %ymm0
683; AVX512VLCD-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
684; AVX512VLCD-NEXT:    retq
685;
686; AVX512CD-LABEL: testv16i16u:
687; AVX512CD:       ## BB#0:
688; AVX512CD-NEXT:    vpmovzxwd %ymm0, %zmm0
689; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
690; AVX512CD-NEXT:    vpmovdw %zmm0, %ymm0
691; AVX512CD-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
692; AVX512CD-NEXT:    retq
693  %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1)
694  ret <16 x i16> %out
695}
696
697define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
698; AVX1-LABEL: testv32i8:
699; AVX1:       # BB#0:
700; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
701; AVX1-NEXT:    vpextrb $1, %xmm1, %eax
702; AVX1-NEXT:    bsrl %eax, %ecx
703; AVX1-NEXT:    movl $15, %eax
704; AVX1-NEXT:    cmovel %eax, %ecx
705; AVX1-NEXT:    xorl $7, %ecx
706; AVX1-NEXT:    vpextrb $0, %xmm1, %edx
707; AVX1-NEXT:    bsrl %edx, %edx
708; AVX1-NEXT:    cmovel %eax, %edx
709; AVX1-NEXT:    xorl $7, %edx
710; AVX1-NEXT:    vmovd %edx, %xmm2
711; AVX1-NEXT:    vpinsrb $1, %ecx, %xmm2, %xmm2
712; AVX1-NEXT:    vpextrb $2, %xmm1, %ecx
713; AVX1-NEXT:    bsrl %ecx, %ecx
714; AVX1-NEXT:    cmovel %eax, %ecx
715; AVX1-NEXT:    xorl $7, %ecx
716; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm2, %xmm2
717; AVX1-NEXT:    vpextrb $3, %xmm1, %ecx
718; AVX1-NEXT:    bsrl %ecx, %ecx
719; AVX1-NEXT:    cmovel %eax, %ecx
720; AVX1-NEXT:    xorl $7, %ecx
721; AVX1-NEXT:    vpinsrb $3, %ecx, %xmm2, %xmm2
722; AVX1-NEXT:    vpextrb $4, %xmm1, %ecx
723; AVX1-NEXT:    bsrl %ecx, %ecx
724; AVX1-NEXT:    cmovel %eax, %ecx
725; AVX1-NEXT:    xorl $7, %ecx
726; AVX1-NEXT:    vpinsrb $4, %ecx, %xmm2, %xmm2
727; AVX1-NEXT:    vpextrb $5, %xmm1, %ecx
728; AVX1-NEXT:    bsrl %ecx, %ecx
729; AVX1-NEXT:    cmovel %eax, %ecx
730; AVX1-NEXT:    xorl $7, %ecx
731; AVX1-NEXT:    vpinsrb $5, %ecx, %xmm2, %xmm2
732; AVX1-NEXT:    vpextrb $6, %xmm1, %ecx
733; AVX1-NEXT:    bsrl %ecx, %ecx
734; AVX1-NEXT:    cmovel %eax, %ecx
735; AVX1-NEXT:    xorl $7, %ecx
736; AVX1-NEXT:    vpinsrb $6, %ecx, %xmm2, %xmm2
737; AVX1-NEXT:    vpextrb $7, %xmm1, %ecx
738; AVX1-NEXT:    bsrl %ecx, %ecx
739; AVX1-NEXT:    cmovel %eax, %ecx
740; AVX1-NEXT:    xorl $7, %ecx
741; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm2, %xmm2
742; AVX1-NEXT:    vpextrb $8, %xmm1, %ecx
743; AVX1-NEXT:    bsrl %ecx, %ecx
744; AVX1-NEXT:    cmovel %eax, %ecx
745; AVX1-NEXT:    xorl $7, %ecx
746; AVX1-NEXT:    vpinsrb $8, %ecx, %xmm2, %xmm2
747; AVX1-NEXT:    vpextrb $9, %xmm1, %ecx
748; AVX1-NEXT:    bsrl %ecx, %ecx
749; AVX1-NEXT:    cmovel %eax, %ecx
750; AVX1-NEXT:    xorl $7, %ecx
751; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm2, %xmm2
752; AVX1-NEXT:    vpextrb $10, %xmm1, %ecx
753; AVX1-NEXT:    bsrl %ecx, %ecx
754; AVX1-NEXT:    cmovel %eax, %ecx
755; AVX1-NEXT:    xorl $7, %ecx
756; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm2, %xmm2
757; AVX1-NEXT:    vpextrb $11, %xmm1, %ecx
758; AVX1-NEXT:    bsrl %ecx, %ecx
759; AVX1-NEXT:    cmovel %eax, %ecx
760; AVX1-NEXT:    xorl $7, %ecx
761; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm2, %xmm2
762; AVX1-NEXT:    vpextrb $12, %xmm1, %ecx
763; AVX1-NEXT:    bsrl %ecx, %ecx
764; AVX1-NEXT:    cmovel %eax, %ecx
765; AVX1-NEXT:    xorl $7, %ecx
766; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm2, %xmm2
767; AVX1-NEXT:    vpextrb $13, %xmm1, %ecx
768; AVX1-NEXT:    bsrl %ecx, %ecx
769; AVX1-NEXT:    cmovel %eax, %ecx
770; AVX1-NEXT:    xorl $7, %ecx
771; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm2, %xmm2
772; AVX1-NEXT:    vpextrb $14, %xmm1, %ecx
773; AVX1-NEXT:    bsrl %ecx, %ecx
774; AVX1-NEXT:    cmovel %eax, %ecx
775; AVX1-NEXT:    xorl $7, %ecx
776; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm2, %xmm2
777; AVX1-NEXT:    vpextrb $15, %xmm1, %ecx
778; AVX1-NEXT:    bsrl %ecx, %ecx
779; AVX1-NEXT:    cmovel %eax, %ecx
780; AVX1-NEXT:    xorl $7, %ecx
781; AVX1-NEXT:    vpinsrb $15, %ecx, %xmm2, %xmm1
782; AVX1-NEXT:    vpextrb $1, %xmm0, %ecx
783; AVX1-NEXT:    bsrl %ecx, %ecx
784; AVX1-NEXT:    cmovel %eax, %ecx
785; AVX1-NEXT:    xorl $7, %ecx
786; AVX1-NEXT:    vpextrb $0, %xmm0, %edx
787; AVX1-NEXT:    bsrl %edx, %edx
788; AVX1-NEXT:    cmovel %eax, %edx
789; AVX1-NEXT:    xorl $7, %edx
790; AVX1-NEXT:    vmovd %edx, %xmm2
791; AVX1-NEXT:    vpinsrb $1, %ecx, %xmm2, %xmm2
792; AVX1-NEXT:    vpextrb $2, %xmm0, %ecx
793; AVX1-NEXT:    bsrl %ecx, %ecx
794; AVX1-NEXT:    cmovel %eax, %ecx
795; AVX1-NEXT:    xorl $7, %ecx
796; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm2, %xmm2
797; AVX1-NEXT:    vpextrb $3, %xmm0, %ecx
798; AVX1-NEXT:    bsrl %ecx, %ecx
799; AVX1-NEXT:    cmovel %eax, %ecx
800; AVX1-NEXT:    xorl $7, %ecx
801; AVX1-NEXT:    vpinsrb $3, %ecx, %xmm2, %xmm2
802; AVX1-NEXT:    vpextrb $4, %xmm0, %ecx
803; AVX1-NEXT:    bsrl %ecx, %ecx
804; AVX1-NEXT:    cmovel %eax, %ecx
805; AVX1-NEXT:    xorl $7, %ecx
806; AVX1-NEXT:    vpinsrb $4, %ecx, %xmm2, %xmm2
807; AVX1-NEXT:    vpextrb $5, %xmm0, %ecx
808; AVX1-NEXT:    bsrl %ecx, %ecx
809; AVX1-NEXT:    cmovel %eax, %ecx
810; AVX1-NEXT:    xorl $7, %ecx
811; AVX1-NEXT:    vpinsrb $5, %ecx, %xmm2, %xmm2
812; AVX1-NEXT:    vpextrb $6, %xmm0, %ecx
813; AVX1-NEXT:    bsrl %ecx, %ecx
814; AVX1-NEXT:    cmovel %eax, %ecx
815; AVX1-NEXT:    xorl $7, %ecx
816; AVX1-NEXT:    vpinsrb $6, %ecx, %xmm2, %xmm2
817; AVX1-NEXT:    vpextrb $7, %xmm0, %ecx
818; AVX1-NEXT:    bsrl %ecx, %ecx
819; AVX1-NEXT:    cmovel %eax, %ecx
820; AVX1-NEXT:    xorl $7, %ecx
821; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm2, %xmm2
822; AVX1-NEXT:    vpextrb $8, %xmm0, %ecx
823; AVX1-NEXT:    bsrl %ecx, %ecx
824; AVX1-NEXT:    cmovel %eax, %ecx
825; AVX1-NEXT:    xorl $7, %ecx
826; AVX1-NEXT:    vpinsrb $8, %ecx, %xmm2, %xmm2
827; AVX1-NEXT:    vpextrb $9, %xmm0, %ecx
828; AVX1-NEXT:    bsrl %ecx, %ecx
829; AVX1-NEXT:    cmovel %eax, %ecx
830; AVX1-NEXT:    xorl $7, %ecx
831; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm2, %xmm2
832; AVX1-NEXT:    vpextrb $10, %xmm0, %ecx
833; AVX1-NEXT:    bsrl %ecx, %ecx
834; AVX1-NEXT:    cmovel %eax, %ecx
835; AVX1-NEXT:    xorl $7, %ecx
836; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm2, %xmm2
837; AVX1-NEXT:    vpextrb $11, %xmm0, %ecx
838; AVX1-NEXT:    bsrl %ecx, %ecx
839; AVX1-NEXT:    cmovel %eax, %ecx
840; AVX1-NEXT:    xorl $7, %ecx
841; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm2, %xmm2
842; AVX1-NEXT:    vpextrb $12, %xmm0, %ecx
843; AVX1-NEXT:    bsrl %ecx, %ecx
844; AVX1-NEXT:    cmovel %eax, %ecx
845; AVX1-NEXT:    xorl $7, %ecx
846; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm2, %xmm2
847; AVX1-NEXT:    vpextrb $13, %xmm0, %ecx
848; AVX1-NEXT:    bsrl %ecx, %ecx
849; AVX1-NEXT:    cmovel %eax, %ecx
850; AVX1-NEXT:    xorl $7, %ecx
851; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm2, %xmm2
852; AVX1-NEXT:    vpextrb $14, %xmm0, %ecx
853; AVX1-NEXT:    bsrl %ecx, %ecx
854; AVX1-NEXT:    cmovel %eax, %ecx
855; AVX1-NEXT:    xorl $7, %ecx
856; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm2, %xmm2
857; AVX1-NEXT:    vpextrb $15, %xmm0, %ecx
858; AVX1-NEXT:    bsrl %ecx, %ecx
859; AVX1-NEXT:    cmovel %eax, %ecx
860; AVX1-NEXT:    xorl $7, %ecx
861; AVX1-NEXT:    vpinsrb $15, %ecx, %xmm2, %xmm0
862; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
863; AVX1-NEXT:    retq
864;
865; AVX2-LABEL: testv32i8:
866; AVX2:       # BB#0:
867; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
868; AVX2-NEXT:    vpextrb $1, %xmm1, %eax
869; AVX2-NEXT:    bsrl %eax, %ecx
870; AVX2-NEXT:    movl $15, %eax
871; AVX2-NEXT:    cmovel %eax, %ecx
872; AVX2-NEXT:    xorl $7, %ecx
873; AVX2-NEXT:    vpextrb $0, %xmm1, %edx
874; AVX2-NEXT:    bsrl %edx, %edx
875; AVX2-NEXT:    cmovel %eax, %edx
876; AVX2-NEXT:    xorl $7, %edx
877; AVX2-NEXT:    vmovd %edx, %xmm2
878; AVX2-NEXT:    vpinsrb $1, %ecx, %xmm2, %xmm2
879; AVX2-NEXT:    vpextrb $2, %xmm1, %ecx
880; AVX2-NEXT:    bsrl %ecx, %ecx
881; AVX2-NEXT:    cmovel %eax, %ecx
882; AVX2-NEXT:    xorl $7, %ecx
883; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm2, %xmm2
884; AVX2-NEXT:    vpextrb $3, %xmm1, %ecx
885; AVX2-NEXT:    bsrl %ecx, %ecx
886; AVX2-NEXT:    cmovel %eax, %ecx
887; AVX2-NEXT:    xorl $7, %ecx
888; AVX2-NEXT:    vpinsrb $3, %ecx, %xmm2, %xmm2
889; AVX2-NEXT:    vpextrb $4, %xmm1, %ecx
890; AVX2-NEXT:    bsrl %ecx, %ecx
891; AVX2-NEXT:    cmovel %eax, %ecx
892; AVX2-NEXT:    xorl $7, %ecx
893; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm2, %xmm2
894; AVX2-NEXT:    vpextrb $5, %xmm1, %ecx
895; AVX2-NEXT:    bsrl %ecx, %ecx
896; AVX2-NEXT:    cmovel %eax, %ecx
897; AVX2-NEXT:    xorl $7, %ecx
898; AVX2-NEXT:    vpinsrb $5, %ecx, %xmm2, %xmm2
899; AVX2-NEXT:    vpextrb $6, %xmm1, %ecx
900; AVX2-NEXT:    bsrl %ecx, %ecx
901; AVX2-NEXT:    cmovel %eax, %ecx
902; AVX2-NEXT:    xorl $7, %ecx
903; AVX2-NEXT:    vpinsrb $6, %ecx, %xmm2, %xmm2
904; AVX2-NEXT:    vpextrb $7, %xmm1, %ecx
905; AVX2-NEXT:    bsrl %ecx, %ecx
906; AVX2-NEXT:    cmovel %eax, %ecx
907; AVX2-NEXT:    xorl $7, %ecx
908; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm2, %xmm2
909; AVX2-NEXT:    vpextrb $8, %xmm1, %ecx
910; AVX2-NEXT:    bsrl %ecx, %ecx
911; AVX2-NEXT:    cmovel %eax, %ecx
912; AVX2-NEXT:    xorl $7, %ecx
913; AVX2-NEXT:    vpinsrb $8, %ecx, %xmm2, %xmm2
914; AVX2-NEXT:    vpextrb $9, %xmm1, %ecx
915; AVX2-NEXT:    bsrl %ecx, %ecx
916; AVX2-NEXT:    cmovel %eax, %ecx
917; AVX2-NEXT:    xorl $7, %ecx
918; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm2, %xmm2
919; AVX2-NEXT:    vpextrb $10, %xmm1, %ecx
920; AVX2-NEXT:    bsrl %ecx, %ecx
921; AVX2-NEXT:    cmovel %eax, %ecx
922; AVX2-NEXT:    xorl $7, %ecx
923; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm2, %xmm2
924; AVX2-NEXT:    vpextrb $11, %xmm1, %ecx
925; AVX2-NEXT:    bsrl %ecx, %ecx
926; AVX2-NEXT:    cmovel %eax, %ecx
927; AVX2-NEXT:    xorl $7, %ecx
928; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm2, %xmm2
929; AVX2-NEXT:    vpextrb $12, %xmm1, %ecx
930; AVX2-NEXT:    bsrl %ecx, %ecx
931; AVX2-NEXT:    cmovel %eax, %ecx
932; AVX2-NEXT:    xorl $7, %ecx
933; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm2, %xmm2
934; AVX2-NEXT:    vpextrb $13, %xmm1, %ecx
935; AVX2-NEXT:    bsrl %ecx, %ecx
936; AVX2-NEXT:    cmovel %eax, %ecx
937; AVX2-NEXT:    xorl $7, %ecx
938; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm2, %xmm2
939; AVX2-NEXT:    vpextrb $14, %xmm1, %ecx
940; AVX2-NEXT:    bsrl %ecx, %ecx
941; AVX2-NEXT:    cmovel %eax, %ecx
942; AVX2-NEXT:    xorl $7, %ecx
943; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm2, %xmm2
944; AVX2-NEXT:    vpextrb $15, %xmm1, %ecx
945; AVX2-NEXT:    bsrl %ecx, %ecx
946; AVX2-NEXT:    cmovel %eax, %ecx
947; AVX2-NEXT:    xorl $7, %ecx
948; AVX2-NEXT:    vpinsrb $15, %ecx, %xmm2, %xmm1
949; AVX2-NEXT:    vpextrb $1, %xmm0, %ecx
950; AVX2-NEXT:    bsrl %ecx, %ecx
951; AVX2-NEXT:    cmovel %eax, %ecx
952; AVX2-NEXT:    xorl $7, %ecx
953; AVX2-NEXT:    vpextrb $0, %xmm0, %edx
954; AVX2-NEXT:    bsrl %edx, %edx
955; AVX2-NEXT:    cmovel %eax, %edx
956; AVX2-NEXT:    xorl $7, %edx
957; AVX2-NEXT:    vmovd %edx, %xmm2
958; AVX2-NEXT:    vpinsrb $1, %ecx, %xmm2, %xmm2
959; AVX2-NEXT:    vpextrb $2, %xmm0, %ecx
960; AVX2-NEXT:    bsrl %ecx, %ecx
961; AVX2-NEXT:    cmovel %eax, %ecx
962; AVX2-NEXT:    xorl $7, %ecx
963; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm2, %xmm2
964; AVX2-NEXT:    vpextrb $3, %xmm0, %ecx
965; AVX2-NEXT:    bsrl %ecx, %ecx
966; AVX2-NEXT:    cmovel %eax, %ecx
967; AVX2-NEXT:    xorl $7, %ecx
968; AVX2-NEXT:    vpinsrb $3, %ecx, %xmm2, %xmm2
969; AVX2-NEXT:    vpextrb $4, %xmm0, %ecx
970; AVX2-NEXT:    bsrl %ecx, %ecx
971; AVX2-NEXT:    cmovel %eax, %ecx
972; AVX2-NEXT:    xorl $7, %ecx
973; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm2, %xmm2
974; AVX2-NEXT:    vpextrb $5, %xmm0, %ecx
975; AVX2-NEXT:    bsrl %ecx, %ecx
976; AVX2-NEXT:    cmovel %eax, %ecx
977; AVX2-NEXT:    xorl $7, %ecx
978; AVX2-NEXT:    vpinsrb $5, %ecx, %xmm2, %xmm2
979; AVX2-NEXT:    vpextrb $6, %xmm0, %ecx
980; AVX2-NEXT:    bsrl %ecx, %ecx
981; AVX2-NEXT:    cmovel %eax, %ecx
982; AVX2-NEXT:    xorl $7, %ecx
983; AVX2-NEXT:    vpinsrb $6, %ecx, %xmm2, %xmm2
984; AVX2-NEXT:    vpextrb $7, %xmm0, %ecx
985; AVX2-NEXT:    bsrl %ecx, %ecx
986; AVX2-NEXT:    cmovel %eax, %ecx
987; AVX2-NEXT:    xorl $7, %ecx
988; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm2, %xmm2
989; AVX2-NEXT:    vpextrb $8, %xmm0, %ecx
990; AVX2-NEXT:    bsrl %ecx, %ecx
991; AVX2-NEXT:    cmovel %eax, %ecx
992; AVX2-NEXT:    xorl $7, %ecx
993; AVX2-NEXT:    vpinsrb $8, %ecx, %xmm2, %xmm2
994; AVX2-NEXT:    vpextrb $9, %xmm0, %ecx
995; AVX2-NEXT:    bsrl %ecx, %ecx
996; AVX2-NEXT:    cmovel %eax, %ecx
997; AVX2-NEXT:    xorl $7, %ecx
998; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm2, %xmm2
999; AVX2-NEXT:    vpextrb $10, %xmm0, %ecx
1000; AVX2-NEXT:    bsrl %ecx, %ecx
1001; AVX2-NEXT:    cmovel %eax, %ecx
1002; AVX2-NEXT:    xorl $7, %ecx
1003; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm2, %xmm2
1004; AVX2-NEXT:    vpextrb $11, %xmm0, %ecx
1005; AVX2-NEXT:    bsrl %ecx, %ecx
1006; AVX2-NEXT:    cmovel %eax, %ecx
1007; AVX2-NEXT:    xorl $7, %ecx
1008; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm2, %xmm2
1009; AVX2-NEXT:    vpextrb $12, %xmm0, %ecx
1010; AVX2-NEXT:    bsrl %ecx, %ecx
1011; AVX2-NEXT:    cmovel %eax, %ecx
1012; AVX2-NEXT:    xorl $7, %ecx
1013; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm2, %xmm2
1014; AVX2-NEXT:    vpextrb $13, %xmm0, %ecx
1015; AVX2-NEXT:    bsrl %ecx, %ecx
1016; AVX2-NEXT:    cmovel %eax, %ecx
1017; AVX2-NEXT:    xorl $7, %ecx
1018; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm2, %xmm2
1019; AVX2-NEXT:    vpextrb $14, %xmm0, %ecx
1020; AVX2-NEXT:    bsrl %ecx, %ecx
1021; AVX2-NEXT:    cmovel %eax, %ecx
1022; AVX2-NEXT:    xorl $7, %ecx
1023; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm2, %xmm2
1024; AVX2-NEXT:    vpextrb $15, %xmm0, %ecx
1025; AVX2-NEXT:    bsrl %ecx, %ecx
1026; AVX2-NEXT:    cmovel %eax, %ecx
1027; AVX2-NEXT:    xorl $7, %ecx
1028; AVX2-NEXT:    vpinsrb $15, %ecx, %xmm2, %xmm0
1029; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1030; AVX2-NEXT:    retq
1031;
1032; AVX512VLCD-LABEL: testv32i8:
1033; AVX512VLCD:       ## BB#0:
1034; AVX512VLCD-NEXT:    vextractf128 $1, %ymm0, %xmm1
1035; AVX512VLCD-NEXT:    vpmovzxbd %xmm1, %zmm1
1036; AVX512VLCD-NEXT:    vplzcntd %zmm1, %zmm1
1037; AVX512VLCD-NEXT:    vpmovdb %zmm1, %xmm1
1038; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
1039; AVX512VLCD-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
1040; AVX512VLCD-NEXT:    vpmovzxbd %xmm0, %zmm0
1041; AVX512VLCD-NEXT:    vplzcntd %zmm0, %zmm0
1042; AVX512VLCD-NEXT:    vpmovdb %zmm0, %xmm0
1043; AVX512VLCD-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
1044; AVX512VLCD-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
1045; AVX512VLCD-NEXT:    retq
1046;
1047; AVX512CD-LABEL: testv32i8:
1048; AVX512CD:       ## BB#0:
1049; AVX512CD-NEXT:    vextractf128 $1, %ymm0, %xmm1
1050; AVX512CD-NEXT:    vpmovzxbd %xmm1, %zmm1
1051; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
1052; AVX512CD-NEXT:    vpmovdb %zmm1, %xmm1
1053; AVX512CD-NEXT:    vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
1054; AVX512CD-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
1055; AVX512CD-NEXT:    vpmovzxbd %xmm0, %zmm0
1056; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
1057; AVX512CD-NEXT:    vpmovdb %zmm0, %xmm0
1058; AVX512CD-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
1059; AVX512CD-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1060; AVX512CD-NEXT:    retq
1061  %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0)
1062  ret <32 x i8> %out
1063}
1064
1065define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
1066; AVX1-LABEL: testv32i8u:
1067; AVX1:       # BB#0:
1068; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1069; AVX1-NEXT:    vpextrb $1, %xmm1, %eax
1070; AVX1-NEXT:    bsrl %eax, %eax
1071; AVX1-NEXT:    xorl $7, %eax
1072; AVX1-NEXT:    vpextrb $0, %xmm1, %ecx
1073; AVX1-NEXT:    bsrl %ecx, %ecx
1074; AVX1-NEXT:    xorl $7, %ecx
1075; AVX1-NEXT:    vmovd %ecx, %xmm2
1076; AVX1-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
1077; AVX1-NEXT:    vpextrb $2, %xmm1, %eax
1078; AVX1-NEXT:    bsrl %eax, %eax
1079; AVX1-NEXT:    xorl $7, %eax
1080; AVX1-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
1081; AVX1-NEXT:    vpextrb $3, %xmm1, %eax
1082; AVX1-NEXT:    bsrl %eax, %eax
1083; AVX1-NEXT:    xorl $7, %eax
1084; AVX1-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
1085; AVX1-NEXT:    vpextrb $4, %xmm1, %eax
1086; AVX1-NEXT:    bsrl %eax, %eax
1087; AVX1-NEXT:    xorl $7, %eax
1088; AVX1-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
1089; AVX1-NEXT:    vpextrb $5, %xmm1, %eax
1090; AVX1-NEXT:    bsrl %eax, %eax
1091; AVX1-NEXT:    xorl $7, %eax
1092; AVX1-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
1093; AVX1-NEXT:    vpextrb $6, %xmm1, %eax
1094; AVX1-NEXT:    bsrl %eax, %eax
1095; AVX1-NEXT:    xorl $7, %eax
1096; AVX1-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
1097; AVX1-NEXT:    vpextrb $7, %xmm1, %eax
1098; AVX1-NEXT:    bsrl %eax, %eax
1099; AVX1-NEXT:    xorl $7, %eax
1100; AVX1-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
1101; AVX1-NEXT:    vpextrb $8, %xmm1, %eax
1102; AVX1-NEXT:    bsrl %eax, %eax
1103; AVX1-NEXT:    xorl $7, %eax
1104; AVX1-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
1105; AVX1-NEXT:    vpextrb $9, %xmm1, %eax
1106; AVX1-NEXT:    bsrl %eax, %eax
1107; AVX1-NEXT:    xorl $7, %eax
1108; AVX1-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
1109; AVX1-NEXT:    vpextrb $10, %xmm1, %eax
1110; AVX1-NEXT:    bsrl %eax, %eax
1111; AVX1-NEXT:    xorl $7, %eax
1112; AVX1-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
1113; AVX1-NEXT:    vpextrb $11, %xmm1, %eax
1114; AVX1-NEXT:    bsrl %eax, %eax
1115; AVX1-NEXT:    xorl $7, %eax
1116; AVX1-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
1117; AVX1-NEXT:    vpextrb $12, %xmm1, %eax
1118; AVX1-NEXT:    bsrl %eax, %eax
1119; AVX1-NEXT:    xorl $7, %eax
1120; AVX1-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
1121; AVX1-NEXT:    vpextrb $13, %xmm1, %eax
1122; AVX1-NEXT:    bsrl %eax, %eax
1123; AVX1-NEXT:    xorl $7, %eax
1124; AVX1-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
1125; AVX1-NEXT:    vpextrb $14, %xmm1, %eax
1126; AVX1-NEXT:    bsrl %eax, %eax
1127; AVX1-NEXT:    xorl $7, %eax
1128; AVX1-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
1129; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
1130; AVX1-NEXT:    bsrl %eax, %eax
1131; AVX1-NEXT:    xorl $7, %eax
1132; AVX1-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
1133; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
1134; AVX1-NEXT:    bsrl %eax, %eax
1135; AVX1-NEXT:    xorl $7, %eax
1136; AVX1-NEXT:    vpextrb $0, %xmm0, %ecx
1137; AVX1-NEXT:    bsrl %ecx, %ecx
1138; AVX1-NEXT:    xorl $7, %ecx
1139; AVX1-NEXT:    vmovd %ecx, %xmm2
1140; AVX1-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
1141; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
1142; AVX1-NEXT:    bsrl %eax, %eax
1143; AVX1-NEXT:    xorl $7, %eax
1144; AVX1-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
1145; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
1146; AVX1-NEXT:    bsrl %eax, %eax
1147; AVX1-NEXT:    xorl $7, %eax
1148; AVX1-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
1149; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
1150; AVX1-NEXT:    bsrl %eax, %eax
1151; AVX1-NEXT:    xorl $7, %eax
1152; AVX1-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
1153; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
1154; AVX1-NEXT:    bsrl %eax, %eax
1155; AVX1-NEXT:    xorl $7, %eax
1156; AVX1-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
1157; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
1158; AVX1-NEXT:    bsrl %eax, %eax
1159; AVX1-NEXT:    xorl $7, %eax
1160; AVX1-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
1161; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
1162; AVX1-NEXT:    bsrl %eax, %eax
1163; AVX1-NEXT:    xorl $7, %eax
1164; AVX1-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
1165; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
1166; AVX1-NEXT:    bsrl %eax, %eax
1167; AVX1-NEXT:    xorl $7, %eax
1168; AVX1-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
1169; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
1170; AVX1-NEXT:    bsrl %eax, %eax
1171; AVX1-NEXT:    xorl $7, %eax
1172; AVX1-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
1173; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
1174; AVX1-NEXT:    bsrl %eax, %eax
1175; AVX1-NEXT:    xorl $7, %eax
1176; AVX1-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
1177; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
1178; AVX1-NEXT:    bsrl %eax, %eax
1179; AVX1-NEXT:    xorl $7, %eax
1180; AVX1-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
1181; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
1182; AVX1-NEXT:    bsrl %eax, %eax
1183; AVX1-NEXT:    xorl $7, %eax
1184; AVX1-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
1185; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
1186; AVX1-NEXT:    bsrl %eax, %eax
1187; AVX1-NEXT:    xorl $7, %eax
1188; AVX1-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
1189; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
1190; AVX1-NEXT:    bsrl %eax, %eax
1191; AVX1-NEXT:    xorl $7, %eax
1192; AVX1-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
1193; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
1194; AVX1-NEXT:    bsrl %eax, %eax
1195; AVX1-NEXT:    xorl $7, %eax
1196; AVX1-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm0
1197; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1198; AVX1-NEXT:    retq
1199;
1200; AVX2-LABEL: testv32i8u:
1201; AVX2:       # BB#0:
1202; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1203; AVX2-NEXT:    vpextrb $1, %xmm1, %eax
1204; AVX2-NEXT:    bsrl %eax, %eax
1205; AVX2-NEXT:    xorl $7, %eax
1206; AVX2-NEXT:    vpextrb $0, %xmm1, %ecx
1207; AVX2-NEXT:    bsrl %ecx, %ecx
1208; AVX2-NEXT:    xorl $7, %ecx
1209; AVX2-NEXT:    vmovd %ecx, %xmm2
1210; AVX2-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
1211; AVX2-NEXT:    vpextrb $2, %xmm1, %eax
1212; AVX2-NEXT:    bsrl %eax, %eax
1213; AVX2-NEXT:    xorl $7, %eax
1214; AVX2-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
1215; AVX2-NEXT:    vpextrb $3, %xmm1, %eax
1216; AVX2-NEXT:    bsrl %eax, %eax
1217; AVX2-NEXT:    xorl $7, %eax
1218; AVX2-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
1219; AVX2-NEXT:    vpextrb $4, %xmm1, %eax
1220; AVX2-NEXT:    bsrl %eax, %eax
1221; AVX2-NEXT:    xorl $7, %eax
1222; AVX2-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
1223; AVX2-NEXT:    vpextrb $5, %xmm1, %eax
1224; AVX2-NEXT:    bsrl %eax, %eax
1225; AVX2-NEXT:    xorl $7, %eax
1226; AVX2-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
1227; AVX2-NEXT:    vpextrb $6, %xmm1, %eax
1228; AVX2-NEXT:    bsrl %eax, %eax
1229; AVX2-NEXT:    xorl $7, %eax
1230; AVX2-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
1231; AVX2-NEXT:    vpextrb $7, %xmm1, %eax
1232; AVX2-NEXT:    bsrl %eax, %eax
1233; AVX2-NEXT:    xorl $7, %eax
1234; AVX2-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
1235; AVX2-NEXT:    vpextrb $8, %xmm1, %eax
1236; AVX2-NEXT:    bsrl %eax, %eax
1237; AVX2-NEXT:    xorl $7, %eax
1238; AVX2-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
1239; AVX2-NEXT:    vpextrb $9, %xmm1, %eax
1240; AVX2-NEXT:    bsrl %eax, %eax
1241; AVX2-NEXT:    xorl $7, %eax
1242; AVX2-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
1243; AVX2-NEXT:    vpextrb $10, %xmm1, %eax
1244; AVX2-NEXT:    bsrl %eax, %eax
1245; AVX2-NEXT:    xorl $7, %eax
1246; AVX2-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
1247; AVX2-NEXT:    vpextrb $11, %xmm1, %eax
1248; AVX2-NEXT:    bsrl %eax, %eax
1249; AVX2-NEXT:    xorl $7, %eax
1250; AVX2-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
1251; AVX2-NEXT:    vpextrb $12, %xmm1, %eax
1252; AVX2-NEXT:    bsrl %eax, %eax
1253; AVX2-NEXT:    xorl $7, %eax
1254; AVX2-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
1255; AVX2-NEXT:    vpextrb $13, %xmm1, %eax
1256; AVX2-NEXT:    bsrl %eax, %eax
1257; AVX2-NEXT:    xorl $7, %eax
1258; AVX2-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
1259; AVX2-NEXT:    vpextrb $14, %xmm1, %eax
1260; AVX2-NEXT:    bsrl %eax, %eax
1261; AVX2-NEXT:    xorl $7, %eax
1262; AVX2-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
1263; AVX2-NEXT:    vpextrb $15, %xmm1, %eax
1264; AVX2-NEXT:    bsrl %eax, %eax
1265; AVX2-NEXT:    xorl $7, %eax
1266; AVX2-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
1267; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
1268; AVX2-NEXT:    bsrl %eax, %eax
1269; AVX2-NEXT:    xorl $7, %eax
1270; AVX2-NEXT:    vpextrb $0, %xmm0, %ecx
1271; AVX2-NEXT:    bsrl %ecx, %ecx
1272; AVX2-NEXT:    xorl $7, %ecx
1273; AVX2-NEXT:    vmovd %ecx, %xmm2
1274; AVX2-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
1275; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
1276; AVX2-NEXT:    bsrl %eax, %eax
1277; AVX2-NEXT:    xorl $7, %eax
1278; AVX2-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
1279; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
1280; AVX2-NEXT:    bsrl %eax, %eax
1281; AVX2-NEXT:    xorl $7, %eax
1282; AVX2-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
1283; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
1284; AVX2-NEXT:    bsrl %eax, %eax
1285; AVX2-NEXT:    xorl $7, %eax
1286; AVX2-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
1287; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
1288; AVX2-NEXT:    bsrl %eax, %eax
1289; AVX2-NEXT:    xorl $7, %eax
1290; AVX2-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
1291; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
1292; AVX2-NEXT:    bsrl %eax, %eax
1293; AVX2-NEXT:    xorl $7, %eax
1294; AVX2-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
1295; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
1296; AVX2-NEXT:    bsrl %eax, %eax
1297; AVX2-NEXT:    xorl $7, %eax
1298; AVX2-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
1299; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
1300; AVX2-NEXT:    bsrl %eax, %eax
1301; AVX2-NEXT:    xorl $7, %eax
1302; AVX2-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
1303; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
1304; AVX2-NEXT:    bsrl %eax, %eax
1305; AVX2-NEXT:    xorl $7, %eax
1306; AVX2-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
1307; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
1308; AVX2-NEXT:    bsrl %eax, %eax
1309; AVX2-NEXT:    xorl $7, %eax
1310; AVX2-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
1311; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
1312; AVX2-NEXT:    bsrl %eax, %eax
1313; AVX2-NEXT:    xorl $7, %eax
1314; AVX2-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
1315; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
1316; AVX2-NEXT:    bsrl %eax, %eax
1317; AVX2-NEXT:    xorl $7, %eax
1318; AVX2-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
1319; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
1320; AVX2-NEXT:    bsrl %eax, %eax
1321; AVX2-NEXT:    xorl $7, %eax
1322; AVX2-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
1323; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
1324; AVX2-NEXT:    bsrl %eax, %eax
1325; AVX2-NEXT:    xorl $7, %eax
1326; AVX2-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
1327; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
1328; AVX2-NEXT:    bsrl %eax, %eax
1329; AVX2-NEXT:    xorl $7, %eax
1330; AVX2-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm0
1331; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1332; AVX2-NEXT:    retq
1333;
1334; AVX512VLCD-LABEL: testv32i8u:
1335; AVX512VLCD:       ## BB#0:
1336; AVX512VLCD-NEXT:    vextractf128 $1, %ymm0, %xmm1
1337; AVX512VLCD-NEXT:    vpmovzxbd %xmm1, %zmm1
1338; AVX512VLCD-NEXT:    vplzcntd %zmm1, %zmm1
1339; AVX512VLCD-NEXT:    vpmovdb %zmm1, %xmm1
1340; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
1341; AVX512VLCD-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
1342; AVX512VLCD-NEXT:    vpmovzxbd %xmm0, %zmm0
1343; AVX512VLCD-NEXT:    vplzcntd %zmm0, %zmm0
1344; AVX512VLCD-NEXT:    vpmovdb %zmm0, %xmm0
1345; AVX512VLCD-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
1346; AVX512VLCD-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
1347; AVX512VLCD-NEXT:    retq
1348;
1349; AVX512CD-LABEL: testv32i8u:
1350; AVX512CD:       ## BB#0:
1351; AVX512CD-NEXT:    vextractf128 $1, %ymm0, %xmm1
1352; AVX512CD-NEXT:    vpmovzxbd %xmm1, %zmm1
1353; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
1354; AVX512CD-NEXT:    vpmovdb %zmm1, %xmm1
1355; AVX512CD-NEXT:    vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
1356; AVX512CD-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
1357; AVX512CD-NEXT:    vpmovzxbd %xmm0, %zmm0
1358; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
1359; AVX512CD-NEXT:    vpmovdb %zmm0, %xmm0
1360; AVX512CD-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
1361; AVX512CD-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1362; AVX512CD-NEXT:    retq
1363  %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1)
1364  ret <32 x i8> %out
1365}
1366
1367define <4 x i64> @foldv4i64() nounwind {
1368; AVX-LABEL: foldv4i64:
1369; AVX:       # BB#0:
1370; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
1371; AVX-NEXT:    retq
1372;
1373; AVX512VLCD-LABEL: foldv4i64:
1374; AVX512VLCD:       ## BB#0:
1375; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56]
1376; AVX512VLCD-NEXT:    retq
1377;
1378; AVX512CD-LABEL: foldv4i64:
1379; AVX512CD:       ## BB#0:
1380; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
1381; AVX512CD-NEXT:    retq
1382  %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
1383  ret <4 x i64> %out
1384}
1385
1386define <4 x i64> @foldv4i64u() nounwind {
1387; AVX-LABEL: foldv4i64u:
1388; AVX:       # BB#0:
1389; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
1390; AVX-NEXT:    retq
1391;
1392; AVX512VLCD-LABEL: foldv4i64u:
1393; AVX512VLCD:       ## BB#0:
1394; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56]
1395; AVX512VLCD-NEXT:    retq
1396;
1397; AVX512CD-LABEL: foldv4i64u:
1398; AVX512CD:       ## BB#0:
1399; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
1400; AVX512CD-NEXT:    retq
1401  %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
1402  ret <4 x i64> %out
1403}
1404
1405define <8 x i32> @foldv8i32() nounwind {
1406; AVX-LABEL: foldv8i32:
1407; AVX:       # BB#0:
1408; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1409; AVX-NEXT:    retq
1410;
1411; AVX512VLCD-LABEL: foldv8i32:
1412; AVX512VLCD:       ## BB#0:
1413; AVX512VLCD-NEXT:    vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1414; AVX512VLCD-NEXT:    retq
1415;
1416; AVX512CD-LABEL: foldv8i32:
1417; AVX512CD:       ## BB#0:
1418; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1419; AVX512CD-NEXT:    retq
1420  %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
1421  ret <8 x i32> %out
1422}
1423
1424define <8 x i32> @foldv8i32u() nounwind {
1425; AVX-LABEL: foldv8i32u:
1426; AVX:       # BB#0:
1427; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1428; AVX-NEXT:    retq
1429;
1430; AVX512VLCD-LABEL: foldv8i32u:
1431; AVX512VLCD:       ## BB#0:
1432; AVX512VLCD-NEXT:    vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1433; AVX512VLCD-NEXT:    retq
1434;
1435; AVX512CD-LABEL: foldv8i32u:
1436; AVX512CD:       ## BB#0:
1437; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1438; AVX512CD-NEXT:    retq
1439  %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
1440  ret <8 x i32> %out
1441}
1442
1443define <16 x i16> @foldv16i16() nounwind {
1444; AVX-LABEL: foldv16i16:
1445; AVX:       # BB#0:
1446; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1447; AVX-NEXT:    retq
1448;
1449; AVX512VLCD-LABEL: foldv16i16:
1450; AVX512VLCD:       ## BB#0:
1451; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1452; AVX512VLCD-NEXT:    retq
1453;
1454; AVX512CD-LABEL: foldv16i16:
1455; AVX512CD:       ## BB#0:
1456; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1457; AVX512CD-NEXT:    retq
1458  %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
1459  ret <16 x i16> %out
1460}
1461
1462define <16 x i16> @foldv16i16u() nounwind {
1463; AVX-LABEL: foldv16i16u:
1464; AVX:       # BB#0:
1465; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1466; AVX-NEXT:    retq
1467;
1468; AVX512VLCD-LABEL: foldv16i16u:
1469; AVX512VLCD:       ## BB#0:
1470; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1471; AVX512VLCD-NEXT:    retq
1472;
1473; AVX512CD-LABEL: foldv16i16u:
1474; AVX512CD:       ## BB#0:
1475; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1476; AVX512CD-NEXT:    retq
1477  %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
1478  ret <16 x i16> %out
1479}
1480
1481define <32 x i8> @foldv32i8() nounwind {
1482; AVX-LABEL: foldv32i8:
1483; AVX:       # BB#0:
1484; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1485; AVX-NEXT:    retq
1486;
1487; AVX512VLCD-LABEL: foldv32i8:
1488; AVX512VLCD:       ## BB#0:
1489; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1490; AVX512VLCD-NEXT:    retq
1491;
1492; AVX512CD-LABEL: foldv32i8:
1493; AVX512CD:       ## BB#0:
1494; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1495; AVX512CD-NEXT:    retq
1496  %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
1497  ret <32 x i8> %out
1498}
1499
1500define <32 x i8> @foldv32i8u() nounwind {
1501; AVX-LABEL: foldv32i8u:
1502; AVX:       # BB#0:
1503; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1504; AVX-NEXT:    retq
1505;
1506; AVX512VLCD-LABEL: foldv32i8u:
1507; AVX512VLCD:       ## BB#0:
1508; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1509; AVX512VLCD-NEXT:    retq
1510;
1511; AVX512CD-LABEL: foldv32i8u:
1512; AVX512CD:       ## BB#0:
1513; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1514; AVX512CD-NEXT:    retq
1515  %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
1516  ret <32 x i8> %out
1517}
1518
1519declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
1520declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
1521declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
1522declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
1523