• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd,-avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CD
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CDBW
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=-avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
6
7define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
8; AVX512CD-LABEL: testv8i64:
9; AVX512CD:       ## BB#0:
10; AVX512CD-NEXT:    vpxord %zmm1, %zmm1, %zmm1
11; AVX512CD-NEXT:    vpsubq %zmm0, %zmm1, %zmm1
12; AVX512CD-NEXT:    vpandq %zmm1, %zmm0, %zmm0
13; AVX512CD-NEXT:    vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0
14; AVX512CD-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
15; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16; AVX512CD-NEXT:    vpand %ymm2, %ymm1, %ymm3
17; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18; AVX512CD-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
19; AVX512CD-NEXT:    vpsrlw $4, %ymm1, %ymm1
20; AVX512CD-NEXT:    vpand %ymm2, %ymm1, %ymm1
21; AVX512CD-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
22; AVX512CD-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
23; AVX512CD-NEXT:    vpxor %ymm3, %ymm3, %ymm3
24; AVX512CD-NEXT:    vpsadbw %ymm3, %ymm1, %ymm1
25; AVX512CD-NEXT:    vpand %ymm2, %ymm0, %ymm5
26; AVX512CD-NEXT:    vpshufb %ymm5, %ymm4, %ymm5
27; AVX512CD-NEXT:    vpsrlw $4, %ymm0, %ymm0
28; AVX512CD-NEXT:    vpand %ymm2, %ymm0, %ymm0
29; AVX512CD-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
30; AVX512CD-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
31; AVX512CD-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
32; AVX512CD-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
33; AVX512CD-NEXT:    retq
34;
35; AVX512CDBW-LABEL: testv8i64:
36; AVX512CDBW:       ## BB#0:
37; AVX512CDBW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
38; AVX512CDBW-NEXT:    vpsubq %zmm0, %zmm1, %zmm2
39; AVX512CDBW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
40; AVX512CDBW-NEXT:    vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0
41; AVX512CDBW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
42; AVX512CDBW-NEXT:    vpandq %zmm2, %zmm0, %zmm3
43; AVX512CDBW-NEXT:    vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
44; AVX512CDBW-NEXT:    vpshufb %zmm3, %zmm4, %zmm3
45; AVX512CDBW-NEXT:    vpsrlw $4, %zmm0, %zmm0
46; AVX512CDBW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
47; AVX512CDBW-NEXT:    vpshufb %zmm0, %zmm4, %zmm0
48; AVX512CDBW-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
49; AVX512CDBW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
50; AVX512CDBW-NEXT:    retq
51;
52; AVX512BW-LABEL: testv8i64:
53; AVX512BW:       ## BB#0:
54; AVX512BW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
55; AVX512BW-NEXT:    vpsubq %zmm0, %zmm1, %zmm2
56; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
57; AVX512BW-NEXT:    vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0
58; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
59; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm3
60; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
61; AVX512BW-NEXT:    vpshufb %zmm3, %zmm4, %zmm3
62; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
63; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
64; AVX512BW-NEXT:    vpshufb %zmm0, %zmm4, %zmm0
65; AVX512BW-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
66; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
67; AVX512BW-NEXT:    retq
68  %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 0)
69  ret <8 x i64> %out
70}
71
72define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
73; AVX512CD-LABEL: testv8i64u:
74; AVX512CD:       ## BB#0:
75; AVX512CD-NEXT:    vpxord %zmm1, %zmm1, %zmm1
76; AVX512CD-NEXT:    vpsubq %zmm0, %zmm1, %zmm1
77; AVX512CD-NEXT:    vpandq %zmm1, %zmm0, %zmm0
78; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
79; AVX512CD-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm1
80; AVX512CD-NEXT:    vpsubq %zmm0, %zmm1, %zmm0
81; AVX512CD-NEXT:    retq
82;
83; AVX512CDBW-LABEL: testv8i64u:
84; AVX512CDBW:       ## BB#0:
85; AVX512CDBW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
86; AVX512CDBW-NEXT:    vpsubq %zmm0, %zmm1, %zmm1
87; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
88; AVX512CDBW-NEXT:    vplzcntq %zmm0, %zmm0
89; AVX512CDBW-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm1
90; AVX512CDBW-NEXT:    vpsubq %zmm0, %zmm1, %zmm0
91; AVX512CDBW-NEXT:    retq
92;
93; AVX512BW-LABEL: testv8i64u:
94; AVX512BW:       ## BB#0:
95; AVX512BW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
96; AVX512BW-NEXT:    vpsubq %zmm0, %zmm1, %zmm2
97; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
98; AVX512BW-NEXT:    vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0
99; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
100; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm3
101; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
102; AVX512BW-NEXT:    vpshufb %zmm3, %zmm4, %zmm3
103; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
104; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
105; AVX512BW-NEXT:    vpshufb %zmm0, %zmm4, %zmm0
106; AVX512BW-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
107; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
108; AVX512BW-NEXT:    retq
109  %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 -1)
110  ret <8 x i64> %out
111}
112
113define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
114; AVX512CD-LABEL: testv16i32:
115; AVX512CD:       ## BB#0:
116; AVX512CD-NEXT:    vpxord %zmm1, %zmm1, %zmm1
117; AVX512CD-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
118; AVX512CD-NEXT:    vpandd %zmm1, %zmm0, %zmm0
119; AVX512CD-NEXT:    vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0
120; AVX512CD-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
121; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
122; AVX512CD-NEXT:    vpand %ymm2, %ymm1, %ymm3
123; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
124; AVX512CD-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
125; AVX512CD-NEXT:    vpsrlw $4, %ymm1, %ymm1
126; AVX512CD-NEXT:    vpand %ymm2, %ymm1, %ymm1
127; AVX512CD-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
128; AVX512CD-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
129; AVX512CD-NEXT:    vpxor %ymm3, %ymm3, %ymm3
130; AVX512CD-NEXT:    vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7]
131; AVX512CD-NEXT:    vpsadbw %ymm3, %ymm5, %ymm5
132; AVX512CD-NEXT:    vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
133; AVX512CD-NEXT:    vpsadbw %ymm3, %ymm1, %ymm1
134; AVX512CD-NEXT:    vpackuswb %ymm5, %ymm1, %ymm1
135; AVX512CD-NEXT:    vpand %ymm2, %ymm0, %ymm5
136; AVX512CD-NEXT:    vpshufb %ymm5, %ymm4, %ymm5
137; AVX512CD-NEXT:    vpsrlw $4, %ymm0, %ymm0
138; AVX512CD-NEXT:    vpand %ymm2, %ymm0, %ymm0
139; AVX512CD-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
140; AVX512CD-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
141; AVX512CD-NEXT:    vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7]
142; AVX512CD-NEXT:    vpsadbw %ymm3, %ymm2, %ymm2
143; AVX512CD-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5]
144; AVX512CD-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
145; AVX512CD-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
146; AVX512CD-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
147; AVX512CD-NEXT:    retq
148;
149; AVX512CDBW-LABEL: testv16i32:
150; AVX512CDBW:       ## BB#0:
151; AVX512CDBW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
152; AVX512CDBW-NEXT:    vpsubd %zmm0, %zmm1, %zmm2
153; AVX512CDBW-NEXT:    vpandd %zmm2, %zmm0, %zmm0
154; AVX512CDBW-NEXT:    vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0
155; AVX512CDBW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
156; AVX512CDBW-NEXT:    vpandq %zmm2, %zmm0, %zmm3
157; AVX512CDBW-NEXT:    vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
158; AVX512CDBW-NEXT:    vpshufb %zmm3, %zmm4, %zmm3
159; AVX512CDBW-NEXT:    vpsrlw $4, %zmm0, %zmm0
160; AVX512CDBW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
161; AVX512CDBW-NEXT:    vpshufb %zmm0, %zmm4, %zmm0
162; AVX512CDBW-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
163; AVX512CDBW-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
164; AVX512CDBW-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
165; AVX512CDBW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
166; AVX512CDBW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
167; AVX512CDBW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
168; AVX512CDBW-NEXT:    retq
169;
170; AVX512BW-LABEL: testv16i32:
171; AVX512BW:       ## BB#0:
172; AVX512BW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
173; AVX512BW-NEXT:    vpsubd %zmm0, %zmm1, %zmm2
174; AVX512BW-NEXT:    vpandd %zmm2, %zmm0, %zmm0
175; AVX512BW-NEXT:    vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0
176; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
177; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm3
178; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
179; AVX512BW-NEXT:    vpshufb %zmm3, %zmm4, %zmm3
180; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
181; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
182; AVX512BW-NEXT:    vpshufb %zmm0, %zmm4, %zmm0
183; AVX512BW-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
184; AVX512BW-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
185; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
186; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
187; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
188; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
189; AVX512BW-NEXT:    retq
190  %out = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %in, i1 0)
191  ret <16 x i32> %out
192}
193
194define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
195; AVX512CD-LABEL: testv16i32u:
196; AVX512CD:       ## BB#0:
197; AVX512CD-NEXT:    vpxord %zmm1, %zmm1, %zmm1
198; AVX512CD-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
199; AVX512CD-NEXT:    vpandd %zmm1, %zmm0, %zmm0
200; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
201; AVX512CD-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm1
202; AVX512CD-NEXT:    vpsubd %zmm0, %zmm1, %zmm0
203; AVX512CD-NEXT:    retq
204;
205; AVX512CDBW-LABEL: testv16i32u:
206; AVX512CDBW:       ## BB#0:
207; AVX512CDBW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
208; AVX512CDBW-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
209; AVX512CDBW-NEXT:    vpandd %zmm1, %zmm0, %zmm0
210; AVX512CDBW-NEXT:    vplzcntd %zmm0, %zmm0
211; AVX512CDBW-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm1
212; AVX512CDBW-NEXT:    vpsubd %zmm0, %zmm1, %zmm0
213; AVX512CDBW-NEXT:    retq
214;
215; AVX512BW-LABEL: testv16i32u:
216; AVX512BW:       ## BB#0:
217; AVX512BW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
218; AVX512BW-NEXT:    vpsubd %zmm0, %zmm1, %zmm2
219; AVX512BW-NEXT:    vpandd %zmm2, %zmm0, %zmm0
220; AVX512BW-NEXT:    vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0
221; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
222; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm3
223; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
224; AVX512BW-NEXT:    vpshufb %zmm3, %zmm4, %zmm3
225; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
226; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
227; AVX512BW-NEXT:    vpshufb %zmm0, %zmm4, %zmm0
228; AVX512BW-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
229; AVX512BW-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
230; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
231; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
232; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
233; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
234; AVX512BW-NEXT:    retq
235  %out = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %in, i1 -1)
236  ret <16 x i32> %out
237}
238
239define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
240; AVX512CD-LABEL: testv32i16:
241; AVX512CD:       ## BB#0:
242; AVX512CD-NEXT:    vpxor %ymm2, %ymm2, %ymm2
243; AVX512CD-NEXT:    vpsubw %ymm0, %ymm2, %ymm3
244; AVX512CD-NEXT:    vpand %ymm3, %ymm0, %ymm0
245; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
246; AVX512CD-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
247; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
248; AVX512CD-NEXT:    vpand %ymm4, %ymm0, %ymm5
249; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
250; AVX512CD-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
251; AVX512CD-NEXT:    vpsrlw $4, %ymm0, %ymm0
252; AVX512CD-NEXT:    vpand %ymm4, %ymm0, %ymm0
253; AVX512CD-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
254; AVX512CD-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
255; AVX512CD-NEXT:    vpsllw $8, %ymm0, %ymm5
256; AVX512CD-NEXT:    vpaddb %ymm0, %ymm5, %ymm0
257; AVX512CD-NEXT:    vpsrlw $8, %ymm0, %ymm0
258; AVX512CD-NEXT:    vpsubw %ymm1, %ymm2, %ymm2
259; AVX512CD-NEXT:    vpand %ymm2, %ymm1, %ymm1
260; AVX512CD-NEXT:    vpsubw %ymm3, %ymm1, %ymm1
261; AVX512CD-NEXT:    vpand %ymm4, %ymm1, %ymm2
262; AVX512CD-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
263; AVX512CD-NEXT:    vpsrlw $4, %ymm1, %ymm1
264; AVX512CD-NEXT:    vpand %ymm4, %ymm1, %ymm1
265; AVX512CD-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
266; AVX512CD-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
267; AVX512CD-NEXT:    vpsllw $8, %ymm1, %ymm2
268; AVX512CD-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
269; AVX512CD-NEXT:    vpsrlw $8, %ymm1, %ymm1
270; AVX512CD-NEXT:    retq
271;
272; AVX512CDBW-LABEL: testv32i16:
273; AVX512CDBW:       ## BB#0:
274; AVX512CDBW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
275; AVX512CDBW-NEXT:    vpsubw %zmm0, %zmm1, %zmm1
276; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
277; AVX512CDBW-NEXT:    vpsubw {{.*}}(%rip), %zmm0, %zmm0
278; AVX512CDBW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
279; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
280; AVX512CDBW-NEXT:    vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
281; AVX512CDBW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
282; AVX512CDBW-NEXT:    vpsrlw $4, %zmm0, %zmm0
283; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
284; AVX512CDBW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
285; AVX512CDBW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
286; AVX512CDBW-NEXT:    vpsllw $8, %zmm0, %zmm1
287; AVX512CDBW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
288; AVX512CDBW-NEXT:    vpsrlw $8, %zmm0, %zmm0
289; AVX512CDBW-NEXT:    retq
290;
291; AVX512BW-LABEL: testv32i16:
292; AVX512BW:       ## BB#0:
293; AVX512BW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
294; AVX512BW-NEXT:    vpsubw %zmm0, %zmm1, %zmm1
295; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
296; AVX512BW-NEXT:    vpsubw {{.*}}(%rip), %zmm0, %zmm0
297; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
298; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
299; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
300; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
301; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
302; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
303; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
304; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
305; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
306; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
307; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
308; AVX512BW-NEXT:    retq
309  %out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 0)
310  ret <32 x i16> %out
311}
312
313define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
314; AVX512CD-LABEL: testv32i16u:
315; AVX512CD:       ## BB#0:
316; AVX512CD-NEXT:    vpxor %ymm2, %ymm2, %ymm2
317; AVX512CD-NEXT:    vpsubw %ymm0, %ymm2, %ymm3
318; AVX512CD-NEXT:    vpand %ymm3, %ymm0, %ymm0
319; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
320; AVX512CD-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
321; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
322; AVX512CD-NEXT:    vpand %ymm4, %ymm0, %ymm5
323; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
324; AVX512CD-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
325; AVX512CD-NEXT:    vpsrlw $4, %ymm0, %ymm0
326; AVX512CD-NEXT:    vpand %ymm4, %ymm0, %ymm0
327; AVX512CD-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
328; AVX512CD-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
329; AVX512CD-NEXT:    vpsllw $8, %ymm0, %ymm5
330; AVX512CD-NEXT:    vpaddb %ymm0, %ymm5, %ymm0
331; AVX512CD-NEXT:    vpsrlw $8, %ymm0, %ymm0
332; AVX512CD-NEXT:    vpsubw %ymm1, %ymm2, %ymm2
333; AVX512CD-NEXT:    vpand %ymm2, %ymm1, %ymm1
334; AVX512CD-NEXT:    vpsubw %ymm3, %ymm1, %ymm1
335; AVX512CD-NEXT:    vpand %ymm4, %ymm1, %ymm2
336; AVX512CD-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
337; AVX512CD-NEXT:    vpsrlw $4, %ymm1, %ymm1
338; AVX512CD-NEXT:    vpand %ymm4, %ymm1, %ymm1
339; AVX512CD-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
340; AVX512CD-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
341; AVX512CD-NEXT:    vpsllw $8, %ymm1, %ymm2
342; AVX512CD-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
343; AVX512CD-NEXT:    vpsrlw $8, %ymm1, %ymm1
344; AVX512CD-NEXT:    retq
345;
346; AVX512CDBW-LABEL: testv32i16u:
347; AVX512CDBW:       ## BB#0:
348; AVX512CDBW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
349; AVX512CDBW-NEXT:    vpsubw %zmm0, %zmm1, %zmm1
350; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
351; AVX512CDBW-NEXT:    vpsubw {{.*}}(%rip), %zmm0, %zmm0
352; AVX512CDBW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
353; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
354; AVX512CDBW-NEXT:    vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
355; AVX512CDBW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
356; AVX512CDBW-NEXT:    vpsrlw $4, %zmm0, %zmm0
357; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
358; AVX512CDBW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
359; AVX512CDBW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
360; AVX512CDBW-NEXT:    vpsllw $8, %zmm0, %zmm1
361; AVX512CDBW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
362; AVX512CDBW-NEXT:    vpsrlw $8, %zmm0, %zmm0
363; AVX512CDBW-NEXT:    retq
364;
365; AVX512BW-LABEL: testv32i16u:
366; AVX512BW:       ## BB#0:
367; AVX512BW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
368; AVX512BW-NEXT:    vpsubw %zmm0, %zmm1, %zmm1
369; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
370; AVX512BW-NEXT:    vpsubw {{.*}}(%rip), %zmm0, %zmm0
371; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
372; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
373; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
374; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
375; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
376; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
377; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
378; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
379; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
380; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
381; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
382; AVX512BW-NEXT:    retq
383  %out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 -1)
384  ret <32 x i16> %out
385}
386
387define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
388; AVX512CD-LABEL: testv64i8:
389; AVX512CD:       ## BB#0:
390; AVX512CD-NEXT:    vpxor %ymm2, %ymm2, %ymm2
391; AVX512CD-NEXT:    vpsubb %ymm0, %ymm2, %ymm3
392; AVX512CD-NEXT:    vpand %ymm3, %ymm0, %ymm0
393; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
394; AVX512CD-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
395; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
396; AVX512CD-NEXT:    vpand %ymm4, %ymm0, %ymm5
397; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
398; AVX512CD-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
399; AVX512CD-NEXT:    vpsrlw $4, %ymm0, %ymm0
400; AVX512CD-NEXT:    vpand %ymm4, %ymm0, %ymm0
401; AVX512CD-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
402; AVX512CD-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
403; AVX512CD-NEXT:    vpsubb %ymm1, %ymm2, %ymm2
404; AVX512CD-NEXT:    vpand %ymm2, %ymm1, %ymm1
405; AVX512CD-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
406; AVX512CD-NEXT:    vpand %ymm4, %ymm1, %ymm2
407; AVX512CD-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
408; AVX512CD-NEXT:    vpsrlw $4, %ymm1, %ymm1
409; AVX512CD-NEXT:    vpand %ymm4, %ymm1, %ymm1
410; AVX512CD-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
411; AVX512CD-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
412; AVX512CD-NEXT:    retq
413;
414; AVX512CDBW-LABEL: testv64i8:
415; AVX512CDBW:       ## BB#0:
416; AVX512CDBW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
417; AVX512CDBW-NEXT:    vpsubb %zmm0, %zmm1, %zmm1
418; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
419; AVX512CDBW-NEXT:    vpsubb {{.*}}(%rip), %zmm0, %zmm0
420; AVX512CDBW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
421; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
422; AVX512CDBW-NEXT:    vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
423; AVX512CDBW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
424; AVX512CDBW-NEXT:    vpsrlw $4, %zmm0, %zmm0
425; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
426; AVX512CDBW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
427; AVX512CDBW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
428; AVX512CDBW-NEXT:    retq
429;
430; AVX512BW-LABEL: testv64i8:
431; AVX512BW:       ## BB#0:
432; AVX512BW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
433; AVX512BW-NEXT:    vpsubb %zmm0, %zmm1, %zmm1
434; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
435; AVX512BW-NEXT:    vpsubb {{.*}}(%rip), %zmm0, %zmm0
436; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
437; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
438; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
439; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
440; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
441; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
442; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
443; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
444; AVX512BW-NEXT:    retq
445  %out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 0)
446  ret <64 x i8> %out
447}
448
449define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
450; AVX512CD-LABEL: testv64i8u:
451; AVX512CD:       ## BB#0:
452; AVX512CD-NEXT:    vpxor %ymm2, %ymm2, %ymm2
453; AVX512CD-NEXT:    vpsubb %ymm0, %ymm2, %ymm3
454; AVX512CD-NEXT:    vpand %ymm3, %ymm0, %ymm0
455; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
456; AVX512CD-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
457; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
458; AVX512CD-NEXT:    vpand %ymm4, %ymm0, %ymm5
459; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
460; AVX512CD-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
461; AVX512CD-NEXT:    vpsrlw $4, %ymm0, %ymm0
462; AVX512CD-NEXT:    vpand %ymm4, %ymm0, %ymm0
463; AVX512CD-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
464; AVX512CD-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
465; AVX512CD-NEXT:    vpsubb %ymm1, %ymm2, %ymm2
466; AVX512CD-NEXT:    vpand %ymm2, %ymm1, %ymm1
467; AVX512CD-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
468; AVX512CD-NEXT:    vpand %ymm4, %ymm1, %ymm2
469; AVX512CD-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
470; AVX512CD-NEXT:    vpsrlw $4, %ymm1, %ymm1
471; AVX512CD-NEXT:    vpand %ymm4, %ymm1, %ymm1
472; AVX512CD-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
473; AVX512CD-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
474; AVX512CD-NEXT:    retq
475;
476; AVX512CDBW-LABEL: testv64i8u:
477; AVX512CDBW:       ## BB#0:
478; AVX512CDBW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
479; AVX512CDBW-NEXT:    vpsubb %zmm0, %zmm1, %zmm1
480; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
481; AVX512CDBW-NEXT:    vpsubb {{.*}}(%rip), %zmm0, %zmm0
482; AVX512CDBW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
483; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
484; AVX512CDBW-NEXT:    vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
485; AVX512CDBW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
486; AVX512CDBW-NEXT:    vpsrlw $4, %zmm0, %zmm0
487; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
488; AVX512CDBW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
489; AVX512CDBW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
490; AVX512CDBW-NEXT:    retq
491;
492; AVX512BW-LABEL: testv64i8u:
493; AVX512BW:       ## BB#0:
494; AVX512BW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
495; AVX512BW-NEXT:    vpsubb %zmm0, %zmm1, %zmm1
496; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
497; AVX512BW-NEXT:    vpsubb {{.*}}(%rip), %zmm0, %zmm0
498; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
499; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
500; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
501; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
502; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
503; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
504; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
505; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
506; AVX512BW-NEXT:    retq
507  %out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 -1)
508  ret <64 x i8> %out
509}
510
511declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1)
512declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
513declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1)
514declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
515