1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s --check-prefix=AVX512VLCD --check-prefix=ALL --check-prefix=AVX512 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=AVX512CD --check-prefix=ALL --check-prefix=AVX512 6 7define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { 8; AVX1-LABEL: testv4i64: 9; AVX1: # BB#0: 10; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 11; AVX1-NEXT: vpextrq $1, %xmm1, %rax 12; AVX1-NEXT: bsrq %rax, %rax 13; AVX1-NEXT: movl $127, %ecx 14; AVX1-NEXT: cmoveq %rcx, %rax 15; AVX1-NEXT: xorq $63, %rax 16; AVX1-NEXT: vmovq %rax, %xmm2 17; AVX1-NEXT: vmovq %xmm1, %rax 18; AVX1-NEXT: bsrq %rax, %rax 19; AVX1-NEXT: cmoveq %rcx, %rax 20; AVX1-NEXT: xorq $63, %rax 21; AVX1-NEXT: vmovq %rax, %xmm1 22; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 23; AVX1-NEXT: vpextrq $1, %xmm0, %rax 24; AVX1-NEXT: bsrq %rax, %rax 25; AVX1-NEXT: cmoveq %rcx, %rax 26; AVX1-NEXT: xorq $63, %rax 27; AVX1-NEXT: vmovq %rax, %xmm2 28; AVX1-NEXT: vmovq %xmm0, %rax 29; AVX1-NEXT: bsrq %rax, %rax 30; AVX1-NEXT: cmoveq %rcx, %rax 31; AVX1-NEXT: xorq $63, %rax 32; AVX1-NEXT: vmovq %rax, %xmm0 33; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 34; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 35; AVX1-NEXT: retq 36; 37; AVX2-LABEL: testv4i64: 38; AVX2: # BB#0: 39; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 40; AVX2-NEXT: vpextrq $1, %xmm1, %rax 41; AVX2-NEXT: bsrq %rax, %rax 42; AVX2-NEXT: movl $127, %ecx 43; AVX2-NEXT: cmoveq %rcx, %rax 44; AVX2-NEXT: xorq $63, %rax 45; AVX2-NEXT: vmovq %rax, %xmm2 46; AVX2-NEXT: vmovq %xmm1, %rax 47; AVX2-NEXT: bsrq %rax, %rax 48; AVX2-NEXT: cmoveq %rcx, %rax 49; AVX2-NEXT: xorq $63, %rax 50; AVX2-NEXT: vmovq %rax, %xmm1 51; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 52; AVX2-NEXT: vpextrq $1, %xmm0, %rax 53; AVX2-NEXT: bsrq %rax, %rax 54; AVX2-NEXT: cmoveq %rcx, %rax 55; AVX2-NEXT: xorq $63, %rax 56; AVX2-NEXT: vmovq %rax, %xmm2 57; AVX2-NEXT: vmovq %xmm0, %rax 58; AVX2-NEXT: bsrq %rax, %rax 59; AVX2-NEXT: cmoveq %rcx, %rax 60; AVX2-NEXT: xorq $63, %rax 61; AVX2-NEXT: vmovq %rax, %xmm0 62; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 63; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 64; AVX2-NEXT: retq 65; 66; AVX512VLCD-LABEL: testv4i64: 67; AVX512VLCD: ## BB#0: 68; AVX512VLCD-NEXT: vplzcntq %ymm0, %ymm0 69; AVX512VLCD-NEXT: retq 70; 71; AVX512CD-LABEL: testv4i64: 72; AVX512CD: ## BB#0: 73; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0 74; AVX512CD-NEXT: retq 75 76 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0) 77 ret <4 x i64> %out 78} 79 80define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind { 81; AVX1-LABEL: testv4i64u: 82; AVX1: # BB#0: 83; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 84; AVX1-NEXT: vpextrq $1, %xmm1, %rax 85; AVX1-NEXT: bsrq %rax, %rax 86; AVX1-NEXT: xorq $63, %rax 87; AVX1-NEXT: vmovq %rax, %xmm2 88; AVX1-NEXT: vmovq %xmm1, %rax 89; AVX1-NEXT: bsrq %rax, %rax 90; AVX1-NEXT: xorq $63, %rax 91; AVX1-NEXT: vmovq %rax, %xmm1 92; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 93; AVX1-NEXT: vpextrq $1, %xmm0, %rax 94; AVX1-NEXT: bsrq %rax, %rax 95; AVX1-NEXT: xorq $63, %rax 96; AVX1-NEXT: vmovq %rax, %xmm2 97; AVX1-NEXT: vmovq %xmm0, %rax 98; AVX1-NEXT: bsrq %rax, %rax 99; AVX1-NEXT: xorq $63, %rax 100; AVX1-NEXT: vmovq %rax, %xmm0 101; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 102; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 103; AVX1-NEXT: retq 104; 105; AVX2-LABEL: testv4i64u: 106; AVX2: # BB#0: 107; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 108; AVX2-NEXT: vpextrq $1, %xmm1, %rax 109; AVX2-NEXT: bsrq %rax, %rax 110; AVX2-NEXT: xorq $63, %rax 111; AVX2-NEXT: vmovq %rax, %xmm2 112; AVX2-NEXT: vmovq %xmm1, %rax 113; AVX2-NEXT: bsrq %rax, %rax 114; AVX2-NEXT: xorq $63, %rax 115; AVX2-NEXT: vmovq %rax, %xmm1 116; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 117; AVX2-NEXT: vpextrq $1, %xmm0, %rax 118; AVX2-NEXT: bsrq %rax, %rax 119; AVX2-NEXT: xorq $63, %rax 120; AVX2-NEXT: vmovq %rax, %xmm2 121; AVX2-NEXT: vmovq %xmm0, %rax 122; AVX2-NEXT: bsrq %rax, %rax 123; AVX2-NEXT: xorq $63, %rax 124; AVX2-NEXT: vmovq %rax, %xmm0 125; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 126; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 127; AVX2-NEXT: retq 128; 129; AVX512VLCD-LABEL: testv4i64u: 130; AVX512VLCD: ## BB#0: 131; AVX512VLCD-NEXT: vplzcntq %ymm0, %ymm0 132; AVX512VLCD-NEXT: retq 133; 134; AVX512CD-LABEL: testv4i64u: 135; AVX512CD: ## BB#0: 136; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0 137; AVX512CD-NEXT: retq 138 139 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1) 140 ret <4 x i64> %out 141} 142 143define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { 144; AVX1-LABEL: testv8i32: 145; AVX1: # BB#0: 146; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 147; AVX1-NEXT: vpextrd $1, %xmm1, %eax 148; AVX1-NEXT: bsrl %eax, %ecx 149; AVX1-NEXT: movl $63, %eax 150; AVX1-NEXT: cmovel %eax, %ecx 151; AVX1-NEXT: xorl $31, %ecx 152; AVX1-NEXT: vmovd %xmm1, %edx 153; AVX1-NEXT: bsrl %edx, %edx 154; AVX1-NEXT: cmovel %eax, %edx 155; AVX1-NEXT: xorl $31, %edx 156; AVX1-NEXT: vmovd %edx, %xmm2 157; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 158; AVX1-NEXT: vpextrd $2, %xmm1, %ecx 159; AVX1-NEXT: bsrl %ecx, %ecx 160; AVX1-NEXT: cmovel %eax, %ecx 161; AVX1-NEXT: xorl $31, %ecx 162; AVX1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2 163; AVX1-NEXT: vpextrd $3, %xmm1, %ecx 164; AVX1-NEXT: bsrl %ecx, %ecx 165; AVX1-NEXT: cmovel %eax, %ecx 166; AVX1-NEXT: xorl $31, %ecx 167; AVX1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1 168; AVX1-NEXT: vpextrd $1, %xmm0, %ecx 169; AVX1-NEXT: bsrl %ecx, %ecx 170; AVX1-NEXT: cmovel %eax, %ecx 171; AVX1-NEXT: xorl $31, %ecx 172; AVX1-NEXT: vmovd %xmm0, %edx 173; AVX1-NEXT: bsrl %edx, %edx 174; AVX1-NEXT: cmovel %eax, %edx 175; AVX1-NEXT: xorl $31, %edx 176; AVX1-NEXT: vmovd %edx, %xmm2 177; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 178; AVX1-NEXT: vpextrd $2, %xmm0, %ecx 179; AVX1-NEXT: bsrl %ecx, %ecx 180; AVX1-NEXT: cmovel %eax, %ecx 181; AVX1-NEXT: xorl $31, %ecx 182; AVX1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2 183; AVX1-NEXT: vpextrd $3, %xmm0, %ecx 184; AVX1-NEXT: bsrl %ecx, %ecx 185; AVX1-NEXT: cmovel %eax, %ecx 186; AVX1-NEXT: xorl $31, %ecx 187; AVX1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0 188; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 189; AVX1-NEXT: retq 190; 191; AVX2-LABEL: testv8i32: 192; AVX2: # BB#0: 193; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 194; AVX2-NEXT: vpextrd $1, %xmm1, %eax 195; AVX2-NEXT: bsrl %eax, %ecx 196; AVX2-NEXT: movl $63, %eax 197; AVX2-NEXT: cmovel %eax, %ecx 198; AVX2-NEXT: xorl $31, %ecx 199; AVX2-NEXT: vmovd %xmm1, %edx 200; AVX2-NEXT: bsrl %edx, %edx 201; AVX2-NEXT: cmovel %eax, %edx 202; AVX2-NEXT: xorl $31, %edx 203; AVX2-NEXT: vmovd %edx, %xmm2 204; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 205; AVX2-NEXT: vpextrd $2, %xmm1, %ecx 206; AVX2-NEXT: bsrl %ecx, %ecx 207; AVX2-NEXT: cmovel %eax, %ecx 208; AVX2-NEXT: xorl $31, %ecx 209; AVX2-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2 210; AVX2-NEXT: vpextrd $3, %xmm1, %ecx 211; AVX2-NEXT: bsrl %ecx, %ecx 212; AVX2-NEXT: cmovel %eax, %ecx 213; AVX2-NEXT: xorl $31, %ecx 214; AVX2-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1 215; AVX2-NEXT: vpextrd $1, %xmm0, %ecx 216; AVX2-NEXT: bsrl %ecx, %ecx 217; AVX2-NEXT: cmovel %eax, %ecx 218; AVX2-NEXT: xorl $31, %ecx 219; AVX2-NEXT: vmovd %xmm0, %edx 220; AVX2-NEXT: bsrl %edx, %edx 221; AVX2-NEXT: cmovel %eax, %edx 222; AVX2-NEXT: xorl $31, %edx 223; AVX2-NEXT: vmovd %edx, %xmm2 224; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 225; AVX2-NEXT: vpextrd $2, %xmm0, %ecx 226; AVX2-NEXT: bsrl %ecx, %ecx 227; AVX2-NEXT: cmovel %eax, %ecx 228; AVX2-NEXT: xorl $31, %ecx 229; AVX2-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2 230; AVX2-NEXT: vpextrd $3, %xmm0, %ecx 231; AVX2-NEXT: bsrl %ecx, %ecx 232; AVX2-NEXT: cmovel %eax, %ecx 233; AVX2-NEXT: xorl $31, %ecx 234; AVX2-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0 235; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 236; AVX2-NEXT: retq 237; 238; AVX512VLCD-LABEL: testv8i32: 239; AVX512VLCD: ## BB#0: 240; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0 241; AVX512VLCD-NEXT: retq 242; 243; AVX512CD-LABEL: testv8i32: 244; AVX512CD: ## BB#0: 245; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 246; AVX512CD-NEXT: retq 247 248 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0) 249 ret <8 x i32> %out 250} 251 252define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind { 253; AVX1-LABEL: testv8i32u: 254; AVX1: # BB#0: 255; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 256; AVX1-NEXT: vpextrd $1, %xmm1, %eax 257; AVX1-NEXT: bsrl %eax, %eax 258; AVX1-NEXT: xorl $31, %eax 259; AVX1-NEXT: vmovd %xmm1, %ecx 260; AVX1-NEXT: bsrl %ecx, %ecx 261; AVX1-NEXT: xorl $31, %ecx 262; AVX1-NEXT: vmovd %ecx, %xmm2 263; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 264; AVX1-NEXT: vpextrd $2, %xmm1, %eax 265; AVX1-NEXT: bsrl %eax, %eax 266; AVX1-NEXT: xorl $31, %eax 267; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 268; AVX1-NEXT: vpextrd $3, %xmm1, %eax 269; AVX1-NEXT: bsrl %eax, %eax 270; AVX1-NEXT: xorl $31, %eax 271; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 272; AVX1-NEXT: vpextrd $1, %xmm0, %eax 273; AVX1-NEXT: bsrl %eax, %eax 274; AVX1-NEXT: xorl $31, %eax 275; AVX1-NEXT: vmovd %xmm0, %ecx 276; AVX1-NEXT: bsrl %ecx, %ecx 277; AVX1-NEXT: xorl $31, %ecx 278; AVX1-NEXT: vmovd %ecx, %xmm2 279; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 280; AVX1-NEXT: vpextrd $2, %xmm0, %eax 281; AVX1-NEXT: bsrl %eax, %eax 282; AVX1-NEXT: xorl $31, %eax 283; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 284; AVX1-NEXT: vpextrd $3, %xmm0, %eax 285; AVX1-NEXT: bsrl %eax, %eax 286; AVX1-NEXT: xorl $31, %eax 287; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 288; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 289; AVX1-NEXT: retq 290; 291; AVX2-LABEL: testv8i32u: 292; AVX2: # BB#0: 293; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 294; AVX2-NEXT: vpextrd $1, %xmm1, %eax 295; AVX2-NEXT: bsrl %eax, %eax 296; AVX2-NEXT: xorl $31, %eax 297; AVX2-NEXT: vmovd %xmm1, %ecx 298; AVX2-NEXT: bsrl %ecx, %ecx 299; AVX2-NEXT: xorl $31, %ecx 300; AVX2-NEXT: vmovd %ecx, %xmm2 301; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 302; AVX2-NEXT: vpextrd $2, %xmm1, %eax 303; AVX2-NEXT: bsrl %eax, %eax 304; AVX2-NEXT: xorl $31, %eax 305; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 306; AVX2-NEXT: vpextrd $3, %xmm1, %eax 307; AVX2-NEXT: bsrl %eax, %eax 308; AVX2-NEXT: xorl $31, %eax 309; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 310; AVX2-NEXT: vpextrd $1, %xmm0, %eax 311; AVX2-NEXT: bsrl %eax, %eax 312; AVX2-NEXT: xorl $31, %eax 313; AVX2-NEXT: vmovd %xmm0, %ecx 314; AVX2-NEXT: bsrl %ecx, %ecx 315; AVX2-NEXT: xorl $31, %ecx 316; AVX2-NEXT: vmovd %ecx, %xmm2 317; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 318; AVX2-NEXT: vpextrd $2, %xmm0, %eax 319; AVX2-NEXT: bsrl %eax, %eax 320; AVX2-NEXT: xorl $31, %eax 321; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 322; AVX2-NEXT: vpextrd $3, %xmm0, %eax 323; AVX2-NEXT: bsrl %eax, %eax 324; AVX2-NEXT: xorl $31, %eax 325; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 326; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 327; AVX2-NEXT: retq 328; 329; AVX512VLCD-LABEL: testv8i32u: 330; AVX512VLCD: ## BB#0: 331; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0 332; AVX512VLCD-NEXT: retq 333; 334; AVX512CD-LABEL: testv8i32u: 335; AVX512CD: ## BB#0: 336; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 337; AVX512CD-NEXT: retq 338 339 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1) 340 ret <8 x i32> %out 341} 342 343define <16 x i16> @testv16i16(<16 x i16> %in) nounwind { 344; AVX1-LABEL: testv16i16: 345; AVX1: # BB#0: 346; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 347; AVX1-NEXT: vpextrw $1, %xmm1, %eax 348; AVX1-NEXT: bsrw %ax, %cx 349; AVX1-NEXT: movw $31, %ax 350; AVX1-NEXT: cmovew %ax, %cx 351; AVX1-NEXT: xorl $15, %ecx 352; AVX1-NEXT: vmovd %xmm1, %edx 353; AVX1-NEXT: bsrw %dx, %dx 354; AVX1-NEXT: cmovew %ax, %dx 355; AVX1-NEXT: xorl $15, %edx 356; AVX1-NEXT: vmovd %edx, %xmm2 357; AVX1-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2 358; AVX1-NEXT: vpextrw $2, %xmm1, %ecx 359; AVX1-NEXT: bsrw %cx, %cx 360; AVX1-NEXT: cmovew %ax, %cx 361; AVX1-NEXT: xorl $15, %ecx 362; AVX1-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2 363; AVX1-NEXT: vpextrw $3, %xmm1, %ecx 364; AVX1-NEXT: bsrw %cx, %cx 365; AVX1-NEXT: cmovew %ax, %cx 366; AVX1-NEXT: xorl $15, %ecx 367; AVX1-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 368; AVX1-NEXT: vpextrw $4, %xmm1, %ecx 369; AVX1-NEXT: bsrw %cx, %cx 370; AVX1-NEXT: cmovew %ax, %cx 371; AVX1-NEXT: xorl $15, %ecx 372; AVX1-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2 373; AVX1-NEXT: vpextrw $5, %xmm1, %ecx 374; AVX1-NEXT: bsrw %cx, %cx 375; AVX1-NEXT: cmovew %ax, %cx 376; AVX1-NEXT: xorl $15, %ecx 377; AVX1-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 378; AVX1-NEXT: vpextrw $6, %xmm1, %ecx 379; AVX1-NEXT: bsrw %cx, %cx 380; AVX1-NEXT: cmovew %ax, %cx 381; AVX1-NEXT: xorl $15, %ecx 382; AVX1-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 383; AVX1-NEXT: vpextrw $7, %xmm1, %ecx 384; AVX1-NEXT: bsrw %cx, %cx 385; AVX1-NEXT: cmovew %ax, %cx 386; AVX1-NEXT: xorl $15, %ecx 387; AVX1-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1 388; AVX1-NEXT: vpextrw $1, %xmm0, %ecx 389; AVX1-NEXT: bsrw %cx, %cx 390; AVX1-NEXT: cmovew %ax, %cx 391; AVX1-NEXT: xorl $15, %ecx 392; AVX1-NEXT: vmovd %xmm0, %edx 393; AVX1-NEXT: bsrw %dx, %dx 394; AVX1-NEXT: cmovew %ax, %dx 395; AVX1-NEXT: xorl $15, %edx 396; AVX1-NEXT: vmovd %edx, %xmm2 397; AVX1-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2 398; AVX1-NEXT: vpextrw $2, %xmm0, %ecx 399; AVX1-NEXT: bsrw %cx, %cx 400; AVX1-NEXT: cmovew %ax, %cx 401; AVX1-NEXT: xorl $15, %ecx 402; AVX1-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2 403; AVX1-NEXT: vpextrw $3, %xmm0, %ecx 404; AVX1-NEXT: bsrw %cx, %cx 405; AVX1-NEXT: cmovew %ax, %cx 406; AVX1-NEXT: xorl $15, %ecx 407; AVX1-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 408; AVX1-NEXT: vpextrw $4, %xmm0, %ecx 409; AVX1-NEXT: bsrw %cx, %cx 410; AVX1-NEXT: cmovew %ax, %cx 411; AVX1-NEXT: xorl $15, %ecx 412; AVX1-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2 413; AVX1-NEXT: vpextrw $5, %xmm0, %ecx 414; AVX1-NEXT: bsrw %cx, %cx 415; AVX1-NEXT: cmovew %ax, %cx 416; AVX1-NEXT: xorl $15, %ecx 417; AVX1-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 418; AVX1-NEXT: vpextrw $6, %xmm0, %ecx 419; AVX1-NEXT: bsrw %cx, %cx 420; AVX1-NEXT: cmovew %ax, %cx 421; AVX1-NEXT: xorl $15, %ecx 422; AVX1-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 423; AVX1-NEXT: vpextrw $7, %xmm0, %ecx 424; AVX1-NEXT: bsrw %cx, %cx 425; AVX1-NEXT: cmovew %ax, %cx 426; AVX1-NEXT: xorl $15, %ecx 427; AVX1-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0 428; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 429; AVX1-NEXT: retq 430; 431; AVX2-LABEL: testv16i16: 432; AVX2: # BB#0: 433; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 434; AVX2-NEXT: vpextrw $1, %xmm1, %eax 435; AVX2-NEXT: bsrw %ax, %cx 436; AVX2-NEXT: movw $31, %ax 437; AVX2-NEXT: cmovew %ax, %cx 438; AVX2-NEXT: xorl $15, %ecx 439; AVX2-NEXT: vmovd %xmm1, %edx 440; AVX2-NEXT: bsrw %dx, %dx 441; AVX2-NEXT: cmovew %ax, %dx 442; AVX2-NEXT: xorl $15, %edx 443; AVX2-NEXT: vmovd %edx, %xmm2 444; AVX2-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2 445; AVX2-NEXT: vpextrw $2, %xmm1, %ecx 446; AVX2-NEXT: bsrw %cx, %cx 447; AVX2-NEXT: cmovew %ax, %cx 448; AVX2-NEXT: xorl $15, %ecx 449; AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2 450; AVX2-NEXT: vpextrw $3, %xmm1, %ecx 451; AVX2-NEXT: bsrw %cx, %cx 452; AVX2-NEXT: cmovew %ax, %cx 453; AVX2-NEXT: xorl $15, %ecx 454; AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 455; AVX2-NEXT: vpextrw $4, %xmm1, %ecx 456; AVX2-NEXT: bsrw %cx, %cx 457; AVX2-NEXT: cmovew %ax, %cx 458; AVX2-NEXT: xorl $15, %ecx 459; AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2 460; AVX2-NEXT: vpextrw $5, %xmm1, %ecx 461; AVX2-NEXT: bsrw %cx, %cx 462; AVX2-NEXT: cmovew %ax, %cx 463; AVX2-NEXT: xorl $15, %ecx 464; AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 465; AVX2-NEXT: vpextrw $6, %xmm1, %ecx 466; AVX2-NEXT: bsrw %cx, %cx 467; AVX2-NEXT: cmovew %ax, %cx 468; AVX2-NEXT: xorl $15, %ecx 469; AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 470; AVX2-NEXT: vpextrw $7, %xmm1, %ecx 471; AVX2-NEXT: bsrw %cx, %cx 472; AVX2-NEXT: cmovew %ax, %cx 473; AVX2-NEXT: xorl $15, %ecx 474; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1 475; AVX2-NEXT: vpextrw $1, %xmm0, %ecx 476; AVX2-NEXT: bsrw %cx, %cx 477; AVX2-NEXT: cmovew %ax, %cx 478; AVX2-NEXT: xorl $15, %ecx 479; AVX2-NEXT: vmovd %xmm0, %edx 480; AVX2-NEXT: bsrw %dx, %dx 481; AVX2-NEXT: cmovew %ax, %dx 482; AVX2-NEXT: xorl $15, %edx 483; AVX2-NEXT: vmovd %edx, %xmm2 484; AVX2-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2 485; AVX2-NEXT: vpextrw $2, %xmm0, %ecx 486; AVX2-NEXT: bsrw %cx, %cx 487; AVX2-NEXT: cmovew %ax, %cx 488; AVX2-NEXT: xorl $15, %ecx 489; AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2 490; AVX2-NEXT: vpextrw $3, %xmm0, %ecx 491; AVX2-NEXT: bsrw %cx, %cx 492; AVX2-NEXT: cmovew %ax, %cx 493; AVX2-NEXT: xorl $15, %ecx 494; AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 495; AVX2-NEXT: vpextrw $4, %xmm0, %ecx 496; AVX2-NEXT: bsrw %cx, %cx 497; AVX2-NEXT: cmovew %ax, %cx 498; AVX2-NEXT: xorl $15, %ecx 499; AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2 500; AVX2-NEXT: vpextrw $5, %xmm0, %ecx 501; AVX2-NEXT: bsrw %cx, %cx 502; AVX2-NEXT: cmovew %ax, %cx 503; AVX2-NEXT: xorl $15, %ecx 504; AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 505; AVX2-NEXT: vpextrw $6, %xmm0, %ecx 506; AVX2-NEXT: bsrw %cx, %cx 507; AVX2-NEXT: cmovew %ax, %cx 508; AVX2-NEXT: xorl $15, %ecx 509; AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 510; AVX2-NEXT: vpextrw $7, %xmm0, %ecx 511; AVX2-NEXT: bsrw %cx, %cx 512; AVX2-NEXT: cmovew %ax, %cx 513; AVX2-NEXT: xorl $15, %ecx 514; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0 515; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 516; AVX2-NEXT: retq 517; 518; AVX512VLCD-LABEL: testv16i16: 519; AVX512VLCD: ## BB#0: 520; AVX512VLCD-NEXT: vpmovzxwd %ymm0, %zmm0 521; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0 522; AVX512VLCD-NEXT: vpmovdw %zmm0, %ymm0 523; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 524; AVX512VLCD-NEXT: retq 525; 526; AVX512CD-LABEL: testv16i16: 527; AVX512CD: ## BB#0: 528; AVX512CD-NEXT: vpmovzxwd %ymm0, %zmm0 529; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 530; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0 531; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 532; AVX512CD-NEXT: retq 533 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0) 534 ret <16 x i16> %out 535} 536 537define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind { 538; AVX1-LABEL: testv16i16u: 539; AVX1: # BB#0: 540; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 541; AVX1-NEXT: vpextrw $1, %xmm1, %eax 542; AVX1-NEXT: bsrw %ax, %ax 543; AVX1-NEXT: xorl $15, %eax 544; AVX1-NEXT: vmovd %xmm1, %ecx 545; AVX1-NEXT: bsrw %cx, %cx 546; AVX1-NEXT: xorl $15, %ecx 547; AVX1-NEXT: vmovd %ecx, %xmm2 548; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 549; AVX1-NEXT: vpextrw $2, %xmm1, %eax 550; AVX1-NEXT: bsrw %ax, %ax 551; AVX1-NEXT: xorl $15, %eax 552; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 553; AVX1-NEXT: vpextrw $3, %xmm1, %eax 554; AVX1-NEXT: bsrw %ax, %ax 555; AVX1-NEXT: xorl $15, %eax 556; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 557; AVX1-NEXT: vpextrw $4, %xmm1, %eax 558; AVX1-NEXT: bsrw %ax, %ax 559; AVX1-NEXT: xorl $15, %eax 560; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 561; AVX1-NEXT: vpextrw $5, %xmm1, %eax 562; AVX1-NEXT: bsrw %ax, %ax 563; AVX1-NEXT: xorl $15, %eax 564; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 565; AVX1-NEXT: vpextrw $6, %xmm1, %eax 566; AVX1-NEXT: bsrw %ax, %ax 567; AVX1-NEXT: xorl $15, %eax 568; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 569; AVX1-NEXT: vpextrw $7, %xmm1, %eax 570; AVX1-NEXT: bsrw %ax, %ax 571; AVX1-NEXT: xorl $15, %eax 572; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1 573; AVX1-NEXT: vpextrw $1, %xmm0, %eax 574; AVX1-NEXT: bsrw %ax, %ax 575; AVX1-NEXT: xorl $15, %eax 576; AVX1-NEXT: vmovd %xmm0, %ecx 577; AVX1-NEXT: bsrw %cx, %cx 578; AVX1-NEXT: xorl $15, %ecx 579; AVX1-NEXT: vmovd %ecx, %xmm2 580; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 581; AVX1-NEXT: vpextrw $2, %xmm0, %eax 582; AVX1-NEXT: bsrw %ax, %ax 583; AVX1-NEXT: xorl $15, %eax 584; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 585; AVX1-NEXT: vpextrw $3, %xmm0, %eax 586; AVX1-NEXT: bsrw %ax, %ax 587; AVX1-NEXT: xorl $15, %eax 588; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 589; AVX1-NEXT: vpextrw $4, %xmm0, %eax 590; AVX1-NEXT: bsrw %ax, %ax 591; AVX1-NEXT: xorl $15, %eax 592; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 593; AVX1-NEXT: vpextrw $5, %xmm0, %eax 594; AVX1-NEXT: bsrw %ax, %ax 595; AVX1-NEXT: xorl $15, %eax 596; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 597; AVX1-NEXT: vpextrw $6, %xmm0, %eax 598; AVX1-NEXT: bsrw %ax, %ax 599; AVX1-NEXT: xorl $15, %eax 600; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 601; AVX1-NEXT: vpextrw $7, %xmm0, %eax 602; AVX1-NEXT: bsrw %ax, %ax 603; AVX1-NEXT: xorl $15, %eax 604; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 605; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 606; AVX1-NEXT: retq 607; 608; AVX2-LABEL: testv16i16u: 609; AVX2: # BB#0: 610; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 611; AVX2-NEXT: vpextrw $1, %xmm1, %eax 612; AVX2-NEXT: bsrw %ax, %ax 613; AVX2-NEXT: xorl $15, %eax 614; AVX2-NEXT: vmovd %xmm1, %ecx 615; AVX2-NEXT: bsrw %cx, %cx 616; AVX2-NEXT: xorl $15, %ecx 617; AVX2-NEXT: vmovd %ecx, %xmm2 618; AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 619; AVX2-NEXT: vpextrw $2, %xmm1, %eax 620; AVX2-NEXT: bsrw %ax, %ax 621; AVX2-NEXT: xorl $15, %eax 622; AVX2-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 623; AVX2-NEXT: vpextrw $3, %xmm1, %eax 624; AVX2-NEXT: bsrw %ax, %ax 625; AVX2-NEXT: xorl $15, %eax 626; AVX2-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 627; AVX2-NEXT: vpextrw $4, %xmm1, %eax 628; AVX2-NEXT: bsrw %ax, %ax 629; AVX2-NEXT: xorl $15, %eax 630; AVX2-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 631; AVX2-NEXT: vpextrw $5, %xmm1, %eax 632; AVX2-NEXT: bsrw %ax, %ax 633; AVX2-NEXT: xorl $15, %eax 634; AVX2-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 635; AVX2-NEXT: vpextrw $6, %xmm1, %eax 636; AVX2-NEXT: bsrw %ax, %ax 637; AVX2-NEXT: xorl $15, %eax 638; AVX2-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 639; AVX2-NEXT: vpextrw $7, %xmm1, %eax 640; AVX2-NEXT: bsrw %ax, %ax 641; AVX2-NEXT: xorl $15, %eax 642; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1 643; AVX2-NEXT: vpextrw $1, %xmm0, %eax 644; AVX2-NEXT: bsrw %ax, %ax 645; AVX2-NEXT: xorl $15, %eax 646; AVX2-NEXT: vmovd %xmm0, %ecx 647; AVX2-NEXT: bsrw %cx, %cx 648; AVX2-NEXT: xorl $15, %ecx 649; AVX2-NEXT: vmovd %ecx, %xmm2 650; AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 651; AVX2-NEXT: vpextrw $2, %xmm0, %eax 652; AVX2-NEXT: bsrw %ax, %ax 653; AVX2-NEXT: xorl $15, %eax 654; AVX2-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 655; AVX2-NEXT: vpextrw $3, %xmm0, %eax 656; AVX2-NEXT: bsrw %ax, %ax 657; AVX2-NEXT: xorl $15, %eax 658; AVX2-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 659; AVX2-NEXT: vpextrw $4, %xmm0, %eax 660; AVX2-NEXT: bsrw %ax, %ax 661; AVX2-NEXT: xorl $15, %eax 662; AVX2-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 663; AVX2-NEXT: vpextrw $5, %xmm0, %eax 664; AVX2-NEXT: bsrw %ax, %ax 665; AVX2-NEXT: xorl $15, %eax 666; AVX2-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 667; AVX2-NEXT: vpextrw $6, %xmm0, %eax 668; AVX2-NEXT: bsrw %ax, %ax 669; AVX2-NEXT: xorl $15, %eax 670; AVX2-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 671; AVX2-NEXT: vpextrw $7, %xmm0, %eax 672; AVX2-NEXT: bsrw %ax, %ax 673; AVX2-NEXT: xorl $15, %eax 674; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 675; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 676; AVX2-NEXT: retq 677; 678; AVX512VLCD-LABEL: testv16i16u: 679; AVX512VLCD: ## BB#0: 680; AVX512VLCD-NEXT: vpmovzxwd %ymm0, %zmm0 681; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0 682; AVX512VLCD-NEXT: vpmovdw %zmm0, %ymm0 683; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 684; AVX512VLCD-NEXT: retq 685; 686; AVX512CD-LABEL: testv16i16u: 687; AVX512CD: ## BB#0: 688; AVX512CD-NEXT: vpmovzxwd %ymm0, %zmm0 689; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 690; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0 691; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 692; AVX512CD-NEXT: retq 693 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1) 694 ret <16 x i16> %out 695} 696 697define <32 x i8> @testv32i8(<32 x i8> %in) nounwind { 698; AVX1-LABEL: testv32i8: 699; AVX1: # BB#0: 700; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 701; AVX1-NEXT: vpextrb $1, %xmm1, %eax 702; AVX1-NEXT: bsrl %eax, %ecx 703; AVX1-NEXT: movl $15, %eax 704; AVX1-NEXT: cmovel %eax, %ecx 705; AVX1-NEXT: xorl $7, %ecx 706; AVX1-NEXT: vpextrb $0, %xmm1, %edx 707; AVX1-NEXT: bsrl %edx, %edx 708; AVX1-NEXT: cmovel %eax, %edx 709; AVX1-NEXT: xorl $7, %edx 710; AVX1-NEXT: vmovd %edx, %xmm2 711; AVX1-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2 712; AVX1-NEXT: vpextrb $2, %xmm1, %ecx 713; AVX1-NEXT: bsrl %ecx, %ecx 714; AVX1-NEXT: cmovel %eax, %ecx 715; AVX1-NEXT: xorl $7, %ecx 716; AVX1-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 717; AVX1-NEXT: vpextrb $3, %xmm1, %ecx 718; AVX1-NEXT: bsrl %ecx, %ecx 719; AVX1-NEXT: cmovel %eax, %ecx 720; AVX1-NEXT: xorl $7, %ecx 721; AVX1-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 722; AVX1-NEXT: vpextrb $4, %xmm1, %ecx 723; AVX1-NEXT: bsrl %ecx, %ecx 724; AVX1-NEXT: cmovel %eax, %ecx 725; AVX1-NEXT: xorl $7, %ecx 726; AVX1-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2 727; AVX1-NEXT: vpextrb $5, %xmm1, %ecx 728; AVX1-NEXT: bsrl %ecx, %ecx 729; AVX1-NEXT: cmovel %eax, %ecx 730; AVX1-NEXT: xorl $7, %ecx 731; AVX1-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2 732; AVX1-NEXT: vpextrb $6, %xmm1, %ecx 733; AVX1-NEXT: bsrl %ecx, %ecx 734; AVX1-NEXT: cmovel %eax, %ecx 735; AVX1-NEXT: xorl $7, %ecx 736; AVX1-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2 737; AVX1-NEXT: vpextrb $7, %xmm1, %ecx 738; AVX1-NEXT: bsrl %ecx, %ecx 739; AVX1-NEXT: cmovel %eax, %ecx 740; AVX1-NEXT: xorl $7, %ecx 741; AVX1-NEXT: vpinsrb $7, %ecx, %xmm2, %xmm2 742; AVX1-NEXT: vpextrb $8, %xmm1, %ecx 743; AVX1-NEXT: bsrl %ecx, %ecx 744; AVX1-NEXT: cmovel %eax, %ecx 745; AVX1-NEXT: xorl $7, %ecx 746; AVX1-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 747; AVX1-NEXT: vpextrb $9, %xmm1, %ecx 748; AVX1-NEXT: bsrl %ecx, %ecx 749; AVX1-NEXT: cmovel %eax, %ecx 750; AVX1-NEXT: xorl $7, %ecx 751; AVX1-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2 752; AVX1-NEXT: vpextrb $10, %xmm1, %ecx 753; AVX1-NEXT: bsrl %ecx, %ecx 754; AVX1-NEXT: cmovel %eax, %ecx 755; AVX1-NEXT: xorl $7, %ecx 756; AVX1-NEXT: vpinsrb $10, %ecx, %xmm2, %xmm2 757; AVX1-NEXT: vpextrb $11, %xmm1, %ecx 758; AVX1-NEXT: bsrl %ecx, %ecx 759; AVX1-NEXT: cmovel %eax, %ecx 760; AVX1-NEXT: xorl $7, %ecx 761; AVX1-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 762; AVX1-NEXT: vpextrb $12, %xmm1, %ecx 763; AVX1-NEXT: bsrl %ecx, %ecx 764; AVX1-NEXT: cmovel %eax, %ecx 765; AVX1-NEXT: xorl $7, %ecx 766; AVX1-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2 767; AVX1-NEXT: vpextrb $13, %xmm1, %ecx 768; AVX1-NEXT: bsrl %ecx, %ecx 769; AVX1-NEXT: cmovel %eax, %ecx 770; AVX1-NEXT: xorl $7, %ecx 771; AVX1-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 772; AVX1-NEXT: vpextrb $14, %xmm1, %ecx 773; AVX1-NEXT: bsrl %ecx, %ecx 774; AVX1-NEXT: cmovel %eax, %ecx 775; AVX1-NEXT: xorl $7, %ecx 776; AVX1-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2 777; AVX1-NEXT: vpextrb $15, %xmm1, %ecx 778; AVX1-NEXT: bsrl %ecx, %ecx 779; AVX1-NEXT: cmovel %eax, %ecx 780; AVX1-NEXT: xorl $7, %ecx 781; AVX1-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1 782; AVX1-NEXT: vpextrb $1, %xmm0, %ecx 783; AVX1-NEXT: bsrl %ecx, %ecx 784; AVX1-NEXT: cmovel %eax, %ecx 785; AVX1-NEXT: xorl $7, %ecx 786; AVX1-NEXT: vpextrb $0, %xmm0, %edx 787; AVX1-NEXT: bsrl %edx, %edx 788; AVX1-NEXT: cmovel %eax, %edx 789; AVX1-NEXT: xorl $7, %edx 790; AVX1-NEXT: vmovd %edx, %xmm2 791; AVX1-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2 792; AVX1-NEXT: vpextrb $2, %xmm0, %ecx 793; AVX1-NEXT: bsrl %ecx, %ecx 794; AVX1-NEXT: cmovel %eax, %ecx 795; AVX1-NEXT: xorl $7, %ecx 796; AVX1-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 797; AVX1-NEXT: vpextrb $3, %xmm0, %ecx 798; AVX1-NEXT: bsrl %ecx, %ecx 799; AVX1-NEXT: cmovel %eax, %ecx 800; AVX1-NEXT: xorl $7, %ecx 801; AVX1-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 802; AVX1-NEXT: vpextrb $4, %xmm0, %ecx 803; AVX1-NEXT: bsrl %ecx, %ecx 804; AVX1-NEXT: cmovel %eax, %ecx 805; AVX1-NEXT: xorl $7, %ecx 806; AVX1-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2 807; AVX1-NEXT: vpextrb $5, %xmm0, %ecx 808; AVX1-NEXT: bsrl %ecx, %ecx 809; AVX1-NEXT: cmovel %eax, %ecx 810; AVX1-NEXT: xorl $7, %ecx 811; AVX1-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2 812; AVX1-NEXT: vpextrb $6, %xmm0, %ecx 813; AVX1-NEXT: bsrl %ecx, %ecx 814; AVX1-NEXT: cmovel %eax, %ecx 815; AVX1-NEXT: xorl $7, %ecx 816; AVX1-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2 817; AVX1-NEXT: vpextrb $7, %xmm0, %ecx 818; AVX1-NEXT: bsrl %ecx, %ecx 819; AVX1-NEXT: cmovel %eax, %ecx 820; AVX1-NEXT: xorl $7, %ecx 821; AVX1-NEXT: vpinsrb $7, %ecx, %xmm2, %xmm2 822; AVX1-NEXT: vpextrb $8, %xmm0, %ecx 823; AVX1-NEXT: bsrl %ecx, %ecx 824; AVX1-NEXT: cmovel %eax, %ecx 825; AVX1-NEXT: xorl $7, %ecx 826; AVX1-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 827; AVX1-NEXT: vpextrb $9, %xmm0, %ecx 828; AVX1-NEXT: bsrl %ecx, %ecx 829; AVX1-NEXT: cmovel %eax, %ecx 830; AVX1-NEXT: xorl $7, %ecx 831; AVX1-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2 832; AVX1-NEXT: vpextrb $10, %xmm0, %ecx 833; AVX1-NEXT: bsrl %ecx, %ecx 834; AVX1-NEXT: cmovel %eax, %ecx 835; AVX1-NEXT: xorl $7, %ecx 836; AVX1-NEXT: vpinsrb $10, %ecx, %xmm2, %xmm2 837; AVX1-NEXT: vpextrb $11, %xmm0, %ecx 838; AVX1-NEXT: bsrl %ecx, %ecx 839; AVX1-NEXT: cmovel %eax, %ecx 840; AVX1-NEXT: xorl $7, %ecx 841; AVX1-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 842; AVX1-NEXT: vpextrb $12, %xmm0, %ecx 843; AVX1-NEXT: bsrl %ecx, %ecx 844; AVX1-NEXT: cmovel %eax, %ecx 845; AVX1-NEXT: xorl $7, %ecx 846; AVX1-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2 847; AVX1-NEXT: vpextrb $13, %xmm0, %ecx 848; AVX1-NEXT: bsrl %ecx, %ecx 849; AVX1-NEXT: cmovel %eax, %ecx 850; AVX1-NEXT: xorl $7, %ecx 851; AVX1-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 852; AVX1-NEXT: vpextrb $14, %xmm0, %ecx 853; AVX1-NEXT: bsrl %ecx, %ecx 854; AVX1-NEXT: cmovel %eax, %ecx 855; AVX1-NEXT: xorl $7, %ecx 856; AVX1-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2 857; AVX1-NEXT: vpextrb $15, %xmm0, %ecx 858; AVX1-NEXT: bsrl %ecx, %ecx 859; AVX1-NEXT: cmovel %eax, %ecx 860; AVX1-NEXT: xorl $7, %ecx 861; AVX1-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm0 862; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 863; AVX1-NEXT: retq 864; 865; AVX2-LABEL: testv32i8: 866; AVX2: # BB#0: 867; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 868; AVX2-NEXT: vpextrb $1, %xmm1, %eax 869; AVX2-NEXT: bsrl %eax, %ecx 870; AVX2-NEXT: movl $15, %eax 871; AVX2-NEXT: cmovel %eax, %ecx 872; AVX2-NEXT: xorl $7, %ecx 873; AVX2-NEXT: vpextrb $0, %xmm1, %edx 874; AVX2-NEXT: bsrl %edx, %edx 875; AVX2-NEXT: cmovel %eax, %edx 876; AVX2-NEXT: xorl $7, %edx 877; AVX2-NEXT: vmovd %edx, %xmm2 878; AVX2-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2 879; AVX2-NEXT: vpextrb $2, %xmm1, %ecx 880; AVX2-NEXT: bsrl %ecx, %ecx 881; AVX2-NEXT: cmovel %eax, %ecx 882; AVX2-NEXT: xorl $7, %ecx 883; AVX2-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 884; AVX2-NEXT: vpextrb $3, %xmm1, %ecx 885; AVX2-NEXT: bsrl %ecx, %ecx 886; AVX2-NEXT: cmovel %eax, %ecx 887; AVX2-NEXT: xorl $7, %ecx 888; AVX2-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 889; AVX2-NEXT: vpextrb $4, %xmm1, %ecx 890; AVX2-NEXT: bsrl %ecx, %ecx 891; AVX2-NEXT: cmovel %eax, %ecx 892; AVX2-NEXT: xorl $7, %ecx 893; AVX2-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2 894; AVX2-NEXT: vpextrb $5, %xmm1, %ecx 895; AVX2-NEXT: bsrl %ecx, %ecx 896; AVX2-NEXT: cmovel %eax, %ecx 897; AVX2-NEXT: xorl $7, %ecx 898; AVX2-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2 899; AVX2-NEXT: vpextrb $6, %xmm1, %ecx 900; AVX2-NEXT: bsrl %ecx, %ecx 901; AVX2-NEXT: cmovel %eax, %ecx 902; AVX2-NEXT: xorl $7, %ecx 903; AVX2-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2 904; AVX2-NEXT: vpextrb $7, %xmm1, %ecx 905; AVX2-NEXT: bsrl %ecx, %ecx 906; AVX2-NEXT: cmovel %eax, %ecx 907; AVX2-NEXT: xorl $7, %ecx 908; AVX2-NEXT: vpinsrb $7, %ecx, %xmm2, %xmm2 909; AVX2-NEXT: vpextrb $8, %xmm1, %ecx 910; AVX2-NEXT: bsrl %ecx, %ecx 911; AVX2-NEXT: cmovel %eax, %ecx 912; AVX2-NEXT: xorl $7, %ecx 913; AVX2-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 914; AVX2-NEXT: vpextrb $9, %xmm1, %ecx 915; AVX2-NEXT: bsrl %ecx, %ecx 916; AVX2-NEXT: cmovel %eax, %ecx 917; AVX2-NEXT: xorl $7, %ecx 918; AVX2-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2 919; AVX2-NEXT: vpextrb $10, %xmm1, %ecx 920; AVX2-NEXT: bsrl %ecx, %ecx 921; AVX2-NEXT: cmovel %eax, %ecx 922; AVX2-NEXT: xorl $7, %ecx 923; AVX2-NEXT: vpinsrb $10, %ecx, %xmm2, %xmm2 924; AVX2-NEXT: vpextrb $11, %xmm1, %ecx 925; AVX2-NEXT: bsrl %ecx, %ecx 926; AVX2-NEXT: cmovel %eax, %ecx 927; AVX2-NEXT: xorl $7, %ecx 928; AVX2-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 929; AVX2-NEXT: vpextrb $12, %xmm1, %ecx 930; AVX2-NEXT: bsrl %ecx, %ecx 931; AVX2-NEXT: cmovel %eax, %ecx 932; AVX2-NEXT: xorl $7, %ecx 933; AVX2-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2 934; AVX2-NEXT: vpextrb $13, %xmm1, %ecx 935; AVX2-NEXT: bsrl %ecx, %ecx 936; AVX2-NEXT: cmovel %eax, %ecx 937; AVX2-NEXT: xorl $7, %ecx 938; AVX2-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 939; AVX2-NEXT: vpextrb $14, %xmm1, %ecx 940; AVX2-NEXT: bsrl %ecx, %ecx 941; AVX2-NEXT: cmovel %eax, %ecx 942; AVX2-NEXT: xorl $7, %ecx 943; AVX2-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2 944; AVX2-NEXT: vpextrb $15, %xmm1, %ecx 945; AVX2-NEXT: bsrl %ecx, %ecx 946; AVX2-NEXT: cmovel %eax, %ecx 947; AVX2-NEXT: xorl $7, %ecx 948; AVX2-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1 949; AVX2-NEXT: vpextrb $1, %xmm0, %ecx 950; AVX2-NEXT: bsrl %ecx, %ecx 951; AVX2-NEXT: cmovel %eax, %ecx 952; AVX2-NEXT: xorl $7, %ecx 953; AVX2-NEXT: vpextrb $0, %xmm0, %edx 954; AVX2-NEXT: bsrl %edx, %edx 955; AVX2-NEXT: cmovel %eax, %edx 956; AVX2-NEXT: xorl $7, %edx 957; AVX2-NEXT: vmovd %edx, %xmm2 958; AVX2-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2 959; AVX2-NEXT: vpextrb $2, %xmm0, %ecx 960; AVX2-NEXT: bsrl %ecx, %ecx 961; AVX2-NEXT: cmovel %eax, %ecx 962; AVX2-NEXT: xorl $7, %ecx 963; AVX2-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 964; AVX2-NEXT: vpextrb $3, %xmm0, %ecx 965; AVX2-NEXT: bsrl %ecx, %ecx 966; AVX2-NEXT: cmovel %eax, %ecx 967; AVX2-NEXT: xorl $7, %ecx 968; AVX2-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 969; AVX2-NEXT: vpextrb $4, %xmm0, %ecx 970; AVX2-NEXT: bsrl %ecx, %ecx 971; AVX2-NEXT: cmovel %eax, %ecx 972; AVX2-NEXT: xorl $7, %ecx 973; AVX2-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2 974; AVX2-NEXT: vpextrb $5, %xmm0, %ecx 975; AVX2-NEXT: bsrl %ecx, %ecx 976; AVX2-NEXT: cmovel %eax, %ecx 977; AVX2-NEXT: xorl $7, %ecx 978; AVX2-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2 979; AVX2-NEXT: vpextrb $6, %xmm0, %ecx 980; AVX2-NEXT: bsrl %ecx, %ecx 981; AVX2-NEXT: cmovel %eax, %ecx 982; AVX2-NEXT: xorl $7, %ecx 983; AVX2-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2 984; AVX2-NEXT: vpextrb $7, %xmm0, %ecx 985; AVX2-NEXT: bsrl %ecx, %ecx 986; AVX2-NEXT: cmovel %eax, %ecx 987; AVX2-NEXT: xorl $7, %ecx 988; AVX2-NEXT: vpinsrb $7, %ecx, %xmm2, %xmm2 989; AVX2-NEXT: vpextrb $8, %xmm0, %ecx 990; AVX2-NEXT: bsrl %ecx, %ecx 991; AVX2-NEXT: cmovel %eax, %ecx 992; AVX2-NEXT: xorl $7, %ecx 993; AVX2-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 994; AVX2-NEXT: vpextrb $9, %xmm0, %ecx 995; AVX2-NEXT: bsrl %ecx, %ecx 996; AVX2-NEXT: cmovel %eax, %ecx 997; AVX2-NEXT: xorl $7, %ecx 998; AVX2-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2 999; AVX2-NEXT: vpextrb $10, %xmm0, %ecx 1000; AVX2-NEXT: bsrl %ecx, %ecx 1001; AVX2-NEXT: cmovel %eax, %ecx 1002; AVX2-NEXT: xorl $7, %ecx 1003; AVX2-NEXT: vpinsrb $10, %ecx, %xmm2, %xmm2 1004; AVX2-NEXT: vpextrb $11, %xmm0, %ecx 1005; AVX2-NEXT: bsrl %ecx, %ecx 1006; AVX2-NEXT: cmovel %eax, %ecx 1007; AVX2-NEXT: xorl $7, %ecx 1008; AVX2-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 1009; AVX2-NEXT: vpextrb $12, %xmm0, %ecx 1010; AVX2-NEXT: bsrl %ecx, %ecx 1011; AVX2-NEXT: cmovel %eax, %ecx 1012; AVX2-NEXT: xorl $7, %ecx 1013; AVX2-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2 1014; AVX2-NEXT: vpextrb $13, %xmm0, %ecx 1015; AVX2-NEXT: bsrl %ecx, %ecx 1016; AVX2-NEXT: cmovel %eax, %ecx 1017; AVX2-NEXT: xorl $7, %ecx 1018; AVX2-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 1019; AVX2-NEXT: vpextrb $14, %xmm0, %ecx 1020; AVX2-NEXT: bsrl %ecx, %ecx 1021; AVX2-NEXT: cmovel %eax, %ecx 1022; AVX2-NEXT: xorl $7, %ecx 1023; AVX2-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2 1024; AVX2-NEXT: vpextrb $15, %xmm0, %ecx 1025; AVX2-NEXT: bsrl %ecx, %ecx 1026; AVX2-NEXT: cmovel %eax, %ecx 1027; AVX2-NEXT: xorl $7, %ecx 1028; AVX2-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm0 1029; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1030; AVX2-NEXT: retq 1031; 1032; AVX512VLCD-LABEL: testv32i8: 1033; AVX512VLCD: ## BB#0: 1034; AVX512VLCD-NEXT: vextractf128 $1, %ymm0, %xmm1 1035; AVX512VLCD-NEXT: vpmovzxbd %xmm1, %zmm1 1036; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1 1037; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1 1038; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 1039; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1 1040; AVX512VLCD-NEXT: vpmovzxbd %xmm0, %zmm0 1041; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0 1042; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0 1043; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm0, %xmm0 1044; AVX512VLCD-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 1045; AVX512VLCD-NEXT: retq 1046; 1047; AVX512CD-LABEL: testv32i8: 1048; AVX512CD: ## BB#0: 1049; AVX512CD-NEXT: vextractf128 $1, %ymm0, %xmm1 1050; AVX512CD-NEXT: vpmovzxbd %xmm1, %zmm1 1051; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1 1052; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1 1053; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 1054; AVX512CD-NEXT: vpsubb %xmm2, %xmm1, %xmm1 1055; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0 1056; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 1057; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0 1058; AVX512CD-NEXT: vpsubb %xmm2, %xmm0, %xmm0 1059; AVX512CD-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1060; AVX512CD-NEXT: retq 1061 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0) 1062 ret <32 x i8> %out 1063} 1064 1065define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind { 1066; AVX1-LABEL: testv32i8u: 1067; AVX1: # BB#0: 1068; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1069; AVX1-NEXT: vpextrb $1, %xmm1, %eax 1070; AVX1-NEXT: bsrl %eax, %eax 1071; AVX1-NEXT: xorl $7, %eax 1072; AVX1-NEXT: vpextrb $0, %xmm1, %ecx 1073; AVX1-NEXT: bsrl %ecx, %ecx 1074; AVX1-NEXT: xorl $7, %ecx 1075; AVX1-NEXT: vmovd %ecx, %xmm2 1076; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 1077; AVX1-NEXT: vpextrb $2, %xmm1, %eax 1078; AVX1-NEXT: bsrl %eax, %eax 1079; AVX1-NEXT: xorl $7, %eax 1080; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 1081; AVX1-NEXT: vpextrb $3, %xmm1, %eax 1082; AVX1-NEXT: bsrl %eax, %eax 1083; AVX1-NEXT: xorl $7, %eax 1084; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 1085; AVX1-NEXT: vpextrb $4, %xmm1, %eax 1086; AVX1-NEXT: bsrl %eax, %eax 1087; AVX1-NEXT: xorl $7, %eax 1088; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 1089; AVX1-NEXT: vpextrb $5, %xmm1, %eax 1090; AVX1-NEXT: bsrl %eax, %eax 1091; AVX1-NEXT: xorl $7, %eax 1092; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 1093; AVX1-NEXT: vpextrb $6, %xmm1, %eax 1094; AVX1-NEXT: bsrl %eax, %eax 1095; AVX1-NEXT: xorl $7, %eax 1096; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 1097; AVX1-NEXT: vpextrb $7, %xmm1, %eax 1098; AVX1-NEXT: bsrl %eax, %eax 1099; AVX1-NEXT: xorl $7, %eax 1100; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 1101; AVX1-NEXT: vpextrb $8, %xmm1, %eax 1102; AVX1-NEXT: bsrl %eax, %eax 1103; AVX1-NEXT: xorl $7, %eax 1104; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 1105; AVX1-NEXT: vpextrb $9, %xmm1, %eax 1106; AVX1-NEXT: bsrl %eax, %eax 1107; AVX1-NEXT: xorl $7, %eax 1108; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 1109; AVX1-NEXT: vpextrb $10, %xmm1, %eax 1110; AVX1-NEXT: bsrl %eax, %eax 1111; AVX1-NEXT: xorl $7, %eax 1112; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 1113; AVX1-NEXT: vpextrb $11, %xmm1, %eax 1114; AVX1-NEXT: bsrl %eax, %eax 1115; AVX1-NEXT: xorl $7, %eax 1116; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 1117; AVX1-NEXT: vpextrb $12, %xmm1, %eax 1118; AVX1-NEXT: bsrl %eax, %eax 1119; AVX1-NEXT: xorl $7, %eax 1120; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 1121; AVX1-NEXT: vpextrb $13, %xmm1, %eax 1122; AVX1-NEXT: bsrl %eax, %eax 1123; AVX1-NEXT: xorl $7, %eax 1124; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 1125; AVX1-NEXT: vpextrb $14, %xmm1, %eax 1126; AVX1-NEXT: bsrl %eax, %eax 1127; AVX1-NEXT: xorl $7, %eax 1128; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 1129; AVX1-NEXT: vpextrb $15, %xmm1, %eax 1130; AVX1-NEXT: bsrl %eax, %eax 1131; AVX1-NEXT: xorl $7, %eax 1132; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 1133; AVX1-NEXT: vpextrb $1, %xmm0, %eax 1134; AVX1-NEXT: bsrl %eax, %eax 1135; AVX1-NEXT: xorl $7, %eax 1136; AVX1-NEXT: vpextrb $0, %xmm0, %ecx 1137; AVX1-NEXT: bsrl %ecx, %ecx 1138; AVX1-NEXT: xorl $7, %ecx 1139; AVX1-NEXT: vmovd %ecx, %xmm2 1140; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 1141; AVX1-NEXT: vpextrb $2, %xmm0, %eax 1142; AVX1-NEXT: bsrl %eax, %eax 1143; AVX1-NEXT: xorl $7, %eax 1144; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 1145; AVX1-NEXT: vpextrb $3, %xmm0, %eax 1146; AVX1-NEXT: bsrl %eax, %eax 1147; AVX1-NEXT: xorl $7, %eax 1148; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 1149; AVX1-NEXT: vpextrb $4, %xmm0, %eax 1150; AVX1-NEXT: bsrl %eax, %eax 1151; AVX1-NEXT: xorl $7, %eax 1152; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 1153; AVX1-NEXT: vpextrb $5, %xmm0, %eax 1154; AVX1-NEXT: bsrl %eax, %eax 1155; AVX1-NEXT: xorl $7, %eax 1156; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 1157; AVX1-NEXT: vpextrb $6, %xmm0, %eax 1158; AVX1-NEXT: bsrl %eax, %eax 1159; AVX1-NEXT: xorl $7, %eax 1160; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 1161; AVX1-NEXT: vpextrb $7, %xmm0, %eax 1162; AVX1-NEXT: bsrl %eax, %eax 1163; AVX1-NEXT: xorl $7, %eax 1164; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 1165; AVX1-NEXT: vpextrb $8, %xmm0, %eax 1166; AVX1-NEXT: bsrl %eax, %eax 1167; AVX1-NEXT: xorl $7, %eax 1168; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 1169; AVX1-NEXT: vpextrb $9, %xmm0, %eax 1170; AVX1-NEXT: bsrl %eax, %eax 1171; AVX1-NEXT: xorl $7, %eax 1172; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 1173; AVX1-NEXT: vpextrb $10, %xmm0, %eax 1174; AVX1-NEXT: bsrl %eax, %eax 1175; AVX1-NEXT: xorl $7, %eax 1176; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 1177; AVX1-NEXT: vpextrb $11, %xmm0, %eax 1178; AVX1-NEXT: bsrl %eax, %eax 1179; AVX1-NEXT: xorl $7, %eax 1180; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 1181; AVX1-NEXT: vpextrb $12, %xmm0, %eax 1182; AVX1-NEXT: bsrl %eax, %eax 1183; AVX1-NEXT: xorl $7, %eax 1184; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 1185; AVX1-NEXT: vpextrb $13, %xmm0, %eax 1186; AVX1-NEXT: bsrl %eax, %eax 1187; AVX1-NEXT: xorl $7, %eax 1188; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 1189; AVX1-NEXT: vpextrb $14, %xmm0, %eax 1190; AVX1-NEXT: bsrl %eax, %eax 1191; AVX1-NEXT: xorl $7, %eax 1192; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 1193; AVX1-NEXT: vpextrb $15, %xmm0, %eax 1194; AVX1-NEXT: bsrl %eax, %eax 1195; AVX1-NEXT: xorl $7, %eax 1196; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 1197; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1198; AVX1-NEXT: retq 1199; 1200; AVX2-LABEL: testv32i8u: 1201; AVX2: # BB#0: 1202; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1203; AVX2-NEXT: vpextrb $1, %xmm1, %eax 1204; AVX2-NEXT: bsrl %eax, %eax 1205; AVX2-NEXT: xorl $7, %eax 1206; AVX2-NEXT: vpextrb $0, %xmm1, %ecx 1207; AVX2-NEXT: bsrl %ecx, %ecx 1208; AVX2-NEXT: xorl $7, %ecx 1209; AVX2-NEXT: vmovd %ecx, %xmm2 1210; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 1211; AVX2-NEXT: vpextrb $2, %xmm1, %eax 1212; AVX2-NEXT: bsrl %eax, %eax 1213; AVX2-NEXT: xorl $7, %eax 1214; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 1215; AVX2-NEXT: vpextrb $3, %xmm1, %eax 1216; AVX2-NEXT: bsrl %eax, %eax 1217; AVX2-NEXT: xorl $7, %eax 1218; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 1219; AVX2-NEXT: vpextrb $4, %xmm1, %eax 1220; AVX2-NEXT: bsrl %eax, %eax 1221; AVX2-NEXT: xorl $7, %eax 1222; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 1223; AVX2-NEXT: vpextrb $5, %xmm1, %eax 1224; AVX2-NEXT: bsrl %eax, %eax 1225; AVX2-NEXT: xorl $7, %eax 1226; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 1227; AVX2-NEXT: vpextrb $6, %xmm1, %eax 1228; AVX2-NEXT: bsrl %eax, %eax 1229; AVX2-NEXT: xorl $7, %eax 1230; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 1231; AVX2-NEXT: vpextrb $7, %xmm1, %eax 1232; AVX2-NEXT: bsrl %eax, %eax 1233; AVX2-NEXT: xorl $7, %eax 1234; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 1235; AVX2-NEXT: vpextrb $8, %xmm1, %eax 1236; AVX2-NEXT: bsrl %eax, %eax 1237; AVX2-NEXT: xorl $7, %eax 1238; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 1239; AVX2-NEXT: vpextrb $9, %xmm1, %eax 1240; AVX2-NEXT: bsrl %eax, %eax 1241; AVX2-NEXT: xorl $7, %eax 1242; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 1243; AVX2-NEXT: vpextrb $10, %xmm1, %eax 1244; AVX2-NEXT: bsrl %eax, %eax 1245; AVX2-NEXT: xorl $7, %eax 1246; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 1247; AVX2-NEXT: vpextrb $11, %xmm1, %eax 1248; AVX2-NEXT: bsrl %eax, %eax 1249; AVX2-NEXT: xorl $7, %eax 1250; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 1251; AVX2-NEXT: vpextrb $12, %xmm1, %eax 1252; AVX2-NEXT: bsrl %eax, %eax 1253; AVX2-NEXT: xorl $7, %eax 1254; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 1255; AVX2-NEXT: vpextrb $13, %xmm1, %eax 1256; AVX2-NEXT: bsrl %eax, %eax 1257; AVX2-NEXT: xorl $7, %eax 1258; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 1259; AVX2-NEXT: vpextrb $14, %xmm1, %eax 1260; AVX2-NEXT: bsrl %eax, %eax 1261; AVX2-NEXT: xorl $7, %eax 1262; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 1263; AVX2-NEXT: vpextrb $15, %xmm1, %eax 1264; AVX2-NEXT: bsrl %eax, %eax 1265; AVX2-NEXT: xorl $7, %eax 1266; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 1267; AVX2-NEXT: vpextrb $1, %xmm0, %eax 1268; AVX2-NEXT: bsrl %eax, %eax 1269; AVX2-NEXT: xorl $7, %eax 1270; AVX2-NEXT: vpextrb $0, %xmm0, %ecx 1271; AVX2-NEXT: bsrl %ecx, %ecx 1272; AVX2-NEXT: xorl $7, %ecx 1273; AVX2-NEXT: vmovd %ecx, %xmm2 1274; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 1275; AVX2-NEXT: vpextrb $2, %xmm0, %eax 1276; AVX2-NEXT: bsrl %eax, %eax 1277; AVX2-NEXT: xorl $7, %eax 1278; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 1279; AVX2-NEXT: vpextrb $3, %xmm0, %eax 1280; AVX2-NEXT: bsrl %eax, %eax 1281; AVX2-NEXT: xorl $7, %eax 1282; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 1283; AVX2-NEXT: vpextrb $4, %xmm0, %eax 1284; AVX2-NEXT: bsrl %eax, %eax 1285; AVX2-NEXT: xorl $7, %eax 1286; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 1287; AVX2-NEXT: vpextrb $5, %xmm0, %eax 1288; AVX2-NEXT: bsrl %eax, %eax 1289; AVX2-NEXT: xorl $7, %eax 1290; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 1291; AVX2-NEXT: vpextrb $6, %xmm0, %eax 1292; AVX2-NEXT: bsrl %eax, %eax 1293; AVX2-NEXT: xorl $7, %eax 1294; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 1295; AVX2-NEXT: vpextrb $7, %xmm0, %eax 1296; AVX2-NEXT: bsrl %eax, %eax 1297; AVX2-NEXT: xorl $7, %eax 1298; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 1299; AVX2-NEXT: vpextrb $8, %xmm0, %eax 1300; AVX2-NEXT: bsrl %eax, %eax 1301; AVX2-NEXT: xorl $7, %eax 1302; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 1303; AVX2-NEXT: vpextrb $9, %xmm0, %eax 1304; AVX2-NEXT: bsrl %eax, %eax 1305; AVX2-NEXT: xorl $7, %eax 1306; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 1307; AVX2-NEXT: vpextrb $10, %xmm0, %eax 1308; AVX2-NEXT: bsrl %eax, %eax 1309; AVX2-NEXT: xorl $7, %eax 1310; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 1311; AVX2-NEXT: vpextrb $11, %xmm0, %eax 1312; AVX2-NEXT: bsrl %eax, %eax 1313; AVX2-NEXT: xorl $7, %eax 1314; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 1315; AVX2-NEXT: vpextrb $12, %xmm0, %eax 1316; AVX2-NEXT: bsrl %eax, %eax 1317; AVX2-NEXT: xorl $7, %eax 1318; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 1319; AVX2-NEXT: vpextrb $13, %xmm0, %eax 1320; AVX2-NEXT: bsrl %eax, %eax 1321; AVX2-NEXT: xorl $7, %eax 1322; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 1323; AVX2-NEXT: vpextrb $14, %xmm0, %eax 1324; AVX2-NEXT: bsrl %eax, %eax 1325; AVX2-NEXT: xorl $7, %eax 1326; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 1327; AVX2-NEXT: vpextrb $15, %xmm0, %eax 1328; AVX2-NEXT: bsrl %eax, %eax 1329; AVX2-NEXT: xorl $7, %eax 1330; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 1331; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1332; AVX2-NEXT: retq 1333; 1334; AVX512VLCD-LABEL: testv32i8u: 1335; AVX512VLCD: ## BB#0: 1336; AVX512VLCD-NEXT: vextractf128 $1, %ymm0, %xmm1 1337; AVX512VLCD-NEXT: vpmovzxbd %xmm1, %zmm1 1338; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1 1339; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1 1340; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 1341; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1 1342; AVX512VLCD-NEXT: vpmovzxbd %xmm0, %zmm0 1343; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0 1344; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0 1345; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm0, %xmm0 1346; AVX512VLCD-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 1347; AVX512VLCD-NEXT: retq 1348; 1349; AVX512CD-LABEL: testv32i8u: 1350; AVX512CD: ## BB#0: 1351; AVX512CD-NEXT: vextractf128 $1, %ymm0, %xmm1 1352; AVX512CD-NEXT: vpmovzxbd %xmm1, %zmm1 1353; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1 1354; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1 1355; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 1356; AVX512CD-NEXT: vpsubb %xmm2, %xmm1, %xmm1 1357; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0 1358; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 1359; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0 1360; AVX512CD-NEXT: vpsubb %xmm2, %xmm0, %xmm0 1361; AVX512CD-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1362; AVX512CD-NEXT: retq 1363 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1) 1364 ret <32 x i8> %out 1365} 1366 1367define <4 x i64> @foldv4i64() nounwind { 1368; AVX-LABEL: foldv4i64: 1369; AVX: # BB#0: 1370; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] 1371; AVX-NEXT: retq 1372; 1373; AVX512VLCD-LABEL: foldv4i64: 1374; AVX512VLCD: ## BB#0: 1375; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56] 1376; AVX512VLCD-NEXT: retq 1377; 1378; AVX512CD-LABEL: foldv4i64: 1379; AVX512CD: ## BB#0: 1380; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] 1381; AVX512CD-NEXT: retq 1382 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0) 1383 ret <4 x i64> %out 1384} 1385 1386define <4 x i64> @foldv4i64u() nounwind { 1387; AVX-LABEL: foldv4i64u: 1388; AVX: # BB#0: 1389; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] 1390; AVX-NEXT: retq 1391; 1392; AVX512VLCD-LABEL: foldv4i64u: 1393; AVX512VLCD: ## BB#0: 1394; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56] 1395; AVX512VLCD-NEXT: retq 1396; 1397; AVX512CD-LABEL: foldv4i64u: 1398; AVX512CD: ## BB#0: 1399; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] 1400; AVX512CD-NEXT: retq 1401 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1) 1402 ret <4 x i64> %out 1403} 1404 1405define <8 x i32> @foldv8i32() nounwind { 1406; AVX-LABEL: foldv8i32: 1407; AVX: # BB#0: 1408; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] 1409; AVX-NEXT: retq 1410; 1411; AVX512VLCD-LABEL: foldv8i32: 1412; AVX512VLCD: ## BB#0: 1413; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] 1414; AVX512VLCD-NEXT: retq 1415; 1416; AVX512CD-LABEL: foldv8i32: 1417; AVX512CD: ## BB#0: 1418; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] 1419; AVX512CD-NEXT: retq 1420 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0) 1421 ret <8 x i32> %out 1422} 1423 1424define <8 x i32> @foldv8i32u() nounwind { 1425; AVX-LABEL: foldv8i32u: 1426; AVX: # BB#0: 1427; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] 1428; AVX-NEXT: retq 1429; 1430; AVX512VLCD-LABEL: foldv8i32u: 1431; AVX512VLCD: ## BB#0: 1432; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] 1433; AVX512VLCD-NEXT: retq 1434; 1435; AVX512CD-LABEL: foldv8i32u: 1436; AVX512CD: ## BB#0: 1437; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] 1438; AVX512CD-NEXT: retq 1439 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1) 1440 ret <8 x i32> %out 1441} 1442 1443define <16 x i16> @foldv16i16() nounwind { 1444; AVX-LABEL: foldv16i16: 1445; AVX: # BB#0: 1446; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] 1447; AVX-NEXT: retq 1448; 1449; AVX512VLCD-LABEL: foldv16i16: 1450; AVX512VLCD: ## BB#0: 1451; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] 1452; AVX512VLCD-NEXT: retq 1453; 1454; AVX512CD-LABEL: foldv16i16: 1455; AVX512CD: ## BB#0: 1456; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] 1457; AVX512CD-NEXT: retq 1458 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0) 1459 ret <16 x i16> %out 1460} 1461 1462define <16 x i16> @foldv16i16u() nounwind { 1463; AVX-LABEL: foldv16i16u: 1464; AVX: # BB#0: 1465; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] 1466; AVX-NEXT: retq 1467; 1468; AVX512VLCD-LABEL: foldv16i16u: 1469; AVX512VLCD: ## BB#0: 1470; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] 1471; AVX512VLCD-NEXT: retq 1472; 1473; AVX512CD-LABEL: foldv16i16u: 1474; AVX512CD: ## BB#0: 1475; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] 1476; AVX512CD-NEXT: retq 1477 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1) 1478 ret <16 x i16> %out 1479} 1480 1481define <32 x i8> @foldv32i8() nounwind { 1482; AVX-LABEL: foldv32i8: 1483; AVX: # BB#0: 1484; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] 1485; AVX-NEXT: retq 1486; 1487; AVX512VLCD-LABEL: foldv32i8: 1488; AVX512VLCD: ## BB#0: 1489; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] 1490; AVX512VLCD-NEXT: retq 1491; 1492; AVX512CD-LABEL: foldv32i8: 1493; AVX512CD: ## BB#0: 1494; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] 1495; AVX512CD-NEXT: retq 1496 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0) 1497 ret <32 x i8> %out 1498} 1499 1500define <32 x i8> @foldv32i8u() nounwind { 1501; AVX-LABEL: foldv32i8u: 1502; AVX: # BB#0: 1503; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] 1504; AVX-NEXT: retq 1505; 1506; AVX512VLCD-LABEL: foldv32i8u: 1507; AVX512VLCD: ## BB#0: 1508; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] 1509; AVX512VLCD-NEXT: retq 1510; 1511; AVX512CD-LABEL: foldv32i8u: 1512; AVX512CD: ## BB#0: 1513; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] 1514; AVX512CD-NEXT: retq 1515 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1) 1516 ret <32 x i8> %out 1517} 1518 1519declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) 1520declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) 1521declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) 1522declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) 1523