1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX512VL 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=X64 --check-prefix=AVX512VLBWDQ 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX512 --check-prefix=AVX512VLCD 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX512 --check-prefix=AVX512CD 8; 9; Just one 32-bit run to make sure we do reasonable things for i64 lzcnt. 10; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX 11 12define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { 13; AVX1-LABEL: testv4i64: 14; AVX1: # %bb.0: 15; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 16; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 17; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm1 18; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 19; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm5 20; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm1 21; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm6 22; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 23; AVX1-NEXT: vpcmpeqb %xmm1, %xmm6, %xmm7 24; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 25; AVX1-NEXT: vpshufb %xmm6, %xmm4, %xmm6 26; AVX1-NEXT: vpaddb %xmm6, %xmm5, %xmm5 27; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm6 28; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 29; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm6 30; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 31; AVX1-NEXT: vpaddw %xmm6, %xmm5, %xmm5 32; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm6 33; AVX1-NEXT: vpsrld $16, %xmm6, %xmm6 34; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm6 35; AVX1-NEXT: vpsrld $16, %xmm5, %xmm5 36; AVX1-NEXT: vpaddd %xmm6, %xmm5, %xmm5 37; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm2 38; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2 39; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 40; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5 41; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 42; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm5 43; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 44; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm6 45; AVX1-NEXT: vpand %xmm3, %xmm6, %xmm3 46; AVX1-NEXT: vpcmpeqb %xmm1, %xmm3, %xmm6 47; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm5 48; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 49; AVX1-NEXT: vpaddb %xmm3, %xmm5, %xmm3 50; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm4 51; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 52; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4 53; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 54; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3 55; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm4 56; AVX1-NEXT: vpsrld $16, %xmm4, %xmm4 57; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4 58; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3 59; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 60; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 61; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 62; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0 63; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm1 64; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 65; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 66; AVX1-NEXT: retq 67; 68; AVX2-LABEL: testv4i64: 69; AVX2: # %bb.0: 70; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 71; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 72; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 73; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 74; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4 75; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1 76; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 77; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 78; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2 79; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1 80; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1 81; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 82; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 83; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2 84; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 85; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 86; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2 87; AVX2-NEXT: vpsrld $16, %ymm2, %ymm2 88; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2 89; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1 90; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 91; AVX2-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 92; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 93; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 94; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm1 95; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0 96; AVX2-NEXT: retq 97; 98; AVX512VL-LABEL: testv4i64: 99; AVX512VL: # %bb.0: 100; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 101; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm2 102; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 103; AVX512VL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 104; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4 105; AVX512VL-NEXT: vpand %ymm1, %ymm4, %ymm1 106; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 107; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 108; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2 109; AVX512VL-NEXT: vpshufb %ymm1, %ymm3, %ymm1 110; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1 111; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 112; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 113; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2 114; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 115; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1 116; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2 117; AVX512VL-NEXT: vpsrld $16, %ymm2, %ymm2 118; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2 119; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1 120; AVX512VL-NEXT: vpaddd %ymm2, %ymm1, %ymm1 121; AVX512VL-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 122; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 123; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0 124; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm1 125; AVX512VL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 126; AVX512VL-NEXT: retq 127; 128; AVX512VLBWDQ-LABEL: testv4i64: 129; AVX512VLBWDQ: # %bb.0: 130; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 131; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 132; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 133; AVX512VLBWDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 134; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm4 135; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm4, %ymm1 136; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 137; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 138; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2 139; AVX512VLBWDQ-NEXT: vpshufb %ymm1, %ymm3, %ymm1 140; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1 141; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 142; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2 143; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2 144; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1 145; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1 146; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2 147; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm2, %ymm2 148; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2 149; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1 150; AVX512VLBWDQ-NEXT: vpaddd %ymm2, %ymm1, %ymm1 151; AVX512VLBWDQ-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 152; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm0, %ymm0 153; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0 154; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm1, %ymm1 155; AVX512VLBWDQ-NEXT: vpaddq %ymm0, %ymm1, %ymm0 156; AVX512VLBWDQ-NEXT: retq 157; 158; AVX512VLCD-LABEL: testv4i64: 159; AVX512VLCD: # %bb.0: 160; AVX512VLCD-NEXT: vplzcntq %ymm0, %ymm0 161; AVX512VLCD-NEXT: retq 162; 163; AVX512CD-LABEL: testv4i64: 164; AVX512CD: # %bb.0: 165; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 166; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0 167; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 168; AVX512CD-NEXT: retq 169; 170; X32-AVX-LABEL: testv4i64: 171; X32-AVX: # %bb.0: 172; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 173; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2 174; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 175; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 176; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4 177; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1 178; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 179; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 180; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2 181; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1 182; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1 183; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 184; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2 185; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2 186; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1 187; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1 188; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2 189; X32-AVX-NEXT: vpsrld $16, %ymm2, %ymm2 190; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2 191; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1 192; X32-AVX-NEXT: vpaddd %ymm2, %ymm1, %ymm1 193; X32-AVX-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 194; X32-AVX-NEXT: vpsrlq $32, %ymm0, %ymm0 195; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0 196; X32-AVX-NEXT: vpsrlq $32, %ymm1, %ymm1 197; X32-AVX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 198; X32-AVX-NEXT: retl 199 200 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0) 201 ret <4 x i64> %out 202} 203 204define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind { 205; AVX1-LABEL: testv4i64u: 206; AVX1: # %bb.0: 207; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 208; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 209; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm1 210; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 211; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm5 212; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm1 213; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm6 214; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 215; AVX1-NEXT: vpcmpeqb %xmm1, %xmm6, %xmm7 216; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 217; AVX1-NEXT: vpshufb %xmm6, %xmm4, %xmm6 218; AVX1-NEXT: vpaddb %xmm6, %xmm5, %xmm5 219; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm6 220; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 221; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm6 222; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 223; AVX1-NEXT: vpaddw %xmm6, %xmm5, %xmm5 224; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm6 225; AVX1-NEXT: vpsrld $16, %xmm6, %xmm6 226; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm6 227; AVX1-NEXT: vpsrld $16, %xmm5, %xmm5 228; AVX1-NEXT: vpaddd %xmm6, %xmm5, %xmm5 229; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm2 230; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2 231; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 232; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5 233; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 234; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm5 235; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 236; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm6 237; AVX1-NEXT: vpand %xmm3, %xmm6, %xmm3 238; AVX1-NEXT: vpcmpeqb %xmm1, %xmm3, %xmm6 239; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm5 240; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 241; AVX1-NEXT: vpaddb %xmm3, %xmm5, %xmm3 242; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm4 243; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 244; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4 245; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 246; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3 247; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm4 248; AVX1-NEXT: vpsrld $16, %xmm4, %xmm4 249; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4 250; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3 251; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 252; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 253; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 254; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0 255; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm1 256; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 257; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 258; AVX1-NEXT: retq 259; 260; AVX2-LABEL: testv4i64u: 261; AVX2: # %bb.0: 262; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 263; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 264; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 265; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 266; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4 267; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1 268; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 269; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 270; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2 271; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1 272; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1 273; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 274; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 275; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2 276; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 277; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 278; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2 279; AVX2-NEXT: vpsrld $16, %ymm2, %ymm2 280; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2 281; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1 282; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 283; AVX2-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 284; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 285; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 286; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm1 287; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0 288; AVX2-NEXT: retq 289; 290; AVX512VL-LABEL: testv4i64u: 291; AVX512VL: # %bb.0: 292; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 293; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm2 294; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 295; AVX512VL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 296; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4 297; AVX512VL-NEXT: vpand %ymm1, %ymm4, %ymm1 298; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 299; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 300; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2 301; AVX512VL-NEXT: vpshufb %ymm1, %ymm3, %ymm1 302; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1 303; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 304; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 305; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2 306; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 307; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1 308; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2 309; AVX512VL-NEXT: vpsrld $16, %ymm2, %ymm2 310; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2 311; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1 312; AVX512VL-NEXT: vpaddd %ymm2, %ymm1, %ymm1 313; AVX512VL-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 314; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 315; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0 316; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm1 317; AVX512VL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 318; AVX512VL-NEXT: retq 319; 320; AVX512VLBWDQ-LABEL: testv4i64u: 321; AVX512VLBWDQ: # %bb.0: 322; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 323; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 324; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 325; AVX512VLBWDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 326; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm4 327; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm4, %ymm1 328; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 329; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 330; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2 331; AVX512VLBWDQ-NEXT: vpshufb %ymm1, %ymm3, %ymm1 332; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1 333; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 334; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2 335; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2 336; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1 337; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1 338; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2 339; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm2, %ymm2 340; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2 341; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1 342; AVX512VLBWDQ-NEXT: vpaddd %ymm2, %ymm1, %ymm1 343; AVX512VLBWDQ-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 344; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm0, %ymm0 345; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0 346; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm1, %ymm1 347; AVX512VLBWDQ-NEXT: vpaddq %ymm0, %ymm1, %ymm0 348; AVX512VLBWDQ-NEXT: retq 349; 350; AVX512VLCD-LABEL: testv4i64u: 351; AVX512VLCD: # %bb.0: 352; AVX512VLCD-NEXT: vplzcntq %ymm0, %ymm0 353; AVX512VLCD-NEXT: retq 354; 355; AVX512CD-LABEL: testv4i64u: 356; AVX512CD: # %bb.0: 357; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 358; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0 359; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 360; AVX512CD-NEXT: retq 361; 362; X32-AVX-LABEL: testv4i64u: 363; X32-AVX: # %bb.0: 364; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 365; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2 366; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 367; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 368; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4 369; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1 370; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 371; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 372; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2 373; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1 374; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1 375; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 376; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2 377; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2 378; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1 379; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1 380; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2 381; X32-AVX-NEXT: vpsrld $16, %ymm2, %ymm2 382; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2 383; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1 384; X32-AVX-NEXT: vpaddd %ymm2, %ymm1, %ymm1 385; X32-AVX-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0 386; X32-AVX-NEXT: vpsrlq $32, %ymm0, %ymm0 387; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0 388; X32-AVX-NEXT: vpsrlq $32, %ymm1, %ymm1 389; X32-AVX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 390; X32-AVX-NEXT: retl 391 392 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1) 393 ret <4 x i64> %out 394} 395 396define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { 397; AVX1-LABEL: testv8i32: 398; AVX1: # %bb.0: 399; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 400; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 401; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 402; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 403; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 404; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm5 405; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5 406; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6 407; AVX1-NEXT: vpcmpeqb %xmm6, %xmm5, %xmm7 408; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 409; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 410; AVX1-NEXT: vpaddb %xmm5, %xmm3, %xmm3 411; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm5 412; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 413; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm5 414; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 415; AVX1-NEXT: vpaddw %xmm5, %xmm3, %xmm3 416; AVX1-NEXT: vpcmpeqw %xmm6, %xmm1, %xmm1 417; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1 418; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1 419; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3 420; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 421; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 422; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 423; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm5 424; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 425; AVX1-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm5 426; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 427; AVX1-NEXT: vpshufb %xmm2, %xmm4, %xmm2 428; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2 429; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm3 430; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 431; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm3 432; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 433; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2 434; AVX1-NEXT: vpcmpeqw %xmm6, %xmm0, %xmm0 435; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 436; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0 437; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 438; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0 439; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 440; AVX1-NEXT: retq 441; 442; AVX2-LABEL: testv8i32: 443; AVX2: # %bb.0: 444; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 445; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 446; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 447; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 448; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4 449; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1 450; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 451; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 452; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2 453; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1 454; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1 455; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 456; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 457; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2 458; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 459; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 460; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 461; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 462; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 463; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1 464; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 465; AVX2-NEXT: retq 466; 467; AVX512VL-LABEL: testv8i32: 468; AVX512VL: # %bb.0: 469; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 470; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm2 471; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 472; AVX512VL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 473; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4 474; AVX512VL-NEXT: vpand %ymm1, %ymm4, %ymm1 475; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 476; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 477; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2 478; AVX512VL-NEXT: vpshufb %ymm1, %ymm3, %ymm1 479; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1 480; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 481; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 482; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2 483; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 484; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1 485; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 486; AVX512VL-NEXT: vpsrld $16, %ymm0, %ymm0 487; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0 488; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1 489; AVX512VL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 490; AVX512VL-NEXT: retq 491; 492; AVX512VLBWDQ-LABEL: testv8i32: 493; AVX512VLBWDQ: # %bb.0: 494; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 495; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 496; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 497; AVX512VLBWDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 498; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm4 499; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm4, %ymm1 500; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 501; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 502; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2 503; AVX512VLBWDQ-NEXT: vpshufb %ymm1, %ymm3, %ymm1 504; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1 505; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 506; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2 507; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2 508; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1 509; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1 510; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 511; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm0, %ymm0 512; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0 513; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1 514; AVX512VLBWDQ-NEXT: vpaddd %ymm0, %ymm1, %ymm0 515; AVX512VLBWDQ-NEXT: retq 516; 517; AVX512VLCD-LABEL: testv8i32: 518; AVX512VLCD: # %bb.0: 519; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0 520; AVX512VLCD-NEXT: retq 521; 522; AVX512CD-LABEL: testv8i32: 523; AVX512CD: # %bb.0: 524; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 525; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 526; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 527; AVX512CD-NEXT: retq 528; 529; X32-AVX-LABEL: testv8i32: 530; X32-AVX: # %bb.0: 531; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 532; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2 533; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 534; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 535; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4 536; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1 537; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 538; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 539; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2 540; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1 541; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1 542; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 543; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2 544; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2 545; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1 546; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1 547; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 548; X32-AVX-NEXT: vpsrld $16, %ymm0, %ymm0 549; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0 550; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1 551; X32-AVX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 552; X32-AVX-NEXT: retl 553 554 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0) 555 ret <8 x i32> %out 556} 557 558define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind { 559; AVX1-LABEL: testv8i32u: 560; AVX1: # %bb.0: 561; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 562; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 563; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 564; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 565; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 566; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm5 567; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5 568; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6 569; AVX1-NEXT: vpcmpeqb %xmm6, %xmm5, %xmm7 570; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 571; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 572; AVX1-NEXT: vpaddb %xmm5, %xmm3, %xmm3 573; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm5 574; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 575; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm5 576; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 577; AVX1-NEXT: vpaddw %xmm5, %xmm3, %xmm3 578; AVX1-NEXT: vpcmpeqw %xmm6, %xmm1, %xmm1 579; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1 580; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1 581; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3 582; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 583; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 584; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 585; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm5 586; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 587; AVX1-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm5 588; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 589; AVX1-NEXT: vpshufb %xmm2, %xmm4, %xmm2 590; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2 591; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm3 592; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 593; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm3 594; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 595; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2 596; AVX1-NEXT: vpcmpeqw %xmm6, %xmm0, %xmm0 597; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 598; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0 599; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 600; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0 601; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 602; AVX1-NEXT: retq 603; 604; AVX2-LABEL: testv8i32u: 605; AVX2: # %bb.0: 606; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 607; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 608; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 609; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 610; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4 611; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1 612; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 613; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 614; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2 615; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1 616; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1 617; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 618; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 619; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2 620; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 621; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 622; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 623; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 624; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 625; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1 626; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 627; AVX2-NEXT: retq 628; 629; AVX512VL-LABEL: testv8i32u: 630; AVX512VL: # %bb.0: 631; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 632; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm2 633; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 634; AVX512VL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 635; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4 636; AVX512VL-NEXT: vpand %ymm1, %ymm4, %ymm1 637; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 638; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 639; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2 640; AVX512VL-NEXT: vpshufb %ymm1, %ymm3, %ymm1 641; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1 642; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 643; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 644; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2 645; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 646; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1 647; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 648; AVX512VL-NEXT: vpsrld $16, %ymm0, %ymm0 649; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0 650; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1 651; AVX512VL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 652; AVX512VL-NEXT: retq 653; 654; AVX512VLBWDQ-LABEL: testv8i32u: 655; AVX512VLBWDQ: # %bb.0: 656; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 657; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 658; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 659; AVX512VLBWDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 660; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm4 661; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm4, %ymm1 662; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 663; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 664; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2 665; AVX512VLBWDQ-NEXT: vpshufb %ymm1, %ymm3, %ymm1 666; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1 667; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 668; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2 669; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2 670; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1 671; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1 672; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 673; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm0, %ymm0 674; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0 675; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1 676; AVX512VLBWDQ-NEXT: vpaddd %ymm0, %ymm1, %ymm0 677; AVX512VLBWDQ-NEXT: retq 678; 679; AVX512VLCD-LABEL: testv8i32u: 680; AVX512VLCD: # %bb.0: 681; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0 682; AVX512VLCD-NEXT: retq 683; 684; AVX512CD-LABEL: testv8i32u: 685; AVX512CD: # %bb.0: 686; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 687; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 688; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 689; AVX512CD-NEXT: retq 690; 691; X32-AVX-LABEL: testv8i32u: 692; X32-AVX: # %bb.0: 693; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 694; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2 695; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 696; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 697; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4 698; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1 699; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 700; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 701; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2 702; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1 703; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1 704; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2 705; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2 706; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2 707; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1 708; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1 709; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 710; X32-AVX-NEXT: vpsrld $16, %ymm0, %ymm0 711; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0 712; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1 713; X32-AVX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 714; X32-AVX-NEXT: retl 715 716 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1) 717 ret <8 x i32> %out 718} 719 720define <16 x i16> @testv16i16(<16 x i16> %in) nounwind { 721; AVX1-LABEL: testv16i16: 722; AVX1: # %bb.0: 723; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 724; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 725; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 726; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 727; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 728; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm5 729; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5 730; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6 731; AVX1-NEXT: vpcmpeqb %xmm6, %xmm5, %xmm7 732; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 733; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 734; AVX1-NEXT: vpaddb %xmm5, %xmm3, %xmm3 735; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm1 736; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 737; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1 738; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 739; AVX1-NEXT: vpaddw %xmm1, %xmm3, %xmm1 740; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 741; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 742; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm5 743; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 744; AVX1-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm5 745; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 746; AVX1-NEXT: vpshufb %xmm2, %xmm4, %xmm2 747; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2 748; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm0 749; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 750; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0 751; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 752; AVX1-NEXT: vpaddw %xmm0, %xmm2, %xmm0 753; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 754; AVX1-NEXT: retq 755; 756; AVX2-LABEL: testv16i16: 757; AVX2: # %bb.0: 758; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 759; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 760; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 761; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 762; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4 763; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1 764; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 765; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 766; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2 767; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1 768; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1 769; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 770; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 771; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 772; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 773; AVX2-NEXT: vpaddw %ymm0, %ymm1, %ymm0 774; AVX2-NEXT: retq 775; 776; AVX512VL-LABEL: testv16i16: 777; AVX512VL: # %bb.0: 778; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 779; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm2 780; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 781; AVX512VL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 782; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4 783; AVX512VL-NEXT: vpand %ymm1, %ymm4, %ymm1 784; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 785; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 786; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2 787; AVX512VL-NEXT: vpshufb %ymm1, %ymm3, %ymm1 788; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1 789; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 790; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 791; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0 792; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 793; AVX512VL-NEXT: vpaddw %ymm0, %ymm1, %ymm0 794; AVX512VL-NEXT: retq 795; 796; AVX512VLBWDQ-LABEL: testv16i16: 797; AVX512VLBWDQ: # %bb.0: 798; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 799; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 800; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 801; AVX512VLBWDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 802; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm4 803; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm4, %ymm1 804; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 805; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 806; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2 807; AVX512VLBWDQ-NEXT: vpshufb %ymm1, %ymm3, %ymm1 808; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1 809; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 810; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm0, %ymm0 811; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0 812; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1 813; AVX512VLBWDQ-NEXT: vpaddw %ymm0, %ymm1, %ymm0 814; AVX512VLBWDQ-NEXT: retq 815; 816; AVX512-LABEL: testv16i16: 817; AVX512: # %bb.0: 818; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 819; AVX512-NEXT: vplzcntd %zmm0, %zmm0 820; AVX512-NEXT: vpmovdw %zmm0, %ymm0 821; AVX512-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 822; AVX512-NEXT: retq 823; 824; X32-AVX-LABEL: testv16i16: 825; X32-AVX: # %bb.0: 826; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 827; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2 828; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 829; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 830; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4 831; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1 832; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 833; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 834; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2 835; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1 836; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1 837; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 838; X32-AVX-NEXT: vpsrlw $8, %ymm0, %ymm0 839; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0 840; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1 841; X32-AVX-NEXT: vpaddw %ymm0, %ymm1, %ymm0 842; X32-AVX-NEXT: retl 843 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0) 844 ret <16 x i16> %out 845} 846 847define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind { 848; AVX1-LABEL: testv16i16u: 849; AVX1: # %bb.0: 850; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 851; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 852; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 853; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 854; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 855; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm5 856; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5 857; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6 858; AVX1-NEXT: vpcmpeqb %xmm6, %xmm5, %xmm7 859; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 860; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 861; AVX1-NEXT: vpaddb %xmm5, %xmm3, %xmm3 862; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm1 863; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 864; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1 865; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 866; AVX1-NEXT: vpaddw %xmm1, %xmm3, %xmm1 867; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 868; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 869; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm5 870; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm2 871; AVX1-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm5 872; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 873; AVX1-NEXT: vpshufb %xmm2, %xmm4, %xmm2 874; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2 875; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm0 876; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 877; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0 878; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 879; AVX1-NEXT: vpaddw %xmm0, %xmm2, %xmm0 880; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 881; AVX1-NEXT: retq 882; 883; AVX2-LABEL: testv16i16u: 884; AVX2: # %bb.0: 885; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 886; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 887; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 888; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 889; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4 890; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1 891; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 892; AVX2-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 893; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2 894; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm1 895; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1 896; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 897; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 898; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 899; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 900; AVX2-NEXT: vpaddw %ymm0, %ymm1, %ymm0 901; AVX2-NEXT: retq 902; 903; AVX512VL-LABEL: testv16i16u: 904; AVX512VL: # %bb.0: 905; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 906; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm2 907; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 908; AVX512VL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 909; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4 910; AVX512VL-NEXT: vpand %ymm1, %ymm4, %ymm1 911; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 912; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 913; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2 914; AVX512VL-NEXT: vpshufb %ymm1, %ymm3, %ymm1 915; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1 916; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 917; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 918; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0 919; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 920; AVX512VL-NEXT: vpaddw %ymm0, %ymm1, %ymm0 921; AVX512VL-NEXT: retq 922; 923; AVX512VLBWDQ-LABEL: testv16i16u: 924; AVX512VLBWDQ: # %bb.0: 925; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 926; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 927; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 928; AVX512VLBWDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 929; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm4 930; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm4, %ymm1 931; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 932; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 933; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2 934; AVX512VLBWDQ-NEXT: vpshufb %ymm1, %ymm3, %ymm1 935; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1 936; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 937; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm0, %ymm0 938; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0 939; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1 940; AVX512VLBWDQ-NEXT: vpaddw %ymm0, %ymm1, %ymm0 941; AVX512VLBWDQ-NEXT: retq 942; 943; AVX512-LABEL: testv16i16u: 944; AVX512: # %bb.0: 945; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 946; AVX512-NEXT: vplzcntd %zmm0, %zmm0 947; AVX512-NEXT: vpmovdw %zmm0, %ymm0 948; AVX512-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 949; AVX512-NEXT: retq 950; 951; X32-AVX-LABEL: testv16i16u: 952; X32-AVX: # %bb.0: 953; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 954; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2 955; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 956; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 957; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm4 958; X32-AVX-NEXT: vpand %ymm1, %ymm4, %ymm1 959; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 960; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm5 961; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2 962; X32-AVX-NEXT: vpshufb %ymm1, %ymm3, %ymm1 963; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1 964; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 965; X32-AVX-NEXT: vpsrlw $8, %ymm0, %ymm0 966; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0 967; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1 968; X32-AVX-NEXT: vpaddw %ymm0, %ymm1, %ymm0 969; X32-AVX-NEXT: retl 970 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1) 971 ret <16 x i16> %out 972} 973 974define <32 x i8> @testv32i8(<32 x i8> %in) nounwind { 975; AVX1-LABEL: testv32i8: 976; AVX1: # %bb.0: 977; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 978; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 979; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 980; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 981; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 982; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 983; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 984; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 985; AVX1-NEXT: vpcmpeqb %xmm5, %xmm1, %xmm6 986; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 987; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 988; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1 989; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 990; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 991; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 992; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 993; AVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm2 994; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 995; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0 996; AVX1-NEXT: vpaddb %xmm0, %xmm2, %xmm0 997; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 998; AVX1-NEXT: retq 999; 1000; AVX2-LABEL: testv32i8: 1001; AVX2: # %bb.0: 1002; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1003; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 1004; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 1005; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 1006; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 1007; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1008; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1009; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 1010; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 1011; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 1012; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 1013; AVX2-NEXT: retq 1014; 1015; AVX512VL-LABEL: testv32i8: 1016; AVX512VL: # %bb.0: 1017; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1018; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm2 1019; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 1020; AVX512VL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 1021; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 1022; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0 1023; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1024; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 1025; AVX512VL-NEXT: vpand %ymm1, %ymm2, %ymm1 1026; AVX512VL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 1027; AVX512VL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 1028; AVX512VL-NEXT: retq 1029; 1030; AVX512VLBWDQ-LABEL: testv32i8: 1031; AVX512VLBWDQ: # %bb.0: 1032; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1033; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 1034; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 1035; AVX512VLBWDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 1036; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm0 1037; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 1038; AVX512VLBWDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1039; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 1040; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm2, %ymm1 1041; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 1042; AVX512VLBWDQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0 1043; AVX512VLBWDQ-NEXT: retq 1044; 1045; AVX512-LABEL: testv32i8: 1046; AVX512: # %bb.0: 1047; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1048; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 1049; AVX512-NEXT: vplzcntd %zmm1, %zmm1 1050; AVX512-NEXT: vpmovdb %zmm1, %xmm1 1051; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 1052; AVX512-NEXT: vpsubb %xmm2, %xmm1, %xmm1 1053; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1054; AVX512-NEXT: vplzcntd %zmm0, %zmm0 1055; AVX512-NEXT: vpmovdb %zmm0, %xmm0 1056; AVX512-NEXT: vpsubb %xmm2, %xmm0, %xmm0 1057; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1058; AVX512-NEXT: retq 1059; 1060; X32-AVX-LABEL: testv32i8: 1061; X32-AVX: # %bb.0: 1062; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1063; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2 1064; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 1065; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 1066; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0 1067; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0 1068; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1069; X32-AVX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 1070; X32-AVX-NEXT: vpand %ymm1, %ymm2, %ymm1 1071; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0 1072; X32-AVX-NEXT: vpaddb %ymm0, %ymm1, %ymm0 1073; X32-AVX-NEXT: retl 1074 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0) 1075 ret <32 x i8> %out 1076} 1077 1078define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind { 1079; AVX1-LABEL: testv32i8u: 1080; AVX1: # %bb.0: 1081; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1082; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1083; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 1084; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 1085; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 1086; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 1087; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1088; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 1089; AVX1-NEXT: vpcmpeqb %xmm5, %xmm1, %xmm6 1090; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 1091; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 1092; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1 1093; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 1094; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 1095; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 1096; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1097; AVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm2 1098; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 1099; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0 1100; AVX1-NEXT: vpaddb %xmm0, %xmm2, %xmm0 1101; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1102; AVX1-NEXT: retq 1103; 1104; AVX2-LABEL: testv32i8u: 1105; AVX2: # %bb.0: 1106; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1107; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 1108; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 1109; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 1110; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 1111; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1112; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1113; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 1114; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 1115; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 1116; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 1117; AVX2-NEXT: retq 1118; 1119; AVX512VL-LABEL: testv32i8u: 1120; AVX512VL: # %bb.0: 1121; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1122; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm2 1123; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 1124; AVX512VL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 1125; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 1126; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0 1127; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1128; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 1129; AVX512VL-NEXT: vpand %ymm1, %ymm2, %ymm1 1130; AVX512VL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 1131; AVX512VL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 1132; AVX512VL-NEXT: retq 1133; 1134; AVX512VLBWDQ-LABEL: testv32i8u: 1135; AVX512VLBWDQ: # %bb.0: 1136; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1137; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 1138; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 1139; AVX512VLBWDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 1140; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm0 1141; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 1142; AVX512VLBWDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1143; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 1144; AVX512VLBWDQ-NEXT: vpand %ymm1, %ymm2, %ymm1 1145; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 1146; AVX512VLBWDQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0 1147; AVX512VLBWDQ-NEXT: retq 1148; 1149; AVX512-LABEL: testv32i8u: 1150; AVX512: # %bb.0: 1151; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1152; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 1153; AVX512-NEXT: vplzcntd %zmm1, %zmm1 1154; AVX512-NEXT: vpmovdb %zmm1, %xmm1 1155; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 1156; AVX512-NEXT: vpsubb %xmm2, %xmm1, %xmm1 1157; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1158; AVX512-NEXT: vplzcntd %zmm0, %zmm0 1159; AVX512-NEXT: vpmovdb %zmm0, %xmm0 1160; AVX512-NEXT: vpsubb %xmm2, %xmm0, %xmm0 1161; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1162; AVX512-NEXT: retq 1163; 1164; X32-AVX-LABEL: testv32i8u: 1165; X32-AVX: # %bb.0: 1166; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1167; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm2 1168; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0] 1169; X32-AVX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 1170; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0 1171; X32-AVX-NEXT: vpand %ymm1, %ymm0, %ymm0 1172; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1173; X32-AVX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 1174; X32-AVX-NEXT: vpand %ymm1, %ymm2, %ymm1 1175; X32-AVX-NEXT: vpshufb %ymm0, %ymm3, %ymm0 1176; X32-AVX-NEXT: vpaddb %ymm0, %ymm1, %ymm0 1177; X32-AVX-NEXT: retl 1178 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1) 1179 ret <32 x i8> %out 1180} 1181 1182define <4 x i64> @foldv4i64() nounwind { 1183; X64-LABEL: foldv4i64: 1184; X64: # %bb.0: 1185; X64-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] 1186; X64-NEXT: retq 1187; 1188; X32-AVX-LABEL: foldv4i64: 1189; X32-AVX: # %bb.0: 1190; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,0,0,64,0,56,0] 1191; X32-AVX-NEXT: retl 1192 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0) 1193 ret <4 x i64> %out 1194} 1195 1196define <4 x i64> @foldv4i64u() nounwind { 1197; X64-LABEL: foldv4i64u: 1198; X64: # %bb.0: 1199; X64-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56] 1200; X64-NEXT: retq 1201; 1202; X32-AVX-LABEL: foldv4i64u: 1203; X32-AVX: # %bb.0: 1204; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,0,0,64,0,56,0] 1205; X32-AVX-NEXT: retl 1206 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1) 1207 ret <4 x i64> %out 1208} 1209 1210define <8 x i32> @foldv8i32() nounwind { 1211; X64-LABEL: foldv8i32: 1212; X64: # %bb.0: 1213; X64-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] 1214; X64-NEXT: retq 1215; 1216; X32-AVX-LABEL: foldv8i32: 1217; X32-AVX: # %bb.0: 1218; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] 1219; X32-AVX-NEXT: retl 1220 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0) 1221 ret <8 x i32> %out 1222} 1223 1224define <8 x i32> @foldv8i32u() nounwind { 1225; X64-LABEL: foldv8i32u: 1226; X64: # %bb.0: 1227; X64-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] 1228; X64-NEXT: retq 1229; 1230; X32-AVX-LABEL: foldv8i32u: 1231; X32-AVX: # %bb.0: 1232; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25] 1233; X32-AVX-NEXT: retl 1234 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1) 1235 ret <8 x i32> %out 1236} 1237 1238define <16 x i16> @foldv16i16() nounwind { 1239; X64-LABEL: foldv16i16: 1240; X64: # %bb.0: 1241; X64-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] 1242; X64-NEXT: retq 1243; 1244; X32-AVX-LABEL: foldv16i16: 1245; X32-AVX: # %bb.0: 1246; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] 1247; X32-AVX-NEXT: retl 1248 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0) 1249 ret <16 x i16> %out 1250} 1251 1252define <16 x i16> @foldv16i16u() nounwind { 1253; X64-LABEL: foldv16i16u: 1254; X64: # %bb.0: 1255; X64-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] 1256; X64-NEXT: retq 1257; 1258; X32-AVX-LABEL: foldv16i16u: 1259; X32-AVX: # %bb.0: 1260; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10] 1261; X32-AVX-NEXT: retl 1262 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1) 1263 ret <16 x i16> %out 1264} 1265 1266define <32 x i8> @foldv32i8() nounwind { 1267; X64-LABEL: foldv32i8: 1268; X64: # %bb.0: 1269; X64-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] 1270; X64-NEXT: retq 1271; 1272; X32-AVX-LABEL: foldv32i8: 1273; X32-AVX: # %bb.0: 1274; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] 1275; X32-AVX-NEXT: retl 1276 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0) 1277 ret <32 x i8> %out 1278} 1279 1280define <32 x i8> @foldv32i8u() nounwind { 1281; X64-LABEL: foldv32i8u: 1282; X64: # %bb.0: 1283; X64-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] 1284; X64-NEXT: retq 1285; 1286; X32-AVX-LABEL: foldv32i8u: 1287; X32-AVX: # %bb.0: 1288; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1] 1289; X32-AVX-NEXT: retl 1290 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1) 1291 ret <32 x i8> %out 1292} 1293 1294declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) 1295declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) 1296declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) 1297declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) 1298