1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQVL 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=BITALG_NOVLX 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG 8 9define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { 10; AVX1-LABEL: testv4i64: 11; AVX1: # %bb.0: 12; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 13; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 15; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 17; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 18; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 19; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 20; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 21; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 22; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1 23; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5 24; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 25; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 26; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 27; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0 28; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 29; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0 30; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 31; AVX1-NEXT: retq 32; 33; AVX2-LABEL: testv4i64: 34; AVX2: # %bb.0: 35; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 37; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 39; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 40; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 41; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 42; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 43; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 44; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 45; AVX2-NEXT: retq 46; 47; AVX512VPOPCNTDQ-LABEL: testv4i64: 48; AVX512VPOPCNTDQ: # %bb.0: 49; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 50; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 51; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 52; AVX512VPOPCNTDQ-NEXT: retq 53; 54; AVX512VPOPCNTDQVL-LABEL: testv4i64: 55; AVX512VPOPCNTDQVL: # %bb.0: 56; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 57; AVX512VPOPCNTDQVL-NEXT: retq 58; 59; BITALG_NOVLX-LABEL: testv4i64: 60; BITALG_NOVLX: # %bb.0: 61; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 62; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm2 63; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 64; BITALG_NOVLX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 65; BITALG_NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0 66; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 67; BITALG_NOVLX-NEXT: vpshufb %ymm0, %ymm3, %ymm0 68; BITALG_NOVLX-NEXT: vpaddb %ymm2, %ymm0, %ymm0 69; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 70; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 71; BITALG_NOVLX-NEXT: retq 72; 73; BITALG-LABEL: testv4i64: 74; BITALG: # %bb.0: 75; BITALG-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 76; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm2 77; BITALG-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 78; BITALG-NEXT: vpshufb %ymm2, %ymm3, %ymm2 79; BITALG-NEXT: vpsrlw $4, %ymm0, %ymm0 80; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 81; BITALG-NEXT: vpshufb %ymm0, %ymm3, %ymm0 82; BITALG-NEXT: vpaddb %ymm2, %ymm0, %ymm0 83; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 84; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 85; BITALG-NEXT: retq 86 %out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %in) 87 ret <4 x i64> %out 88} 89 90define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { 91; AVX1-LABEL: testv8i32: 92; AVX1: # %bb.0: 93; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 94; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 95; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 96; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 97; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 98; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 99; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 100; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 101; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 102; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 103; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 104; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5 105; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 106; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1 107; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1 108; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5 109; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 110; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 111; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 112; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0 113; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 114; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3] 115; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2 116; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 117; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0 118; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 119; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 120; AVX1-NEXT: retq 121; 122; AVX2-LABEL: testv8i32: 123; AVX2: # %bb.0: 124; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 125; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 126; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 127; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 128; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 129; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 130; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 131; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 132; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 133; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 134; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 135; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 136; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 137; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 138; AVX2-NEXT: retq 139; 140; AVX512VPOPCNTDQ-LABEL: testv8i32: 141; AVX512VPOPCNTDQ: # %bb.0: 142; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 143; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 144; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 145; AVX512VPOPCNTDQ-NEXT: retq 146; 147; AVX512VPOPCNTDQVL-LABEL: testv8i32: 148; AVX512VPOPCNTDQVL: # %bb.0: 149; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 150; AVX512VPOPCNTDQVL-NEXT: retq 151; 152; BITALG_NOVLX-LABEL: testv8i32: 153; BITALG_NOVLX: # %bb.0: 154; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 155; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm2 156; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 157; BITALG_NOVLX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 158; BITALG_NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0 159; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 160; BITALG_NOVLX-NEXT: vpshufb %ymm0, %ymm3, %ymm0 161; BITALG_NOVLX-NEXT: vpaddb %ymm2, %ymm0, %ymm0 162; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 163; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 164; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 165; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 166; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 167; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 168; BITALG_NOVLX-NEXT: retq 169; 170; BITALG-LABEL: testv8i32: 171; BITALG: # %bb.0: 172; BITALG-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 173; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm2 174; BITALG-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 175; BITALG-NEXT: vpshufb %ymm2, %ymm3, %ymm2 176; BITALG-NEXT: vpsrlw $4, %ymm0, %ymm0 177; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 178; BITALG-NEXT: vpshufb %ymm0, %ymm3, %ymm0 179; BITALG-NEXT: vpaddb %ymm2, %ymm0, %ymm0 180; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 181; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 182; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 183; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 184; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 185; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 186; BITALG-NEXT: retq 187 %out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %in) 188 ret <8 x i32> %out 189} 190 191define <16 x i16> @testv16i16(<16 x i16> %in) nounwind { 192; AVX1-LABEL: testv16i16: 193; AVX1: # %bb.0: 194; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 195; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 196; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 197; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 198; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 199; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 200; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 201; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 202; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 203; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 204; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 205; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 206; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 207; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 208; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 209; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 210; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 211; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 212; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 213; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 214; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 215; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 216; AVX1-NEXT: retq 217; 218; AVX2-LABEL: testv16i16: 219; AVX2: # %bb.0: 220; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 221; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 222; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 223; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 224; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 225; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 226; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 227; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 228; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 229; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 230; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 231; AVX2-NEXT: retq 232; 233; AVX512VPOPCNTDQ-LABEL: testv16i16: 234; AVX512VPOPCNTDQ: # %bb.0: 235; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 236; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 237; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 238; AVX512VPOPCNTDQ-NEXT: retq 239; 240; AVX512VPOPCNTDQVL-LABEL: testv16i16: 241; AVX512VPOPCNTDQVL: # %bb.0: 242; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 243; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 244; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 245; AVX512VPOPCNTDQVL-NEXT: retq 246; 247; BITALG_NOVLX-LABEL: testv16i16: 248; BITALG_NOVLX: # %bb.0: 249; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 250; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 251; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 252; BITALG_NOVLX-NEXT: retq 253; 254; BITALG-LABEL: testv16i16: 255; BITALG: # %bb.0: 256; BITALG-NEXT: vpopcntw %ymm0, %ymm0 257; BITALG-NEXT: retq 258 %out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %in) 259 ret <16 x i16> %out 260} 261 262define <32 x i8> @testv32i8(<32 x i8> %in) nounwind { 263; AVX1-LABEL: testv32i8: 264; AVX1: # %bb.0: 265; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 266; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 267; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 268; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 269; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 270; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 271; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 272; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 273; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 274; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 275; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 276; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 277; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 278; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0 279; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 280; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 281; AVX1-NEXT: retq 282; 283; AVX2-LABEL: testv32i8: 284; AVX2: # %bb.0: 285; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 286; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 287; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 288; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 289; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 290; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 291; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 292; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 293; AVX2-NEXT: retq 294; 295; AVX512VPOPCNTDQ-LABEL: testv32i8: 296; AVX512VPOPCNTDQ: # %bb.0: 297; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 298; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 299; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 300; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 301; AVX512VPOPCNTDQ-NEXT: vpsrlw $4, %ymm0, %ymm0 302; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 303; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 304; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 305; AVX512VPOPCNTDQ-NEXT: retq 306; 307; AVX512VPOPCNTDQVL-LABEL: testv32i8: 308; AVX512VPOPCNTDQVL: # %bb.0: 309; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 310; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2 311; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 312; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 313; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0 314; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 315; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 316; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 317; AVX512VPOPCNTDQVL-NEXT: retq 318; 319; BITALG_NOVLX-LABEL: testv32i8: 320; BITALG_NOVLX: # %bb.0: 321; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 322; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 323; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 324; BITALG_NOVLX-NEXT: retq 325; 326; BITALG-LABEL: testv32i8: 327; BITALG: # %bb.0: 328; BITALG-NEXT: vpopcntb %ymm0, %ymm0 329; BITALG-NEXT: retq 330 %out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %in) 331 ret <32 x i8> %out 332} 333 334define <4 x i64> @foldv4i64() nounwind { 335; ALL-LABEL: foldv4i64: 336; ALL: # %bb.0: 337; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [1,64,0,8] 338; ALL-NEXT: retq 339 %out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>) 340 ret <4 x i64> %out 341} 342 343define <8 x i32> @foldv8i32() nounwind { 344; ALL-LABEL: foldv8i32: 345; ALL: # %bb.0: 346; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [1,32,0,8,16,3,2,3] 347; ALL-NEXT: retq 348 %out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>) 349 ret <8 x i32> %out 350} 351 352define <16 x i16> @foldv16i16() nounwind { 353; ALL-LABEL: foldv16i16: 354; ALL: # %bb.0: 355; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [1,16,0,8,0,3,2,3,15,7,1,1,1,1,1,1] 356; ALL-NEXT: retq 357 %out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>) 358 ret <16 x i16> %out 359} 360 361define <32 x i8> @foldv32i8() nounwind { 362; ALL-LABEL: foldv32i8: 363; ALL: # %bb.0: 364; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1,1,1,0,0,1,2,3,4,5,6,7,8,2,2,3,7] 365; ALL-NEXT: retq 366 %out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>) 367 ret <32 x i8> %out 368} 369 370declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) 371declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) 372declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) 373declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) 374