1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VPOPCNTDQ --check-prefix=AVX512VPOPCNTDQ-NOBW 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VPOPCNTDQ --check-prefix=AVX512VPOPCNTDQ-BW 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=AVX512 --check-prefix=BITALG 7 8define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { 9; AVX512F-LABEL: testv8i64: 10; AVX512F: # %bb.0: 11; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 12; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 13; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 14; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 15; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 16; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 17; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 18; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 19; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 20; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 21; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 22; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 23; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 24; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 25; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 26; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 27; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 28; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 29; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 30; AVX512F-NEXT: retq 31; 32; AVX512BW-LABEL: testv8i64: 33; AVX512BW: # %bb.0: 34; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 35; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 36; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 37; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 38; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 39; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 40; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 41; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 42; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 43; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 44; AVX512BW-NEXT: retq 45; 46; AVX512VPOPCNTDQ-LABEL: testv8i64: 47; AVX512VPOPCNTDQ: # %bb.0: 48; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 49; AVX512VPOPCNTDQ-NEXT: retq 50; 51; BITALG-LABEL: testv8i64: 52; BITALG: # %bb.0: 53; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 54; BITALG-NEXT: vpandq %zmm1, %zmm0, %zmm2 55; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 56; BITALG-NEXT: vpshufb %zmm2, %zmm3, %zmm2 57; BITALG-NEXT: vpsrlw $4, %zmm0, %zmm0 58; BITALG-NEXT: vpandq %zmm1, %zmm0, %zmm0 59; BITALG-NEXT: vpshufb %zmm0, %zmm3, %zmm0 60; BITALG-NEXT: vpaddb %zmm2, %zmm0, %zmm0 61; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 62; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 63; BITALG-NEXT: retq 64 %out = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %in) 65 ret <8 x i64> %out 66} 67 68define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { 69; AVX512F-LABEL: testv16i32: 70; AVX512F: # %bb.0: 71; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 72; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 73; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 74; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 75; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 76; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 77; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 78; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 79; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 80; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 81; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] 82; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 83; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] 84; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 85; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 86; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 87; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 88; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 89; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 90; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 91; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 92; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] 93; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 94; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] 95; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 96; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 97; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 98; AVX512F-NEXT: retq 99; 100; AVX512BW-LABEL: testv16i32: 101; AVX512BW: # %bb.0: 102; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 103; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 104; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 105; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 106; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 107; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 108; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 109; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 110; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 111; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 112; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 113; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 114; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 115; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 116; AVX512BW-NEXT: retq 117; 118; AVX512VPOPCNTDQ-LABEL: testv16i32: 119; AVX512VPOPCNTDQ: # %bb.0: 120; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 121; AVX512VPOPCNTDQ-NEXT: retq 122; 123; BITALG-LABEL: testv16i32: 124; BITALG: # %bb.0: 125; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 126; BITALG-NEXT: vpandq %zmm1, %zmm0, %zmm2 127; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 128; BITALG-NEXT: vpshufb %zmm2, %zmm3, %zmm2 129; BITALG-NEXT: vpsrlw $4, %zmm0, %zmm0 130; BITALG-NEXT: vpandq %zmm1, %zmm0, %zmm0 131; BITALG-NEXT: vpshufb %zmm0, %zmm3, %zmm0 132; BITALG-NEXT: vpaddb %zmm2, %zmm0, %zmm0 133; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 134; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 135; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 136; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 137; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 138; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 139; BITALG-NEXT: retq 140 %out = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %in) 141 ret <16 x i32> %out 142} 143 144define <32 x i16> @testv32i16(<32 x i16> %in) nounwind { 145; AVX512F-LABEL: testv32i16: 146; AVX512F: # %bb.0: 147; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 148; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm3 149; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 150; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 151; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 152; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 153; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 154; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0 155; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm3 156; AVX512F-NEXT: vpaddb %ymm0, %ymm3, %ymm0 157; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 158; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 159; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 160; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 161; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 162; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 163; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 164; AVX512F-NEXT: vpsllw $8, %ymm1, %ymm2 165; AVX512F-NEXT: vpaddb %ymm1, %ymm2, %ymm1 166; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 167; AVX512F-NEXT: retq 168; 169; AVX512BW-LABEL: testv32i16: 170; AVX512BW: # %bb.0: 171; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 172; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 173; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 174; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 175; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 176; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 177; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 178; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 179; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 180; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 181; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 182; AVX512BW-NEXT: retq 183; 184; AVX512VPOPCNTDQ-NOBW-LABEL: testv32i16: 185; AVX512VPOPCNTDQ-NOBW: # %bb.0: 186; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 187; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 188; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 189; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 190; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 191; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 192; AVX512VPOPCNTDQ-NOBW-NEXT: retq 193; 194; AVX512VPOPCNTDQ-BW-LABEL: testv32i16: 195; AVX512VPOPCNTDQ-BW: # %bb.0: 196; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 197; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 198; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 199; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 200; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 201; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 202; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 203; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 204; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 205; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 206; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 207; AVX512VPOPCNTDQ-BW-NEXT: retq 208; 209; BITALG-LABEL: testv32i16: 210; BITALG: # %bb.0: 211; BITALG-NEXT: vpopcntw %zmm0, %zmm0 212; BITALG-NEXT: retq 213 %out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in) 214 ret <32 x i16> %out 215} 216 217define <64 x i8> @testv64i8(<64 x i8> %in) nounwind { 218; AVX512F-LABEL: testv64i8: 219; AVX512F: # %bb.0: 220; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 221; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm3 222; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 223; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 224; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 225; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 226; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 227; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0 228; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 229; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 230; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 231; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 232; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 233; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 234; AVX512F-NEXT: retq 235; 236; AVX512BW-LABEL: testv64i8: 237; AVX512BW: # %bb.0: 238; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 239; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 240; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 241; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 242; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 243; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 244; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 245; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 246; AVX512BW-NEXT: retq 247; 248; AVX512VPOPCNTDQ-NOBW-LABEL: testv64i8: 249; AVX512VPOPCNTDQ-NOBW: # %bb.0: 250; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 251; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm2, %ymm0, %ymm3 252; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 253; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm3, %ymm4, %ymm3 254; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm0 255; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm2, %ymm0, %ymm0 256; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm0, %ymm4, %ymm0 257; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm3, %ymm0, %ymm0 258; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm2, %ymm1, %ymm3 259; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm3, %ymm4, %ymm3 260; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm1, %ymm1 261; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm2, %ymm1, %ymm1 262; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm1, %ymm4, %ymm1 263; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm3, %ymm1, %ymm1 264; AVX512VPOPCNTDQ-NOBW-NEXT: retq 265; 266; AVX512VPOPCNTDQ-BW-LABEL: testv64i8: 267; AVX512VPOPCNTDQ-BW: # %bb.0: 268; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 269; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 270; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 271; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 272; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 273; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 274; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 275; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 276; AVX512VPOPCNTDQ-BW-NEXT: retq 277; 278; BITALG-LABEL: testv64i8: 279; BITALG: # %bb.0: 280; BITALG-NEXT: vpopcntb %zmm0, %zmm0 281; BITALG-NEXT: retq 282 %out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in) 283 ret <64 x i8> %out 284} 285 286declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) 287declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) 288declare <32 x i16> @llvm.ctpop.v32i16(<32 x i16>) 289declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>) 290