1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQVL 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG 12 13define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { 14; SSE2-LABEL: testv2i64: 15; SSE2: # %bb.0: 16; SSE2-NEXT: movdqa %xmm0, %xmm1 17; SSE2-NEXT: psrlq $1, %xmm1 18; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 19; SSE2-NEXT: psubq %xmm1, %xmm0 20; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323] 21; SSE2-NEXT: movdqa %xmm0, %xmm2 22; SSE2-NEXT: pand %xmm1, %xmm2 23; SSE2-NEXT: psrlq $2, %xmm0 24; SSE2-NEXT: pand %xmm1, %xmm0 25; SSE2-NEXT: paddq %xmm2, %xmm0 26; SSE2-NEXT: movdqa %xmm0, %xmm1 27; SSE2-NEXT: psrlq $4, %xmm1 28; SSE2-NEXT: paddq %xmm0, %xmm1 29; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 30; SSE2-NEXT: pxor %xmm0, %xmm0 31; SSE2-NEXT: psadbw %xmm0, %xmm1 32; SSE2-NEXT: movdqa %xmm1, %xmm0 33; SSE2-NEXT: retq 34; 35; SSE3-LABEL: testv2i64: 36; SSE3: # %bb.0: 37; SSE3-NEXT: movdqa %xmm0, %xmm1 38; SSE3-NEXT: psrlq $1, %xmm1 39; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 40; SSE3-NEXT: psubq %xmm1, %xmm0 41; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323] 42; SSE3-NEXT: movdqa %xmm0, %xmm2 43; SSE3-NEXT: pand %xmm1, %xmm2 44; SSE3-NEXT: psrlq $2, %xmm0 45; SSE3-NEXT: pand %xmm1, %xmm0 46; SSE3-NEXT: paddq %xmm2, %xmm0 47; SSE3-NEXT: movdqa %xmm0, %xmm1 48; SSE3-NEXT: psrlq $4, %xmm1 49; SSE3-NEXT: paddq %xmm0, %xmm1 50; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 51; SSE3-NEXT: pxor %xmm0, %xmm0 52; SSE3-NEXT: psadbw %xmm0, %xmm1 53; SSE3-NEXT: movdqa %xmm1, %xmm0 54; SSE3-NEXT: retq 55; 56; SSSE3-LABEL: testv2i64: 57; SSSE3: # %bb.0: 58; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 59; SSSE3-NEXT: movdqa %xmm0, %xmm2 60; SSSE3-NEXT: pand %xmm1, %xmm2 61; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 62; SSSE3-NEXT: movdqa %xmm3, %xmm4 63; SSSE3-NEXT: pshufb %xmm2, %xmm4 64; SSSE3-NEXT: psrlw $4, %xmm0 65; SSSE3-NEXT: pand %xmm1, %xmm0 66; SSSE3-NEXT: pshufb %xmm0, %xmm3 67; SSSE3-NEXT: paddb %xmm4, %xmm3 68; SSSE3-NEXT: pxor %xmm0, %xmm0 69; SSSE3-NEXT: psadbw %xmm3, %xmm0 70; SSSE3-NEXT: retq 71; 72; SSE41-LABEL: testv2i64: 73; SSE41: # %bb.0: 74; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 75; SSE41-NEXT: movdqa %xmm0, %xmm2 76; SSE41-NEXT: pand %xmm1, %xmm2 77; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 78; SSE41-NEXT: movdqa %xmm3, %xmm4 79; SSE41-NEXT: pshufb %xmm2, %xmm4 80; SSE41-NEXT: psrlw $4, %xmm0 81; SSE41-NEXT: pand %xmm1, %xmm0 82; SSE41-NEXT: pshufb %xmm0, %xmm3 83; SSE41-NEXT: paddb %xmm4, %xmm3 84; SSE41-NEXT: pxor %xmm0, %xmm0 85; SSE41-NEXT: psadbw %xmm3, %xmm0 86; SSE41-NEXT: retq 87; 88; AVX1-LABEL: testv2i64: 89; AVX1: # %bb.0: 90; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 91; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 92; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 93; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 94; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 95; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 96; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 97; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 98; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 99; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 100; AVX1-NEXT: retq 101; 102; AVX2-LABEL: testv2i64: 103; AVX2: # %bb.0: 104; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 105; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 106; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 107; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 108; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 109; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 110; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 111; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 112; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 113; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 114; AVX2-NEXT: retq 115; 116; AVX512VPOPCNTDQ-LABEL: testv2i64: 117; AVX512VPOPCNTDQ: # %bb.0: 118; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 119; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 120; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 121; AVX512VPOPCNTDQ-NEXT: vzeroupper 122; AVX512VPOPCNTDQ-NEXT: retq 123; 124; AVX512VPOPCNTDQVL-LABEL: testv2i64: 125; AVX512VPOPCNTDQVL: # %bb.0: 126; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 127; AVX512VPOPCNTDQVL-NEXT: retq 128; 129; BITALG_NOVLX-LABEL: testv2i64: 130; BITALG_NOVLX: # %bb.0: 131; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 132; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm2 133; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 134; BITALG_NOVLX-NEXT: vpshufb %xmm2, %xmm3, %xmm2 135; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0 136; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 137; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm3, %xmm0 138; BITALG_NOVLX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 139; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 140; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 141; BITALG_NOVLX-NEXT: retq 142; 143; BITALG-LABEL: testv2i64: 144; BITALG: # %bb.0: 145; BITALG-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 146; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm2 147; BITALG-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 148; BITALG-NEXT: vpshufb %xmm2, %xmm3, %xmm2 149; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0 150; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 151; BITALG-NEXT: vpshufb %xmm0, %xmm3, %xmm0 152; BITALG-NEXT: vpaddb %xmm2, %xmm0, %xmm0 153; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 154; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 155; BITALG-NEXT: retq 156 %out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %in) 157 ret <2 x i64> %out 158} 159 160define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { 161; SSE2-LABEL: testv4i32: 162; SSE2: # %bb.0: 163; SSE2-NEXT: movdqa %xmm0, %xmm1 164; SSE2-NEXT: psrld $1, %xmm1 165; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 166; SSE2-NEXT: psubd %xmm1, %xmm0 167; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459] 168; SSE2-NEXT: movdqa %xmm0, %xmm2 169; SSE2-NEXT: pand %xmm1, %xmm2 170; SSE2-NEXT: psrld $2, %xmm0 171; SSE2-NEXT: pand %xmm1, %xmm0 172; SSE2-NEXT: paddd %xmm2, %xmm0 173; SSE2-NEXT: movdqa %xmm0, %xmm1 174; SSE2-NEXT: psrld $4, %xmm1 175; SSE2-NEXT: paddd %xmm0, %xmm1 176; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 177; SSE2-NEXT: pxor %xmm0, %xmm0 178; SSE2-NEXT: movdqa %xmm1, %xmm2 179; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 180; SSE2-NEXT: psadbw %xmm0, %xmm2 181; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 182; SSE2-NEXT: psadbw %xmm0, %xmm1 183; SSE2-NEXT: packuswb %xmm2, %xmm1 184; SSE2-NEXT: movdqa %xmm1, %xmm0 185; SSE2-NEXT: retq 186; 187; SSE3-LABEL: testv4i32: 188; SSE3: # %bb.0: 189; SSE3-NEXT: movdqa %xmm0, %xmm1 190; SSE3-NEXT: psrld $1, %xmm1 191; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 192; SSE3-NEXT: psubd %xmm1, %xmm0 193; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459] 194; SSE3-NEXT: movdqa %xmm0, %xmm2 195; SSE3-NEXT: pand %xmm1, %xmm2 196; SSE3-NEXT: psrld $2, %xmm0 197; SSE3-NEXT: pand %xmm1, %xmm0 198; SSE3-NEXT: paddd %xmm2, %xmm0 199; SSE3-NEXT: movdqa %xmm0, %xmm1 200; SSE3-NEXT: psrld $4, %xmm1 201; SSE3-NEXT: paddd %xmm0, %xmm1 202; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 203; SSE3-NEXT: pxor %xmm0, %xmm0 204; SSE3-NEXT: movdqa %xmm1, %xmm2 205; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 206; SSE3-NEXT: psadbw %xmm0, %xmm2 207; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 208; SSE3-NEXT: psadbw %xmm0, %xmm1 209; SSE3-NEXT: packuswb %xmm2, %xmm1 210; SSE3-NEXT: movdqa %xmm1, %xmm0 211; SSE3-NEXT: retq 212; 213; SSSE3-LABEL: testv4i32: 214; SSSE3: # %bb.0: 215; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 216; SSSE3-NEXT: movdqa %xmm0, %xmm3 217; SSSE3-NEXT: pand %xmm2, %xmm3 218; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 219; SSSE3-NEXT: movdqa %xmm1, %xmm4 220; SSSE3-NEXT: pshufb %xmm3, %xmm4 221; SSSE3-NEXT: psrlw $4, %xmm0 222; SSSE3-NEXT: pand %xmm2, %xmm0 223; SSSE3-NEXT: pshufb %xmm0, %xmm1 224; SSSE3-NEXT: paddb %xmm4, %xmm1 225; SSSE3-NEXT: pxor %xmm0, %xmm0 226; SSSE3-NEXT: movdqa %xmm1, %xmm2 227; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 228; SSSE3-NEXT: psadbw %xmm0, %xmm2 229; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 230; SSSE3-NEXT: psadbw %xmm0, %xmm1 231; SSSE3-NEXT: packuswb %xmm2, %xmm1 232; SSSE3-NEXT: movdqa %xmm1, %xmm0 233; SSSE3-NEXT: retq 234; 235; SSE41-LABEL: testv4i32: 236; SSE41: # %bb.0: 237; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 238; SSE41-NEXT: movdqa %xmm0, %xmm2 239; SSE41-NEXT: pand %xmm1, %xmm2 240; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 241; SSE41-NEXT: movdqa %xmm3, %xmm4 242; SSE41-NEXT: pshufb %xmm2, %xmm4 243; SSE41-NEXT: psrlw $4, %xmm0 244; SSE41-NEXT: pand %xmm1, %xmm0 245; SSE41-NEXT: pshufb %xmm0, %xmm3 246; SSE41-NEXT: paddb %xmm4, %xmm3 247; SSE41-NEXT: pxor %xmm1, %xmm1 248; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 249; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 250; SSE41-NEXT: psadbw %xmm1, %xmm3 251; SSE41-NEXT: psadbw %xmm1, %xmm0 252; SSE41-NEXT: packuswb %xmm3, %xmm0 253; SSE41-NEXT: retq 254; 255; AVX1-LABEL: testv4i32: 256; AVX1: # %bb.0: 257; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 258; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 259; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 260; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 261; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 262; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 263; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 264; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 265; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 266; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 267; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 268; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 269; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 270; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 271; AVX1-NEXT: retq 272; 273; AVX2-LABEL: testv4i32: 274; AVX2: # %bb.0: 275; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 276; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 277; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 278; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 279; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 280; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 281; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 282; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 283; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 284; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 285; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 286; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 287; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 288; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 289; AVX2-NEXT: retq 290; 291; AVX512VPOPCNTDQ-LABEL: testv4i32: 292; AVX512VPOPCNTDQ: # %bb.0: 293; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 294; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 295; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 296; AVX512VPOPCNTDQ-NEXT: vzeroupper 297; AVX512VPOPCNTDQ-NEXT: retq 298; 299; AVX512VPOPCNTDQVL-LABEL: testv4i32: 300; AVX512VPOPCNTDQVL: # %bb.0: 301; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 302; AVX512VPOPCNTDQVL-NEXT: retq 303; 304; BITALG_NOVLX-LABEL: testv4i32: 305; BITALG_NOVLX: # %bb.0: 306; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 307; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm2 308; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 309; BITALG_NOVLX-NEXT: vpshufb %xmm2, %xmm3, %xmm2 310; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0 311; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 312; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm3, %xmm0 313; BITALG_NOVLX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 314; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 315; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 316; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 317; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 318; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 319; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 320; BITALG_NOVLX-NEXT: retq 321; 322; BITALG-LABEL: testv4i32: 323; BITALG: # %bb.0: 324; BITALG-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 325; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm2 326; BITALG-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 327; BITALG-NEXT: vpshufb %xmm2, %xmm3, %xmm2 328; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0 329; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 330; BITALG-NEXT: vpshufb %xmm0, %xmm3, %xmm0 331; BITALG-NEXT: vpaddb %xmm2, %xmm0, %xmm0 332; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 333; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 334; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 335; BITALG-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 336; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 337; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 338; BITALG-NEXT: retq 339 %out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %in) 340 ret <4 x i32> %out 341} 342 343define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { 344; SSE2-LABEL: testv8i16: 345; SSE2: # %bb.0: 346; SSE2-NEXT: movdqa %xmm0, %xmm1 347; SSE2-NEXT: psrlw $1, %xmm1 348; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 349; SSE2-NEXT: psubw %xmm1, %xmm0 350; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] 351; SSE2-NEXT: movdqa %xmm0, %xmm2 352; SSE2-NEXT: pand %xmm1, %xmm2 353; SSE2-NEXT: psrlw $2, %xmm0 354; SSE2-NEXT: pand %xmm1, %xmm0 355; SSE2-NEXT: paddw %xmm2, %xmm0 356; SSE2-NEXT: movdqa %xmm0, %xmm1 357; SSE2-NEXT: psrlw $4, %xmm1 358; SSE2-NEXT: paddw %xmm0, %xmm1 359; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 360; SSE2-NEXT: movdqa %xmm1, %xmm0 361; SSE2-NEXT: psllw $8, %xmm0 362; SSE2-NEXT: paddb %xmm1, %xmm0 363; SSE2-NEXT: psrlw $8, %xmm0 364; SSE2-NEXT: retq 365; 366; SSE3-LABEL: testv8i16: 367; SSE3: # %bb.0: 368; SSE3-NEXT: movdqa %xmm0, %xmm1 369; SSE3-NEXT: psrlw $1, %xmm1 370; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 371; SSE3-NEXT: psubw %xmm1, %xmm0 372; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] 373; SSE3-NEXT: movdqa %xmm0, %xmm2 374; SSE3-NEXT: pand %xmm1, %xmm2 375; SSE3-NEXT: psrlw $2, %xmm0 376; SSE3-NEXT: pand %xmm1, %xmm0 377; SSE3-NEXT: paddw %xmm2, %xmm0 378; SSE3-NEXT: movdqa %xmm0, %xmm1 379; SSE3-NEXT: psrlw $4, %xmm1 380; SSE3-NEXT: paddw %xmm0, %xmm1 381; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 382; SSE3-NEXT: movdqa %xmm1, %xmm0 383; SSE3-NEXT: psllw $8, %xmm0 384; SSE3-NEXT: paddb %xmm1, %xmm0 385; SSE3-NEXT: psrlw $8, %xmm0 386; SSE3-NEXT: retq 387; 388; SSSE3-LABEL: testv8i16: 389; SSSE3: # %bb.0: 390; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 391; SSSE3-NEXT: movdqa %xmm0, %xmm2 392; SSSE3-NEXT: pand %xmm1, %xmm2 393; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 394; SSSE3-NEXT: movdqa %xmm3, %xmm4 395; SSSE3-NEXT: pshufb %xmm2, %xmm4 396; SSSE3-NEXT: psrlw $4, %xmm0 397; SSSE3-NEXT: pand %xmm1, %xmm0 398; SSSE3-NEXT: pshufb %xmm0, %xmm3 399; SSSE3-NEXT: paddb %xmm4, %xmm3 400; SSSE3-NEXT: movdqa %xmm3, %xmm0 401; SSSE3-NEXT: psllw $8, %xmm0 402; SSSE3-NEXT: paddb %xmm3, %xmm0 403; SSSE3-NEXT: psrlw $8, %xmm0 404; SSSE3-NEXT: retq 405; 406; SSE41-LABEL: testv8i16: 407; SSE41: # %bb.0: 408; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 409; SSE41-NEXT: movdqa %xmm0, %xmm2 410; SSE41-NEXT: pand %xmm1, %xmm2 411; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 412; SSE41-NEXT: movdqa %xmm3, %xmm4 413; SSE41-NEXT: pshufb %xmm2, %xmm4 414; SSE41-NEXT: psrlw $4, %xmm0 415; SSE41-NEXT: pand %xmm1, %xmm0 416; SSE41-NEXT: pshufb %xmm0, %xmm3 417; SSE41-NEXT: paddb %xmm4, %xmm3 418; SSE41-NEXT: movdqa %xmm3, %xmm0 419; SSE41-NEXT: psllw $8, %xmm0 420; SSE41-NEXT: paddb %xmm3, %xmm0 421; SSE41-NEXT: psrlw $8, %xmm0 422; SSE41-NEXT: retq 423; 424; AVX1-LABEL: testv8i16: 425; AVX1: # %bb.0: 426; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 427; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 428; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 429; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 430; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 431; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 432; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 433; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 434; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 435; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 436; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 437; AVX1-NEXT: retq 438; 439; AVX2-LABEL: testv8i16: 440; AVX2: # %bb.0: 441; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 442; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 443; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 444; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 445; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 446; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 447; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 448; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 449; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 450; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 451; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 452; AVX2-NEXT: retq 453; 454; AVX512VPOPCNTDQ-LABEL: testv8i16: 455; AVX512VPOPCNTDQ: # %bb.0: 456; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 457; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 458; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 459; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 460; AVX512VPOPCNTDQ-NEXT: vzeroupper 461; AVX512VPOPCNTDQ-NEXT: retq 462; 463; AVX512VPOPCNTDQVL-LABEL: testv8i16: 464; AVX512VPOPCNTDQVL: # %bb.0: 465; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 466; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 467; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 468; AVX512VPOPCNTDQVL-NEXT: vzeroupper 469; AVX512VPOPCNTDQVL-NEXT: retq 470; 471; BITALG_NOVLX-LABEL: testv8i16: 472; BITALG_NOVLX: # %bb.0: 473; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 474; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 475; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 476; BITALG_NOVLX-NEXT: vzeroupper 477; BITALG_NOVLX-NEXT: retq 478; 479; BITALG-LABEL: testv8i16: 480; BITALG: # %bb.0: 481; BITALG-NEXT: vpopcntw %xmm0, %xmm0 482; BITALG-NEXT: retq 483 %out = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %in) 484 ret <8 x i16> %out 485} 486 487define <16 x i8> @testv16i8(<16 x i8> %in) nounwind { 488; SSE2-LABEL: testv16i8: 489; SSE2: # %bb.0: 490; SSE2-NEXT: movdqa %xmm0, %xmm1 491; SSE2-NEXT: psrlw $1, %xmm1 492; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 493; SSE2-NEXT: psubb %xmm1, %xmm0 494; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 495; SSE2-NEXT: movdqa %xmm0, %xmm2 496; SSE2-NEXT: pand %xmm1, %xmm2 497; SSE2-NEXT: psrlw $2, %xmm0 498; SSE2-NEXT: pand %xmm1, %xmm0 499; SSE2-NEXT: paddb %xmm2, %xmm0 500; SSE2-NEXT: movdqa %xmm0, %xmm1 501; SSE2-NEXT: psrlw $4, %xmm1 502; SSE2-NEXT: paddb %xmm0, %xmm1 503; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 504; SSE2-NEXT: movdqa %xmm1, %xmm0 505; SSE2-NEXT: retq 506; 507; SSE3-LABEL: testv16i8: 508; SSE3: # %bb.0: 509; SSE3-NEXT: movdqa %xmm0, %xmm1 510; SSE3-NEXT: psrlw $1, %xmm1 511; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 512; SSE3-NEXT: psubb %xmm1, %xmm0 513; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 514; SSE3-NEXT: movdqa %xmm0, %xmm2 515; SSE3-NEXT: pand %xmm1, %xmm2 516; SSE3-NEXT: psrlw $2, %xmm0 517; SSE3-NEXT: pand %xmm1, %xmm0 518; SSE3-NEXT: paddb %xmm2, %xmm0 519; SSE3-NEXT: movdqa %xmm0, %xmm1 520; SSE3-NEXT: psrlw $4, %xmm1 521; SSE3-NEXT: paddb %xmm0, %xmm1 522; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 523; SSE3-NEXT: movdqa %xmm1, %xmm0 524; SSE3-NEXT: retq 525; 526; SSSE3-LABEL: testv16i8: 527; SSSE3: # %bb.0: 528; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 529; SSSE3-NEXT: movdqa %xmm0, %xmm3 530; SSSE3-NEXT: pand %xmm2, %xmm3 531; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 532; SSSE3-NEXT: movdqa %xmm1, %xmm4 533; SSSE3-NEXT: pshufb %xmm3, %xmm4 534; SSSE3-NEXT: psrlw $4, %xmm0 535; SSSE3-NEXT: pand %xmm2, %xmm0 536; SSSE3-NEXT: pshufb %xmm0, %xmm1 537; SSSE3-NEXT: paddb %xmm4, %xmm1 538; SSSE3-NEXT: movdqa %xmm1, %xmm0 539; SSSE3-NEXT: retq 540; 541; SSE41-LABEL: testv16i8: 542; SSE41: # %bb.0: 543; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 544; SSE41-NEXT: movdqa %xmm0, %xmm3 545; SSE41-NEXT: pand %xmm2, %xmm3 546; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 547; SSE41-NEXT: movdqa %xmm1, %xmm4 548; SSE41-NEXT: pshufb %xmm3, %xmm4 549; SSE41-NEXT: psrlw $4, %xmm0 550; SSE41-NEXT: pand %xmm2, %xmm0 551; SSE41-NEXT: pshufb %xmm0, %xmm1 552; SSE41-NEXT: paddb %xmm4, %xmm1 553; SSE41-NEXT: movdqa %xmm1, %xmm0 554; SSE41-NEXT: retq 555; 556; AVX1-LABEL: testv16i8: 557; AVX1: # %bb.0: 558; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 559; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 560; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 561; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 562; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 563; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 564; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 565; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 566; AVX1-NEXT: retq 567; 568; AVX2-LABEL: testv16i8: 569; AVX2: # %bb.0: 570; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 571; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 572; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 573; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 574; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 575; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 576; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 577; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 578; AVX2-NEXT: retq 579; 580; AVX512VPOPCNTDQ-LABEL: testv16i8: 581; AVX512VPOPCNTDQ: # %bb.0: 582; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 583; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 584; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 585; AVX512VPOPCNTDQ-NEXT: vzeroupper 586; AVX512VPOPCNTDQ-NEXT: retq 587; 588; AVX512VPOPCNTDQVL-LABEL: testv16i8: 589; AVX512VPOPCNTDQVL: # %bb.0: 590; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 591; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 592; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 593; AVX512VPOPCNTDQVL-NEXT: vzeroupper 594; AVX512VPOPCNTDQVL-NEXT: retq 595; 596; BITALG_NOVLX-LABEL: testv16i8: 597; BITALG_NOVLX: # %bb.0: 598; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 599; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 600; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 601; BITALG_NOVLX-NEXT: vzeroupper 602; BITALG_NOVLX-NEXT: retq 603; 604; BITALG-LABEL: testv16i8: 605; BITALG: # %bb.0: 606; BITALG-NEXT: vpopcntb %xmm0, %xmm0 607; BITALG-NEXT: retq 608 %out = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %in) 609 ret <16 x i8> %out 610} 611 612define <2 x i64> @foldv2i64() nounwind { 613; SSE-LABEL: foldv2i64: 614; SSE: # %bb.0: 615; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,64] 616; SSE-NEXT: retq 617; 618; AVX-LABEL: foldv2i64: 619; AVX: # %bb.0: 620; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,64] 621; AVX-NEXT: retq 622; 623; BITALG_NOVLX-LABEL: foldv2i64: 624; BITALG_NOVLX: # %bb.0: 625; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [1,64] 626; BITALG_NOVLX-NEXT: retq 627; 628; BITALG-LABEL: foldv2i64: 629; BITALG: # %bb.0: 630; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [1,64] 631; BITALG-NEXT: retq 632 %out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> <i64 256, i64 -1>) 633 ret <2 x i64> %out 634} 635 636define <4 x i32> @foldv4i32() nounwind { 637; SSE-LABEL: foldv4i32: 638; SSE: # %bb.0: 639; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,32,0,8] 640; SSE-NEXT: retq 641; 642; AVX-LABEL: foldv4i32: 643; AVX: # %bb.0: 644; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,32,0,8] 645; AVX-NEXT: retq 646; 647; BITALG_NOVLX-LABEL: foldv4i32: 648; BITALG_NOVLX: # %bb.0: 649; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [1,32,0,8] 650; BITALG_NOVLX-NEXT: retq 651; 652; BITALG-LABEL: foldv4i32: 653; BITALG: # %bb.0: 654; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [1,32,0,8] 655; BITALG-NEXT: retq 656 %out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>) 657 ret <4 x i32> %out 658} 659 660define <8 x i16> @foldv8i16() nounwind { 661; SSE-LABEL: foldv8i16: 662; SSE: # %bb.0: 663; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3] 664; SSE-NEXT: retq 665; 666; AVX-LABEL: foldv8i16: 667; AVX: # %bb.0: 668; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3] 669; AVX-NEXT: retq 670; 671; BITALG_NOVLX-LABEL: foldv8i16: 672; BITALG_NOVLX: # %bb.0: 673; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3] 674; BITALG_NOVLX-NEXT: retq 675; 676; BITALG-LABEL: foldv8i16: 677; BITALG: # %bb.0: 678; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3] 679; BITALG-NEXT: retq 680 %out = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>) 681 ret <8 x i16> %out 682} 683 684define <16 x i8> @foldv16i8() nounwind { 685; SSE-LABEL: foldv16i8: 686; SSE: # %bb.0: 687; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1] 688; SSE-NEXT: retq 689; 690; AVX-LABEL: foldv16i8: 691; AVX: # %bb.0: 692; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1] 693; AVX-NEXT: retq 694; 695; BITALG_NOVLX-LABEL: foldv16i8: 696; BITALG_NOVLX: # %bb.0: 697; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1] 698; BITALG_NOVLX-NEXT: retq 699; 700; BITALG-LABEL: foldv16i8: 701; BITALG: # %bb.0: 702; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1] 703; BITALG-NEXT: retq 704 %out = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>) 705 ret <16 x i8> %out 706} 707 708declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) 709declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) 710declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) 711declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) 712