1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BWNOVL 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512BWVL 10 11; 12; General cases - packing of vector comparison to legal vector result types 13; 14 15define <16 x i8> @vselect_packss_v16i16(<16 x i16> %a0, <16 x i16> %a1, <16 x i8> %a2, <16 x i8> %a3) { 16; SSE2-LABEL: vselect_packss_v16i16: 17; SSE2: # %bb.0: 18; SSE2-NEXT: pcmpeqw %xmm3, %xmm1 19; SSE2-NEXT: pcmpeqw %xmm2, %xmm0 20; SSE2-NEXT: packsswb %xmm1, %xmm0 21; SSE2-NEXT: pand %xmm0, %xmm4 22; SSE2-NEXT: pandn %xmm5, %xmm0 23; SSE2-NEXT: por %xmm4, %xmm0 24; SSE2-NEXT: retq 25; 26; SSE42-LABEL: vselect_packss_v16i16: 27; SSE42: # %bb.0: 28; SSE42-NEXT: pcmpeqw %xmm3, %xmm1 29; SSE42-NEXT: pcmpeqw %xmm2, %xmm0 30; SSE42-NEXT: packsswb %xmm1, %xmm0 31; SSE42-NEXT: pblendvb %xmm0, %xmm4, %xmm5 32; SSE42-NEXT: movdqa %xmm5, %xmm0 33; SSE42-NEXT: retq 34; 35; AVX1-LABEL: vselect_packss_v16i16: 36; AVX1: # %bb.0: 37; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 38; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 39; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4 40; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 41; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0 42; AVX1-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 43; AVX1-NEXT: vzeroupper 44; AVX1-NEXT: retq 45; 46; AVX2-LABEL: vselect_packss_v16i16: 47; AVX2: # %bb.0: 48; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 49; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 50; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 51; AVX2-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 52; AVX2-NEXT: vzeroupper 53; AVX2-NEXT: retq 54; 55; AVX512F-LABEL: vselect_packss_v16i16: 56; AVX512F: # %bb.0: 57; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 58; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 59; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 60; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 61; AVX512F-NEXT: vzeroupper 62; AVX512F-NEXT: retq 63; 64; AVX512VL-LABEL: vselect_packss_v16i16: 65; AVX512VL: # %bb.0: 66; AVX512VL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 67; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 68; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 69; AVX512VL-NEXT: vpternlogq $202, %xmm3, %xmm2, %xmm0 70; AVX512VL-NEXT: vzeroupper 71; AVX512VL-NEXT: retq 72; 73; AVX512BWNOVL-LABEL: vselect_packss_v16i16: 74; AVX512BWNOVL: # %bb.0: 75; AVX512BWNOVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 76; AVX512BWNOVL-NEXT: vpmovwb %zmm0, %ymm0 77; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 78; AVX512BWNOVL-NEXT: vzeroupper 79; AVX512BWNOVL-NEXT: retq 80; 81; AVX512BWVL-LABEL: vselect_packss_v16i16: 82; AVX512BWVL: # %bb.0: 83; AVX512BWVL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 84; AVX512BWVL-NEXT: vpmovm2b %k0, %xmm0 85; AVX512BWVL-NEXT: vpternlogq $202, %xmm3, %xmm2, %xmm0 86; AVX512BWVL-NEXT: vzeroupper 87; AVX512BWVL-NEXT: retq 88 %1 = icmp eq <16 x i16> %a0, %a1 89 %2 = sext <16 x i1> %1 to <16 x i8> 90 %3 = and <16 x i8> %2, %a2 91 %4 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 92 %5 = and <16 x i8> %4, %a3 93 %6 = or <16 x i8> %3, %5 94 ret <16 x i8> %6 95} 96 97define <16 x i8> @vselect_packss_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a2, <16 x i8> %a3) { 98; SSE2-LABEL: vselect_packss_v16i32: 99; SSE2: # %bb.0: 100; SSE2-NEXT: pcmpeqd %xmm7, %xmm3 101; SSE2-NEXT: pcmpeqd %xmm6, %xmm2 102; SSE2-NEXT: packssdw %xmm3, %xmm2 103; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 104; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 105; SSE2-NEXT: packssdw %xmm1, %xmm0 106; SSE2-NEXT: packsswb %xmm2, %xmm0 107; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 108; SSE2-NEXT: pand %xmm0, %xmm1 109; SSE2-NEXT: pandn {{[0-9]+}}(%rsp), %xmm0 110; SSE2-NEXT: por %xmm1, %xmm0 111; SSE2-NEXT: retq 112; 113; SSE42-LABEL: vselect_packss_v16i32: 114; SSE42: # %bb.0: 115; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 116; SSE42-NEXT: pcmpeqd %xmm7, %xmm3 117; SSE42-NEXT: pcmpeqd %xmm6, %xmm2 118; SSE42-NEXT: packssdw %xmm3, %xmm2 119; SSE42-NEXT: pcmpeqd %xmm5, %xmm1 120; SSE42-NEXT: pcmpeqd %xmm4, %xmm0 121; SSE42-NEXT: packssdw %xmm1, %xmm0 122; SSE42-NEXT: packsswb %xmm2, %xmm0 123; SSE42-NEXT: pblendvb %xmm0, {{[0-9]+}}(%rsp), %xmm8 124; SSE42-NEXT: movdqa %xmm8, %xmm0 125; SSE42-NEXT: retq 126; 127; AVX1-LABEL: vselect_packss_v16i32: 128; AVX1: # %bb.0: 129; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6 130; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 131; AVX1-NEXT: vpcmpeqd %xmm6, %xmm7, %xmm6 132; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 133; AVX1-NEXT: vpackssdw %xmm6, %xmm1, %xmm1 134; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 135; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 136; AVX1-NEXT: vpcmpeqd %xmm3, %xmm6, %xmm3 137; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 138; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 139; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 140; AVX1-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0 141; AVX1-NEXT: vzeroupper 142; AVX1-NEXT: retq 143; 144; AVX2-LABEL: vselect_packss_v16i32: 145; AVX2: # %bb.0: 146; AVX2-NEXT: vpcmpeqd %ymm3, %ymm1, %ymm1 147; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 148; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 149; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 150; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 151; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 152; AVX2-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0 153; AVX2-NEXT: vzeroupper 154; AVX2-NEXT: retq 155; 156; AVX512F-LABEL: vselect_packss_v16i32: 157; AVX512F: # %bb.0: 158; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 159; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 160; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 161; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 162; AVX512F-NEXT: vzeroupper 163; AVX512F-NEXT: retq 164; 165; AVX512VL-LABEL: vselect_packss_v16i32: 166; AVX512VL: # %bb.0: 167; AVX512VL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 168; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 169; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 170; AVX512VL-NEXT: vpternlogq $202, %xmm3, %xmm2, %xmm0 171; AVX512VL-NEXT: vzeroupper 172; AVX512VL-NEXT: retq 173; 174; AVX512BWNOVL-LABEL: vselect_packss_v16i32: 175; AVX512BWNOVL: # %bb.0: 176; AVX512BWNOVL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 177; AVX512BWNOVL-NEXT: vpmovm2b %k0, %zmm0 178; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 179; AVX512BWNOVL-NEXT: vzeroupper 180; AVX512BWNOVL-NEXT: retq 181; 182; AVX512BWVL-LABEL: vselect_packss_v16i32: 183; AVX512BWVL: # %bb.0: 184; AVX512BWVL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 185; AVX512BWVL-NEXT: vpmovm2b %k0, %xmm0 186; AVX512BWVL-NEXT: vpternlogq $202, %xmm3, %xmm2, %xmm0 187; AVX512BWVL-NEXT: vzeroupper 188; AVX512BWVL-NEXT: retq 189 %1 = icmp eq <16 x i32> %a0, %a1 190 %2 = sext <16 x i1> %1 to <16 x i8> 191 %3 = and <16 x i8> %2, %a2 192 %4 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 193 %5 = and <16 x i8> %4, %a3 194 %6 = or <16 x i8> %3, %5 195 ret <16 x i8> %6 196} 197 198define <16 x i8> @vselect_packss_v16i64(<16 x i64> %a0, <16 x i64> %a1, <16 x i8> %a2, <16 x i8> %a3) { 199; SSE2-LABEL: vselect_packss_v16i64: 200; SSE2: # %bb.0: 201; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm7 202; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[1,0,3,2] 203; SSE2-NEXT: pand %xmm7, %xmm8 204; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm6 205; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[1,0,3,2] 206; SSE2-NEXT: pand %xmm6, %xmm7 207; SSE2-NEXT: packssdw %xmm8, %xmm7 208; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm5 209; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,0,3,2] 210; SSE2-NEXT: pand %xmm5, %xmm6 211; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm4 212; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,0,3,2] 213; SSE2-NEXT: pand %xmm4, %xmm5 214; SSE2-NEXT: packssdw %xmm6, %xmm5 215; SSE2-NEXT: packssdw %xmm7, %xmm5 216; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm3 217; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,0,3,2] 218; SSE2-NEXT: pand %xmm3, %xmm4 219; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm2 220; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2] 221; SSE2-NEXT: pand %xmm2, %xmm3 222; SSE2-NEXT: packssdw %xmm4, %xmm3 223; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm1 224; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] 225; SSE2-NEXT: pand %xmm1, %xmm2 226; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm0 227; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] 228; SSE2-NEXT: pand %xmm0, %xmm1 229; SSE2-NEXT: packssdw %xmm2, %xmm1 230; SSE2-NEXT: packssdw %xmm3, %xmm1 231; SSE2-NEXT: packsswb %xmm5, %xmm1 232; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0 233; SSE2-NEXT: pand %xmm1, %xmm0 234; SSE2-NEXT: pandn {{[0-9]+}}(%rsp), %xmm1 235; SSE2-NEXT: por %xmm0, %xmm1 236; SSE2-NEXT: movdqa %xmm1, %xmm0 237; SSE2-NEXT: retq 238; 239; SSE42-LABEL: vselect_packss_v16i64: 240; SSE42: # %bb.0: 241; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm7 242; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm6 243; SSE42-NEXT: packssdw %xmm7, %xmm6 244; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm5 245; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm4 246; SSE42-NEXT: packssdw %xmm5, %xmm4 247; SSE42-NEXT: packssdw %xmm6, %xmm4 248; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm3 249; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm2 250; SSE42-NEXT: packssdw %xmm3, %xmm2 251; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm1 252; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm0 253; SSE42-NEXT: packssdw %xmm1, %xmm0 254; SSE42-NEXT: packssdw %xmm2, %xmm0 255; SSE42-NEXT: packsswb %xmm4, %xmm0 256; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 257; SSE42-NEXT: pand %xmm0, %xmm1 258; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm0 259; SSE42-NEXT: por %xmm1, %xmm0 260; SSE42-NEXT: retq 261; 262; AVX1-LABEL: vselect_packss_v16i64: 263; AVX1: # %bb.0: 264; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8 265; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm9 266; AVX1-NEXT: vpcmpeqq %xmm8, %xmm9, %xmm8 267; AVX1-NEXT: vpcmpeqq %xmm7, %xmm3, %xmm3 268; AVX1-NEXT: vpackssdw %xmm8, %xmm3, %xmm8 269; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7 270; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 271; AVX1-NEXT: vpcmpeqq %xmm7, %xmm3, %xmm3 272; AVX1-NEXT: vpcmpeqq %xmm6, %xmm2, %xmm2 273; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 274; AVX1-NEXT: vpackssdw %xmm8, %xmm2, %xmm2 275; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3 276; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 277; AVX1-NEXT: vpcmpeqq %xmm3, %xmm6, %xmm3 278; AVX1-NEXT: vpcmpeqq %xmm5, %xmm1, %xmm1 279; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 280; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 281; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 282; AVX1-NEXT: vpcmpeqq %xmm3, %xmm5, %xmm3 283; AVX1-NEXT: vpcmpeqq %xmm4, %xmm0, %xmm0 284; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 285; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 286; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 287; AVX1-NEXT: vpand {{[0-9]+}}(%rsp), %xmm0, %xmm1 288; AVX1-NEXT: vpandn {{[0-9]+}}(%rsp), %xmm0, %xmm0 289; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 290; AVX1-NEXT: vzeroupper 291; AVX1-NEXT: retq 292; 293; AVX2-LABEL: vselect_packss_v16i64: 294; AVX2: # %bb.0: 295; AVX2-NEXT: vpcmpeqq %ymm7, %ymm3, %ymm3 296; AVX2-NEXT: vpcmpeqq %ymm6, %ymm2, %ymm2 297; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 298; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 299; AVX2-NEXT: vpcmpeqq %ymm5, %ymm1, %ymm1 300; AVX2-NEXT: vpcmpeqq %ymm4, %ymm0, %ymm0 301; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 302; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 303; AVX2-NEXT: vpackssdw %ymm2, %ymm0, %ymm0 304; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 305; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 306; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 307; AVX2-NEXT: vpand {{[0-9]+}}(%rsp), %xmm0, %xmm1 308; AVX2-NEXT: vpandn {{[0-9]+}}(%rsp), %xmm0, %xmm0 309; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 310; AVX2-NEXT: vzeroupper 311; AVX2-NEXT: retq 312; 313; AVX512F-LABEL: vselect_packss_v16i64: 314; AVX512F: # %bb.0: 315; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 316; AVX512F-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 317; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 318; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 319; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 320; AVX512F-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0 321; AVX512F-NEXT: vzeroupper 322; AVX512F-NEXT: retq 323; 324; AVX512VL-LABEL: vselect_packss_v16i64: 325; AVX512VL: # %bb.0: 326; AVX512VL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 327; AVX512VL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 328; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1 329; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 330; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 331; AVX512VL-NEXT: vpternlogq $202, %xmm5, %xmm4, %xmm0 332; AVX512VL-NEXT: vzeroupper 333; AVX512VL-NEXT: retq 334; 335; AVX512BWNOVL-LABEL: vselect_packss_v16i64: 336; AVX512BWNOVL: # %bb.0: 337; AVX512BWNOVL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 338; AVX512BWNOVL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 339; AVX512BWNOVL-NEXT: kunpckbw %k0, %k1, %k0 340; AVX512BWNOVL-NEXT: vpmovm2b %k0, %zmm0 341; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0 342; AVX512BWNOVL-NEXT: vzeroupper 343; AVX512BWNOVL-NEXT: retq 344; 345; AVX512BWVL-LABEL: vselect_packss_v16i64: 346; AVX512BWVL: # %bb.0: 347; AVX512BWVL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 348; AVX512BWVL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 349; AVX512BWVL-NEXT: kunpckbw %k0, %k1, %k0 350; AVX512BWVL-NEXT: vpmovm2b %k0, %xmm0 351; AVX512BWVL-NEXT: vpternlogq $202, %xmm5, %xmm4, %xmm0 352; AVX512BWVL-NEXT: vzeroupper 353; AVX512BWVL-NEXT: retq 354 %1 = icmp eq <16 x i64> %a0, %a1 355 %2 = sext <16 x i1> %1 to <16 x i8> 356 %3 = and <16 x i8> %2, %a2 357 %4 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 358 %5 = and <16 x i8> %4, %a3 359 %6 = or <16 x i8> %3, %5 360 ret <16 x i8> %6 361} 362 363; 364; PACKSS case 365; 366 367define <16 x i8> @vselect_packss(<16 x i16> %a0, <16 x i16> %a1, <16 x i8> %a2, <16 x i8> %a3) { 368; SSE2-LABEL: vselect_packss: 369; SSE2: # %bb.0: 370; SSE2-NEXT: pcmpeqw %xmm3, %xmm1 371; SSE2-NEXT: pcmpeqw %xmm2, %xmm0 372; SSE2-NEXT: packsswb %xmm1, %xmm0 373; SSE2-NEXT: pand %xmm0, %xmm4 374; SSE2-NEXT: pandn %xmm5, %xmm0 375; SSE2-NEXT: por %xmm4, %xmm0 376; SSE2-NEXT: retq 377; 378; SSE42-LABEL: vselect_packss: 379; SSE42: # %bb.0: 380; SSE42-NEXT: pcmpeqw %xmm3, %xmm1 381; SSE42-NEXT: pcmpeqw %xmm2, %xmm0 382; SSE42-NEXT: packsswb %xmm1, %xmm0 383; SSE42-NEXT: pblendvb %xmm0, %xmm4, %xmm5 384; SSE42-NEXT: movdqa %xmm5, %xmm0 385; SSE42-NEXT: retq 386; 387; AVX1-LABEL: vselect_packss: 388; AVX1: # %bb.0: 389; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 390; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 391; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4 392; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 393; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0 394; AVX1-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 395; AVX1-NEXT: vzeroupper 396; AVX1-NEXT: retq 397; 398; AVX2-LABEL: vselect_packss: 399; AVX2: # %bb.0: 400; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 401; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 402; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 403; AVX2-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 404; AVX2-NEXT: vzeroupper 405; AVX2-NEXT: retq 406; 407; AVX512F-LABEL: vselect_packss: 408; AVX512F: # %bb.0: 409; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 410; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 411; AVX512F-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 412; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 413; AVX512F-NEXT: vzeroupper 414; AVX512F-NEXT: retq 415; 416; AVX512VL-LABEL: vselect_packss: 417; AVX512VL: # %bb.0: 418; AVX512VL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 419; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 420; AVX512VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 421; AVX512VL-NEXT: vpternlogq $202, %xmm3, %xmm2, %xmm0 422; AVX512VL-NEXT: vzeroupper 423; AVX512VL-NEXT: retq 424; 425; AVX512BWNOVL-LABEL: vselect_packss: 426; AVX512BWNOVL: # %bb.0: 427; AVX512BWNOVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 428; AVX512BWNOVL-NEXT: vextracti128 $1, %ymm0, %xmm1 429; AVX512BWNOVL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 430; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 431; AVX512BWNOVL-NEXT: vzeroupper 432; AVX512BWNOVL-NEXT: retq 433; 434; AVX512BWVL-LABEL: vselect_packss: 435; AVX512BWVL: # %bb.0: 436; AVX512BWVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 437; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 438; AVX512BWVL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 439; AVX512BWVL-NEXT: vpternlogq $202, %xmm3, %xmm2, %xmm0 440; AVX512BWVL-NEXT: vzeroupper 441; AVX512BWVL-NEXT: retq 442 %1 = icmp eq <16 x i16> %a0, %a1 443 %2 = sext <16 x i1> %1 to <16 x i16> 444 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 445 %4 = shufflevector <16 x i16> %2, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 446 %5 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %3, <8 x i16> %4) 447 %6 = and <16 x i8> %5, %a2 448 %7 = xor <16 x i8> %5, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 449 %8 = and <16 x i8> %7, %a3 450 %9 = or <16 x i8> %6, %8 451 ret <16 x i8> %9 452} 453declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) 454