1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,X86-AVX512VLCDBW 5 6define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) { 7; AVX512CD-LABEL: test_mm_epi64: 8; AVX512CD: # %bb.0: # %entry 9; AVX512CD-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 10; AVX512CD-NEXT: vpmovsxwq %xmm0, %zmm0 11; AVX512CD-NEXT: vptestmq %zmm0, %zmm0, %k0 12; AVX512CD-NEXT: kmovw %k0, %eax 13; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0 14; AVX512CD-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 15; AVX512CD-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 16; AVX512CD-NEXT: vzeroupper 17; AVX512CD-NEXT: retq 18; 19; AVX512VLCDBW-LABEL: test_mm_epi64: 20; AVX512VLCDBW: # %bb.0: # %entry 21; AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 22; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %xmm0 23; AVX512VLCDBW-NEXT: retq 24; 25; X86-AVX512VLCDBW-LABEL: test_mm_epi64: 26; X86-AVX512VLCDBW: # %bb.0: # %entry 27; X86-AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 28; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax 29; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax 30; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0 31; X86-AVX512VLCDBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero 32; X86-AVX512VLCDBW-NEXT: retl 33entry: 34 %0 = icmp eq <8 x i16> %a, %b 35 %1 = bitcast <8 x i1> %0 to i8 36 %conv.i = zext i8 %1 to i64 37 %vecinit.i.i = insertelement <2 x i64> undef, i64 %conv.i, i32 0 38 %vecinit1.i.i = shufflevector <2 x i64> %vecinit.i.i, <2 x i64> undef, <2 x i32> zeroinitializer 39 ret <2 x i64> %vecinit1.i.i 40} 41 42define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) { 43; AVX512CD-LABEL: test_mm_epi32: 44; AVX512CD: # %bb.0: # %entry 45; AVX512CD-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 46; AVX512CD-NEXT: vpmovsxbd %xmm0, %zmm0 47; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0 48; AVX512CD-NEXT: kmovw %k0, %eax 49; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0 50; AVX512CD-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 51; AVX512CD-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 52; AVX512CD-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 53; AVX512CD-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 54; AVX512CD-NEXT: vzeroupper 55; AVX512CD-NEXT: retq 56; 57; AVX512VLCDBW-LABEL: test_mm_epi32: 58; AVX512VLCDBW: # %bb.0: # %entry 59; AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 60; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0 61; AVX512VLCDBW-NEXT: retq 62; 63; X86-AVX512VLCDBW-LABEL: test_mm_epi32: 64; X86-AVX512VLCDBW: # %bb.0: # %entry 65; X86-AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 66; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0 67; X86-AVX512VLCDBW-NEXT: retl 68entry: 69 %0 = icmp eq <16 x i8> %a, %b 70 %1 = bitcast <16 x i1> %0 to i16 71 %conv.i = zext i16 %1 to i32 72 %vecinit.i.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0 73 %vecinit3.i.i = shufflevector <4 x i32> %vecinit.i.i, <4 x i32> undef, <4 x i32> zeroinitializer 74 ret <4 x i32> %vecinit3.i.i 75} 76 77define <16 x i32> @test_mm512_epi32(<16 x i32> %a, <16 x i32> %b) { 78; AVX512CD-LABEL: test_mm512_epi32: 79; AVX512CD: # %bb.0: # %entry 80; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 81; AVX512CD-NEXT: vpbroadcastmw2d %k0, %zmm0 82; AVX512CD-NEXT: retq 83; 84; AVX512VLCDBW-LABEL: test_mm512_epi32: 85; AVX512VLCDBW: # %bb.0: # %entry 86; AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 87; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %zmm0 88; AVX512VLCDBW-NEXT: retq 89; 90; X86-AVX512VLCDBW-LABEL: test_mm512_epi32: 91; X86-AVX512VLCDBW: # %bb.0: # %entry 92; X86-AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 93; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %zmm0 94; X86-AVX512VLCDBW-NEXT: retl 95entry: 96 %0 = icmp eq <16 x i32> %a, %b 97 %1 = bitcast <16 x i1> %0 to i16 98 %conv.i = zext i16 %1 to i32 99 %vecinit.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0 100 %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer 101 ret <16 x i32> %vecinit15.i.i 102} 103 104define <8 x i64> @test_mm512_epi64(<8 x i32> %a, <8 x i32> %b) { 105; AVX512CD-LABEL: test_mm512_epi64: 106; AVX512CD: # %bb.0: # %entry 107; AVX512CD-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 108; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 109; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 110; AVX512CD-NEXT: vpbroadcastmb2q %k0, %zmm0 111; AVX512CD-NEXT: retq 112; 113; AVX512VLCDBW-LABEL: test_mm512_epi64: 114; AVX512VLCDBW: # %bb.0: # %entry 115; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 116; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %zmm0 117; AVX512VLCDBW-NEXT: retq 118; 119; X86-AVX512VLCDBW-LABEL: test_mm512_epi64: 120; X86-AVX512VLCDBW: # %bb.0: # %entry 121; X86-AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 122; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax 123; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax 124; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0 125; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %zmm0 126; X86-AVX512VLCDBW-NEXT: retl 127entry: 128 %0 = icmp eq <8 x i32> %a, %b 129 %1 = bitcast <8 x i1> %0 to i8 130 %conv.i = zext i8 %1 to i64 131 %vecinit.i.i = insertelement <8 x i64> undef, i64 %conv.i, i32 0 132 %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer 133 ret <8 x i64> %vecinit7.i.i 134} 135 136define <4 x i64> @test_mm256_epi64(<8 x i32> %a, <8 x i32> %b) { 137; AVX512CD-LABEL: test_mm256_epi64: 138; AVX512CD: # %bb.0: # %entry 139; AVX512CD-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 140; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 141; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 142; AVX512CD-NEXT: kmovw %k0, %eax 143; AVX512CD-NEXT: movzbl %al, %eax 144; AVX512CD-NEXT: vmovq %rax, %xmm0 145; AVX512CD-NEXT: vpbroadcastq %xmm0, %ymm0 146; AVX512CD-NEXT: retq 147; 148; AVX512VLCDBW-LABEL: test_mm256_epi64: 149; AVX512VLCDBW: # %bb.0: # %entry 150; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 151; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %ymm0 152; AVX512VLCDBW-NEXT: retq 153; 154; X86-AVX512VLCDBW-LABEL: test_mm256_epi64: 155; X86-AVX512VLCDBW: # %bb.0: # %entry 156; X86-AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 157; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax 158; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax 159; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0 160; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %ymm0 161; X86-AVX512VLCDBW-NEXT: retl 162entry: 163 %0 = icmp eq <8 x i32> %a, %b 164 %1 = bitcast <8 x i1> %0 to i8 165 %conv.i = zext i8 %1 to i64 166 %vecinit.i.i = insertelement <4 x i64> undef, i64 %conv.i, i32 0 167 %vecinit3.i.i = shufflevector <4 x i64> %vecinit.i.i, <4 x i64> undef, <4 x i32> zeroinitializer 168 ret <4 x i64> %vecinit3.i.i 169} 170 171define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) { 172; AVX512CD-LABEL: test_mm256_epi32: 173; AVX512CD: # %bb.0: # %entry 174; AVX512CD-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 175; AVX512CD-NEXT: vpmovsxwd %ymm0, %zmm0 176; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0 177; AVX512CD-NEXT: kmovw %k0, %eax 178; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0 179; AVX512CD-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 180; AVX512CD-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 181; AVX512CD-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 182; AVX512CD-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 183; AVX512CD-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 184; AVX512CD-NEXT: retq 185; 186; AVX512VLCDBW-LABEL: test_mm256_epi32: 187; AVX512VLCDBW: # %bb.0: # %entry 188; AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 189; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0 190; AVX512VLCDBW-NEXT: retq 191; 192; X86-AVX512VLCDBW-LABEL: test_mm256_epi32: 193; X86-AVX512VLCDBW: # %bb.0: # %entry 194; X86-AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 195; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0 196; X86-AVX512VLCDBW-NEXT: retl 197entry: 198 %0 = icmp eq <16 x i16> %a, %b 199 %1 = bitcast <16 x i1> %0 to i16 200 %conv.i = zext i16 %1 to i32 201 %vecinit.i.i = insertelement <8 x i32> undef, i32 %conv.i, i32 0 202 %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> undef, <8 x i32> zeroinitializer 203 ret <8 x i32> %vecinit7.i.i 204} 205 206