1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512VL 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefix=VL_BW_DQ 5 6define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) { 7; AVX512F-LABEL: shuf2i1_1_0: 8; AVX512F: # %bb.0: 9; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 10; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 11; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 12; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 13; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 14; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 15; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 16; AVX512F-NEXT: vzeroupper 17; AVX512F-NEXT: retq 18; 19; AVX512VL-LABEL: shuf2i1_1_0: 20; AVX512VL: # %bb.0: 21; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0 22; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k1 23; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 24; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} {z} 25; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 26; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1 27; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 28; AVX512VL-NEXT: retq 29; 30; VL_BW_DQ-LABEL: shuf2i1_1_0: 31; VL_BW_DQ: # %bb.0: 32; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0 33; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0 34; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0 35; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 36; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0 37; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0 38; VL_BW_DQ-NEXT: retq 39 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 0> 40 ret <2 x i1> %b 41} 42 43define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) { 44; AVX512F-LABEL: shuf2i1_1_2: 45; AVX512F: # %bb.0: 46; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 47; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 48; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 49; AVX512F-NEXT: movq $-1, %rax 50; AVX512F-NEXT: vmovq %rax, %xmm1 51; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 52; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 53; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 54; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 55; AVX512F-NEXT: vzeroupper 56; AVX512F-NEXT: retq 57; 58; AVX512VL-LABEL: shuf2i1_1_2: 59; AVX512VL: # %bb.0: 60; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0 61; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k1 62; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 63; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} {z} 64; AVX512VL-NEXT: movq $-1, %rax 65; AVX512VL-NEXT: vmovq %rax, %xmm2 66; AVX512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 67; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1 68; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 69; AVX512VL-NEXT: retq 70; 71; VL_BW_DQ-LABEL: shuf2i1_1_2: 72; VL_BW_DQ: # %bb.0: 73; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0 74; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0 75; VL_BW_DQ-NEXT: movq $-1, %rax 76; VL_BW_DQ-NEXT: vmovq %rax, %xmm0 77; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm1 78; VL_BW_DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 79; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0 80; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0 81; VL_BW_DQ-NEXT: retq 82 %b = shufflevector <2 x i1> %a, <2 x i1> <i1 1, i1 0>, <2 x i32> <i32 1, i32 2> 83 ret <2 x i1> %b 84} 85 86 87define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) { 88; AVX512F-LABEL: shuf4i1_3_2_10: 89; AVX512F: # %bb.0: 90; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 91; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 92; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 93; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 94; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 95; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 96; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 97; AVX512F-NEXT: vzeroupper 98; AVX512F-NEXT: retq 99; 100; AVX512VL-LABEL: shuf4i1_3_2_10: 101; AVX512VL: # %bb.0: 102; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 103; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 104; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 105; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} 106; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] 107; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1 108; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 109; AVX512VL-NEXT: retq 110; 111; VL_BW_DQ-LABEL: shuf4i1_3_2_10: 112; VL_BW_DQ: # %bb.0: 113; VL_BW_DQ-NEXT: vpslld $31, %xmm0, %xmm0 114; VL_BW_DQ-NEXT: vpmovd2m %xmm0, %k0 115; VL_BW_DQ-NEXT: vpmovm2d %k0, %xmm0 116; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 117; VL_BW_DQ-NEXT: vpmovd2m %xmm0, %k0 118; VL_BW_DQ-NEXT: vpmovm2d %k0, %xmm0 119; VL_BW_DQ-NEXT: retq 120 %b = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 121 ret <4 x i1> %b 122} 123 124define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %a1, <8 x i64> %b1) { 125; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0: 126; AVX512F: # %bb.0: 127; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 128; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 129; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0] 130; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0 131; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 132; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 133; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 134; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 135; AVX512F-NEXT: vzeroupper 136; AVX512F-NEXT: retq 137; 138; AVX512VL-LABEL: shuf8i1_3_6_1_0_3_7_7_0: 139; AVX512VL: # %bb.0: 140; AVX512VL-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 141; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 142; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} 143; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,1,0,3,7,7,0] 144; AVX512VL-NEXT: vpermd %ymm1, %ymm2, %ymm1 145; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1 146; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 147; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 148; AVX512VL-NEXT: vzeroupper 149; AVX512VL-NEXT: retq 150; 151; VL_BW_DQ-LABEL: shuf8i1_3_6_1_0_3_7_7_0: 152; VL_BW_DQ: # %bb.0: 153; VL_BW_DQ-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 154; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 155; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [3,6,1,0,3,7,7,0] 156; VL_BW_DQ-NEXT: vpermd %ymm0, %ymm1, %ymm0 157; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0 158; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0 159; VL_BW_DQ-NEXT: vzeroupper 160; VL_BW_DQ-NEXT: retq 161 %a2 = icmp eq <8 x i64> %a, %a1 162 %b2 = icmp eq <8 x i64> %b, %b1 163 %c = shufflevector <8 x i1> %a2, <8 x i1> %b2, <8 x i32> <i32 3, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0> 164 ret <8 x i1> %c 165} 166 167define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <16 x i32> %b, <16 x i32> %a1, <16 x i32> %b1) { 168; AVX512F-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: 169; AVX512F: # %bb.0: 170; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 171; AVX512F-NEXT: vpcmpeqd %zmm3, %zmm1, %k2 172; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 173; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 174; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 175; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 176; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 177; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 178; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 179; AVX512F-NEXT: vzeroupper 180; AVX512F-NEXT: retq 181; 182; AVX512VL-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: 183; AVX512VL: # %bb.0: 184; AVX512VL-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 185; AVX512VL-NEXT: vpcmpeqd %zmm3, %zmm1, %k2 186; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 187; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 188; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 189; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 190; AVX512VL-NEXT: vptestmd %zmm2, %zmm2, %k1 191; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 192; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 193; AVX512VL-NEXT: vzeroupper 194; AVX512VL-NEXT: retq 195; 196; VL_BW_DQ-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: 197; VL_BW_DQ: # %bb.0: 198; VL_BW_DQ-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 199; VL_BW_DQ-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 200; VL_BW_DQ-NEXT: vpmovm2d %k1, %zmm0 201; VL_BW_DQ-NEXT: vpmovm2d %k0, %zmm1 202; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 203; VL_BW_DQ-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 204; VL_BW_DQ-NEXT: vpmovd2m %zmm2, %k0 205; VL_BW_DQ-NEXT: vpmovm2b %k0, %xmm0 206; VL_BW_DQ-NEXT: vzeroupper 207; VL_BW_DQ-NEXT: retq 208 %a2 = icmp eq <16 x i32> %a, %a1 209 %b2 = icmp eq <16 x i32> %b, %b1 210 %c = shufflevector <16 x i1> %a2, <16 x i1> %b2, <16 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0> 211 ret <16 x i1> %c 212} 213 214define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<32 x i1> %a) { 215; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: 216; AVX512F: # %bb.0: 217; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 218; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 219; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 220; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 221; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 222; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 223; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 224; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 225; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 226; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 227; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 228; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 229; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 230; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 231; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 232; AVX512F-NEXT: retq 233; 234; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: 235; AVX512VL: # %bb.0: 236; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm1 237; AVX512VL-NEXT: vpslld $31, %zmm1, %zmm1 238; AVX512VL-NEXT: vptestmd %zmm1, %zmm1, %k1 239; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 240; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0 241; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 242; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2 243; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 244; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 245; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 246; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 247; AVX512VL-NEXT: vptestmd %zmm2, %zmm2, %k1 248; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 249; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 250; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 251; AVX512VL-NEXT: retq 252; 253; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: 254; VL_BW_DQ: # %bb.0: 255; VL_BW_DQ-NEXT: vpsllw $7, %ymm0, %ymm0 256; VL_BW_DQ-NEXT: vpmovb2m %ymm0, %k0 257; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0 258; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 259; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm1, %zmm0 260; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k0 261; VL_BW_DQ-NEXT: vpmovm2b %k0, %ymm0 262; VL_BW_DQ-NEXT: retq 263 %b = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0, i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0> 264 ret <32 x i1> %b 265} 266 267define <32 x i16> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16(<32 x i16> %a, <32 x i16> %c, <32 x i16> %d) { 268; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16: 269; AVX512F: # %bb.0: 270; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 271; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm0, %ymm0 272; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 273; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 274; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm1, %ymm0 275; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 276; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 277; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 278; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 279; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 280; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm6 281; AVX512F-NEXT: vptestmd %zmm6, %zmm6, %k1 282; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 283; AVX512F-NEXT: vpmovdw %zmm0, %ymm1 284; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0 285; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1 286; AVX512F-NEXT: retq 287; 288; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16: 289; AVX512VL: # %bb.0: 290; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6 291; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm0, %ymm0 292; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0 293; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1 294; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm1, %ymm0 295; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0 296; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2 297; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 298; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 299; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 300; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm6 301; AVX512VL-NEXT: vptestmd %zmm6, %zmm6, %k1 302; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 303; AVX512VL-NEXT: vpmovdw %zmm0, %ymm1 304; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0 305; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1 306; AVX512VL-NEXT: retq 307; 308; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16: 309; VL_BW_DQ: # %bb.0: 310; VL_BW_DQ-NEXT: vptestnmw %zmm0, %zmm0, %k0 311; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0 312; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 313; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm3, %zmm0 314; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k1 315; VL_BW_DQ-NEXT: vpblendmw %zmm1, %zmm2, %zmm0 {%k1} 316; VL_BW_DQ-NEXT: retq 317 %cmp = icmp eq <32 x i16> %a, zeroinitializer 318 %shuf = shufflevector <32 x i1> %cmp, <32 x i1> undef, <32 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0, i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0> 319 %sel = select <32 x i1> %shuf, <32 x i16> %c, <32 x i16> %d 320 ret <32 x i16> %sel 321} 322 323define <32 x i8> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8(<32 x i8> %a, <32 x i8> %c, <32 x i8> %d) { 324; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8: 325; AVX512F: # %bb.0: 326; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 327; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 328; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm3 329; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 330; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 331; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 332; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 333; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 334; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} 335; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 336; AVX512F-NEXT: vpermi2d %zmm0, %zmm3, %zmm4 337; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k1 338; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 339; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 340; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 341; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 342; AVX512F-NEXT: retq 343; 344; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8: 345; AVX512VL: # %bb.0: 346; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 347; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 348; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm3 349; AVX512VL-NEXT: vptestmd %zmm3, %zmm3, %k1 350; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 351; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0 352; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2 353; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 354; AVX512VL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} 355; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 356; AVX512VL-NEXT: vpermi2d %zmm0, %zmm3, %zmm4 357; AVX512VL-NEXT: vptestmd %zmm4, %zmm4, %k1 358; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 359; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 360; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 361; AVX512VL-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 362; AVX512VL-NEXT: retq 363; 364; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8: 365; VL_BW_DQ: # %bb.0: 366; VL_BW_DQ-NEXT: vptestnmb %ymm0, %ymm0, %k0 367; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0 368; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 369; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm3, %zmm0 370; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k1 371; VL_BW_DQ-NEXT: vpblendmb %ymm1, %ymm2, %ymm0 {%k1} 372; VL_BW_DQ-NEXT: retq 373 %cmp = icmp eq <32 x i8> %a, zeroinitializer 374 %shuf = shufflevector <32 x i1> %cmp, <32 x i1> undef, <32 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0, i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0> 375 %sel = select <32 x i1> %shuf, <32 x i8> %c, <32 x i8> %d 376 ret <32 x i8> %sel 377} 378 379define <32 x i16> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16_split(<16 x i32> %a, <16 x i32> %b, <32 x i16> %c, <32 x i16> %d) { 380; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16_split: 381; AVX512F: # %bb.0: 382; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 383; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k2 384; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 385; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 386; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 387; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm6 388; AVX512F-NEXT: vptestmd %zmm6, %zmm6, %k1 389; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 390; AVX512F-NEXT: vpmovdw %zmm0, %ymm1 391; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0 392; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1 393; AVX512F-NEXT: retq 394; 395; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16_split: 396; AVX512VL: # %bb.0: 397; AVX512VL-NEXT: vptestnmd %zmm0, %zmm0, %k1 398; AVX512VL-NEXT: vptestnmd %zmm1, %zmm1, %k2 399; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 400; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 401; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 402; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm6 403; AVX512VL-NEXT: vptestmd %zmm6, %zmm6, %k1 404; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 405; AVX512VL-NEXT: vpmovdw %zmm0, %ymm1 406; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0 407; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1 408; AVX512VL-NEXT: retq 409; 410; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16_split: 411; VL_BW_DQ: # %bb.0: 412; VL_BW_DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 413; VL_BW_DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1 414; VL_BW_DQ-NEXT: kunpckwd %k0, %k1, %k0 415; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0 416; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 417; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm1, %zmm0 418; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k1 419; VL_BW_DQ-NEXT: vpblendmw %zmm2, %zmm3, %zmm0 {%k1} 420; VL_BW_DQ-NEXT: retq 421 %cmp1 = icmp eq <16 x i32> %a, zeroinitializer 422 %cmp2 = icmp eq <16 x i32> %b, zeroinitializer 423 %concat = shufflevector <16 x i1> %cmp1, <16 x i1> %cmp2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 424 %shuf = shufflevector <32 x i1> %concat, <32 x i1> undef, <32 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0, i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0> 425 %sel = select <32 x i1> %shuf, <32 x i16> %c, <32 x i16> %d 426 ret <32 x i16> %sel 427} 428 429define <32 x i8> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8_split(<16 x i32> %a, <16 x i32> %b, <32 x i8> %c, <32 x i8> %d) { 430; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8_split: 431; AVX512F: # %bb.0: 432; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 433; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k2 434; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 435; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 436; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 437; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm4 438; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k1 439; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 440; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 441; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 442; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0 443; AVX512F-NEXT: retq 444; 445; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8_split: 446; AVX512VL: # %bb.0: 447; AVX512VL-NEXT: vptestnmd %zmm0, %zmm0, %k1 448; AVX512VL-NEXT: vptestnmd %zmm1, %zmm1, %k2 449; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 450; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 451; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 452; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm4 453; AVX512VL-NEXT: vptestmd %zmm4, %zmm4, %k1 454; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 455; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 456; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 457; AVX512VL-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0 458; AVX512VL-NEXT: retq 459; 460; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8_split: 461; VL_BW_DQ: # %bb.0: 462; VL_BW_DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 463; VL_BW_DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1 464; VL_BW_DQ-NEXT: kunpckwd %k0, %k1, %k0 465; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0 466; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] 467; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm1, %zmm0 468; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k1 469; VL_BW_DQ-NEXT: vpblendmb %ymm2, %ymm3, %ymm0 {%k1} 470; VL_BW_DQ-NEXT: retq 471 %cmp1 = icmp eq <16 x i32> %a, zeroinitializer 472 %cmp2 = icmp eq <16 x i32> %b, zeroinitializer 473 %concat = shufflevector <16 x i1> %cmp1, <16 x i1> %cmp2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 474 %shuf = shufflevector <32 x i1> %concat, <32 x i1> undef, <32 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0, i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0> 475 %sel = select <32 x i1> %shuf, <32 x i8> %c, <32 x i8> %d 476 ret <32 x i8> %sel 477} 478 479define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) { 480; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u: 481; AVX512F: # %bb.0: 482; AVX512F-NEXT: kmovw %edi, %k1 483; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 484; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 485; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 486; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 487; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 488; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 489; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 490; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 491; AVX512F-NEXT: vzeroupper 492; AVX512F-NEXT: retq 493; 494; AVX512VL-LABEL: shuf8i1_u_2_u_u_2_u_2_u: 495; AVX512VL: # %bb.0: 496; AVX512VL-NEXT: kmovw %edi, %k1 497; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 498; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} 499; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 500; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1 501; AVX512VL-NEXT: vpslld $31, %ymm1, %ymm1 502; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1 503; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 504; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 505; AVX512VL-NEXT: vzeroupper 506; AVX512VL-NEXT: retq 507; 508; VL_BW_DQ-LABEL: shuf8i1_u_2_u_u_2_u_2_u: 509; VL_BW_DQ: # %bb.0: 510; VL_BW_DQ-NEXT: kmovd %edi, %k0 511; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 512; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 513; VL_BW_DQ-NEXT: vpbroadcastq %xmm0, %ymm0 514; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0 515; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0 516; VL_BW_DQ-NEXT: vzeroupper 517; VL_BW_DQ-NEXT: retq 518 %b = bitcast i8 %a to <8 x i1> 519 %c = shufflevector < 8 x i1> %b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef> 520 ret <8 x i1> %c 521} 522 523define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) { 524; AVX512F-LABEL: shuf8i1_10_2_9_u_3_u_2_u: 525; AVX512F: # %bb.0: 526; AVX512F-NEXT: kmovw %edi, %k1 527; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 528; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 529; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u> 530; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 531; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0 532; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 533; AVX512F-NEXT: kmovw %k0, %eax 534; AVX512F-NEXT: # kill: def $al killed $al killed $eax 535; AVX512F-NEXT: vzeroupper 536; AVX512F-NEXT: retq 537; 538; AVX512VL-LABEL: shuf8i1_10_2_9_u_3_u_2_u: 539; AVX512VL: # %bb.0: 540; AVX512VL-NEXT: kmovw %edi, %k1 541; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 542; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 543; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,2,3] 544; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 545; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,1,10,3,0,1,2,3] 546; AVX512VL-NEXT: vpermi2d %ymm1, %ymm0, %ymm2 547; AVX512VL-NEXT: vpslld $31, %ymm2, %ymm0 548; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0 549; AVX512VL-NEXT: kmovw %k0, %eax 550; AVX512VL-NEXT: # kill: def $al killed $al killed $eax 551; AVX512VL-NEXT: vzeroupper 552; AVX512VL-NEXT: retq 553; 554; VL_BW_DQ-LABEL: shuf8i1_10_2_9_u_3_u_2_u: 555; VL_BW_DQ: # %bb.0: 556; VL_BW_DQ-NEXT: kmovd %edi, %k0 557; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 558; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,2,3] 559; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 560; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8,1,10,3,0,1,2,3] 561; VL_BW_DQ-NEXT: vpermi2d %ymm1, %ymm0, %ymm2 562; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0 563; VL_BW_DQ-NEXT: kmovd %k0, %eax 564; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax 565; VL_BW_DQ-NEXT: vzeroupper 566; VL_BW_DQ-NEXT: retq 567 %b = bitcast i8 %a to <8 x i1> 568 %c = shufflevector < 8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 10, i32 2, i32 9, i32 undef, i32 3, i32 undef, i32 2, i32 undef> 569 %d = bitcast <8 x i1> %c to i8 570 ret i8 %d 571} 572 573define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) { 574; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u: 575; AVX512F: # %bb.0: 576; AVX512F-NEXT: kmovw %edi, %k1 577; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 578; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1] 579; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 580; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 581; AVX512F-NEXT: kmovw %k0, %eax 582; AVX512F-NEXT: # kill: def $al killed $al killed $eax 583; AVX512F-NEXT: vzeroupper 584; AVX512F-NEXT: retq 585; 586; AVX512VL-LABEL: shuf8i1_0_1_4_5_u_u_u_u: 587; AVX512VL: # %bb.0: 588; AVX512VL-NEXT: kmovw %edi, %k1 589; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 590; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 591; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 592; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 593; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0 594; AVX512VL-NEXT: kmovw %k0, %eax 595; AVX512VL-NEXT: # kill: def $al killed $al killed $eax 596; AVX512VL-NEXT: vzeroupper 597; AVX512VL-NEXT: retq 598; 599; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u: 600; VL_BW_DQ: # %bb.0: 601; VL_BW_DQ-NEXT: kmovd %edi, %k0 602; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 603; VL_BW_DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 604; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0 605; VL_BW_DQ-NEXT: kmovd %k0, %eax 606; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax 607; VL_BW_DQ-NEXT: vzeroupper 608; VL_BW_DQ-NEXT: retq 609 %b = bitcast i8 %a to <8 x i1> 610 %c = shufflevector < 8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 611 %d = bitcast <8 x i1> %c to i8 612 ret i8 %d 613} 614 615define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) { 616; AVX512F-LABEL: shuf8i1_9_6_1_0_3_7_7_0: 617; AVX512F: # %bb.0: 618; AVX512F-NEXT: kmovw %edi, %k1 619; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 620; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 621; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0] 622; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 623; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0 624; AVX512F-NEXT: kmovw %k0, %eax 625; AVX512F-NEXT: # kill: def $al killed $al killed $eax 626; AVX512F-NEXT: vzeroupper 627; AVX512F-NEXT: retq 628; 629; AVX512VL-LABEL: shuf8i1_9_6_1_0_3_7_7_0: 630; AVX512VL: # %bb.0: 631; AVX512VL-NEXT: kmovw %edi, %k1 632; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 633; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 634; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 635; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,1,0,3,7,7,0] 636; AVX512VL-NEXT: vpermi2d %ymm1, %ymm0, %ymm2 637; AVX512VL-NEXT: vptestmd %ymm2, %ymm2, %k0 638; AVX512VL-NEXT: kmovw %k0, %eax 639; AVX512VL-NEXT: # kill: def $al killed $al killed $eax 640; AVX512VL-NEXT: vzeroupper 641; AVX512VL-NEXT: retq 642; 643; VL_BW_DQ-LABEL: shuf8i1_9_6_1_0_3_7_7_0: 644; VL_BW_DQ: # %bb.0: 645; VL_BW_DQ-NEXT: kmovd %edi, %k0 646; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 647; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 648; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,1,0,3,7,7,0] 649; VL_BW_DQ-NEXT: vpermi2d %ymm1, %ymm0, %ymm2 650; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0 651; VL_BW_DQ-NEXT: kmovd %k0, %eax 652; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax 653; VL_BW_DQ-NEXT: vzeroupper 654; VL_BW_DQ-NEXT: retq 655 %b = bitcast i8 %a to <8 x i1> 656 %c = shufflevector <8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0> 657 %d = bitcast <8 x i1>%c to i8 658 ret i8 %d 659} 660 661define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) { 662; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0: 663; AVX512F: # %bb.0: 664; AVX512F-NEXT: kmovw %edi, %k1 665; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 666; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7] 667; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 668; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2 669; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0 670; AVX512F-NEXT: kmovw %k0, %eax 671; AVX512F-NEXT: # kill: def $al killed $al killed $eax 672; AVX512F-NEXT: vzeroupper 673; AVX512F-NEXT: retq 674; 675; AVX512VL-LABEL: shuf8i1_9_6_1_10_3_7_7_0: 676; AVX512VL: # %bb.0: 677; AVX512VL-NEXT: kmovw %edi, %k1 678; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 679; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 680; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2] 681; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 682; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3],ymm1[4,5,6,7] 683; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0 684; AVX512VL-NEXT: kmovw %k0, %eax 685; AVX512VL-NEXT: # kill: def $al killed $al killed $eax 686; AVX512VL-NEXT: vzeroupper 687; AVX512VL-NEXT: retq 688; 689; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0: 690; VL_BW_DQ: # %bb.0: 691; VL_BW_DQ-NEXT: kmovd %edi, %k0 692; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 693; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2] 694; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 695; VL_BW_DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3],ymm1[4,5,6,7] 696; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0 697; VL_BW_DQ-NEXT: kmovd %k0, %eax 698; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax 699; VL_BW_DQ-NEXT: vzeroupper 700; VL_BW_DQ-NEXT: retq 701 %b = bitcast i8 %a to <8 x i1> 702 %c = shufflevector <8 x i1> zeroinitializer, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 10, i32 3, i32 7, i32 7, i32 0> 703 %d = bitcast <8 x i1>%c to i8 704 ret i8 %d 705} 706 707define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) { 708; AVX512F-LABEL: shuf8i1__9_6_1_10_3_7_7_1: 709; AVX512F: # %bb.0: 710; AVX512F-NEXT: kmovw %edi, %k1 711; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 712; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,6,1,0,3,7,7,1] 713; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [18446744073709551615,18446744073709551615,0,0,0,0,0,0] 714; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2 715; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0 716; AVX512F-NEXT: kmovw %k0, %eax 717; AVX512F-NEXT: # kill: def $al killed $al killed $eax 718; AVX512F-NEXT: vzeroupper 719; AVX512F-NEXT: retq 720; 721; AVX512VL-LABEL: shuf8i1__9_6_1_10_3_7_7_1: 722; AVX512VL: # %bb.0: 723; AVX512VL-NEXT: kmovw %edi, %k1 724; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 725; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 726; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 727; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3,4,5,6,7] 728; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0 729; AVX512VL-NEXT: kmovw %k0, %eax 730; AVX512VL-NEXT: # kill: def $al killed $al killed $eax 731; AVX512VL-NEXT: vzeroupper 732; AVX512VL-NEXT: retq 733; 734; VL_BW_DQ-LABEL: shuf8i1__9_6_1_10_3_7_7_1: 735; VL_BW_DQ: # %bb.0: 736; VL_BW_DQ-NEXT: kmovd %edi, %k0 737; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 738; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 739; VL_BW_DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3,4,5,6,7] 740; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0 741; VL_BW_DQ-NEXT: kmovd %k0, %eax 742; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax 743; VL_BW_DQ-NEXT: vzeroupper 744; VL_BW_DQ-NEXT: retq 745 %b = bitcast i8 %a to <8 x i1> 746 %c = shufflevector <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 1> 747 %c1 = bitcast <8 x i1>%c to i8 748 ret i8 %c1 749} 750 751define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) { 752; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones: 753; AVX512F: # %bb.0: 754; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 755; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 756; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 757; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 758; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7] 759; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 760; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2 761; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0 762; AVX512F-NEXT: kmovw %k0, %eax 763; AVX512F-NEXT: # kill: def $al killed $al killed $eax 764; AVX512F-NEXT: vzeroupper 765; AVX512F-NEXT: retq 766; 767; AVX512VL-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones: 768; AVX512VL: # %bb.0: 769; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 770; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 771; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 772; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 773; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} 774; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] 775; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 776; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0 777; AVX512VL-NEXT: kmovw %k0, %eax 778; AVX512VL-NEXT: # kill: def $al killed $al killed $eax 779; AVX512VL-NEXT: vzeroupper 780; AVX512VL-NEXT: retq 781; 782; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones: 783; VL_BW_DQ: # %bb.0: 784; VL_BW_DQ-NEXT: vpsllw $15, %xmm0, %xmm0 785; VL_BW_DQ-NEXT: vpmovw2m %xmm0, %k0 786; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 787; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 788; VL_BW_DQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 789; VL_BW_DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 790; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0 791; VL_BW_DQ-NEXT: kmovd %k0, %eax 792; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax 793; VL_BW_DQ-NEXT: vzeroupper 794; VL_BW_DQ-NEXT: retq 795 %c = shufflevector <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1> %a, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0> 796 %c1 = bitcast <8 x i1>%c to i8 797 ret i8 %c1 798} 799 800 801define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) { 802; AVX512F-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0: 803; AVX512F: # %bb.0: 804; AVX512F-NEXT: kmovw %edi, %k1 805; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 806; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0 807; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 808; AVX512F-NEXT: kmovw %k0, %eax 809; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 810; AVX512F-NEXT: vzeroupper 811; AVX512F-NEXT: retq 812; 813; AVX512VL-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0: 814; AVX512VL: # %bb.0: 815; AVX512VL-NEXT: kmovw %edi, %k1 816; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 817; AVX512VL-NEXT: vpbroadcastd %xmm0, %zmm0 818; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0 819; AVX512VL-NEXT: kmovw %k0, %eax 820; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax 821; AVX512VL-NEXT: vzeroupper 822; AVX512VL-NEXT: retq 823; 824; VL_BW_DQ-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0: 825; VL_BW_DQ: # %bb.0: 826; VL_BW_DQ-NEXT: kmovd %edi, %k0 827; VL_BW_DQ-NEXT: vpmovm2d %k0, %zmm0 828; VL_BW_DQ-NEXT: vpbroadcastd %xmm0, %zmm0 829; VL_BW_DQ-NEXT: vpmovd2m %zmm0, %k0 830; VL_BW_DQ-NEXT: kmovd %k0, %eax 831; VL_BW_DQ-NEXT: # kill: def $ax killed $ax killed $eax 832; VL_BW_DQ-NEXT: vzeroupper 833; VL_BW_DQ-NEXT: retq 834 %b = bitcast i16 %a to <16 x i1> 835 %c = shufflevector < 16 x i1> %b, <16 x i1> undef, <16 x i32> zeroinitializer 836 %d = bitcast <16 x i1> %c to i16 837 ret i16 %d 838} 839 840define i64 @shuf64i1_zero(i64 %a) { 841; AVX512F-LABEL: shuf64i1_zero: 842; AVX512F: # %bb.0: 843; AVX512F-NEXT: kmovw %edi, %k1 844; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 845; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0 846; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 847; AVX512F-NEXT: kmovw %k0, %eax 848; AVX512F-NEXT: kmovw %k0, %ecx 849; AVX512F-NEXT: shll $16, %ecx 850; AVX512F-NEXT: orl %eax, %ecx 851; AVX512F-NEXT: movq %rcx, %rax 852; AVX512F-NEXT: shlq $32, %rax 853; AVX512F-NEXT: orq %rcx, %rax 854; AVX512F-NEXT: vzeroupper 855; AVX512F-NEXT: retq 856; 857; AVX512VL-LABEL: shuf64i1_zero: 858; AVX512VL: # %bb.0: 859; AVX512VL-NEXT: kmovw %edi, %k1 860; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 861; AVX512VL-NEXT: vpbroadcastd %xmm0, %zmm0 862; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0 863; AVX512VL-NEXT: kmovw %k0, %eax 864; AVX512VL-NEXT: kmovw %k0, %ecx 865; AVX512VL-NEXT: shll $16, %ecx 866; AVX512VL-NEXT: orl %eax, %ecx 867; AVX512VL-NEXT: movq %rcx, %rax 868; AVX512VL-NEXT: shlq $32, %rax 869; AVX512VL-NEXT: orq %rcx, %rax 870; AVX512VL-NEXT: vzeroupper 871; AVX512VL-NEXT: retq 872; 873; VL_BW_DQ-LABEL: shuf64i1_zero: 874; VL_BW_DQ: # %bb.0: 875; VL_BW_DQ-NEXT: kmovq %rdi, %k0 876; VL_BW_DQ-NEXT: vpmovm2b %k0, %zmm0 877; VL_BW_DQ-NEXT: vpbroadcastb %xmm0, %zmm0 878; VL_BW_DQ-NEXT: vpmovb2m %zmm0, %k0 879; VL_BW_DQ-NEXT: kmovq %k0, %rax 880; VL_BW_DQ-NEXT: vzeroupper 881; VL_BW_DQ-NEXT: retq 882 %b = bitcast i64 %a to <64 x i1> 883 %c = shufflevector < 64 x i1> %b, <64 x i1> undef, <64 x i32> zeroinitializer 884 %d = bitcast <64 x i1> %c to i64 885 ret i64 %d 886} 887