1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 4 5; 6; Variable Shifts 7; 8 9define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 10; ALL-LABEL: var_shift_v8i64: 11; ALL: # %bb.0: 12; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 13; ALL-NEXT: retq 14 %shift = shl <8 x i64> %a, %b 15 ret <8 x i64> %shift 16} 17 18define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 19; ALL-LABEL: var_shift_v16i32: 20; ALL: # %bb.0: 21; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 22; ALL-NEXT: retq 23 %shift = shl <16 x i32> %a, %b 24 ret <16 x i32> %shift 25} 26 27define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 28; AVX512DQ-LABEL: var_shift_v32i16: 29; AVX512DQ: # %bb.0: 30; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero 31; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 32; AVX512DQ-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 33; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 34; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero 35; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 36; AVX512DQ-NEXT: vpsllvd %zmm2, %zmm1, %zmm1 37; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 38; AVX512DQ-NEXT: retq 39; 40; AVX512BW-LABEL: var_shift_v32i16: 41; AVX512BW: # %bb.0: 42; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 43; AVX512BW-NEXT: retq 44 %shift = shl <32 x i16> %a, %b 45 ret <32 x i16> %shift 46} 47 48define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 49; AVX512DQ-LABEL: var_shift_v64i8: 50; AVX512DQ: # %bb.0: 51; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm4 52; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 53; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4 54; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2 55; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 56; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm4 57; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 58; AVX512DQ-NEXT: vpand %ymm6, %ymm4, %ymm4 59; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 60; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 61; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm4 62; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 63; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 64; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm2 65; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 66; AVX512DQ-NEXT: vpsllw $5, %ymm3, %ymm3 67; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 68; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2 69; AVX512DQ-NEXT: vpand %ymm6, %ymm2, %ymm2 70; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3 71; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 72; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2 73; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3 74; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 75; AVX512DQ-NEXT: retq 76; 77; AVX512BW-LABEL: var_shift_v64i8: 78; AVX512BW: # %bb.0: 79; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 80; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 81; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 82; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 83; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 84; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 85; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 86; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 87; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 88; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 89; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 90; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 91; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} 92; AVX512BW-NEXT: retq 93 %shift = shl <64 x i8> %a, %b 94 ret <64 x i8> %shift 95} 96 97; 98; Uniform Variable Shifts 99; 100 101define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 102; ALL-LABEL: splatvar_shift_v8i64: 103; ALL: # %bb.0: 104; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0 105; ALL-NEXT: retq 106 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 107 %shift = shl <8 x i64> %a, %splat 108 ret <8 x i64> %shift 109} 110 111define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 112; ALL-LABEL: splatvar_shift_v16i32: 113; ALL: # %bb.0: 114; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 115; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0 116; ALL-NEXT: retq 117 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer 118 %shift = shl <16 x i32> %a, %splat 119 ret <16 x i32> %shift 120} 121 122define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 123; AVX512DQ-LABEL: splatvar_shift_v32i16: 124; AVX512DQ: # %bb.0: 125; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 126; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0 127; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1 128; AVX512DQ-NEXT: retq 129; 130; AVX512BW-LABEL: splatvar_shift_v32i16: 131; AVX512BW: # %bb.0: 132; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 133; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 134; AVX512BW-NEXT: retq 135 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer 136 %shift = shl <32 x i16> %a, %splat 137 ret <32 x i16> %shift 138} 139 140define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 141; AVX512DQ-LABEL: splatvar_shift_v64i8: 142; AVX512DQ: # %bb.0: 143; AVX512DQ-NEXT: vpbroadcastb %xmm2, %ymm2 144; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm3 145; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 146; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 147; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2 148; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0 149; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm3 150; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 151; AVX512DQ-NEXT: vpand %ymm5, %ymm3, %ymm3 152; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm6 153; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm3, %ymm0, %ymm0 154; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm3 155; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm7 156; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm3, %ymm0, %ymm0 157; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm3 158; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 159; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 160; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2 161; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 162; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1 163; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2 164; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm1, %ymm1 165; AVX512DQ-NEXT: retq 166; 167; AVX512BW-LABEL: splatvar_shift_v64i8: 168; AVX512BW: # %bb.0: 169; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 170; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 171; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 172; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 173; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 174; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 175; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 176; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 177; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 178; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 179; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 180; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 181; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 182; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} 183; AVX512BW-NEXT: retq 184 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer 185 %shift = shl <64 x i8> %a, %splat 186 ret <64 x i8> %shift 187} 188 189; 190; Constant Shifts 191; 192 193define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind { 194; ALL-LABEL: constant_shift_v8i64: 195; ALL: # %bb.0: 196; ALL-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0 197; ALL-NEXT: retq 198 %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62> 199 ret <8 x i64> %shift 200} 201 202define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind { 203; ALL-LABEL: constant_shift_v16i32: 204; ALL: # %bb.0: 205; ALL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0 206; ALL-NEXT: retq 207 %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> 208 ret <16 x i32> %shift 209} 210 211define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind { 212; AVX512DQ-LABEL: constant_shift_v32i16: 213; AVX512DQ: # %bb.0: 214; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 215; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0 216; AVX512DQ-NEXT: vpmullw %ymm2, %ymm1, %ymm1 217; AVX512DQ-NEXT: retq 218; 219; AVX512BW-LABEL: constant_shift_v32i16: 220; AVX512BW: # %bb.0: 221; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 222; AVX512BW-NEXT: retq 223 %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 224 ret <32 x i16> %shift 225} 226 227define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { 228; AVX512DQ-LABEL: constant_shift_v64i8: 229; AVX512DQ: # %bb.0: 230; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 231; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 232; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 233; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 234; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 235; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 236; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 237; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 238; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6 239; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0 240; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 241; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm7 242; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm0, %ymm0 243; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm2 244; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 245; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1 246; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2 247; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 248; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1 249; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2 250; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm1, %ymm1 251; AVX512DQ-NEXT: retq 252; 253; AVX512BW-LABEL: constant_shift_v64i8: 254; AVX512BW: # %bb.0: 255; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 256; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 257; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 258; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 259; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 260; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 261; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 262; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 263; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 264; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 265; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 266; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 267; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} 268; AVX512BW-NEXT: retq 269 %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 270 ret <64 x i8> %shift 271} 272 273; 274; Uniform Constant Shifts 275; 276 277define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind { 278; ALL-LABEL: splatconstant_shift_v8i64: 279; ALL: # %bb.0: 280; ALL-NEXT: vpsllq $7, %zmm0, %zmm0 281; ALL-NEXT: retq 282 %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> 283 ret <8 x i64> %shift 284} 285 286define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind { 287; ALL-LABEL: splatconstant_shift_v16i32: 288; ALL: # %bb.0: 289; ALL-NEXT: vpslld $5, %zmm0, %zmm0 290; ALL-NEXT: retq 291 %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 292 ret <16 x i32> %shift 293} 294 295define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind { 296; AVX512DQ-LABEL: splatconstant_shift_v32i16: 297; AVX512DQ: # %bb.0: 298; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0 299; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1 300; AVX512DQ-NEXT: retq 301; 302; AVX512BW-LABEL: splatconstant_shift_v32i16: 303; AVX512BW: # %bb.0: 304; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0 305; AVX512BW-NEXT: retq 306 %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 307 ret <32 x i16> %shift 308} 309 310define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind { 311; AVX512DQ-LABEL: splatconstant_shift_v64i8: 312; AVX512DQ: # %bb.0: 313; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0 314; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248] 315; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 316; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1 317; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 318; AVX512DQ-NEXT: retq 319; 320; AVX512BW-LABEL: splatconstant_shift_v64i8: 321; AVX512BW: # %bb.0: 322; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0 323; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 324; AVX512BW-NEXT: retq 325 %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 326 ret <64 x i8> %shift 327} 328