1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 4 5; 6; Variable Shifts 7; 8 9define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 10; ALL-LABEL: var_shift_v8i64: 11; ALL: # %bb.0: 12; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 13; ALL-NEXT: retq 14 %shift = lshr <8 x i64> %a, %b 15 ret <8 x i64> %shift 16} 17 18define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 19; ALL-LABEL: var_shift_v16i32: 20; ALL: # %bb.0: 21; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 22; ALL-NEXT: retq 23 %shift = lshr <16 x i32> %a, %b 24 ret <16 x i32> %shift 25} 26 27define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 28; AVX512DQ-LABEL: var_shift_v32i16: 29; AVX512DQ: # %bb.0: 30; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero 31; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 32; AVX512DQ-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0 33; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 34; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero 35; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 36; AVX512DQ-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1 37; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 38; AVX512DQ-NEXT: retq 39; 40; AVX512BW-LABEL: var_shift_v32i16: 41; AVX512BW: # %bb.0: 42; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 43; AVX512BW-NEXT: retq 44 %shift = lshr <32 x i16> %a, %b 45 ret <32 x i16> %shift 46} 47 48define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 49; AVX512DQ-LABEL: var_shift_v64i8: 50; AVX512DQ: # %bb.0: 51; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm4 52; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 53; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4 54; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2 55; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 56; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm4 57; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 58; AVX512DQ-NEXT: vpand %ymm6, %ymm4, %ymm4 59; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 60; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 61; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm4 62; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 63; AVX512DQ-NEXT: vpand %ymm7, %ymm4, %ymm4 64; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 65; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 66; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm2 67; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 68; AVX512DQ-NEXT: vpsllw $5, %ymm3, %ymm3 69; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 70; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2 71; AVX512DQ-NEXT: vpand %ymm6, %ymm2, %ymm2 72; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3 73; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 74; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2 75; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 76; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3 77; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 78; AVX512DQ-NEXT: retq 79; 80; AVX512BW-LABEL: var_shift_v64i8: 81; AVX512BW: # %bb.0: 82; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2 83; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 84; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 85; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 86; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 87; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2 88; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 89; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 90; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 91; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 92; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2 93; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 94; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 95; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 96; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 97; AVX512BW-NEXT: retq 98 %shift = lshr <64 x i8> %a, %b 99 ret <64 x i8> %shift 100} 101 102; 103; Uniform Variable Shifts 104; 105 106define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 107; ALL-LABEL: splatvar_shift_v8i64: 108; ALL: # %bb.0: 109; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 110; ALL-NEXT: retq 111 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 112 %shift = lshr <8 x i64> %a, %splat 113 ret <8 x i64> %shift 114} 115 116define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 117; ALL-LABEL: splatvar_shift_v16i32: 118; ALL: # %bb.0: 119; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 120; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 121; ALL-NEXT: retq 122 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer 123 %shift = lshr <16 x i32> %a, %splat 124 ret <16 x i32> %shift 125} 126 127define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 128; AVX512DQ-LABEL: splatvar_shift_v32i16: 129; AVX512DQ: # %bb.0: 130; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 131; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 132; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm1, %ymm1 133; AVX512DQ-NEXT: retq 134; 135; AVX512BW-LABEL: splatvar_shift_v32i16: 136; AVX512BW: # %bb.0: 137; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 138; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 139; AVX512BW-NEXT: retq 140 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer 141 %shift = lshr <32 x i16> %a, %splat 142 ret <32 x i16> %shift 143} 144 145define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 146; AVX512DQ-LABEL: splatvar_shift_v64i8: 147; AVX512DQ: # %bb.0: 148; AVX512DQ-NEXT: vpbroadcastb %xmm2, %ymm2 149; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm3 150; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 151; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 152; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2 153; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0 154; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm3 155; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 156; AVX512DQ-NEXT: vpand %ymm5, %ymm3, %ymm3 157; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm6 158; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm3, %ymm0, %ymm0 159; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm3 160; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 161; AVX512DQ-NEXT: vpand %ymm7, %ymm3, %ymm3 162; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm8 163; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm3, %ymm0, %ymm0 164; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm3 165; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 166; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 167; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2 168; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 169; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1 170; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2 171; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 172; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1 173; AVX512DQ-NEXT: retq 174; 175; AVX512BW-LABEL: splatvar_shift_v64i8: 176; AVX512BW: # %bb.0: 177; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 178; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2 179; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 180; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 181; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 182; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 183; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2 184; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 185; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 186; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 187; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 188; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2 189; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 190; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 191; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 192; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 193; AVX512BW-NEXT: retq 194 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer 195 %shift = lshr <64 x i8> %a, %splat 196 ret <64 x i8> %shift 197} 198 199; 200; Constant Shifts 201; 202 203define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind { 204; ALL-LABEL: constant_shift_v8i64: 205; ALL: # %bb.0: 206; ALL-NEXT: vpsrlvq {{.*}}(%rip), %zmm0, %zmm0 207; ALL-NEXT: retq 208 %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62> 209 ret <8 x i64> %shift 210} 211 212define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind { 213; ALL-LABEL: constant_shift_v16i32: 214; ALL: # %bb.0: 215; ALL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0 216; ALL-NEXT: retq 217 %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> 218 ret <16 x i32> %shift 219} 220 221define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind { 222; AVX512DQ-LABEL: constant_shift_v32i16: 223; AVX512DQ: # %bb.0: 224; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 225; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 226; AVX512DQ-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0 227; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 228; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 229; AVX512DQ-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1 230; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 231; AVX512DQ-NEXT: retq 232; 233; AVX512BW-LABEL: constant_shift_v32i16: 234; AVX512BW: # %bb.0: 235; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 236; AVX512BW-NEXT: retq 237 %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 238 ret <32 x i16> %shift 239} 240 241define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { 242; AVX512DQ-LABEL: constant_shift_v64i8: 243; AVX512DQ: # %bb.0: 244; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2 245; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 246; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 247; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 248; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 249; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2 250; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 251; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 252; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6 253; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0 254; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2 255; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 256; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 257; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm8 258; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm0, %ymm0 259; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm2 260; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 261; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1 262; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2 263; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 264; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1 265; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2 266; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 267; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1 268; AVX512DQ-NEXT: retq 269; 270; AVX512BW-LABEL: constant_shift_v64i8: 271; AVX512BW: # %bb.0: 272; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 273; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 274; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2 275; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 276; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 277; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2 278; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 279; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 280; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 281; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 282; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2 283; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 284; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 285; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 286; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 287; AVX512BW-NEXT: retq 288 %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 289 ret <64 x i8> %shift 290} 291 292; 293; Uniform Constant Shifts 294; 295 296define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind { 297; ALL-LABEL: splatconstant_shift_v8i64: 298; ALL: # %bb.0: 299; ALL-NEXT: vpsrlq $7, %zmm0, %zmm0 300; ALL-NEXT: retq 301 %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> 302 ret <8 x i64> %shift 303} 304 305define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind { 306; ALL-LABEL: splatconstant_shift_v16i32: 307; ALL: # %bb.0: 308; ALL-NEXT: vpsrld $5, %zmm0, %zmm0 309; ALL-NEXT: retq 310 %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 311 ret <16 x i32> %shift 312} 313 314define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind { 315; AVX512DQ-LABEL: splatconstant_shift_v32i16: 316; AVX512DQ: # %bb.0: 317; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0 318; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1 319; AVX512DQ-NEXT: retq 320; 321; AVX512BW-LABEL: splatconstant_shift_v32i16: 322; AVX512BW: # %bb.0: 323; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0 324; AVX512BW-NEXT: retq 325 %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 326 ret <32 x i16> %shift 327} 328 329define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind { 330; AVX512DQ-LABEL: splatconstant_shift_v64i8: 331; AVX512DQ: # %bb.0: 332; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0 333; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 334; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 335; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1 336; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 337; AVX512DQ-NEXT: retq 338; 339; AVX512BW-LABEL: splatconstant_shift_v64i8: 340; AVX512BW: # %bb.0: 341; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0 342; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 343; AVX512BW-NEXT: retq 344 %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 345 ret <64 x i8> %shift 346} 347