1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 4 5; 6; Variable Shifts 7; 8 9define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 10; ALL-LABEL: var_shift_v8i64: 11; ALL: ## BB#0: 12; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 13; ALL-NEXT: retq 14 %shift = lshr <8 x i64> %a, %b 15 ret <8 x i64> %shift 16} 17 18define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 19; ALL-LABEL: var_shift_v16i32: 20; ALL: ## BB#0: 21; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 22; ALL-NEXT: retq 23 %shift = lshr <16 x i32> %a, %b 24 ret <16 x i32> %shift 25} 26 27define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 28; AVX512DQ-LABEL: var_shift_v32i16: 29; AVX512DQ: ## BB#0: 30; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 31; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15] 32; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15] 33; AVX512DQ-NEXT: vpsrlvd %ymm5, %ymm6, %ymm5 34; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5 35; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11] 36; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11] 37; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0 38; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0 39; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0 40; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15] 41; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15] 42; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm5, %ymm2 43; AVX512DQ-NEXT: vpsrld $16, %ymm2, %ymm2 44; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11] 45; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11] 46; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1 47; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1 48; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm1, %ymm1 49; AVX512DQ-NEXT: retq 50; 51; AVX512BW-LABEL: var_shift_v32i16: 52; AVX512BW: ## BB#0: 53; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 54; AVX512BW-NEXT: retq 55 %shift = lshr <32 x i16> %a, %b 56 ret <32 x i16> %shift 57} 58 59define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 60; AVX512DQ-LABEL: var_shift_v64i8: 61; AVX512DQ: ## BB#0: 62; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm4 63; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 64; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4 65; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2 66; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 67; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm4 68; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 69; AVX512DQ-NEXT: vpand %ymm6, %ymm4, %ymm4 70; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 71; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 72; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm4 73; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 74; AVX512DQ-NEXT: vpand %ymm7, %ymm4, %ymm4 75; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 76; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 77; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm2 78; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 79; AVX512DQ-NEXT: vpsllw $5, %ymm3, %ymm3 80; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 81; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2 82; AVX512DQ-NEXT: vpand %ymm6, %ymm2, %ymm2 83; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3 84; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 85; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2 86; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 87; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3 88; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 89; AVX512DQ-NEXT: retq 90 91 %shift = lshr <64 x i8> %a, %b 92 ret <64 x i8> %shift 93} 94 95; 96; Uniform Variable Shifts 97; 98 99define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 100; ALL-LABEL: splatvar_shift_v8i64: 101; ALL: ## BB#0: 102; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 103; ALL-NEXT: retq 104 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 105 %shift = lshr <8 x i64> %a, %splat 106 ret <8 x i64> %shift 107} 108 109define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 110; ALL-LABEL: splatvar_shift_v16i32: 111; ALL: ## BB#0: 112; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 113; ALL-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] 114; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 115; ALL-NEXT: retq 116 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer 117 %shift = lshr <16 x i32> %a, %splat 118 ret <16 x i32> %shift 119} 120 121define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 122; AVX512DQ-LABEL: splatvar_shift_v32i16: 123; AVX512DQ: ## BB#0: 124; AVX512DQ-NEXT: vmovd %xmm2, %eax 125; AVX512DQ-NEXT: movzwl %ax, %eax 126; AVX512DQ-NEXT: vmovd %eax, %xmm2 127; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 128; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm1, %ymm1 129; AVX512DQ-NEXT: retq 130; 131; AVX512BW-LABEL: splatvar_shift_v32i16: 132; AVX512BW: ## BB#0: 133; AVX512BW-NEXT: vmovd %xmm1, %eax 134; AVX512BW-NEXT: movzwl %ax, %eax 135; AVX512BW-NEXT: vmovd %eax, %xmm1 136; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 137; AVX512BW-NEXT: retq 138 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer 139 %shift = lshr <32 x i16> %a, %splat 140 ret <32 x i16> %shift 141} 142 143define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 144; AVX512DQ-LABEL: splatvar_shift_v64i8: 145; AVX512DQ: ## BB#0: 146; AVX512DQ-NEXT: vpbroadcastb %xmm2, %ymm2 147; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm3 148; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 149; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 150; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2 151; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0 152; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm3 153; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 154; AVX512DQ-NEXT: vpand %ymm5, %ymm3, %ymm3 155; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm6 156; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm3, %ymm0, %ymm0 157; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm3 158; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 159; AVX512DQ-NEXT: vpand %ymm7, %ymm3, %ymm3 160; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm8 161; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm3, %ymm0, %ymm0 162; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm3 163; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 164; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 165; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2 166; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 167; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1 168; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2 169; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 170; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1 171; AVX512DQ-NEXT: retq 172 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer 173 %shift = lshr <64 x i8> %a, %splat 174 ret <64 x i8> %shift 175} 176 177; 178; Constant Shifts 179; 180 181define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind { 182; ALL-LABEL: constant_shift_v8i64: 183; ALL: ## BB#0: 184; ALL-NEXT: vpsrlvq {{.*}}(%rip), %zmm0, %zmm0 185; ALL-NEXT: retq 186 %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62> 187 ret <8 x i64> %shift 188} 189 190define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind { 191; ALL-LABEL: constant_shift_v16i32: 192; ALL: ## BB#0: 193; ALL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0 194; ALL-NEXT: retq 195 %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> 196 ret <16 x i32> %shift 197} 198 199define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind { 200; AVX512DQ-LABEL: constant_shift_v32i16: 201; AVX512DQ: ## BB#0: 202; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2 203; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 204; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15] 205; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 206; AVX512DQ-NEXT: vpsrlvd %ymm4, %ymm5, %ymm5 207; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5 208; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11] 209; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 210; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm0, %ymm0 211; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0 212; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0 213; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15] 214; AVX512DQ-NEXT: vpsrlvd %ymm4, %ymm5, %ymm4 215; AVX512DQ-NEXT: vpsrld $16, %ymm4, %ymm4 216; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11] 217; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1 218; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1 219; AVX512DQ-NEXT: vpackusdw %ymm4, %ymm1, %ymm1 220; AVX512DQ-NEXT: retq 221; 222; AVX512BW-LABEL: constant_shift_v32i16: 223; AVX512BW: ## BB#0: 224; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 225; AVX512BW-NEXT: retq 226 %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 227 ret <32 x i16> %shift 228} 229 230define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { 231; AVX512DQ-LABEL: constant_shift_v64i8: 232; AVX512DQ: ## BB#0: 233; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2 234; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 235; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 236; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 237; AVX512DQ-NEXT: vpsllw $5, %ymm4, %ymm4 238; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 239; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2 240; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 241; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 242; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6 243; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0 244; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2 245; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 246; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 247; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm8 248; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm0, %ymm0 249; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm2 250; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 251; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1 252; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2 253; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 254; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1 255; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2 256; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 257; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1 258; AVX512DQ-NEXT: retq 259 %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 260 ret <64 x i8> %shift 261} 262 263; 264; Uniform Constant Shifts 265; 266 267define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind { 268; ALL-LABEL: splatconstant_shift_v8i64: 269; ALL: ## BB#0: 270; ALL-NEXT: vpsrlq $7, %zmm0, %zmm0 271; ALL-NEXT: retq 272 %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> 273 ret <8 x i64> %shift 274} 275 276define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind { 277; ALL-LABEL: splatconstant_shift_v16i32: 278; ALL: ## BB#0: 279; ALL-NEXT: vpsrld $5, %zmm0, %zmm0 280; ALL-NEXT: retq 281 %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 282 ret <16 x i32> %shift 283} 284 285define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind { 286; AVX512DQ-LABEL: splatconstant_shift_v32i16: 287; AVX512DQ: ## BB#0: 288; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0 289; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1 290; AVX512DQ-NEXT: retq 291; 292; AVX512BW-LABEL: splatconstant_shift_v32i16: 293; AVX512BW: ## BB#0: 294; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0 295; AVX512BW-NEXT: retq 296 %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 297 ret <32 x i16> %shift 298} 299 300define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind { 301; AVX512DQ-LABEL: splatconstant_shift_v64i8: 302; AVX512DQ: ## BB#0: 303; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0 304; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 305; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 306; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1 307; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 308; AVX512DQ-NEXT: retq 309; 310; AVX512BW-LABEL: splatconstant_shift_v64i8: 311; AVX512BW: ## BB#0: 312; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0 313; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 314; AVX512BW-NEXT: retq 315 %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 316 ret <64 x i8> %shift 317} 318