• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
4
5;
6; Variable Shifts
7;
8
9define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10; ALL-LABEL: var_shift_v8i64:
11; ALL:       # %bb.0:
12; ALL-NEXT:    vpsravq %zmm1, %zmm0, %zmm0
13; ALL-NEXT:    retq
14  %shift = ashr <8 x i64> %a, %b
15  ret <8 x i64> %shift
16}
17
18define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19; ALL-LABEL: var_shift_v16i32:
20; ALL:       # %bb.0:
21; ALL-NEXT:    vpsravd %zmm1, %zmm0, %zmm0
22; ALL-NEXT:    retq
23  %shift = ashr <16 x i32> %a, %b
24  ret <16 x i32> %shift
25}
26
27define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28; AVX512DQ-LABEL: var_shift_v32i16:
29; AVX512DQ:       # %bb.0:
30; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
31; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
32; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm0, %zmm0
33; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
34; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
35; AVX512DQ-NEXT:    vpmovsxwd %ymm1, %zmm1
36; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm1, %zmm1
37; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
38; AVX512DQ-NEXT:    retq
39;
40; AVX512BW-LABEL: var_shift_v32i16:
41; AVX512BW:       # %bb.0:
42; AVX512BW-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
43; AVX512BW-NEXT:    retq
44  %shift = ashr <32 x i16> %a, %b
45  ret <32 x i16> %shift
46}
47
48define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
49; AVX512DQ-LABEL: var_shift_v64i8:
50; AVX512DQ:       # %bb.0:
51; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
52; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
53; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
54; AVX512DQ-NEXT:    vpsraw $4, %ymm5, %ymm6
55; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm6, %ymm5, %ymm5
56; AVX512DQ-NEXT:    vpsraw $2, %ymm5, %ymm6
57; AVX512DQ-NEXT:    vpaddw %ymm4, %ymm4, %ymm4
58; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm6, %ymm5, %ymm5
59; AVX512DQ-NEXT:    vpsraw $1, %ymm5, %ymm6
60; AVX512DQ-NEXT:    vpaddw %ymm4, %ymm4, %ymm4
61; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm6, %ymm5, %ymm4
62; AVX512DQ-NEXT:    vpsrlw $8, %ymm4, %ymm4
63; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
64; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
65; AVX512DQ-NEXT:    vpsraw $4, %ymm0, %ymm5
66; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
67; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
68; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
69; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
70; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
71; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
72; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
73; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
74; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
75; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm2
76; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
77; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
78; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
79; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
80; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
81; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm3
82; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
83; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
84; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm3
85; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
86; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
87; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
88; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
89; AVX512DQ-NEXT:    vpsraw $4, %ymm1, %ymm4
90; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
91; AVX512DQ-NEXT:    vpsraw $2, %ymm1, %ymm4
92; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
93; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
94; AVX512DQ-NEXT:    vpsraw $1, %ymm1, %ymm4
95; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
96; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
97; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
98; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
99; AVX512DQ-NEXT:    retq
100;
101; AVX512BW-LABEL: var_shift_v64i8:
102; AVX512BW:       # %bb.0:
103; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
104; AVX512BW-NEXT:    vpsraw $4, %zmm2, %zmm3
105; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
106; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
107; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
108; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
109; AVX512BW-NEXT:    vpsraw $2, %zmm2, %zmm3
110; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
111; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
112; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
113; AVX512BW-NEXT:    vpsraw $1, %zmm2, %zmm3
114; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
115; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
116; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
117; AVX512BW-NEXT:    vpsrlw $8, %zmm2, %zmm2
118; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
119; AVX512BW-NEXT:    vpsraw $4, %zmm0, %zmm3
120; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
121; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
122; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
123; AVX512BW-NEXT:    vpsraw $2, %zmm0, %zmm3
124; AVX512BW-NEXT:    vpaddw %zmm1, %zmm1, %zmm1
125; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
126; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
127; AVX512BW-NEXT:    vpsraw $1, %zmm0, %zmm3
128; AVX512BW-NEXT:    vpaddw %zmm1, %zmm1, %zmm1
129; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
130; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
131; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
132; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
133; AVX512BW-NEXT:    retq
134  %shift = ashr <64 x i8> %a, %b
135  ret <64 x i8> %shift
136}
137
138;
139; Uniform Variable Shifts
140;
141
142define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
143; ALL-LABEL: splatvar_shift_v8i64:
144; ALL:       # %bb.0:
145; ALL-NEXT:    vpsraq %xmm1, %zmm0, %zmm0
146; ALL-NEXT:    retq
147  %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
148  %shift = ashr <8 x i64> %a, %splat
149  ret <8 x i64> %shift
150}
151
152define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
153; ALL-LABEL: splatvar_shift_v16i32:
154; ALL:       # %bb.0:
155; ALL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
156; ALL-NEXT:    vpsrad %xmm1, %zmm0, %zmm0
157; ALL-NEXT:    retq
158  %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
159  %shift = ashr <16 x i32> %a, %splat
160  ret <16 x i32> %shift
161}
162
163define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
164; AVX512DQ-LABEL: splatvar_shift_v32i16:
165; AVX512DQ:       # %bb.0:
166; AVX512DQ-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
167; AVX512DQ-NEXT:    vpsraw %xmm2, %ymm0, %ymm0
168; AVX512DQ-NEXT:    vpsraw %xmm2, %ymm1, %ymm1
169; AVX512DQ-NEXT:    retq
170;
171; AVX512BW-LABEL: splatvar_shift_v32i16:
172; AVX512BW:       # %bb.0:
173; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
174; AVX512BW-NEXT:    vpsraw %xmm1, %zmm0, %zmm0
175; AVX512BW-NEXT:    retq
176  %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
177  %shift = ashr <32 x i16> %a, %splat
178  ret <32 x i16> %shift
179}
180
181define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
182; AVX512DQ-LABEL: splatvar_shift_v64i8:
183; AVX512DQ:       # %bb.0:
184; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
185; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
186; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
187; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
188; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
189; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
190; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
191; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm6
192; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
193; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
194; AVX512DQ-NEXT:    vpaddw %ymm6, %ymm6, %ymm7
195; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm5, %ymm4, %ymm4
196; AVX512DQ-NEXT:    vpsrlw $8, %ymm4, %ymm4
197; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
198; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
199; AVX512DQ-NEXT:    vpsraw $4, %ymm0, %ymm5
200; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
201; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
202; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm8
203; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
204; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
205; AVX512DQ-NEXT:    vpaddw %ymm8, %ymm8, %ymm9
206; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
207; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
208; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
209; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
210; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
211; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
212; AVX512DQ-NEXT:    vpsraw $2, %ymm3, %ymm4
213; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
214; AVX512DQ-NEXT:    vpsraw $1, %ymm3, %ymm4
215; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm4, %ymm3, %ymm3
216; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
217; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
218; AVX512DQ-NEXT:    vpsraw $4, %ymm1, %ymm4
219; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
220; AVX512DQ-NEXT:    vpsraw $2, %ymm1, %ymm2
221; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
222; AVX512DQ-NEXT:    vpsraw $1, %ymm1, %ymm2
223; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm2, %ymm1, %ymm1
224; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
225; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
226; AVX512DQ-NEXT:    retq
227;
228; AVX512BW-LABEL: splatvar_shift_v64i8:
229; AVX512BW:       # %bb.0:
230; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1
231; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
232; AVX512BW-NEXT:    vpsraw $4, %zmm2, %zmm3
233; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
234; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
235; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
236; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
237; AVX512BW-NEXT:    vpsraw $2, %zmm2, %zmm3
238; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
239; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
240; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
241; AVX512BW-NEXT:    vpsraw $1, %zmm2, %zmm3
242; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
243; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
244; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm2 {%k1}
245; AVX512BW-NEXT:    vpsrlw $8, %zmm2, %zmm2
246; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
247; AVX512BW-NEXT:    vpsraw $4, %zmm0, %zmm3
248; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
249; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
250; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
251; AVX512BW-NEXT:    vpsraw $2, %zmm0, %zmm3
252; AVX512BW-NEXT:    vpaddw %zmm1, %zmm1, %zmm1
253; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
254; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
255; AVX512BW-NEXT:    vpsraw $1, %zmm0, %zmm3
256; AVX512BW-NEXT:    vpaddw %zmm1, %zmm1, %zmm1
257; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
258; AVX512BW-NEXT:    vmovdqu8 %zmm3, %zmm0 {%k1}
259; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
260; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
261; AVX512BW-NEXT:    retq
262  %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
263  %shift = ashr <64 x i8> %a, %splat
264  ret <64 x i8> %shift
265}
266
267;
268; Constant Shifts
269;
270
271define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
272; ALL-LABEL: constant_shift_v8i64:
273; ALL:       # %bb.0:
274; ALL-NEXT:    vpsravq {{.*}}(%rip), %zmm0, %zmm0
275; ALL-NEXT:    retq
276  %shift = ashr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
277  ret <8 x i64> %shift
278}
279
280define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
281; ALL-LABEL: constant_shift_v16i32:
282; ALL:       # %bb.0:
283; ALL-NEXT:    vpsravd {{.*}}(%rip), %zmm0, %zmm0
284; ALL-NEXT:    retq
285  %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
286  ret <16 x i32> %shift
287}
288
289define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
290; AVX512DQ-LABEL: constant_shift_v32i16:
291; AVX512DQ:       # %bb.0:
292; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
293; AVX512DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
294; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm0, %zmm0
295; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
296; AVX512DQ-NEXT:    vpmovsxwd %ymm1, %zmm1
297; AVX512DQ-NEXT:    vpsravd %zmm2, %zmm1, %zmm1
298; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
299; AVX512DQ-NEXT:    retq
300;
301; AVX512BW-LABEL: constant_shift_v32i16:
302; AVX512BW:       # %bb.0:
303; AVX512BW-NEXT:    vpsravw {{.*}}(%rip), %zmm0, %zmm0
304; AVX512BW-NEXT:    retq
305  %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
306  ret <32 x i16> %shift
307}
308
309define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
310; AVX512DQ-LABEL: constant_shift_v64i8:
311; AVX512DQ:       # %bb.0:
312; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
313; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
314; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
315; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
316; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
317; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
318; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm6
319; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
320; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
321; AVX512DQ-NEXT:    vpaddw %ymm6, %ymm6, %ymm7
322; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm5, %ymm4, %ymm4
323; AVX512DQ-NEXT:    vpsrlw $8, %ymm4, %ymm4
324; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
325; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
326; AVX512DQ-NEXT:    vpsraw $4, %ymm0, %ymm5
327; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
328; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
329; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm8
330; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
331; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
332; AVX512DQ-NEXT:    vpaddw %ymm8, %ymm8, %ymm9
333; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
334; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
335; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
336; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
337; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
338; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
339; AVX512DQ-NEXT:    vpsraw $2, %ymm3, %ymm4
340; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
341; AVX512DQ-NEXT:    vpsraw $1, %ymm3, %ymm4
342; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm4, %ymm3, %ymm3
343; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
344; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
345; AVX512DQ-NEXT:    vpsraw $4, %ymm1, %ymm4
346; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
347; AVX512DQ-NEXT:    vpsraw $2, %ymm1, %ymm2
348; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
349; AVX512DQ-NEXT:    vpsraw $1, %ymm1, %ymm2
350; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm2, %ymm1, %ymm1
351; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
352; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
353; AVX512DQ-NEXT:    retq
354;
355; AVX512BW-LABEL: constant_shift_v64i8:
356; AVX512BW:       # %bb.0:
357; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
358; AVX512BW-NEXT:    vpsraw $4, %zmm1, %zmm2
359; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
360; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm3[8],zmm0[9],zmm3[9],zmm0[10],zmm3[10],zmm0[11],zmm3[11],zmm0[12],zmm3[12],zmm0[13],zmm3[13],zmm0[14],zmm3[14],zmm0[15],zmm3[15],zmm0[24],zmm3[24],zmm0[25],zmm3[25],zmm0[26],zmm3[26],zmm0[27],zmm3[27],zmm0[28],zmm3[28],zmm0[29],zmm3[29],zmm0[30],zmm3[30],zmm0[31],zmm3[31],zmm0[40],zmm3[40],zmm0[41],zmm3[41],zmm0[42],zmm3[42],zmm0[43],zmm3[43],zmm0[44],zmm3[44],zmm0[45],zmm3[45],zmm0[46],zmm3[46],zmm0[47],zmm3[47],zmm0[56],zmm3[56],zmm0[57],zmm3[57],zmm0[58],zmm3[58],zmm0[59],zmm3[59],zmm0[60],zmm3[60],zmm0[61],zmm3[61],zmm0[62],zmm3[62],zmm0[63],zmm3[63]
361; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
362; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm1 {%k1}
363; AVX512BW-NEXT:    vpsraw $2, %zmm1, %zmm2
364; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
365; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
366; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm1 {%k1}
367; AVX512BW-NEXT:    vpsraw $1, %zmm1, %zmm2
368; AVX512BW-NEXT:    vpaddw %zmm4, %zmm4, %zmm4
369; AVX512BW-NEXT:    vpmovb2m %zmm4, %k1
370; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm1 {%k1}
371; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
372; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
373; AVX512BW-NEXT:    vpsraw $4, %zmm0, %zmm2
374; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm3[0],zmm0[1],zmm3[1],zmm0[2],zmm3[2],zmm0[3],zmm3[3],zmm0[4],zmm3[4],zmm0[5],zmm3[5],zmm0[6],zmm3[6],zmm0[7],zmm3[7],zmm0[16],zmm3[16],zmm0[17],zmm3[17],zmm0[18],zmm3[18],zmm0[19],zmm3[19],zmm0[20],zmm3[20],zmm0[21],zmm3[21],zmm0[22],zmm3[22],zmm0[23],zmm3[23],zmm0[32],zmm3[32],zmm0[33],zmm3[33],zmm0[34],zmm3[34],zmm0[35],zmm3[35],zmm0[36],zmm3[36],zmm0[37],zmm3[37],zmm0[38],zmm3[38],zmm0[39],zmm3[39],zmm0[48],zmm3[48],zmm0[49],zmm3[49],zmm0[50],zmm3[50],zmm0[51],zmm3[51],zmm0[52],zmm3[52],zmm0[53],zmm3[53],zmm0[54],zmm3[54],zmm0[55],zmm3[55]
375; AVX512BW-NEXT:    vpmovb2m %zmm3, %k1
376; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
377; AVX512BW-NEXT:    vpsraw $2, %zmm0, %zmm2
378; AVX512BW-NEXT:    vpaddw %zmm3, %zmm3, %zmm3
379; AVX512BW-NEXT:    vpmovb2m %zmm3, %k1
380; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
381; AVX512BW-NEXT:    vpsraw $1, %zmm0, %zmm2
382; AVX512BW-NEXT:    vpaddw %zmm3, %zmm3, %zmm3
383; AVX512BW-NEXT:    vpmovb2m %zmm3, %k1
384; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
385; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
386; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
387; AVX512BW-NEXT:    retq
388  %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
389  ret <64 x i8> %shift
390}
391
392;
393; Uniform Constant Shifts
394;
395
396define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
397; ALL-LABEL: splatconstant_shift_v8i64:
398; ALL:       # %bb.0:
399; ALL-NEXT:    vpsraq $7, %zmm0, %zmm0
400; ALL-NEXT:    retq
401  %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
402  ret <8 x i64> %shift
403}
404
405define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
406; ALL-LABEL: splatconstant_shift_v16i32:
407; ALL:       # %bb.0:
408; ALL-NEXT:    vpsrad $5, %zmm0, %zmm0
409; ALL-NEXT:    retq
410  %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
411  ret <16 x i32> %shift
412}
413
414define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
415; AVX512DQ-LABEL: splatconstant_shift_v32i16:
416; AVX512DQ:       # %bb.0:
417; AVX512DQ-NEXT:    vpsraw $3, %ymm0, %ymm0
418; AVX512DQ-NEXT:    vpsraw $3, %ymm1, %ymm1
419; AVX512DQ-NEXT:    retq
420;
421; AVX512BW-LABEL: splatconstant_shift_v32i16:
422; AVX512BW:       # %bb.0:
423; AVX512BW-NEXT:    vpsraw $3, %zmm0, %zmm0
424; AVX512BW-NEXT:    retq
425  %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
426  ret <32 x i16> %shift
427}
428
429define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
430; AVX512DQ-LABEL: splatconstant_shift_v64i8:
431; AVX512DQ:       # %bb.0:
432; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
433; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
434; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
435; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
436; AVX512DQ-NEXT:    vpxor %ymm3, %ymm0, %ymm0
437; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
438; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
439; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
440; AVX512DQ-NEXT:    vpxor %ymm3, %ymm1, %ymm1
441; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
442; AVX512DQ-NEXT:    retq
443;
444; AVX512BW-LABEL: splatconstant_shift_v64i8:
445; AVX512BW:       # %bb.0:
446; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
447; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
448; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
449; AVX512BW-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
450; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
451; AVX512BW-NEXT:    retq
452  %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
453  ret <64 x i8> %shift
454}
455
456define <64 x i8> @ashr_const7_v64i8(<64 x i8> %a) {
457; AVX512DQ-LABEL: ashr_const7_v64i8:
458; AVX512DQ:       # %bb.0:
459; AVX512DQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
460; AVX512DQ-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
461; AVX512DQ-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
462; AVX512DQ-NEXT:    retq
463;
464; AVX512BW-LABEL: ashr_const7_v64i8:
465; AVX512BW:       # %bb.0:
466; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
467; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
468; AVX512BW-NEXT:    retq
469  %res = ashr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
470  ret <64 x i8> %res
471}
472