• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
4
5;
6; Variable Shifts
7;
8
9define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10; ALL-LABEL: var_shift_v8i64:
11; ALL:       # %bb.0:
12; ALL-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0
13; ALL-NEXT:    retq
14  %shift = lshr <8 x i64> %a, %b
15  ret <8 x i64> %shift
16}
17
18define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19; ALL-LABEL: var_shift_v16i32:
20; ALL:       # %bb.0:
21; ALL-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
22; ALL-NEXT:    retq
23  %shift = lshr <16 x i32> %a, %b
24  ret <16 x i32> %shift
25}
26
27define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28; AVX512DQ-LABEL: var_shift_v32i16:
29; AVX512DQ:       # %bb.0:
30; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
31; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
32; AVX512DQ-NEXT:    vpsrlvd %zmm2, %zmm0, %zmm0
33; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
34; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
35; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
36; AVX512DQ-NEXT:    vpsrlvd %zmm2, %zmm1, %zmm1
37; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
38; AVX512DQ-NEXT:    retq
39;
40; AVX512BW-LABEL: var_shift_v32i16:
41; AVX512BW:       # %bb.0:
42; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
43; AVX512BW-NEXT:    retq
44  %shift = lshr <32 x i16> %a, %b
45  ret <32 x i16> %shift
46}
47
48define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
49; AVX512DQ-LABEL: var_shift_v64i8:
50; AVX512DQ:       # %bb.0:
51; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
52; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
53; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
54; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
55; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
56; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm4
57; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
58; AVX512DQ-NEXT:    vpand %ymm6, %ymm4, %ymm4
59; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
60; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
61; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm4
62; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
63; AVX512DQ-NEXT:    vpand %ymm7, %ymm4, %ymm4
64; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
65; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
66; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
67; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
68; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm3
69; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
70; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
71; AVX512DQ-NEXT:    vpand %ymm6, %ymm2, %ymm2
72; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
73; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
74; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
75; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
76; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
77; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
78; AVX512DQ-NEXT:    retq
79;
80; AVX512BW-LABEL: var_shift_v64i8:
81; AVX512BW:       # %bb.0:
82; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm2
83; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
84; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
85; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
86; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
87; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm2
88; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
89; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
90; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
91; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
92; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm2
93; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
94; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
95; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
96; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
97; AVX512BW-NEXT:    retq
98  %shift = lshr <64 x i8> %a, %b
99  ret <64 x i8> %shift
100}
101
102;
103; Uniform Variable Shifts
104;
105
106define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
107; ALL-LABEL: splatvar_shift_v8i64:
108; ALL:       # %bb.0:
109; ALL-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0
110; ALL-NEXT:    retq
111  %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
112  %shift = lshr <8 x i64> %a, %splat
113  ret <8 x i64> %shift
114}
115
116define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
117; ALL-LABEL: splatvar_shift_v16i32:
118; ALL:       # %bb.0:
119; ALL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
120; ALL-NEXT:    vpsrld %xmm1, %zmm0, %zmm0
121; ALL-NEXT:    retq
122  %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
123  %shift = lshr <16 x i32> %a, %splat
124  ret <16 x i32> %shift
125}
126
127define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
128; AVX512DQ-LABEL: splatvar_shift_v32i16:
129; AVX512DQ:       # %bb.0:
130; AVX512DQ-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
131; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0
132; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm1, %ymm1
133; AVX512DQ-NEXT:    retq
134;
135; AVX512BW-LABEL: splatvar_shift_v32i16:
136; AVX512BW:       # %bb.0:
137; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
138; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
139; AVX512BW-NEXT:    retq
140  %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
141  %shift = lshr <32 x i16> %a, %splat
142  ret <32 x i16> %shift
143}
144
145define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
146; AVX512DQ-LABEL: splatvar_shift_v64i8:
147; AVX512DQ:       # %bb.0:
148; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
149; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm3
150; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
151; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
152; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
153; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
154; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm3
155; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
156; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
157; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm6
158; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
159; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm3
160; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
161; AVX512DQ-NEXT:    vpand %ymm7, %ymm3, %ymm3
162; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
163; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm3, %ymm0, %ymm0
164; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm3
165; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
166; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
167; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
168; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
169; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
170; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
171; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
172; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
173; AVX512DQ-NEXT:    retq
174;
175; AVX512BW-LABEL: splatvar_shift_v64i8:
176; AVX512BW:       # %bb.0:
177; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1
178; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm2
179; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
180; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
181; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
182; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
183; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm2
184; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
185; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
186; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
187; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
188; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm2
189; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
190; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
191; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
192; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
193; AVX512BW-NEXT:    retq
194  %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
195  %shift = lshr <64 x i8> %a, %splat
196  ret <64 x i8> %shift
197}
198
199;
200; Constant Shifts
201;
202
203define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
204; ALL-LABEL: constant_shift_v8i64:
205; ALL:       # %bb.0:
206; ALL-NEXT:    vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
207; ALL-NEXT:    retq
208  %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
209  ret <8 x i64> %shift
210}
211
212define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
213; ALL-LABEL: constant_shift_v16i32:
214; ALL:       # %bb.0:
215; ALL-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
216; ALL-NEXT:    retq
217  %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
218  ret <16 x i32> %shift
219}
220
221define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
222; AVX512DQ-LABEL: constant_shift_v32i16:
223; AVX512DQ:       # %bb.0:
224; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
225; AVX512DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
226; AVX512DQ-NEXT:    vpsrlvd %zmm2, %zmm0, %zmm0
227; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
228; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
229; AVX512DQ-NEXT:    vpsrlvd %zmm2, %zmm1, %zmm1
230; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
231; AVX512DQ-NEXT:    retq
232;
233; AVX512BW-LABEL: constant_shift_v32i16:
234; AVX512BW:       # %bb.0:
235; AVX512BW-NEXT:    vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
236; AVX512BW-NEXT:    retq
237  %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
238  ret <32 x i16> %shift
239}
240
241define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
242; AVX512DQ-LABEL: constant_shift_v64i8:
243; AVX512DQ:       # %bb.0:
244; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm2
245; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
246; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
247; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
248; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
249; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm2
250; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
251; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
252; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
253; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
254; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm2
255; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
256; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
257; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
258; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm0, %ymm0
259; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
260; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
261; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
262; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
263; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
264; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
265; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
266; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
267; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
268; AVX512DQ-NEXT:    retq
269;
270; AVX512BW-LABEL: constant_shift_v64i8:
271; AVX512BW:       # %bb.0:
272; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
273; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
274; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm2
275; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
276; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
277; AVX512BW-NEXT:    vpsrlw $2, %zmm0, %zmm2
278; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
279; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
280; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
281; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
282; AVX512BW-NEXT:    vpsrlw $1, %zmm0, %zmm2
283; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
284; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
285; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
286; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
287; AVX512BW-NEXT:    retq
288  %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
289  ret <64 x i8> %shift
290}
291
292;
293; Uniform Constant Shifts
294;
295
296define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
297; ALL-LABEL: splatconstant_shift_v8i64:
298; ALL:       # %bb.0:
299; ALL-NEXT:    vpsrlq $7, %zmm0, %zmm0
300; ALL-NEXT:    retq
301  %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
302  ret <8 x i64> %shift
303}
304
305define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
306; ALL-LABEL: splatconstant_shift_v16i32:
307; ALL:       # %bb.0:
308; ALL-NEXT:    vpsrld $5, %zmm0, %zmm0
309; ALL-NEXT:    retq
310  %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
311  ret <16 x i32> %shift
312}
313
314define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
315; AVX512DQ-LABEL: splatconstant_shift_v32i16:
316; AVX512DQ:       # %bb.0:
317; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
318; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
319; AVX512DQ-NEXT:    retq
320;
321; AVX512BW-LABEL: splatconstant_shift_v32i16:
322; AVX512BW:       # %bb.0:
323; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
324; AVX512BW-NEXT:    retq
325  %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
326  ret <32 x i16> %shift
327}
328
329define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
330; AVX512DQ-LABEL: splatconstant_shift_v64i8:
331; AVX512DQ:       # %bb.0:
332; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
333; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
334; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
335; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
336; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
337; AVX512DQ-NEXT:    retq
338;
339; AVX512BW-LABEL: splatconstant_shift_v64i8:
340; AVX512BW:       # %bb.0:
341; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
342; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
343; AVX512BW-NEXT:    retq
344  %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
345  ret <64 x i8> %shift
346}
347