• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
4
5;
6; Variable Shifts
7;
8
9define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10; ALL-LABEL: var_shift_v8i64:
11; ALL:       ## BB#0:
12; ALL-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0
13; ALL-NEXT:    retq
14  %shift = lshr <8 x i64> %a, %b
15  ret <8 x i64> %shift
16}
17
18define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19; ALL-LABEL: var_shift_v16i32:
20; ALL:       ## BB#0:
21; ALL-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
22; ALL-NEXT:    retq
23  %shift = lshr <16 x i32> %a, %b
24  ret <16 x i32> %shift
25}
26
27define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28; AVX512DQ-LABEL: var_shift_v32i16:
29; AVX512DQ:       ## BB#0:
30; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
31; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
32; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm6 = ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15]
33; AVX512DQ-NEXT:    vpsrlvd %ymm5, %ymm6, %ymm5
34; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
35; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
36; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11]
37; AVX512DQ-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
38; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
39; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
40; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
41; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15]
42; AVX512DQ-NEXT:    vpsrlvd %ymm2, %ymm5, %ymm2
43; AVX512DQ-NEXT:    vpsrld $16, %ymm2, %ymm2
44; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
45; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11]
46; AVX512DQ-NEXT:    vpsrlvd %ymm3, %ymm1, %ymm1
47; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
48; AVX512DQ-NEXT:    vpackusdw %ymm2, %ymm1, %ymm1
49; AVX512DQ-NEXT:    retq
50;
51; AVX512BW-LABEL: var_shift_v32i16:
52; AVX512BW:       ## BB#0:
53; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
54; AVX512BW-NEXT:    retq
55  %shift = lshr <32 x i16> %a, %b
56  ret <32 x i16> %shift
57}
58
59define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
60; AVX512DQ-LABEL: var_shift_v64i8:
61; AVX512DQ:       ## BB#0:
62; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
63; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
64; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
65; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
66; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
67; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm4
68; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
69; AVX512DQ-NEXT:    vpand %ymm6, %ymm4, %ymm4
70; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
71; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
72; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm4
73; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
74; AVX512DQ-NEXT:    vpand %ymm7, %ymm4, %ymm4
75; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
76; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
77; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
78; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
79; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm3
80; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
81; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
82; AVX512DQ-NEXT:    vpand %ymm6, %ymm2, %ymm2
83; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
84; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
85; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
86; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
87; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
88; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
89; AVX512DQ-NEXT:    retq
90
91  %shift = lshr <64 x i8> %a, %b
92  ret <64 x i8> %shift
93}
94
95;
96; Uniform Variable Shifts
97;
98
99define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
100; ALL-LABEL: splatvar_shift_v8i64:
101; ALL:       ## BB#0:
102; ALL-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0
103; ALL-NEXT:    retq
104  %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
105  %shift = lshr <8 x i64> %a, %splat
106  ret <8 x i64> %shift
107}
108
109define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
110; ALL-LABEL: splatvar_shift_v16i32:
111; ALL:       ## BB#0:
112; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
113; ALL-NEXT:    vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
114; ALL-NEXT:    vpsrld %xmm1, %zmm0, %zmm0
115; ALL-NEXT:    retq
116  %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
117  %shift = lshr <16 x i32> %a, %splat
118  ret <16 x i32> %shift
119}
120
121define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
122; AVX512DQ-LABEL: splatvar_shift_v32i16:
123; AVX512DQ:       ## BB#0:
124; AVX512DQ-NEXT:    vmovd %xmm2, %eax
125; AVX512DQ-NEXT:    movzwl %ax, %eax
126; AVX512DQ-NEXT:    vmovd %eax, %xmm2
127; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0
128; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm1, %ymm1
129; AVX512DQ-NEXT:    retq
130;
131; AVX512BW-LABEL: splatvar_shift_v32i16:
132; AVX512BW:       ## BB#0:
133; AVX512BW-NEXT:    vmovd %xmm1, %eax
134; AVX512BW-NEXT:    movzwl %ax, %eax
135; AVX512BW-NEXT:    vmovd %eax, %xmm1
136; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
137; AVX512BW-NEXT:    retq
138  %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
139  %shift = lshr <32 x i16> %a, %splat
140  ret <32 x i16> %shift
141}
142
143define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
144; AVX512DQ-LABEL: splatvar_shift_v64i8:
145; AVX512DQ:       ## BB#0:
146; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
147; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm3
148; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
149; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
150; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
151; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
152; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm3
153; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
154; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
155; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm6
156; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
157; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm3
158; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
159; AVX512DQ-NEXT:    vpand %ymm7, %ymm3, %ymm3
160; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
161; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm3, %ymm0, %ymm0
162; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm3
163; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
164; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
165; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
166; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
167; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
168; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
169; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
170; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
171; AVX512DQ-NEXT:    retq
172  %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
173  %shift = lshr <64 x i8> %a, %splat
174  ret <64 x i8> %shift
175}
176
177;
178; Constant Shifts
179;
180
181define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
182; ALL-LABEL: constant_shift_v8i64:
183; ALL:       ## BB#0:
184; ALL-NEXT:    vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
185; ALL-NEXT:    retq
186  %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
187  ret <8 x i64> %shift
188}
189
190define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
191; ALL-LABEL: constant_shift_v16i32:
192; ALL:       ## BB#0:
193; ALL-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
194; ALL-NEXT:    retq
195  %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
196  ret <16 x i32> %shift
197}
198
199define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
200; AVX512DQ-LABEL: constant_shift_v32i16:
201; AVX512DQ:       ## BB#0:
202; AVX512DQ-NEXT:    vpxor %ymm2, %ymm2, %ymm2
203; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
204; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
205; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
206; AVX512DQ-NEXT:    vpsrlvd %ymm4, %ymm5, %ymm5
207; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
208; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
209; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
210; AVX512DQ-NEXT:    vpsrlvd %ymm3, %ymm0, %ymm0
211; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
212; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
213; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15]
214; AVX512DQ-NEXT:    vpsrlvd %ymm4, %ymm5, %ymm4
215; AVX512DQ-NEXT:    vpsrld $16, %ymm4, %ymm4
216; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11]
217; AVX512DQ-NEXT:    vpsrlvd %ymm3, %ymm1, %ymm1
218; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
219; AVX512DQ-NEXT:    vpackusdw %ymm4, %ymm1, %ymm1
220; AVX512DQ-NEXT:    retq
221;
222; AVX512BW-LABEL: constant_shift_v32i16:
223; AVX512BW:       ## BB#0:
224; AVX512BW-NEXT:    vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
225; AVX512BW-NEXT:    retq
226  %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
227  ret <32 x i16> %shift
228}
229
230define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
231; AVX512DQ-LABEL: constant_shift_v64i8:
232; AVX512DQ:       ## BB#0:
233; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm2
234; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
235; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
236; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
237; AVX512DQ-NEXT:    vpsllw $5, %ymm4, %ymm4
238; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
239; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm2
240; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
241; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
242; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
243; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
244; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm2
245; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
246; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
247; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
248; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm0, %ymm0
249; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
250; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
251; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
252; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
253; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
254; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
255; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
256; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
257; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
258; AVX512DQ-NEXT:    retq
259  %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
260  ret <64 x i8> %shift
261}
262
263;
264; Uniform Constant Shifts
265;
266
267define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
268; ALL-LABEL: splatconstant_shift_v8i64:
269; ALL:       ## BB#0:
270; ALL-NEXT:    vpsrlq $7, %zmm0, %zmm0
271; ALL-NEXT:    retq
272  %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
273  ret <8 x i64> %shift
274}
275
276define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
277; ALL-LABEL: splatconstant_shift_v16i32:
278; ALL:       ## BB#0:
279; ALL-NEXT:    vpsrld $5, %zmm0, %zmm0
280; ALL-NEXT:    retq
281  %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
282  ret <16 x i32> %shift
283}
284
285define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
286; AVX512DQ-LABEL: splatconstant_shift_v32i16:
287; AVX512DQ:       ## BB#0:
288; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
289; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
290; AVX512DQ-NEXT:    retq
291;
292; AVX512BW-LABEL: splatconstant_shift_v32i16:
293; AVX512BW:       ## BB#0:
294; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
295; AVX512BW-NEXT:    retq
296  %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
297  ret <32 x i16> %shift
298}
299
300define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
301; AVX512DQ-LABEL: splatconstant_shift_v64i8:
302; AVX512DQ:       ## BB#0:
303; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
304; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
305; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
306; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
307; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
308; AVX512DQ-NEXT:    retq
309;
310; AVX512BW-LABEL: splatconstant_shift_v64i8:
311; AVX512BW:       ## BB#0:
312; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
313; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
314; AVX512BW-NEXT:    retq
315  %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
316  ret <64 x i8> %shift
317}
318