• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512DQVL
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512BWVL
12;
13; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
14; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
15
16;
17; Variable Shifts
18;
19
20define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
21; SSE2-LABEL: var_shift_v2i64:
22; SSE2:       # %bb.0:
23; SSE2-NEXT:    movdqa %xmm0, %xmm2
24; SSE2-NEXT:    psrlq %xmm1, %xmm2
25; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
26; SSE2-NEXT:    psrlq %xmm1, %xmm0
27; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
28; SSE2-NEXT:    retq
29;
30; SSE41-LABEL: var_shift_v2i64:
31; SSE41:       # %bb.0:
32; SSE41-NEXT:    movdqa %xmm0, %xmm2
33; SSE41-NEXT:    psrlq %xmm1, %xmm2
34; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
35; SSE41-NEXT:    psrlq %xmm1, %xmm0
36; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
37; SSE41-NEXT:    retq
38;
39; AVX1-LABEL: var_shift_v2i64:
40; AVX1:       # %bb.0:
41; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm2
42; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
43; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
44; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
45; AVX1-NEXT:    retq
46;
47; AVX2-LABEL: var_shift_v2i64:
48; AVX2:       # %bb.0:
49; AVX2-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
50; AVX2-NEXT:    retq
51;
52; XOPAVX1-LABEL: var_shift_v2i64:
53; XOPAVX1:       # %bb.0:
54; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
55; XOPAVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
56; XOPAVX1-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
57; XOPAVX1-NEXT:    retq
58;
59; XOPAVX2-LABEL: var_shift_v2i64:
60; XOPAVX2:       # %bb.0:
61; XOPAVX2-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
62; XOPAVX2-NEXT:    retq
63;
64; AVX512-LABEL: var_shift_v2i64:
65; AVX512:       # %bb.0:
66; AVX512-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
67; AVX512-NEXT:    retq
68;
69; AVX512VL-LABEL: var_shift_v2i64:
70; AVX512VL:       # %bb.0:
71; AVX512VL-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
72; AVX512VL-NEXT:    retq
73;
74; X32-SSE-LABEL: var_shift_v2i64:
75; X32-SSE:       # %bb.0:
76; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
77; X32-SSE-NEXT:    psrlq %xmm1, %xmm2
78; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
79; X32-SSE-NEXT:    psrlq %xmm1, %xmm0
80; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
81; X32-SSE-NEXT:    retl
82  %shift = lshr <2 x i64> %a, %b
83  ret <2 x i64> %shift
84}
85
86define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
87; SSE2-LABEL: var_shift_v4i32:
88; SSE2:       # %bb.0:
89; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
90; SSE2-NEXT:    movdqa %xmm0, %xmm3
91; SSE2-NEXT:    psrld %xmm2, %xmm3
92; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7]
93; SSE2-NEXT:    movdqa %xmm0, %xmm2
94; SSE2-NEXT:    psrld %xmm4, %xmm2
95; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
96; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
97; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
98; SSE2-NEXT:    movdqa %xmm0, %xmm4
99; SSE2-NEXT:    psrld %xmm3, %xmm4
100; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
101; SSE2-NEXT:    psrld %xmm1, %xmm0
102; SSE2-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
103; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3]
104; SSE2-NEXT:    movaps %xmm2, %xmm0
105; SSE2-NEXT:    retq
106;
107; SSE41-LABEL: var_shift_v4i32:
108; SSE41:       # %bb.0:
109; SSE41-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
110; SSE41-NEXT:    movdqa %xmm0, %xmm3
111; SSE41-NEXT:    psrld %xmm2, %xmm3
112; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
113; SSE41-NEXT:    pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
114; SSE41-NEXT:    movdqa %xmm0, %xmm5
115; SSE41-NEXT:    psrld %xmm4, %xmm5
116; SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm3[0,1,2,3],xmm5[4,5,6,7]
117; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
118; SSE41-NEXT:    movdqa %xmm0, %xmm3
119; SSE41-NEXT:    psrld %xmm1, %xmm3
120; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,1,1,1,4,5,6,7]
121; SSE41-NEXT:    psrld %xmm1, %xmm0
122; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
123; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7]
124; SSE41-NEXT:    retq
125;
126; AVX1-LABEL: var_shift_v4i32:
127; AVX1:       # %bb.0:
128; AVX1-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
129; AVX1-NEXT:    vpsrld %xmm2, %xmm0, %xmm2
130; AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm3
131; AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
132; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
133; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
134; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm3 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
135; AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
136; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
137; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
138; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
139; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
140; AVX1-NEXT:    retq
141;
142; AVX2-LABEL: var_shift_v4i32:
143; AVX2:       # %bb.0:
144; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
145; AVX2-NEXT:    retq
146;
147; XOPAVX1-LABEL: var_shift_v4i32:
148; XOPAVX1:       # %bb.0:
149; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
150; XOPAVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
151; XOPAVX1-NEXT:    vpshld %xmm1, %xmm0, %xmm0
152; XOPAVX1-NEXT:    retq
153;
154; XOPAVX2-LABEL: var_shift_v4i32:
155; XOPAVX2:       # %bb.0:
156; XOPAVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
157; XOPAVX2-NEXT:    retq
158;
159; AVX512-LABEL: var_shift_v4i32:
160; AVX512:       # %bb.0:
161; AVX512-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
162; AVX512-NEXT:    retq
163;
164; AVX512VL-LABEL: var_shift_v4i32:
165; AVX512VL:       # %bb.0:
166; AVX512VL-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
167; AVX512VL-NEXT:    retq
168;
169; X32-SSE-LABEL: var_shift_v4i32:
170; X32-SSE:       # %bb.0:
171; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
172; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
173; X32-SSE-NEXT:    psrld %xmm2, %xmm3
174; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7]
175; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
176; X32-SSE-NEXT:    psrld %xmm4, %xmm2
177; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
178; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
179; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
180; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
181; X32-SSE-NEXT:    psrld %xmm3, %xmm4
182; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
183; X32-SSE-NEXT:    psrld %xmm1, %xmm0
184; X32-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
185; X32-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3]
186; X32-SSE-NEXT:    movaps %xmm2, %xmm0
187; X32-SSE-NEXT:    retl
188  %shift = lshr <4 x i32> %a, %b
189  ret <4 x i32> %shift
190}
191
192define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
193; SSE2-LABEL: var_shift_v8i16:
194; SSE2:       # %bb.0:
195; SSE2-NEXT:    psllw $12, %xmm1
196; SSE2-NEXT:    movdqa %xmm1, %xmm2
197; SSE2-NEXT:    psraw $15, %xmm2
198; SSE2-NEXT:    movdqa %xmm2, %xmm3
199; SSE2-NEXT:    pandn %xmm0, %xmm3
200; SSE2-NEXT:    psrlw $8, %xmm0
201; SSE2-NEXT:    pand %xmm2, %xmm0
202; SSE2-NEXT:    por %xmm3, %xmm0
203; SSE2-NEXT:    paddw %xmm1, %xmm1
204; SSE2-NEXT:    movdqa %xmm1, %xmm2
205; SSE2-NEXT:    psraw $15, %xmm2
206; SSE2-NEXT:    movdqa %xmm2, %xmm3
207; SSE2-NEXT:    pandn %xmm0, %xmm3
208; SSE2-NEXT:    psrlw $4, %xmm0
209; SSE2-NEXT:    pand %xmm2, %xmm0
210; SSE2-NEXT:    por %xmm3, %xmm0
211; SSE2-NEXT:    paddw %xmm1, %xmm1
212; SSE2-NEXT:    movdqa %xmm1, %xmm2
213; SSE2-NEXT:    psraw $15, %xmm2
214; SSE2-NEXT:    movdqa %xmm2, %xmm3
215; SSE2-NEXT:    pandn %xmm0, %xmm3
216; SSE2-NEXT:    psrlw $2, %xmm0
217; SSE2-NEXT:    pand %xmm2, %xmm0
218; SSE2-NEXT:    por %xmm3, %xmm0
219; SSE2-NEXT:    paddw %xmm1, %xmm1
220; SSE2-NEXT:    psraw $15, %xmm1
221; SSE2-NEXT:    movdqa %xmm1, %xmm2
222; SSE2-NEXT:    pandn %xmm0, %xmm2
223; SSE2-NEXT:    psrlw $1, %xmm0
224; SSE2-NEXT:    pand %xmm1, %xmm0
225; SSE2-NEXT:    por %xmm2, %xmm0
226; SSE2-NEXT:    retq
227;
228; SSE41-LABEL: var_shift_v8i16:
229; SSE41:       # %bb.0:
230; SSE41-NEXT:    movdqa %xmm0, %xmm2
231; SSE41-NEXT:    movdqa %xmm1, %xmm0
232; SSE41-NEXT:    psllw $12, %xmm0
233; SSE41-NEXT:    psllw $4, %xmm1
234; SSE41-NEXT:    por %xmm0, %xmm1
235; SSE41-NEXT:    movdqa %xmm1, %xmm3
236; SSE41-NEXT:    paddw %xmm1, %xmm3
237; SSE41-NEXT:    movdqa %xmm2, %xmm4
238; SSE41-NEXT:    psrlw $8, %xmm4
239; SSE41-NEXT:    movdqa %xmm1, %xmm0
240; SSE41-NEXT:    pblendvb %xmm0, %xmm4, %xmm2
241; SSE41-NEXT:    movdqa %xmm2, %xmm1
242; SSE41-NEXT:    psrlw $4, %xmm1
243; SSE41-NEXT:    movdqa %xmm3, %xmm0
244; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
245; SSE41-NEXT:    movdqa %xmm2, %xmm1
246; SSE41-NEXT:    psrlw $2, %xmm1
247; SSE41-NEXT:    paddw %xmm3, %xmm3
248; SSE41-NEXT:    movdqa %xmm3, %xmm0
249; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
250; SSE41-NEXT:    movdqa %xmm2, %xmm1
251; SSE41-NEXT:    psrlw $1, %xmm1
252; SSE41-NEXT:    paddw %xmm3, %xmm3
253; SSE41-NEXT:    movdqa %xmm3, %xmm0
254; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
255; SSE41-NEXT:    movdqa %xmm2, %xmm0
256; SSE41-NEXT:    retq
257;
258; AVX1-LABEL: var_shift_v8i16:
259; AVX1:       # %bb.0:
260; AVX1-NEXT:    vpsllw $12, %xmm1, %xmm2
261; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm1
262; AVX1-NEXT:    vpor %xmm2, %xmm1, %xmm1
263; AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm2
264; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm3
265; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
266; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm1
267; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
268; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm1
269; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
270; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
271; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm1
272; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
273; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
274; AVX1-NEXT:    retq
275;
276; AVX2-LABEL: var_shift_v8i16:
277; AVX2:       # %bb.0:
278; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
279; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
280; AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
281; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
282; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
283; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
284; AVX2-NEXT:    vzeroupper
285; AVX2-NEXT:    retq
286;
287; XOP-LABEL: var_shift_v8i16:
288; XOP:       # %bb.0:
289; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
290; XOP-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
291; XOP-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
292; XOP-NEXT:    retq
293;
294; AVX512DQ-LABEL: var_shift_v8i16:
295; AVX512DQ:       # %bb.0:
296; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
297; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
298; AVX512DQ-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
299; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
300; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
301; AVX512DQ-NEXT:    vzeroupper
302; AVX512DQ-NEXT:    retq
303;
304; AVX512BW-LABEL: var_shift_v8i16:
305; AVX512BW:       # %bb.0:
306; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
307; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
308; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
309; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
310; AVX512BW-NEXT:    vzeroupper
311; AVX512BW-NEXT:    retq
312;
313; AVX512DQVL-LABEL: var_shift_v8i16:
314; AVX512DQVL:       # %bb.0:
315; AVX512DQVL-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
316; AVX512DQVL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
317; AVX512DQVL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
318; AVX512DQVL-NEXT:    vpmovdw %ymm0, %xmm0
319; AVX512DQVL-NEXT:    vzeroupper
320; AVX512DQVL-NEXT:    retq
321;
322; AVX512BWVL-LABEL: var_shift_v8i16:
323; AVX512BWVL:       # %bb.0:
324; AVX512BWVL-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0
325; AVX512BWVL-NEXT:    retq
326;
327; X32-SSE-LABEL: var_shift_v8i16:
328; X32-SSE:       # %bb.0:
329; X32-SSE-NEXT:    psllw $12, %xmm1
330; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
331; X32-SSE-NEXT:    psraw $15, %xmm2
332; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
333; X32-SSE-NEXT:    pandn %xmm0, %xmm3
334; X32-SSE-NEXT:    psrlw $8, %xmm0
335; X32-SSE-NEXT:    pand %xmm2, %xmm0
336; X32-SSE-NEXT:    por %xmm3, %xmm0
337; X32-SSE-NEXT:    paddw %xmm1, %xmm1
338; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
339; X32-SSE-NEXT:    psraw $15, %xmm2
340; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
341; X32-SSE-NEXT:    pandn %xmm0, %xmm3
342; X32-SSE-NEXT:    psrlw $4, %xmm0
343; X32-SSE-NEXT:    pand %xmm2, %xmm0
344; X32-SSE-NEXT:    por %xmm3, %xmm0
345; X32-SSE-NEXT:    paddw %xmm1, %xmm1
346; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
347; X32-SSE-NEXT:    psraw $15, %xmm2
348; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
349; X32-SSE-NEXT:    pandn %xmm0, %xmm3
350; X32-SSE-NEXT:    psrlw $2, %xmm0
351; X32-SSE-NEXT:    pand %xmm2, %xmm0
352; X32-SSE-NEXT:    por %xmm3, %xmm0
353; X32-SSE-NEXT:    paddw %xmm1, %xmm1
354; X32-SSE-NEXT:    psraw $15, %xmm1
355; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
356; X32-SSE-NEXT:    pandn %xmm0, %xmm2
357; X32-SSE-NEXT:    psrlw $1, %xmm0
358; X32-SSE-NEXT:    pand %xmm1, %xmm0
359; X32-SSE-NEXT:    por %xmm2, %xmm0
360; X32-SSE-NEXT:    retl
361  %shift = lshr <8 x i16> %a, %b
362  ret <8 x i16> %shift
363}
364
365define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
366; SSE2-LABEL: var_shift_v16i8:
367; SSE2:       # %bb.0:
368; SSE2-NEXT:    psllw $5, %xmm1
369; SSE2-NEXT:    pxor %xmm2, %xmm2
370; SSE2-NEXT:    pxor %xmm3, %xmm3
371; SSE2-NEXT:    pcmpgtb %xmm1, %xmm3
372; SSE2-NEXT:    movdqa %xmm3, %xmm4
373; SSE2-NEXT:    pandn %xmm0, %xmm4
374; SSE2-NEXT:    psrlw $4, %xmm0
375; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
376; SSE2-NEXT:    pand %xmm3, %xmm0
377; SSE2-NEXT:    por %xmm4, %xmm0
378; SSE2-NEXT:    paddb %xmm1, %xmm1
379; SSE2-NEXT:    pxor %xmm3, %xmm3
380; SSE2-NEXT:    pcmpgtb %xmm1, %xmm3
381; SSE2-NEXT:    movdqa %xmm3, %xmm4
382; SSE2-NEXT:    pandn %xmm0, %xmm4
383; SSE2-NEXT:    psrlw $2, %xmm0
384; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
385; SSE2-NEXT:    pand %xmm3, %xmm0
386; SSE2-NEXT:    por %xmm4, %xmm0
387; SSE2-NEXT:    paddb %xmm1, %xmm1
388; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
389; SSE2-NEXT:    movdqa %xmm2, %xmm1
390; SSE2-NEXT:    pandn %xmm0, %xmm1
391; SSE2-NEXT:    psrlw $1, %xmm0
392; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
393; SSE2-NEXT:    pand %xmm2, %xmm0
394; SSE2-NEXT:    por %xmm1, %xmm0
395; SSE2-NEXT:    retq
396;
397; SSE41-LABEL: var_shift_v16i8:
398; SSE41:       # %bb.0:
399; SSE41-NEXT:    movdqa %xmm0, %xmm2
400; SSE41-NEXT:    psllw $5, %xmm1
401; SSE41-NEXT:    movdqa %xmm0, %xmm3
402; SSE41-NEXT:    psrlw $4, %xmm3
403; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
404; SSE41-NEXT:    movdqa %xmm1, %xmm0
405; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
406; SSE41-NEXT:    movdqa %xmm2, %xmm3
407; SSE41-NEXT:    psrlw $2, %xmm3
408; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
409; SSE41-NEXT:    paddb %xmm1, %xmm1
410; SSE41-NEXT:    movdqa %xmm1, %xmm0
411; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
412; SSE41-NEXT:    movdqa %xmm2, %xmm3
413; SSE41-NEXT:    psrlw $1, %xmm3
414; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
415; SSE41-NEXT:    paddb %xmm1, %xmm1
416; SSE41-NEXT:    movdqa %xmm1, %xmm0
417; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm2
418; SSE41-NEXT:    movdqa %xmm2, %xmm0
419; SSE41-NEXT:    retq
420;
421; AVX-LABEL: var_shift_v16i8:
422; AVX:       # %bb.0:
423; AVX-NEXT:    vpsllw $5, %xmm1, %xmm1
424; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm2
425; AVX-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
426; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
427; AVX-NEXT:    vpsrlw $2, %xmm0, %xmm2
428; AVX-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
429; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
430; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
431; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm2
432; AVX-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
433; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
434; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
435; AVX-NEXT:    retq
436;
437; XOP-LABEL: var_shift_v16i8:
438; XOP:       # %bb.0:
439; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
440; XOP-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
441; XOP-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
442; XOP-NEXT:    retq
443;
444; AVX512DQ-LABEL: var_shift_v16i8:
445; AVX512DQ:       # %bb.0:
446; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
447; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
448; AVX512DQ-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
449; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
450; AVX512DQ-NEXT:    vzeroupper
451; AVX512DQ-NEXT:    retq
452;
453; AVX512BW-LABEL: var_shift_v16i8:
454; AVX512BW:       # %bb.0:
455; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
456; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
457; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
458; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
459; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
460; AVX512BW-NEXT:    vzeroupper
461; AVX512BW-NEXT:    retq
462;
463; AVX512DQVL-LABEL: var_shift_v16i8:
464; AVX512DQVL:       # %bb.0:
465; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
466; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
467; AVX512DQVL-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
468; AVX512DQVL-NEXT:    vpmovdb %zmm0, %xmm0
469; AVX512DQVL-NEXT:    vzeroupper
470; AVX512DQVL-NEXT:    retq
471;
472; AVX512BWVL-LABEL: var_shift_v16i8:
473; AVX512BWVL:       # %bb.0:
474; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
475; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
476; AVX512BWVL-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0
477; AVX512BWVL-NEXT:    vpmovwb %ymm0, %xmm0
478; AVX512BWVL-NEXT:    vzeroupper
479; AVX512BWVL-NEXT:    retq
480;
481; X32-SSE-LABEL: var_shift_v16i8:
482; X32-SSE:       # %bb.0:
483; X32-SSE-NEXT:    psllw $5, %xmm1
484; X32-SSE-NEXT:    pxor %xmm2, %xmm2
485; X32-SSE-NEXT:    pxor %xmm3, %xmm3
486; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
487; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
488; X32-SSE-NEXT:    pandn %xmm0, %xmm4
489; X32-SSE-NEXT:    psrlw $4, %xmm0
490; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
491; X32-SSE-NEXT:    pand %xmm3, %xmm0
492; X32-SSE-NEXT:    por %xmm4, %xmm0
493; X32-SSE-NEXT:    paddb %xmm1, %xmm1
494; X32-SSE-NEXT:    pxor %xmm3, %xmm3
495; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
496; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
497; X32-SSE-NEXT:    pandn %xmm0, %xmm4
498; X32-SSE-NEXT:    psrlw $2, %xmm0
499; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
500; X32-SSE-NEXT:    pand %xmm3, %xmm0
501; X32-SSE-NEXT:    por %xmm4, %xmm0
502; X32-SSE-NEXT:    paddb %xmm1, %xmm1
503; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
504; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
505; X32-SSE-NEXT:    pandn %xmm0, %xmm1
506; X32-SSE-NEXT:    psrlw $1, %xmm0
507; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
508; X32-SSE-NEXT:    pand %xmm2, %xmm0
509; X32-SSE-NEXT:    por %xmm1, %xmm0
510; X32-SSE-NEXT:    retl
511  %shift = lshr <16 x i8> %a, %b
512  ret <16 x i8> %shift
513}
514
515;
516; Uniform Variable Shifts
517;
518
519define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
520; SSE-LABEL: splatvar_shift_v2i64:
521; SSE:       # %bb.0:
522; SSE-NEXT:    psrlq %xmm1, %xmm0
523; SSE-NEXT:    retq
524;
525; AVX-LABEL: splatvar_shift_v2i64:
526; AVX:       # %bb.0:
527; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
528; AVX-NEXT:    retq
529;
530; XOP-LABEL: splatvar_shift_v2i64:
531; XOP:       # %bb.0:
532; XOP-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
533; XOP-NEXT:    retq
534;
535; AVX512-LABEL: splatvar_shift_v2i64:
536; AVX512:       # %bb.0:
537; AVX512-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
538; AVX512-NEXT:    retq
539;
540; AVX512VL-LABEL: splatvar_shift_v2i64:
541; AVX512VL:       # %bb.0:
542; AVX512VL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
543; AVX512VL-NEXT:    retq
544;
545; X32-SSE-LABEL: splatvar_shift_v2i64:
546; X32-SSE:       # %bb.0:
547; X32-SSE-NEXT:    psrlq %xmm1, %xmm0
548; X32-SSE-NEXT:    retl
549  %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
550  %shift = lshr <2 x i64> %a, %splat
551  ret <2 x i64> %shift
552}
553
554define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
555; SSE2-LABEL: splatvar_shift_v4i32:
556; SSE2:       # %bb.0:
557; SSE2-NEXT:    xorps %xmm2, %xmm2
558; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
559; SSE2-NEXT:    psrld %xmm2, %xmm0
560; SSE2-NEXT:    retq
561;
562; SSE41-LABEL: splatvar_shift_v4i32:
563; SSE41:       # %bb.0:
564; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
565; SSE41-NEXT:    psrld %xmm1, %xmm0
566; SSE41-NEXT:    retq
567;
568; AVX-LABEL: splatvar_shift_v4i32:
569; AVX:       # %bb.0:
570; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
571; AVX-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
572; AVX-NEXT:    retq
573;
574; XOP-LABEL: splatvar_shift_v4i32:
575; XOP:       # %bb.0:
576; XOP-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
577; XOP-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
578; XOP-NEXT:    retq
579;
580; AVX512-LABEL: splatvar_shift_v4i32:
581; AVX512:       # %bb.0:
582; AVX512-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
583; AVX512-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
584; AVX512-NEXT:    retq
585;
586; AVX512VL-LABEL: splatvar_shift_v4i32:
587; AVX512VL:       # %bb.0:
588; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
589; AVX512VL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
590; AVX512VL-NEXT:    retq
591;
592; X32-SSE-LABEL: splatvar_shift_v4i32:
593; X32-SSE:       # %bb.0:
594; X32-SSE-NEXT:    xorps %xmm2, %xmm2
595; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
596; X32-SSE-NEXT:    psrld %xmm2, %xmm0
597; X32-SSE-NEXT:    retl
598  %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
599  %shift = lshr <4 x i32> %a, %splat
600  ret <4 x i32> %shift
601}
602
603define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
604; SSE2-LABEL: splatvar_shift_v8i16:
605; SSE2:       # %bb.0:
606; SSE2-NEXT:    pextrw $0, %xmm1, %eax
607; SSE2-NEXT:    movd %eax, %xmm1
608; SSE2-NEXT:    psrlw %xmm1, %xmm0
609; SSE2-NEXT:    retq
610;
611; SSE41-LABEL: splatvar_shift_v8i16:
612; SSE41:       # %bb.0:
613; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
614; SSE41-NEXT:    psrlw %xmm1, %xmm0
615; SSE41-NEXT:    retq
616;
617; AVX-LABEL: splatvar_shift_v8i16:
618; AVX:       # %bb.0:
619; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
620; AVX-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
621; AVX-NEXT:    retq
622;
623; XOP-LABEL: splatvar_shift_v8i16:
624; XOP:       # %bb.0:
625; XOP-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
626; XOP-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
627; XOP-NEXT:    retq
628;
629; AVX512-LABEL: splatvar_shift_v8i16:
630; AVX512:       # %bb.0:
631; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
632; AVX512-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
633; AVX512-NEXT:    retq
634;
635; AVX512VL-LABEL: splatvar_shift_v8i16:
636; AVX512VL:       # %bb.0:
637; AVX512VL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
638; AVX512VL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
639; AVX512VL-NEXT:    retq
640;
641; X32-SSE-LABEL: splatvar_shift_v8i16:
642; X32-SSE:       # %bb.0:
643; X32-SSE-NEXT:    pextrw $0, %xmm1, %eax
644; X32-SSE-NEXT:    movd %eax, %xmm1
645; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
646; X32-SSE-NEXT:    retl
647  %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
648  %shift = lshr <8 x i16> %a, %splat
649  ret <8 x i16> %shift
650}
651
652define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
653; SSE2-LABEL: splatvar_shift_v16i8:
654; SSE2:       # %bb.0:
655; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
656; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
657; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,0,0]
658; SSE2-NEXT:    psllw $5, %xmm2
659; SSE2-NEXT:    pxor %xmm1, %xmm1
660; SSE2-NEXT:    pxor %xmm3, %xmm3
661; SSE2-NEXT:    pcmpgtb %xmm2, %xmm3
662; SSE2-NEXT:    movdqa %xmm3, %xmm4
663; SSE2-NEXT:    pandn %xmm0, %xmm4
664; SSE2-NEXT:    psrlw $4, %xmm0
665; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
666; SSE2-NEXT:    pand %xmm3, %xmm0
667; SSE2-NEXT:    por %xmm4, %xmm0
668; SSE2-NEXT:    paddb %xmm2, %xmm2
669; SSE2-NEXT:    pxor %xmm3, %xmm3
670; SSE2-NEXT:    pcmpgtb %xmm2, %xmm3
671; SSE2-NEXT:    movdqa %xmm3, %xmm4
672; SSE2-NEXT:    pandn %xmm0, %xmm4
673; SSE2-NEXT:    psrlw $2, %xmm0
674; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
675; SSE2-NEXT:    pand %xmm3, %xmm0
676; SSE2-NEXT:    por %xmm4, %xmm0
677; SSE2-NEXT:    paddb %xmm2, %xmm2
678; SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
679; SSE2-NEXT:    movdqa %xmm1, %xmm2
680; SSE2-NEXT:    pandn %xmm0, %xmm2
681; SSE2-NEXT:    psrlw $1, %xmm0
682; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
683; SSE2-NEXT:    pand %xmm1, %xmm0
684; SSE2-NEXT:    por %xmm2, %xmm0
685; SSE2-NEXT:    retq
686;
687; SSE41-LABEL: splatvar_shift_v16i8:
688; SSE41:       # %bb.0:
689; SSE41-NEXT:    movdqa %xmm0, %xmm2
690; SSE41-NEXT:    pxor %xmm0, %xmm0
691; SSE41-NEXT:    pshufb %xmm0, %xmm1
692; SSE41-NEXT:    psllw $5, %xmm1
693; SSE41-NEXT:    movdqa %xmm1, %xmm3
694; SSE41-NEXT:    paddb %xmm1, %xmm3
695; SSE41-NEXT:    movdqa %xmm2, %xmm4
696; SSE41-NEXT:    psrlw $4, %xmm4
697; SSE41-NEXT:    pand {{.*}}(%rip), %xmm4
698; SSE41-NEXT:    movdqa %xmm1, %xmm0
699; SSE41-NEXT:    pblendvb %xmm0, %xmm4, %xmm2
700; SSE41-NEXT:    movdqa %xmm2, %xmm1
701; SSE41-NEXT:    psrlw $2, %xmm1
702; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
703; SSE41-NEXT:    movdqa %xmm3, %xmm0
704; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
705; SSE41-NEXT:    movdqa %xmm2, %xmm1
706; SSE41-NEXT:    psrlw $1, %xmm1
707; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
708; SSE41-NEXT:    paddb %xmm3, %xmm3
709; SSE41-NEXT:    movdqa %xmm3, %xmm0
710; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
711; SSE41-NEXT:    movdqa %xmm2, %xmm0
712; SSE41-NEXT:    retq
713;
714; AVX1-LABEL: splatvar_shift_v16i8:
715; AVX1:       # %bb.0:
716; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
717; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
718; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
719; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm2
720; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm3
721; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm3, %xmm3
722; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
723; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm1
724; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
725; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
726; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm1
727; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
728; AVX1-NEXT:    vpaddb %xmm2, %xmm2, %xmm2
729; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
730; AVX1-NEXT:    retq
731;
732; AVX2-LABEL: splatvar_shift_v16i8:
733; AVX2:       # %bb.0:
734; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
735; AVX2-NEXT:    vpsllw $5, %xmm1, %xmm1
736; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm2
737; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
738; AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
739; AVX2-NEXT:    vpsrlw $2, %xmm0, %xmm2
740; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
741; AVX2-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
742; AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
743; AVX2-NEXT:    vpsrlw $1, %xmm0, %xmm2
744; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
745; AVX2-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
746; AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
747; AVX2-NEXT:    retq
748;
749; XOPAVX1-LABEL: splatvar_shift_v16i8:
750; XOPAVX1:       # %bb.0:
751; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
752; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
753; XOPAVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
754; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
755; XOPAVX1-NEXT:    retq
756;
757; XOPAVX2-LABEL: splatvar_shift_v16i8:
758; XOPAVX2:       # %bb.0:
759; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
760; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
761; XOPAVX2-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
762; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
763; XOPAVX2-NEXT:    retq
764;
765; AVX512DQ-LABEL: splatvar_shift_v16i8:
766; AVX512DQ:       # %bb.0:
767; AVX512DQ-NEXT:    vpbroadcastb %xmm1, %xmm1
768; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
769; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
770; AVX512DQ-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
771; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
772; AVX512DQ-NEXT:    vzeroupper
773; AVX512DQ-NEXT:    retq
774;
775; AVX512BW-LABEL: splatvar_shift_v16i8:
776; AVX512BW:       # %bb.0:
777; AVX512BW-NEXT:    vpbroadcastb %xmm1, %xmm1
778; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
779; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
780; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
781; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
782; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
783; AVX512BW-NEXT:    vzeroupper
784; AVX512BW-NEXT:    retq
785;
786; AVX512DQVL-LABEL: splatvar_shift_v16i8:
787; AVX512DQVL:       # %bb.0:
788; AVX512DQVL-NEXT:    vpbroadcastb %xmm1, %xmm1
789; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
790; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
791; AVX512DQVL-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
792; AVX512DQVL-NEXT:    vpmovdb %zmm0, %xmm0
793; AVX512DQVL-NEXT:    vzeroupper
794; AVX512DQVL-NEXT:    retq
795;
796; AVX512BWVL-LABEL: splatvar_shift_v16i8:
797; AVX512BWVL:       # %bb.0:
798; AVX512BWVL-NEXT:    vpbroadcastb %xmm1, %xmm1
799; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
800; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
801; AVX512BWVL-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0
802; AVX512BWVL-NEXT:    vpmovwb %ymm0, %xmm0
803; AVX512BWVL-NEXT:    vzeroupper
804; AVX512BWVL-NEXT:    retq
805;
806; X32-SSE-LABEL: splatvar_shift_v16i8:
807; X32-SSE:       # %bb.0:
808; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
809; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
810; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,0,0,0]
811; X32-SSE-NEXT:    psllw $5, %xmm2
812; X32-SSE-NEXT:    pxor %xmm1, %xmm1
813; X32-SSE-NEXT:    pxor %xmm3, %xmm3
814; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
815; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
816; X32-SSE-NEXT:    pandn %xmm0, %xmm4
817; X32-SSE-NEXT:    psrlw $4, %xmm0
818; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
819; X32-SSE-NEXT:    pand %xmm3, %xmm0
820; X32-SSE-NEXT:    por %xmm4, %xmm0
821; X32-SSE-NEXT:    paddb %xmm2, %xmm2
822; X32-SSE-NEXT:    pxor %xmm3, %xmm3
823; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
824; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
825; X32-SSE-NEXT:    pandn %xmm0, %xmm4
826; X32-SSE-NEXT:    psrlw $2, %xmm0
827; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
828; X32-SSE-NEXT:    pand %xmm3, %xmm0
829; X32-SSE-NEXT:    por %xmm4, %xmm0
830; X32-SSE-NEXT:    paddb %xmm2, %xmm2
831; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm1
832; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
833; X32-SSE-NEXT:    pandn %xmm0, %xmm2
834; X32-SSE-NEXT:    psrlw $1, %xmm0
835; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
836; X32-SSE-NEXT:    pand %xmm1, %xmm0
837; X32-SSE-NEXT:    por %xmm2, %xmm0
838; X32-SSE-NEXT:    retl
839  %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
840  %shift = lshr <16 x i8> %a, %splat
841  ret <16 x i8> %shift
842}
843
844;
845; Constant Shifts
846;
847
848define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
849; SSE2-LABEL: constant_shift_v2i64:
850; SSE2:       # %bb.0:
851; SSE2-NEXT:    movdqa %xmm0, %xmm1
852; SSE2-NEXT:    psrlq $1, %xmm1
853; SSE2-NEXT:    psrlq $7, %xmm0
854; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
855; SSE2-NEXT:    retq
856;
857; SSE41-LABEL: constant_shift_v2i64:
858; SSE41:       # %bb.0:
859; SSE41-NEXT:    movdqa %xmm0, %xmm1
860; SSE41-NEXT:    psrlq $7, %xmm1
861; SSE41-NEXT:    psrlq $1, %xmm0
862; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
863; SSE41-NEXT:    retq
864;
865; AVX1-LABEL: constant_shift_v2i64:
866; AVX1:       # %bb.0:
867; AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm1
868; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
869; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
870; AVX1-NEXT:    retq
871;
872; AVX2-LABEL: constant_shift_v2i64:
873; AVX2:       # %bb.0:
874; AVX2-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
875; AVX2-NEXT:    retq
876;
877; XOPAVX1-LABEL: constant_shift_v2i64:
878; XOPAVX1:       # %bb.0:
879; XOPAVX1-NEXT:    vpshlq {{.*}}(%rip), %xmm0, %xmm0
880; XOPAVX1-NEXT:    retq
881;
882; XOPAVX2-LABEL: constant_shift_v2i64:
883; XOPAVX2:       # %bb.0:
884; XOPAVX2-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
885; XOPAVX2-NEXT:    retq
886;
887; AVX512-LABEL: constant_shift_v2i64:
888; AVX512:       # %bb.0:
889; AVX512-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
890; AVX512-NEXT:    retq
891;
892; AVX512VL-LABEL: constant_shift_v2i64:
893; AVX512VL:       # %bb.0:
894; AVX512VL-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
895; AVX512VL-NEXT:    retq
896;
897; X32-SSE-LABEL: constant_shift_v2i64:
898; X32-SSE:       # %bb.0:
899; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
900; X32-SSE-NEXT:    psrlq $1, %xmm1
901; X32-SSE-NEXT:    psrlq $7, %xmm0
902; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
903; X32-SSE-NEXT:    retl
904  %shift = lshr <2 x i64> %a, <i64 1, i64 7>
905  ret <2 x i64> %shift
906}
907
908define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
909; SSE2-LABEL: constant_shift_v4i32:
910; SSE2:       # %bb.0:
911; SSE2-NEXT:    movdqa %xmm0, %xmm1
912; SSE2-NEXT:    psrld $7, %xmm1
913; SSE2-NEXT:    movdqa %xmm0, %xmm2
914; SSE2-NEXT:    psrld $6, %xmm2
915; SSE2-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
916; SSE2-NEXT:    movdqa %xmm0, %xmm1
917; SSE2-NEXT:    psrld $5, %xmm1
918; SSE2-NEXT:    psrld $4, %xmm0
919; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
920; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3]
921; SSE2-NEXT:    retq
922;
923; SSE41-LABEL: constant_shift_v4i32:
924; SSE41:       # %bb.0:
925; SSE41-NEXT:    movdqa %xmm0, %xmm1
926; SSE41-NEXT:    psrld $7, %xmm1
927; SSE41-NEXT:    movdqa %xmm0, %xmm2
928; SSE41-NEXT:    psrld $5, %xmm2
929; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
930; SSE41-NEXT:    movdqa %xmm0, %xmm1
931; SSE41-NEXT:    psrld $6, %xmm1
932; SSE41-NEXT:    psrld $4, %xmm0
933; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
934; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
935; SSE41-NEXT:    retq
936;
937; AVX1-LABEL: constant_shift_v4i32:
938; AVX1:       # %bb.0:
939; AVX1-NEXT:    vpsrld $7, %xmm0, %xmm1
940; AVX1-NEXT:    vpsrld $5, %xmm0, %xmm2
941; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
942; AVX1-NEXT:    vpsrld $6, %xmm0, %xmm2
943; AVX1-NEXT:    vpsrld $4, %xmm0, %xmm0
944; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
945; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
946; AVX1-NEXT:    retq
947;
948; AVX2-LABEL: constant_shift_v4i32:
949; AVX2:       # %bb.0:
950; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
951; AVX2-NEXT:    retq
952;
953; XOPAVX1-LABEL: constant_shift_v4i32:
954; XOPAVX1:       # %bb.0:
955; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm0
956; XOPAVX1-NEXT:    retq
957;
958; XOPAVX2-LABEL: constant_shift_v4i32:
959; XOPAVX2:       # %bb.0:
960; XOPAVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
961; XOPAVX2-NEXT:    retq
962;
963; AVX512-LABEL: constant_shift_v4i32:
964; AVX512:       # %bb.0:
965; AVX512-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
966; AVX512-NEXT:    retq
967;
968; AVX512VL-LABEL: constant_shift_v4i32:
969; AVX512VL:       # %bb.0:
970; AVX512VL-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
971; AVX512VL-NEXT:    retq
972;
973; X32-SSE-LABEL: constant_shift_v4i32:
974; X32-SSE:       # %bb.0:
975; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
976; X32-SSE-NEXT:    psrld $7, %xmm1
977; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
978; X32-SSE-NEXT:    psrld $6, %xmm2
979; X32-SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
980; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
981; X32-SSE-NEXT:    psrld $5, %xmm1
982; X32-SSE-NEXT:    psrld $4, %xmm0
983; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
984; X32-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3]
985; X32-SSE-NEXT:    retl
986  %shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
987  ret <4 x i32> %shift
988}
989
990define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
991; SSE2-LABEL: constant_shift_v8i16:
992; SSE2:       # %bb.0:
993; SSE2-NEXT:    movdqa %xmm0, %xmm1
994; SSE2-NEXT:    psrlw $4, %xmm1
995; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
996; SSE2-NEXT:    movapd %xmm1, %xmm2
997; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[2,3]
998; SSE2-NEXT:    psrlw $2, %xmm1
999; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
1000; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1001; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
1002; SSE2-NEXT:    movaps %xmm2, %xmm0
1003; SSE2-NEXT:    andps %xmm1, %xmm0
1004; SSE2-NEXT:    psrlw $1, %xmm2
1005; SSE2-NEXT:    andnps %xmm2, %xmm1
1006; SSE2-NEXT:    orps %xmm1, %xmm0
1007; SSE2-NEXT:    retq
1008;
1009; SSE41-LABEL: constant_shift_v8i16:
1010; SSE41:       # %bb.0:
1011; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = <u,32768,16384,8192,4096,2048,1024,512>
1012; SSE41-NEXT:    pmulhuw %xmm0, %xmm1
1013; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
1014; SSE41-NEXT:    retq
1015;
1016; AVX-LABEL: constant_shift_v8i16:
1017; AVX:       # %bb.0:
1018; AVX-NEXT:    vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
1019; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
1020; AVX-NEXT:    retq
1021;
1022; XOP-LABEL: constant_shift_v8i16:
1023; XOP:       # %bb.0:
1024; XOP-NEXT:    vpshlw {{.*}}(%rip), %xmm0, %xmm0
1025; XOP-NEXT:    retq
1026;
1027; AVX512DQ-LABEL: constant_shift_v8i16:
1028; AVX512DQ:       # %bb.0:
1029; AVX512DQ-NEXT:    vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
1030; AVX512DQ-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
1031; AVX512DQ-NEXT:    retq
1032;
1033; AVX512BW-LABEL: constant_shift_v8i16:
1034; AVX512BW:       # %bb.0:
1035; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1036; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
1037; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
1038; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1039; AVX512BW-NEXT:    vzeroupper
1040; AVX512BW-NEXT:    retq
1041;
1042; AVX512DQVL-LABEL: constant_shift_v8i16:
1043; AVX512DQVL:       # %bb.0:
1044; AVX512DQVL-NEXT:    vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
1045; AVX512DQVL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
1046; AVX512DQVL-NEXT:    retq
1047;
1048; AVX512BWVL-LABEL: constant_shift_v8i16:
1049; AVX512BWVL:       # %bb.0:
1050; AVX512BWVL-NEXT:    vpsrlvw {{.*}}(%rip), %xmm0, %xmm0
1051; AVX512BWVL-NEXT:    retq
1052;
1053; X32-SSE-LABEL: constant_shift_v8i16:
1054; X32-SSE:       # %bb.0:
1055; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
1056; X32-SSE-NEXT:    psrlw $4, %xmm1
1057; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1058; X32-SSE-NEXT:    movapd %xmm1, %xmm2
1059; X32-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[2,3]
1060; X32-SSE-NEXT:    psrlw $2, %xmm1
1061; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
1062; X32-SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1063; X32-SSE-NEXT:    movaps {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
1064; X32-SSE-NEXT:    movaps %xmm2, %xmm0
1065; X32-SSE-NEXT:    andps %xmm1, %xmm0
1066; X32-SSE-NEXT:    psrlw $1, %xmm2
1067; X32-SSE-NEXT:    andnps %xmm2, %xmm1
1068; X32-SSE-NEXT:    orps %xmm1, %xmm0
1069; X32-SSE-NEXT:    retl
1070  %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
1071  ret <8 x i16> %shift
1072}
1073
1074define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
1075; SSE2-LABEL: constant_shift_v16i8:
1076; SSE2:       # %bb.0:
1077; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32]
1078; SSE2-NEXT:    pxor %xmm1, %xmm1
1079; SSE2-NEXT:    pxor %xmm3, %xmm3
1080; SSE2-NEXT:    pcmpgtb %xmm2, %xmm3
1081; SSE2-NEXT:    movdqa %xmm3, %xmm4
1082; SSE2-NEXT:    pandn %xmm0, %xmm4
1083; SSE2-NEXT:    psrlw $4, %xmm0
1084; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1085; SSE2-NEXT:    pand %xmm3, %xmm0
1086; SSE2-NEXT:    por %xmm4, %xmm0
1087; SSE2-NEXT:    paddb %xmm2, %xmm2
1088; SSE2-NEXT:    pxor %xmm3, %xmm3
1089; SSE2-NEXT:    pcmpgtb %xmm2, %xmm3
1090; SSE2-NEXT:    movdqa %xmm3, %xmm4
1091; SSE2-NEXT:    pandn %xmm0, %xmm4
1092; SSE2-NEXT:    psrlw $2, %xmm0
1093; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1094; SSE2-NEXT:    pand %xmm3, %xmm0
1095; SSE2-NEXT:    por %xmm4, %xmm0
1096; SSE2-NEXT:    paddb %xmm2, %xmm2
1097; SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
1098; SSE2-NEXT:    movdqa %xmm1, %xmm2
1099; SSE2-NEXT:    pandn %xmm0, %xmm2
1100; SSE2-NEXT:    psrlw $1, %xmm0
1101; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1102; SSE2-NEXT:    pand %xmm1, %xmm0
1103; SSE2-NEXT:    por %xmm2, %xmm0
1104; SSE2-NEXT:    retq
1105;
1106; SSE41-LABEL: constant_shift_v16i8:
1107; SSE41:       # %bb.0:
1108; SSE41-NEXT:    movdqa %xmm0, %xmm1
1109; SSE41-NEXT:    movdqa %xmm0, %xmm2
1110; SSE41-NEXT:    psrlw $4, %xmm2
1111; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
1112; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,49376,32928,16480,32]
1113; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
1114; SSE41-NEXT:    movdqa %xmm1, %xmm2
1115; SSE41-NEXT:    psrlw $2, %xmm2
1116; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
1117; SSE41-NEXT:    paddb %xmm0, %xmm0
1118; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
1119; SSE41-NEXT:    movdqa %xmm1, %xmm2
1120; SSE41-NEXT:    psrlw $1, %xmm2
1121; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
1122; SSE41-NEXT:    paddb %xmm0, %xmm0
1123; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
1124; SSE41-NEXT:    movdqa %xmm1, %xmm0
1125; SSE41-NEXT:    retq
1126;
1127; AVX-LABEL: constant_shift_v16i8:
1128; AVX:       # %bb.0:
1129; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm1
1130; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
1131; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32]
1132; AVX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
1133; AVX-NEXT:    vpsrlw $2, %xmm0, %xmm1
1134; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
1135; AVX-NEXT:    vpaddb %xmm2, %xmm2, %xmm2
1136; AVX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
1137; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm1
1138; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
1139; AVX-NEXT:    vpaddb %xmm2, %xmm2, %xmm2
1140; AVX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
1141; AVX-NEXT:    retq
1142;
1143; XOP-LABEL: constant_shift_v16i8:
1144; XOP:       # %bb.0:
1145; XOP-NEXT:    vpshlb {{.*}}(%rip), %xmm0, %xmm0
1146; XOP-NEXT:    retq
1147;
1148; AVX512DQ-LABEL: constant_shift_v16i8:
1149; AVX512DQ:       # %bb.0:
1150; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1151; AVX512DQ-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
1152; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1153; AVX512DQ-NEXT:    vzeroupper
1154; AVX512DQ-NEXT:    retq
1155;
1156; AVX512BW-LABEL: constant_shift_v16i8:
1157; AVX512BW:       # %bb.0:
1158; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
1159; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1160; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
1161; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
1162; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1163; AVX512BW-NEXT:    vzeroupper
1164; AVX512BW-NEXT:    retq
1165;
1166; AVX512DQVL-LABEL: constant_shift_v16i8:
1167; AVX512DQVL:       # %bb.0:
1168; AVX512DQVL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1169; AVX512DQVL-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
1170; AVX512DQVL-NEXT:    vpmovdb %zmm0, %xmm0
1171; AVX512DQVL-NEXT:    vzeroupper
1172; AVX512DQVL-NEXT:    retq
1173;
1174; AVX512BWVL-LABEL: constant_shift_v16i8:
1175; AVX512BWVL:       # %bb.0:
1176; AVX512BWVL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1177; AVX512BWVL-NEXT:    vpsrlvw {{.*}}(%rip), %ymm0, %ymm0
1178; AVX512BWVL-NEXT:    vpmovwb %ymm0, %xmm0
1179; AVX512BWVL-NEXT:    vzeroupper
1180; AVX512BWVL-NEXT:    retq
1181;
1182; X32-SSE-LABEL: constant_shift_v16i8:
1183; X32-SSE:       # %bb.0:
1184; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32]
1185; X32-SSE-NEXT:    pxor %xmm1, %xmm1
1186; X32-SSE-NEXT:    pxor %xmm3, %xmm3
1187; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
1188; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
1189; X32-SSE-NEXT:    pandn %xmm0, %xmm4
1190; X32-SSE-NEXT:    psrlw $4, %xmm0
1191; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
1192; X32-SSE-NEXT:    pand %xmm3, %xmm0
1193; X32-SSE-NEXT:    por %xmm4, %xmm0
1194; X32-SSE-NEXT:    paddb %xmm2, %xmm2
1195; X32-SSE-NEXT:    pxor %xmm3, %xmm3
1196; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
1197; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
1198; X32-SSE-NEXT:    pandn %xmm0, %xmm4
1199; X32-SSE-NEXT:    psrlw $2, %xmm0
1200; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
1201; X32-SSE-NEXT:    pand %xmm3, %xmm0
1202; X32-SSE-NEXT:    por %xmm4, %xmm0
1203; X32-SSE-NEXT:    paddb %xmm2, %xmm2
1204; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm1
1205; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
1206; X32-SSE-NEXT:    pandn %xmm0, %xmm2
1207; X32-SSE-NEXT:    psrlw $1, %xmm0
1208; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
1209; X32-SSE-NEXT:    pand %xmm1, %xmm0
1210; X32-SSE-NEXT:    por %xmm2, %xmm0
1211; X32-SSE-NEXT:    retl
1212  %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
1213  ret <16 x i8> %shift
1214}
1215
1216;
1217; Uniform Constant Shifts
1218;
1219
1220define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
1221; SSE-LABEL: splatconstant_shift_v2i64:
1222; SSE:       # %bb.0:
1223; SSE-NEXT:    psrlq $7, %xmm0
1224; SSE-NEXT:    retq
1225;
1226; AVX-LABEL: splatconstant_shift_v2i64:
1227; AVX:       # %bb.0:
1228; AVX-NEXT:    vpsrlq $7, %xmm0, %xmm0
1229; AVX-NEXT:    retq
1230;
1231; XOP-LABEL: splatconstant_shift_v2i64:
1232; XOP:       # %bb.0:
1233; XOP-NEXT:    vpsrlq $7, %xmm0, %xmm0
1234; XOP-NEXT:    retq
1235;
1236; AVX512-LABEL: splatconstant_shift_v2i64:
1237; AVX512:       # %bb.0:
1238; AVX512-NEXT:    vpsrlq $7, %xmm0, %xmm0
1239; AVX512-NEXT:    retq
1240;
1241; AVX512VL-LABEL: splatconstant_shift_v2i64:
1242; AVX512VL:       # %bb.0:
1243; AVX512VL-NEXT:    vpsrlq $7, %xmm0, %xmm0
1244; AVX512VL-NEXT:    retq
1245;
1246; X32-SSE-LABEL: splatconstant_shift_v2i64:
1247; X32-SSE:       # %bb.0:
1248; X32-SSE-NEXT:    psrlq $7, %xmm0
1249; X32-SSE-NEXT:    retl
1250  %shift = lshr <2 x i64> %a, <i64 7, i64 7>
1251  ret <2 x i64> %shift
1252}
1253
1254define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
1255; SSE-LABEL: splatconstant_shift_v4i32:
1256; SSE:       # %bb.0:
1257; SSE-NEXT:    psrld $5, %xmm0
1258; SSE-NEXT:    retq
1259;
1260; AVX-LABEL: splatconstant_shift_v4i32:
1261; AVX:       # %bb.0:
1262; AVX-NEXT:    vpsrld $5, %xmm0, %xmm0
1263; AVX-NEXT:    retq
1264;
1265; XOP-LABEL: splatconstant_shift_v4i32:
1266; XOP:       # %bb.0:
1267; XOP-NEXT:    vpsrld $5, %xmm0, %xmm0
1268; XOP-NEXT:    retq
1269;
1270; AVX512-LABEL: splatconstant_shift_v4i32:
1271; AVX512:       # %bb.0:
1272; AVX512-NEXT:    vpsrld $5, %xmm0, %xmm0
1273; AVX512-NEXT:    retq
1274;
1275; AVX512VL-LABEL: splatconstant_shift_v4i32:
1276; AVX512VL:       # %bb.0:
1277; AVX512VL-NEXT:    vpsrld $5, %xmm0, %xmm0
1278; AVX512VL-NEXT:    retq
1279;
1280; X32-SSE-LABEL: splatconstant_shift_v4i32:
1281; X32-SSE:       # %bb.0:
1282; X32-SSE-NEXT:    psrld $5, %xmm0
1283; X32-SSE-NEXT:    retl
1284  %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
1285  ret <4 x i32> %shift
1286}
1287
1288define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
1289; SSE-LABEL: splatconstant_shift_v8i16:
1290; SSE:       # %bb.0:
1291; SSE-NEXT:    psrlw $3, %xmm0
1292; SSE-NEXT:    retq
1293;
1294; AVX-LABEL: splatconstant_shift_v8i16:
1295; AVX:       # %bb.0:
1296; AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
1297; AVX-NEXT:    retq
1298;
1299; XOP-LABEL: splatconstant_shift_v8i16:
1300; XOP:       # %bb.0:
1301; XOP-NEXT:    vpsrlw $3, %xmm0, %xmm0
1302; XOP-NEXT:    retq
1303;
1304; AVX512-LABEL: splatconstant_shift_v8i16:
1305; AVX512:       # %bb.0:
1306; AVX512-NEXT:    vpsrlw $3, %xmm0, %xmm0
1307; AVX512-NEXT:    retq
1308;
1309; AVX512VL-LABEL: splatconstant_shift_v8i16:
1310; AVX512VL:       # %bb.0:
1311; AVX512VL-NEXT:    vpsrlw $3, %xmm0, %xmm0
1312; AVX512VL-NEXT:    retq
1313;
1314; X32-SSE-LABEL: splatconstant_shift_v8i16:
1315; X32-SSE:       # %bb.0:
1316; X32-SSE-NEXT:    psrlw $3, %xmm0
1317; X32-SSE-NEXT:    retl
1318  %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
1319  ret <8 x i16> %shift
1320}
1321
1322define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
1323; SSE-LABEL: splatconstant_shift_v16i8:
1324; SSE:       # %bb.0:
1325; SSE-NEXT:    psrlw $3, %xmm0
1326; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
1327; SSE-NEXT:    retq
1328;
1329; AVX-LABEL: splatconstant_shift_v16i8:
1330; AVX:       # %bb.0:
1331; AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
1332; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1333; AVX-NEXT:    retq
1334;
1335; XOP-LABEL: splatconstant_shift_v16i8:
1336; XOP:       # %bb.0:
1337; XOP-NEXT:    vpshlb {{.*}}(%rip), %xmm0, %xmm0
1338; XOP-NEXT:    retq
1339;
1340; AVX512-LABEL: splatconstant_shift_v16i8:
1341; AVX512:       # %bb.0:
1342; AVX512-NEXT:    vpsrlw $3, %xmm0, %xmm0
1343; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1344; AVX512-NEXT:    retq
1345;
1346; AVX512VL-LABEL: splatconstant_shift_v16i8:
1347; AVX512VL:       # %bb.0:
1348; AVX512VL-NEXT:    vpsrlw $3, %xmm0, %xmm0
1349; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1350; AVX512VL-NEXT:    retq
1351;
1352; X32-SSE-LABEL: splatconstant_shift_v16i8:
1353; X32-SSE:       # %bb.0:
1354; X32-SSE-NEXT:    psrlw $3, %xmm0
1355; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
1356; X32-SSE-NEXT:    retl
1357  %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
1358  ret <16 x i8> %shift
1359}
1360