• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s
3
4; AVX2 Logical Shift Left
5
6define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
7; CHECK-LABEL: test_sllw_1:
8; CHECK:       # BB#0: # %entry
9; CHECK-NEXT:    retq
10entry:
11  %shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
12  ret <16 x i16> %shl
13}
14
15define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
16; CHECK-LABEL: test_sllw_2:
17; CHECK:       # BB#0: # %entry
18; CHECK-NEXT:    vpaddw %ymm0, %ymm0, %ymm0
19; CHECK-NEXT:    retq
20entry:
21  %shl = shl <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
22  ret <16 x i16> %shl
23}
24
25define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
26; CHECK-LABEL: test_sllw_3:
27; CHECK:       # BB#0: # %entry
28; CHECK-NEXT:    vpsllw $15, %ymm0, %ymm0
29; CHECK-NEXT:    retq
30entry:
31  %shl = shl <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
32  ret <16 x i16> %shl
33}
34
35define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
36; CHECK-LABEL: test_slld_1:
37; CHECK:       # BB#0: # %entry
38; CHECK-NEXT:    retq
39entry:
40  %shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
41  ret <8 x i32> %shl
42}
43
44define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
45; CHECK-LABEL: test_slld_2:
46; CHECK:       # BB#0: # %entry
47; CHECK-NEXT:    vpaddd %ymm0, %ymm0, %ymm0
48; CHECK-NEXT:    retq
49entry:
50  %shl = shl <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
51  ret <8 x i32> %shl
52}
53
54define <8 x i32> @test_vpslld_var(i32 %shift) {
55; CHECK-LABEL: test_vpslld_var:
56; CHECK:       # BB#0:
57; CHECK-NEXT:    vmovd %edi, %xmm0
58; CHECK-NEXT:    vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
59; CHECK-NEXT:    vpslld %xmm0, %ymm1, %ymm0
60; CHECK-NEXT:    retq
61  %amt = insertelement <8 x i32> undef, i32 %shift, i32 0
62  %tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt
63  ret <8 x i32> %tmp
64}
65
66define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
67; CHECK-LABEL: test_slld_3:
68; CHECK:       # BB#0: # %entry
69; CHECK-NEXT:    vpslld $31, %ymm0, %ymm0
70; CHECK-NEXT:    retq
71entry:
72  %shl = shl <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
73  ret <8 x i32> %shl
74}
75
76define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
77; CHECK-LABEL: test_sllq_1:
78; CHECK:       # BB#0: # %entry
79; CHECK-NEXT:    retq
80entry:
81  %shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
82  ret <4 x i64> %shl
83}
84
85define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
86; CHECK-LABEL: test_sllq_2:
87; CHECK:       # BB#0: # %entry
88; CHECK-NEXT:    vpaddq %ymm0, %ymm0, %ymm0
89; CHECK-NEXT:    retq
90entry:
91  %shl = shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
92  ret <4 x i64> %shl
93}
94
95define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
96; CHECK-LABEL: test_sllq_3:
97; CHECK:       # BB#0: # %entry
98; CHECK-NEXT:    vpsllq $63, %ymm0, %ymm0
99; CHECK-NEXT:    retq
100entry:
101  %shl = shl <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63>
102  ret <4 x i64> %shl
103}
104
105; AVX2 Arithmetic Shift
106
107define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
108; CHECK-LABEL: test_sraw_1:
109; CHECK:       # BB#0: # %entry
110; CHECK-NEXT:    retq
111entry:
112  %shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
113  ret <16 x i16> %shl
114}
115
116define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
117; CHECK-LABEL: test_sraw_2:
118; CHECK:       # BB#0: # %entry
119; CHECK-NEXT:    vpsraw $1, %ymm0, %ymm0
120; CHECK-NEXT:    retq
121entry:
122  %shl = ashr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
123  ret <16 x i16> %shl
124}
125
126define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
127; CHECK-LABEL: test_sraw_3:
128; CHECK:       # BB#0: # %entry
129; CHECK-NEXT:    vpsraw $15, %ymm0, %ymm0
130; CHECK-NEXT:    retq
131entry:
132  %shl = ashr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
133  ret <16 x i16> %shl
134}
135
136define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
137; CHECK-LABEL: test_srad_1:
138; CHECK:       # BB#0: # %entry
139; CHECK-NEXT:    retq
140entry:
141  %shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
142  ret <8 x i32> %shl
143}
144
145define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
146; CHECK-LABEL: test_srad_2:
147; CHECK:       # BB#0: # %entry
148; CHECK-NEXT:    vpsrad $1, %ymm0, %ymm0
149; CHECK-NEXT:    retq
150entry:
151  %shl = ashr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
152  ret <8 x i32> %shl
153}
154
155define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
156; CHECK-LABEL: test_srad_3:
157; CHECK:       # BB#0: # %entry
158; CHECK-NEXT:    vpsrad $31, %ymm0, %ymm0
159; CHECK-NEXT:    retq
160entry:
161  %shl = ashr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
162  ret <8 x i32> %shl
163}
164
165; SSE Logical Shift Right
166
167define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
168; CHECK-LABEL: test_srlw_1:
169; CHECK:       # BB#0: # %entry
170; CHECK-NEXT:    retq
171entry:
172  %shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
173  ret <16 x i16> %shl
174}
175
176define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
177; CHECK-LABEL: test_srlw_2:
178; CHECK:       # BB#0: # %entry
179; CHECK-NEXT:    vpsrlw $1, %ymm0, %ymm0
180; CHECK-NEXT:    retq
181entry:
182  %shl = lshr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
183  ret <16 x i16> %shl
184}
185
186define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
187; CHECK-LABEL: test_srlw_3:
188; CHECK:       # BB#0: # %entry
189; CHECK-NEXT:    vpsrlw $15, %ymm0, %ymm0
190; CHECK-NEXT:    retq
191entry:
192  %shl = lshr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
193  ret <16 x i16> %shl
194}
195
196define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
197; CHECK-LABEL: test_srld_1:
198; CHECK:       # BB#0: # %entry
199; CHECK-NEXT:    retq
200entry:
201  %shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
202  ret <8 x i32> %shl
203}
204
205define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
206; CHECK-LABEL: test_srld_2:
207; CHECK:       # BB#0: # %entry
208; CHECK-NEXT:    vpsrld $1, %ymm0, %ymm0
209; CHECK-NEXT:    retq
210entry:
211  %shl = lshr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
212  ret <8 x i32> %shl
213}
214
215define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
216; CHECK-LABEL: test_srld_3:
217; CHECK:       # BB#0: # %entry
218; CHECK-NEXT:    vpsrld $31, %ymm0, %ymm0
219; CHECK-NEXT:    retq
220entry:
221  %shl = lshr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
222  ret <8 x i32> %shl
223}
224
225define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
226; CHECK-LABEL: test_srlq_1:
227; CHECK:       # BB#0: # %entry
228; CHECK-NEXT:    retq
229entry:
230  %shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
231  ret <4 x i64> %shl
232}
233
234define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
235; CHECK-LABEL: test_srlq_2:
236; CHECK:       # BB#0: # %entry
237; CHECK-NEXT:    vpsrlq $1, %ymm0, %ymm0
238; CHECK-NEXT:    retq
239entry:
240  %shl = lshr <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
241  ret <4 x i64> %shl
242}
243
244define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
245; CHECK-LABEL: test_srlq_3:
246; CHECK:       # BB#0: # %entry
247; CHECK-NEXT:    vpsrlq $63, %ymm0, %ymm0
248; CHECK-NEXT:    retq
249entry:
250  %shl = lshr <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63>
251  ret <4 x i64> %shl
252}
253
254define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
255; CHECK-LABEL: srl_trunc_and_v4i64:
256; CHECK:       # BB#0:
257; CHECK-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
258; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
259; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
260; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
261; CHECK-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
262; CHECK-NEXT:    vzeroupper
263; CHECK-NEXT:    retq
264  %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
265  %trunc = trunc <4 x i64> %and to <4 x i32>
266  %sra = lshr <4 x i32> %x, %trunc
267  ret <4 x i32> %sra
268}
269
270;
271; Vectorized byte shifts
272;
273
274define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
275; CHECK-LABEL: shl_8i16:
276; CHECK:       # BB#0:
277; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
278; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
279; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
280; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
281; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
282; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
283; CHECK-NEXT:    vzeroupper
284; CHECK-NEXT:    retq
285  %shl = shl <8 x i16> %r, %a
286  ret <8 x i16> %shl
287}
288
289define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
290; CHECK-LABEL: shl_16i16:
291; CHECK:       # BB#0:
292; CHECK-NEXT:    vpxor %ymm2, %ymm2, %ymm2
293; CHECK-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
294; CHECK-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
295; CHECK-NEXT:    vpsllvd %ymm3, %ymm4, %ymm3
296; CHECK-NEXT:    vpsrld $16, %ymm3, %ymm3
297; CHECK-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
298; CHECK-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
299; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
300; CHECK-NEXT:    vpsrld $16, %ymm0, %ymm0
301; CHECK-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
302; CHECK-NEXT:    retq
303  %shl = shl <16 x i16> %r, %a
304  ret <16 x i16> %shl
305}
306
307define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
308; CHECK-LABEL: shl_32i8:
309; CHECK:       # BB#0:
310; CHECK-NEXT:    vpsllw $5, %ymm1, %ymm1
311; CHECK-NEXT:    vpsllw $4, %ymm0, %ymm2
312; CHECK-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
313; CHECK-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
314; CHECK-NEXT:    vpsllw $2, %ymm0, %ymm2
315; CHECK-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
316; CHECK-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
317; CHECK-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
318; CHECK-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
319; CHECK-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
320; CHECK-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
321; CHECK-NEXT:    retq
322  %shl = shl <32 x i8> %r, %a
323  ret <32 x i8> %shl
324}
325
326define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
327; CHECK-LABEL: ashr_8i16:
328; CHECK:       # BB#0:
329; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
330; CHECK-NEXT:    vpmovsxwd %xmm0, %ymm0
331; CHECK-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
332; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
333; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
334; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
335; CHECK-NEXT:    vzeroupper
336; CHECK-NEXT:    retq
337  %ashr = ashr <8 x i16> %r, %a
338  ret <8 x i16> %ashr
339}
340
341define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
342; CHECK-LABEL: ashr_16i16:
343; CHECK:       # BB#0:
344; CHECK-NEXT:    vpxor %ymm2, %ymm2, %ymm2
345; CHECK-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
346; CHECK-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
347; CHECK-NEXT:    vpsravd %ymm3, %ymm4, %ymm3
348; CHECK-NEXT:    vpsrld $16, %ymm3, %ymm3
349; CHECK-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
350; CHECK-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
351; CHECK-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
352; CHECK-NEXT:    vpsrld $16, %ymm0, %ymm0
353; CHECK-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
354; CHECK-NEXT:    retq
355  %ashr = ashr <16 x i16> %r, %a
356  ret <16 x i16> %ashr
357}
358
359define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
360; CHECK-LABEL: ashr_32i8:
361; CHECK:       # BB#0:
362; CHECK-NEXT:    vpsllw $5, %ymm1, %ymm1
363; CHECK-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
364; CHECK-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
365; CHECK-NEXT:    vpsraw $4, %ymm3, %ymm4
366; CHECK-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
367; CHECK-NEXT:    vpsraw $2, %ymm3, %ymm4
368; CHECK-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
369; CHECK-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
370; CHECK-NEXT:    vpsraw $1, %ymm3, %ymm4
371; CHECK-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
372; CHECK-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
373; CHECK-NEXT:    vpsrlw $8, %ymm2, %ymm2
374; CHECK-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
375; CHECK-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
376; CHECK-NEXT:    vpsraw $4, %ymm0, %ymm3
377; CHECK-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
378; CHECK-NEXT:    vpsraw $2, %ymm0, %ymm3
379; CHECK-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
380; CHECK-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
381; CHECK-NEXT:    vpsraw $1, %ymm0, %ymm3
382; CHECK-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
383; CHECK-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
384; CHECK-NEXT:    vpsrlw $8, %ymm0, %ymm0
385; CHECK-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
386; CHECK-NEXT:    retq
387  %ashr = ashr <32 x i8> %r, %a
388  ret <32 x i8> %ashr
389}
390
391define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
392; CHECK-LABEL: lshr_8i16:
393; CHECK:       # BB#0:
394; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
395; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
396; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
397; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
398; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
399; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
400; CHECK-NEXT:    vzeroupper
401; CHECK-NEXT:    retq
402  %lshr = lshr <8 x i16> %r, %a
403  ret <8 x i16> %lshr
404}
405
406define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
407; CHECK-LABEL: lshr_16i16:
408; CHECK:       # BB#0:
409; CHECK-NEXT:    vpxor %ymm2, %ymm2, %ymm2
410; CHECK-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
411; CHECK-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
412; CHECK-NEXT:    vpsrlvd %ymm3, %ymm4, %ymm3
413; CHECK-NEXT:    vpsrld $16, %ymm3, %ymm3
414; CHECK-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
415; CHECK-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
416; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
417; CHECK-NEXT:    vpsrld $16, %ymm0, %ymm0
418; CHECK-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
419; CHECK-NEXT:    retq
420  %lshr = lshr <16 x i16> %r, %a
421  ret <16 x i16> %lshr
422}
423
424define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
425; CHECK-LABEL: lshr_32i8:
426; CHECK:       # BB#0:
427; CHECK-NEXT:    vpsllw $5, %ymm1, %ymm1
428; CHECK-NEXT:    vpsrlw $4, %ymm0, %ymm2
429; CHECK-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
430; CHECK-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
431; CHECK-NEXT:    vpsrlw $2, %ymm0, %ymm2
432; CHECK-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
433; CHECK-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
434; CHECK-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
435; CHECK-NEXT:    vpsrlw $1, %ymm0, %ymm2
436; CHECK-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
437; CHECK-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
438; CHECK-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
439; CHECK-NEXT:    retq
440  %lshr = lshr <32 x i8> %r, %a
441  ret <32 x i8> %lshr
442}
443