• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,MASK
3; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2,+fast-vector-shift-masks | FileCheck %s --check-prefixes=CHECK,SHIFT
4; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver1 | FileCheck %s --check-prefixes=CHECK,SHIFT
5
6; SSE2 Logical Shift Left
7
8define <8 x i16> @test_sllw_1(<8 x i16> %InVec) {
9; CHECK-LABEL: test_sllw_1:
10; CHECK:       # %bb.0: # %entry
11; CHECK-NEXT:    retq
12entry:
13  %shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
14  ret <8 x i16> %shl
15}
16
17define <8 x i16> @test_sllw_2(<8 x i16> %InVec) {
18; CHECK-LABEL: test_sllw_2:
19; CHECK:       # %bb.0: # %entry
20; CHECK-NEXT:    paddw %xmm0, %xmm0
21; CHECK-NEXT:    retq
22entry:
23  %shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
24  ret <8 x i16> %shl
25}
26
27define <8 x i16> @test_sllw_3(<8 x i16> %InVec) {
28; CHECK-LABEL: test_sllw_3:
29; CHECK:       # %bb.0: # %entry
30; CHECK-NEXT:    psllw $15, %xmm0
31; CHECK-NEXT:    retq
32entry:
33  %shl = shl <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
34  ret <8 x i16> %shl
35}
36
37define <4 x i32> @test_slld_1(<4 x i32> %InVec) {
38; CHECK-LABEL: test_slld_1:
39; CHECK:       # %bb.0: # %entry
40; CHECK-NEXT:    retq
41entry:
42  %shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
43  ret <4 x i32> %shl
44}
45
46define <4 x i32> @test_slld_2(<4 x i32> %InVec) {
47; CHECK-LABEL: test_slld_2:
48; CHECK:       # %bb.0: # %entry
49; CHECK-NEXT:    paddd %xmm0, %xmm0
50; CHECK-NEXT:    retq
51entry:
52  %shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
53  ret <4 x i32> %shl
54}
55
56define <4 x i32> @test_slld_3(<4 x i32> %InVec) {
57; CHECK-LABEL: test_slld_3:
58; CHECK:       # %bb.0: # %entry
59; CHECK-NEXT:    pslld $31, %xmm0
60; CHECK-NEXT:    retq
61entry:
62  %shl = shl <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
63  ret <4 x i32> %shl
64}
65
66define <2 x i64> @test_sllq_1(<2 x i64> %InVec) {
67; CHECK-LABEL: test_sllq_1:
68; CHECK:       # %bb.0: # %entry
69; CHECK-NEXT:    retq
70entry:
71  %shl = shl <2 x i64> %InVec, <i64 0, i64 0>
72  ret <2 x i64> %shl
73}
74
75define <2 x i64> @test_sllq_2(<2 x i64> %InVec) {
76; CHECK-LABEL: test_sllq_2:
77; CHECK:       # %bb.0: # %entry
78; CHECK-NEXT:    paddq %xmm0, %xmm0
79; CHECK-NEXT:    retq
80entry:
81  %shl = shl <2 x i64> %InVec, <i64 1, i64 1>
82  ret <2 x i64> %shl
83}
84
85define <2 x i64> @test_sllq_3(<2 x i64> %InVec) {
86; CHECK-LABEL: test_sllq_3:
87; CHECK:       # %bb.0: # %entry
88; CHECK-NEXT:    psllq $63, %xmm0
89; CHECK-NEXT:    retq
90entry:
91  %shl = shl <2 x i64> %InVec, <i64 63, i64 63>
92  ret <2 x i64> %shl
93}
94
95; SSE2 Arithmetic Shift
96
97define <8 x i16> @test_sraw_1(<8 x i16> %InVec) {
98; CHECK-LABEL: test_sraw_1:
99; CHECK:       # %bb.0: # %entry
100; CHECK-NEXT:    retq
101entry:
102  %shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
103  ret <8 x i16> %shl
104}
105
106define <8 x i16> @test_sraw_2(<8 x i16> %InVec) {
107; CHECK-LABEL: test_sraw_2:
108; CHECK:       # %bb.0: # %entry
109; CHECK-NEXT:    psraw $1, %xmm0
110; CHECK-NEXT:    retq
111entry:
112  %shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
113  ret <8 x i16> %shl
114}
115
116define <8 x i16> @test_sraw_3(<8 x i16> %InVec) {
117; CHECK-LABEL: test_sraw_3:
118; CHECK:       # %bb.0: # %entry
119; CHECK-NEXT:    psraw $15, %xmm0
120; CHECK-NEXT:    retq
121entry:
122  %shl = ashr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
123  ret <8 x i16> %shl
124}
125
126define <4 x i32> @test_srad_1(<4 x i32> %InVec) {
127; CHECK-LABEL: test_srad_1:
128; CHECK:       # %bb.0: # %entry
129; CHECK-NEXT:    retq
130entry:
131  %shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
132  ret <4 x i32> %shl
133}
134
135define <4 x i32> @test_srad_2(<4 x i32> %InVec) {
136; CHECK-LABEL: test_srad_2:
137; CHECK:       # %bb.0: # %entry
138; CHECK-NEXT:    psrad $1, %xmm0
139; CHECK-NEXT:    retq
140entry:
141  %shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
142  ret <4 x i32> %shl
143}
144
145define <4 x i32> @test_srad_3(<4 x i32> %InVec) {
146; CHECK-LABEL: test_srad_3:
147; CHECK:       # %bb.0: # %entry
148; CHECK-NEXT:    psrad $31, %xmm0
149; CHECK-NEXT:    retq
150entry:
151  %shl = ashr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
152  ret <4 x i32> %shl
153}
154
155; SSE Logical Shift Right
156
157define <8 x i16> @test_srlw_1(<8 x i16> %InVec) {
158; CHECK-LABEL: test_srlw_1:
159; CHECK:       # %bb.0: # %entry
160; CHECK-NEXT:    retq
161entry:
162  %shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
163  ret <8 x i16> %shl
164}
165
166define <8 x i16> @test_srlw_2(<8 x i16> %InVec) {
167; CHECK-LABEL: test_srlw_2:
168; CHECK:       # %bb.0: # %entry
169; CHECK-NEXT:    psrlw $1, %xmm0
170; CHECK-NEXT:    retq
171entry:
172  %shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
173  ret <8 x i16> %shl
174}
175
176define <8 x i16> @test_srlw_3(<8 x i16> %InVec) {
177; CHECK-LABEL: test_srlw_3:
178; CHECK:       # %bb.0: # %entry
179; CHECK-NEXT:    psrlw $15, %xmm0
180; CHECK-NEXT:    retq
181entry:
182  %shl = lshr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
183  ret <8 x i16> %shl
184}
185
186define <4 x i32> @test_srld_1(<4 x i32> %InVec) {
187; CHECK-LABEL: test_srld_1:
188; CHECK:       # %bb.0: # %entry
189; CHECK-NEXT:    retq
190entry:
191  %shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
192  ret <4 x i32> %shl
193}
194
195define <4 x i32> @test_srld_2(<4 x i32> %InVec) {
196; CHECK-LABEL: test_srld_2:
197; CHECK:       # %bb.0: # %entry
198; CHECK-NEXT:    psrld $1, %xmm0
199; CHECK-NEXT:    retq
200entry:
201  %shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
202  ret <4 x i32> %shl
203}
204
205define <4 x i32> @test_srld_3(<4 x i32> %InVec) {
206; CHECK-LABEL: test_srld_3:
207; CHECK:       # %bb.0: # %entry
208; CHECK-NEXT:    psrld $31, %xmm0
209; CHECK-NEXT:    retq
210entry:
211  %shl = lshr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
212  ret <4 x i32> %shl
213}
214
215define <2 x i64> @test_srlq_1(<2 x i64> %InVec) {
216; CHECK-LABEL: test_srlq_1:
217; CHECK:       # %bb.0: # %entry
218; CHECK-NEXT:    retq
219entry:
220  %shl = lshr <2 x i64> %InVec, <i64 0, i64 0>
221  ret <2 x i64> %shl
222}
223
224define <2 x i64> @test_srlq_2(<2 x i64> %InVec) {
225; CHECK-LABEL: test_srlq_2:
226; CHECK:       # %bb.0: # %entry
227; CHECK-NEXT:    psrlq $1, %xmm0
228; CHECK-NEXT:    retq
229entry:
230  %shl = lshr <2 x i64> %InVec, <i64 1, i64 1>
231  ret <2 x i64> %shl
232}
233
234define <2 x i64> @test_srlq_3(<2 x i64> %InVec) {
235; CHECK-LABEL: test_srlq_3:
236; CHECK:       # %bb.0: # %entry
237; CHECK-NEXT:    psrlq $63, %xmm0
238; CHECK-NEXT:    retq
239entry:
240  %shl = lshr <2 x i64> %InVec, <i64 63, i64 63>
241  ret <2 x i64> %shl
242}
243
244define <4 x i32> @sra_sra_v4i32(<4 x i32> %x) nounwind {
245; CHECK-LABEL: sra_sra_v4i32:
246; CHECK:       # %bb.0:
247; CHECK-NEXT:    psrad $6, %xmm0
248; CHECK-NEXT:    retq
249  %sra0 = ashr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
250  %sra1 = ashr <4 x i32> %sra0, <i32 4, i32 4, i32 4, i32 4>
251  ret <4 x i32> %sra1
252}
253
254define <4 x i32> @srl_srl_v4i32(<4 x i32> %x) nounwind {
255; CHECK-LABEL: srl_srl_v4i32:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    psrld $6, %xmm0
258; CHECK-NEXT:    retq
259  %srl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
260  %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
261  ret <4 x i32> %srl1
262}
263
264define <4 x i32> @srl_shl_v4i32(<4 x i32> %x) nounwind {
265; CHECK-LABEL: srl_shl_v4i32:
266; CHECK:       # %bb.0:
267; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
268; CHECK-NEXT:    retq
269  %srl0 = shl <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
270  %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
271  ret <4 x i32> %srl1
272}
273
274define <4 x i32> @srl_sra_31_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
275; CHECK-LABEL: srl_sra_31_v4i32:
276; CHECK:       # %bb.0:
277; CHECK-NEXT:    psrld $31, %xmm0
278; CHECK-NEXT:    retq
279  %sra = ashr <4 x i32> %x, %y
280  %srl1 = lshr <4 x i32> %sra, <i32 31, i32 31, i32 31, i32 31>
281  ret <4 x i32> %srl1
282}
283
284define <4 x i32> @shl_shl_v4i32(<4 x i32> %x) nounwind {
285; CHECK-LABEL: shl_shl_v4i32:
286; CHECK:       # %bb.0:
287; CHECK-NEXT:    pslld $6, %xmm0
288; CHECK-NEXT:    retq
289  %shl0 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
290  %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
291  ret <4 x i32> %shl1
292}
293
294define <4 x i32> @shl_sra_v4i32(<4 x i32> %x) nounwind {
295; CHECK-LABEL: shl_sra_v4i32:
296; CHECK:       # %bb.0:
297; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
298; CHECK-NEXT:    retq
299  %shl0 = ashr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
300  %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
301  ret <4 x i32> %shl1
302}
303
304define <4 x i32> @shl_srl_v4i32(<4 x i32> %x) nounwind {
305; MASK-LABEL: shl_srl_v4i32:
306; MASK:       # %bb.0:
307; MASK-NEXT:    pslld $3, %xmm0
308; MASK-NEXT:    pand {{.*}}(%rip), %xmm0
309; MASK-NEXT:    retq
310;
311; SHIFT-LABEL: shl_srl_v4i32:
312; SHIFT:       # %bb.0:
313; SHIFT-NEXT:    psrld $2, %xmm0
314; SHIFT-NEXT:    pslld $5, %xmm0
315; SHIFT-NEXT:    retq
316  %shl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
317  %shl1 = shl <4 x i32> %shl0, <i32 5, i32 5, i32 5, i32 5>
318  ret <4 x i32> %shl1
319}
320
321define <4 x i32> @shl_zext_srl_v4i32(<4 x i16> %x) nounwind {
322; CHECK-LABEL: shl_zext_srl_v4i32:
323; CHECK:       # %bb.0:
324; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
325; CHECK-NEXT:    pxor %xmm1, %xmm1
326; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
327; CHECK-NEXT:    retq
328  %srl = lshr <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
329  %zext = zext <4 x i16> %srl to <4 x i32>
330  %shl = shl <4 x i32> %zext, <i32 2, i32 2, i32 2, i32 2>
331  ret <4 x i32> %shl
332}
333
334define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind {
335; CHECK-LABEL: sra_trunc_srl_v4i32:
336; CHECK:       # %bb.0:
337; CHECK-NEXT:    psrad $19, %xmm0
338; CHECK-NEXT:    packssdw %xmm0, %xmm0
339; CHECK-NEXT:    retq
340  %srl = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
341  %trunc = trunc <4 x i32> %srl to <4 x i16>
342  %sra = ashr <4 x i16> %trunc, <i16 3, i16 3, i16 3, i16 3>
343  ret <4 x i16> %sra
344}
345
346define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind {
347; CHECK-LABEL: shl_zext_shl_v4i32:
348; CHECK:       # %bb.0:
349; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
350; CHECK-NEXT:    pslld $19, %xmm0
351; CHECK-NEXT:    retq
352  %shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
353  %ext = zext <4 x i16> %shl0 to <4 x i32>
354  %shl1 = shl <4 x i32> %ext, <i32 17, i32 17, i32 17, i32 17>
355  ret <4 x i32> %shl1
356}
357
358define <4 x i32> @sra_v4i32(<4 x i32> %x) nounwind {
359; CHECK-LABEL: sra_v4i32:
360; CHECK:       # %bb.0:
361; CHECK-NEXT:    psrad $3, %xmm0
362; CHECK-NEXT:    retq
363  %sra = ashr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
364  ret <4 x i32> %sra
365}
366
367define <4 x i32> @srl_v4i32(<4 x i32> %x) nounwind {
368; CHECK-LABEL: srl_v4i32:
369; CHECK:       # %bb.0:
370; CHECK-NEXT:    psrld $3, %xmm0
371; CHECK-NEXT:    retq
372  %sra = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
373  ret <4 x i32> %sra
374}
375
376define <4 x i32> @shl_v4i32(<4 x i32> %x) nounwind {
377; CHECK-LABEL: shl_v4i32:
378; CHECK:       # %bb.0:
379; CHECK-NEXT:    pslld $3, %xmm0
380; CHECK-NEXT:    retq
381  %sra = shl <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
382  ret <4 x i32> %sra
383}
384