• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
4
5; Verify that we correctly fold target specific packed vector shifts by
6; immediate count into a simple build_vector when the elements of the vector
7; in input to the packed shift are all constants or undef.
8
9define <8 x i16> @test1() {
10; X32-LABEL: test1:
11; X32:       # BB#0:
12; X32-NEXT:    movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64]
13; X32-NEXT:    retl
14;
15; X64-LABEL: test1:
16; X64:       # BB#0:
17; X64-NEXT:    movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64]
18; X64-NEXT:    retq
19  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 2, i16 4, i16 8>, i32 3)
20  ret <8 x i16> %1
21}
22
23define <8 x i16> @test2() {
24; X32-LABEL: test2:
25; X32:       # BB#0:
26; X32-NEXT:    movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
27; X32-NEXT:    retl
28;
29; X64-LABEL: test2:
30; X64:       # BB#0:
31; X64-NEXT:    movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
32; X64-NEXT:    retq
33  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3)
34  ret <8 x i16> %1
35}
36
37define <8 x i16> @test3() {
38; X32-LABEL: test3:
39; X32:       # BB#0:
40; X32-NEXT:    movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
41; X32-NEXT:    retl
42;
43; X64-LABEL: test3:
44; X64:       # BB#0:
45; X64-NEXT:    movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
46; X64-NEXT:    retq
47  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3)
48  ret <8 x i16> %1
49}
50
51define <4 x i32> @test4() {
52; X32-LABEL: test4:
53; X32:       # BB#0:
54; X32-NEXT:    movaps {{.*#+}} xmm0 = [8,16,32,64]
55; X32-NEXT:    retl
56;
57; X64-LABEL: test4:
58; X64:       # BB#0:
59; X64-NEXT:    movaps {{.*#+}} xmm0 = [8,16,32,64]
60; X64-NEXT:    retq
61  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3)
62  ret <4 x i32> %1
63}
64
65define <4 x i32> @test5() {
66; X32-LABEL: test5:
67; X32:       # BB#0:
68; X32-NEXT:    movaps {{.*#+}} xmm0 = [0,1,2,4]
69; X32-NEXT:    retl
70;
71; X64-LABEL: test5:
72; X64:       # BB#0:
73; X64-NEXT:    movaps {{.*#+}} xmm0 = [0,1,2,4]
74; X64-NEXT:    retq
75  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
76  ret <4 x i32> %1
77}
78
79define <4 x i32> @test6() {
80; X32-LABEL: test6:
81; X32:       # BB#0:
82; X32-NEXT:    movaps {{.*#+}} xmm0 = [0,1,2,4]
83; X32-NEXT:    retl
84;
85; X64-LABEL: test6:
86; X64:       # BB#0:
87; X64-NEXT:    movaps {{.*#+}} xmm0 = [0,1,2,4]
88; X64-NEXT:    retq
89  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
90  ret <4 x i32> %1
91}
92
93define <2 x i64> @test7() {
94; X32-LABEL: test7:
95; X32:       # BB#0:
96; X32-NEXT:    movdqa {{.*#+}} xmm0 = [1,0,2,0]
97; X32-NEXT:    psllq $3, %xmm0
98; X32-NEXT:    retl
99;
100; X64-LABEL: test7:
101; X64:       # BB#0:
102; X64-NEXT:    movaps {{.*#+}} xmm0 = [8,16]
103; X64-NEXT:    retq
104  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3)
105  ret <2 x i64> %1
106}
107
108define <2 x i64> @test8() {
109; X32-LABEL: test8:
110; X32:       # BB#0:
111; X32-NEXT:    movdqa {{.*#+}} xmm0 = [8,0,16,0]
112; X32-NEXT:    psrlq $3, %xmm0
113; X32-NEXT:    retl
114;
115; X64-LABEL: test8:
116; X64:       # BB#0:
117; X64-NEXT:    movaps {{.*#+}} xmm0 = [1,2]
118; X64-NEXT:    retq
119  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3)
120  ret <2 x i64> %1
121}
122
123define <8 x i16> @test9() {
124; X32-LABEL: test9:
125; X32:       # BB#0:
126; X32-NEXT:    movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
127; X32-NEXT:    retl
128;
129; X64-LABEL: test9:
130; X64:       # BB#0:
131; X64-NEXT:    movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
132; X64-NEXT:    retq
133  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
134  ret <8 x i16> %1
135}
136
137define <4 x i32> @test10() {
138; X32-LABEL: test10:
139; X32:       # BB#0:
140; X32-NEXT:    movaps {{.*#+}} xmm0 = <u,1,u,4>
141; X32-NEXT:    retl
142;
143; X64-LABEL: test10:
144; X64:       # BB#0:
145; X64-NEXT:    movaps {{.*#+}} xmm0 = <u,1,u,4>
146; X64-NEXT:    retq
147  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
148  ret <4 x i32> %1
149}
150
151define <2 x i64> @test11() {
152; X32-LABEL: test11:
153; X32:       # BB#0:
154; X32-NEXT:    movdqa {{.*#+}} xmm0 = <u,u,31,0>
155; X32-NEXT:    psrlq $3, %xmm0
156; X32-NEXT:    retl
157;
158; X64-LABEL: test11:
159; X64:       # BB#0:
160; X64-NEXT:    movaps {{.*#+}} xmm0 = <u,3>
161; X64-NEXT:    retq
162  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 undef, i64 31>, i32 3)
163  ret <2 x i64> %1
164}
165
166define <8 x i16> @test12() {
167; X32-LABEL: test12:
168; X32:       # BB#0:
169; X32-NEXT:    movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
170; X32-NEXT:    retl
171;
172; X64-LABEL: test12:
173; X64:       # BB#0:
174; X64-NEXT:    movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
175; X64-NEXT:    retq
176  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
177  ret <8 x i16> %1
178}
179
180define <4 x i32> @test13() {
181; X32-LABEL: test13:
182; X32:       # BB#0:
183; X32-NEXT:    movaps {{.*#+}} xmm0 = <u,1,u,4>
184; X32-NEXT:    retl
185;
186; X64-LABEL: test13:
187; X64:       # BB#0:
188; X64-NEXT:    movaps {{.*#+}} xmm0 = <u,1,u,4>
189; X64-NEXT:    retq
190  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
191  ret <4 x i32> %1
192}
193
194define <8 x i16> @test14() {
195; X32-LABEL: test14:
196; X32:       # BB#0:
197; X32-NEXT:    movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
198; X32-NEXT:    retl
199;
200; X64-LABEL: test14:
201; X64:       # BB#0:
202; X64-NEXT:    movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
203; X64-NEXT:    retq
204  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
205  ret <8 x i16> %1
206}
207
208define <4 x i32> @test15() {
209; X32-LABEL: test15:
210; X32:       # BB#0:
211; X32-NEXT:    movaps {{.*#+}} xmm0 = <u,64,u,256>
212; X32-NEXT:    retl
213;
214; X64-LABEL: test15:
215; X64:       # BB#0:
216; X64-NEXT:    movaps {{.*#+}} xmm0 = <u,64,u,256>
217; X64-NEXT:    retq
218  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
219  ret <4 x i32> %1
220}
221
222define <2 x i64> @test16() {
223; X32-LABEL: test16:
224; X32:       # BB#0:
225; X32-NEXT:    movdqa {{.*#+}} xmm0 = <u,u,31,0>
226; X32-NEXT:    psllq $3, %xmm0
227; X32-NEXT:    retl
228;
229; X64-LABEL: test16:
230; X64:       # BB#0:
231; X64-NEXT:    movaps {{.*#+}} xmm0 = <u,248>
232; X64-NEXT:    retq
233  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 undef, i64 31>, i32 3)
234  ret <2 x i64> %1
235}
236
237declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32)
238declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32)
239declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32)
240declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32)
241declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32)
242declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32)
243declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32)
244declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32)
245
246