• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt < %s -instcombine -S | FileCheck %s
2
3;
4; EXTRQ
5;
6
7define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
8; CHECK-LABEL: @test_extrq_call
9; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
10; CHECK-NEXT: ret <2 x i64> %1
11  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
12  ret <2 x i64> %1
13}
14
15define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
16; CHECK-LABEL: @test_extrq_zero_arg0
17; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
18  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
19  ret <2 x i64> %1
20}
21
22define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
23; CHECK-LABEL: @test_extrq_zero_arg1
24; CHECK-NEXT: ret <2 x i64> %x
25  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
26  ret <2 x i64> %1
27}
28
29define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
30; CHECK-LABEL: @test_extrq_to_extqi
31; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
32; CHECK-NEXT: ret <2 x i64> %1
33  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
34  ret <2 x i64> %1
35}
36
37define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
38; CHECK-LABEL: @test_extrq_constant
39; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
40  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
41  ret <2 x i64> %1
42}
43
44define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
45; CHECK-LABEL: @test_extrq_constant_undef
46; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
47  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
48  ret <2 x i64> %1
49}
50
51;
52; EXTRQI
53;
54
55define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
56; CHECK-LABEL: @test_extrqi_call
57; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
58; CHECK-NEXT: ret <2 x i64> %1
59  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
60  ret <2 x i64> %1
61}
62
63define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
64; CHECK-LABEL: @test_extrqi_shuffle_1zuu
65; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
66; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
67; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
68; CHECK-NEXT: ret <2 x i64> %3
69  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
70  ret <2 x i64> %1
71}
72
73define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
74; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu
75; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
76; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
77; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
78; CHECK-NEXT: ret <2 x i64> %3
79  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
80  ret <2 x i64> %1
81}
82
83define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
84; CHECK-LABEL: @test_extrqi_undef
85; CHECK-NEXT: ret <2 x i64> undef
86  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
87  ret <2 x i64> %1
88}
89
90define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
91; CHECK-LABEL: @test_extrqi_zero
92; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
93  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
94  ret <2 x i64> %1
95}
96
97define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
98; CHECK-LABEL: @test_extrqi_constant
99; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
100  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
101  ret <2 x i64> %1
102}
103
104define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
105; CHECK-LABEL: @test_extrqi_constant_undef
106; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
107  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
108  ret <2 x i64> %1
109}
110
111;
112; INSERTQ
113;
114
115define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
116; CHECK-LABEL: @test_insertq_call
117; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
118; CHECK-NEXT: ret <2 x i64> %1
119  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
120  ret <2 x i64> %1
121}
122
123define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
124; CHECK-LABEL: @test_insertq_to_insertqi
125; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
126; CHECK-NEXT: ret <2 x i64> %1
127  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
128  ret <2 x i64> %1
129}
130
131define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
132; CHECK-LABEL: @test_insertq_constant
133; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
134  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
135  ret <2 x i64> %1
136}
137
138define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
139; CHECK-LABEL: @test_insertq_constant_undef
140; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
141  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
142  ret <2 x i64> %1
143}
144
145;
146; INSERTQI
147;
148
149define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
150; CHECK-LABEL: @test_insertqi_shuffle_04uu
151; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
152; CHECK-NEXT: ret <16 x i8> %1
153  %1 = bitcast <16 x i8> %v to <2 x i64>
154  %2 = bitcast <16 x i8> %i to <2 x i64>
155  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
156  %4 = bitcast <2 x i64> %3 to <16 x i8>
157  ret <16 x i8> %4
158}
159
160define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
161; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu
162; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
163; CHECK-NEXT: ret <16 x i8> %1
164  %1 = bitcast <16 x i8> %v to <2 x i64>
165  %2 = bitcast <16 x i8> %i to <2 x i64>
166  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
167  %4 = bitcast <2 x i64> %3 to <16 x i8>
168  ret <16 x i8> %4
169}
170
171define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
172; CHECK-LABEL: @test_insertqi_constant
173; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
174  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
175  ret <2 x i64> %1
176}
177
178; The result of this insert is the second arg, since the top 64 bits of
179; the result are undefined, and we copy the bottom 64 bits from the
180; second arg
181define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
182; CHECK-LABEL: @testInsert64Bits
183; CHECK-NEXT: ret <2 x i64> %i
184  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
185  ret <2 x i64> %1
186}
187
188define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
189; CHECK-LABEL: @testZeroLength
190; CHECK-NEXT: ret <2 x i64> %i
191  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
192  ret <2 x i64> %1
193}
194
195define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
196; CHECK-LABEL: @testUndefinedInsertq_1
197; CHECK-NEXT: ret <2 x i64> undef
198  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
199  ret <2 x i64> %1
200}
201
202define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
203; CHECK-LABEL: @testUndefinedInsertq_2
204; CHECK-NEXT: ret <2 x i64> undef
205  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
206  ret <2 x i64> %1
207}
208
209define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
210; CHECK-LABEL: @testUndefinedInsertq_3
211; CHECK-NEXT: ret <2 x i64> undef
212  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
213  ret <2 x i64> %1
214}
215
216;
217; Vector Demanded Bits
218;
219
220define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
221; CHECK-LABEL: @test_extrq_arg0
222; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
223; CHECK-NEXT: ret <2 x i64> %1
224  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
225  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
226  ret <2 x i64> %2
227}
228
229define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
230; CHECK-LABEL: @test_extrq_arg1
231; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
232; CHECK-NEXT: ret <2 x i64> %1
233  %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
234  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
235  ret <2 x i64> %2
236}
237
238define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
239; CHECK-LABEL: @test_extrq_args01
240; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
241; CHECK-NEXT: ret <2 x i64> %1
242  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
243  %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
244  %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
245  ret <2 x i64> %3
246}
247
248define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
249; CHECK-LABEL: @test_extrq_ret
250; CHECK-NEXT: ret <2 x i64> undef
251  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
252  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
253  ret <2 x i64> %2
254}
255
256define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
257; CHECK-LABEL: @test_extrqi_arg0
258; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
259; CHECK-NEXT: ret <2 x i64> %1
260  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
261  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
262  ret <2 x i64> %2
263}
264
265define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
266; CHECK-LABEL: @test_extrqi_ret
267; CHECK-NEXT: ret <2 x i64> undef
268  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
269  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
270  ret <2 x i64> %2
271}
272
273define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
274; CHECK-LABEL: @test_insertq_arg0
275; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
276; CHECK-NEXT: ret <2 x i64> %1
277  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
278  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
279  ret <2 x i64> %2
280}
281
282define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
283; CHECK-LABEL: @test_insertq_ret
284; CHECK-NEXT: ret <2 x i64> undef
285  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
286  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
287  ret <2 x i64> %2
288}
289
290define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
291; CHECK-LABEL: @test_insertqi_arg0
292; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
293; CHECK-NEXT: ret <2 x i64> %1
294  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
295  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
296  ret <2 x i64> %2
297}
298
299define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
300; CHECK-LABEL: @test_insertqi_arg1
301; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
302; CHECK-NEXT: ret <2 x i64> %1
303  %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
304  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
305  ret <2 x i64> %2
306}
307
308define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
309; CHECK-LABEL: @test_insertqi_args01
310; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
311; CHECK-NEXT: ret <2 x i64> %1
312  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
313  %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
314  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
315  ret <2 x i64> %3
316}
317
318define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
319; CHECK-LABEL: @test_insertqi_ret
320; CHECK-NEXT: ret <2 x i64> undef
321  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
322  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
323  ret <2 x i64> %2
324}
325
326; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
327declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
328
329; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
330declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
331
332; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
333declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
334
335; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
336declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
337