• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
2; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
3; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck -check-prefix=CHECK -check-prefix=AVX %s
4
5; Ensure that the backend selects SSE/AVX scalar fp instructions
6; from a packed fp instrution plus a vector insert.
7
8
9define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
10  %1 = fadd <4 x float> %a, %b
11  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
12  ret <4 x float> %2
13}
14
15; CHECK-LABEL: test_add_ss
16; SSE2: addss   %xmm1, %xmm0
17; AVX: vaddss   %xmm1, %xmm0, %xmm0
18; CHECK-NOT: movss
19; CHECK: ret
20
21
22define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
23  %1 = fsub <4 x float> %a, %b
24  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
25  ret <4 x float> %2
26}
27
28; CHECK-LABEL: test_sub_ss
29; SSE2: subss   %xmm1, %xmm0
30; AVX: vsubss   %xmm1, %xmm0, %xmm0
31; CHECK-NOT: movss
32; CHECK: ret
33
34
35define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
36  %1 = fmul <4 x float> %a, %b
37  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
38  ret <4 x float> %2
39}
40
41; CHECK-LABEL: test_mul_ss
42; SSE2: mulss   %xmm1, %xmm0
43; AVX: vmulss   %xmm1, %xmm0, %xmm0
44; CHECK-NOT: movss
45; CHECK: ret
46
47
48define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
49  %1 = fdiv <4 x float> %a, %b
50  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
51  ret <4 x float> %2
52}
53
54; CHECK-LABEL: test_div_ss
55; SSE2: divss   %xmm1, %xmm0
56; AVX: vdivss   %xmm1, %xmm0, %xmm0
57; CHECK-NOT: movss
58; CHECK: ret
59
60
61define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
62  %1 = fadd <2 x double> %a, %b
63  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
64  ret <2 x double> %2
65}
66
67; CHECK-LABEL: test_add_sd
68; SSE2: addsd   %xmm1, %xmm0
69; AVX: vaddsd   %xmm1, %xmm0, %xmm0
70; CHECK-NOT: movsd
71; CHECK: ret
72
73
74define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
75  %1 = fsub <2 x double> %a, %b
76  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
77  ret <2 x double> %2
78}
79
80; CHECK-LABEL: test_sub_sd
81; SSE2: subsd   %xmm1, %xmm0
82; AVX: vsubsd   %xmm1, %xmm0, %xmm0
83; CHECK-NOT: movsd
84; CHECK: ret
85
86
87define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
88  %1 = fmul <2 x double> %a, %b
89  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
90  ret <2 x double> %2
91}
92
93; CHECK-LABEL: test_mul_sd
94; SSE2: mulsd   %xmm1, %xmm0
95; AVX: vmulsd   %xmm1, %xmm0, %xmm0
96; CHECK-NOT: movsd
97; CHECK: ret
98
99
100define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
101  %1 = fdiv <2 x double> %a, %b
102  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
103  ret <2 x double> %2
104}
105
106; CHECK-LABEL: test_div_sd
107; SSE2: divsd   %xmm1, %xmm0
108; AVX: vdivsd   %xmm1, %xmm0, %xmm0
109; CHECK-NOT: movsd
110; CHECK: ret
111
112
113define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
114  %1 = fadd <4 x float> %b, %a
115  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
116  ret <4 x float> %2
117}
118
119; CHECK-LABEL: test2_add_ss
120; SSE2: addss   %xmm0, %xmm1
121; AVX: vaddss   %xmm0, %xmm1, %xmm0
122; CHECK-NOT: movss
123; CHECK: ret
124
125
126define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
127  %1 = fsub <4 x float> %b, %a
128  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
129  ret <4 x float> %2
130}
131
132; CHECK-LABEL: test2_sub_ss
133; SSE2: subss   %xmm0, %xmm1
134; AVX: vsubss   %xmm0, %xmm1, %xmm0
135; CHECK-NOT: movss
136; CHECK: ret
137
138
139define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
140  %1 = fmul <4 x float> %b, %a
141  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
142  ret <4 x float> %2
143}
144
145; CHECK-LABEL: test2_mul_ss
146; SSE2: mulss   %xmm0, %xmm1
147; AVX: vmulss   %xmm0, %xmm1, %xmm0
148; CHECK-NOT: movss
149; CHECK: ret
150
151
152define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
153  %1 = fdiv <4 x float> %b, %a
154  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
155  ret <4 x float> %2
156}
157
158; CHECK-LABEL: test2_div_ss
159; SSE2: divss   %xmm0, %xmm1
160; AVX: vdivss   %xmm0, %xmm1, %xmm0
161; CHECK-NOT: movss
162; CHECK: ret
163
164
165define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
166  %1 = fadd <2 x double> %b, %a
167  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
168  ret <2 x double> %2
169}
170
171; CHECK-LABEL: test2_add_sd
172; SSE2: addsd   %xmm0, %xmm1
173; AVX: vaddsd   %xmm0, %xmm1, %xmm0
174; CHECK-NOT: movsd
175; CHECK: ret
176
177
178define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
179  %1 = fsub <2 x double> %b, %a
180  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
181  ret <2 x double> %2
182}
183
184; CHECK-LABEL: test2_sub_sd
185; SSE2: subsd   %xmm0, %xmm1
186; AVX: vsubsd   %xmm0, %xmm1, %xmm0
187; CHECK-NOT: movsd
188; CHECK: ret
189
190
191define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
192  %1 = fmul <2 x double> %b, %a
193  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
194  ret <2 x double> %2
195}
196
197; CHECK-LABEL: test2_mul_sd
198; SSE2: mulsd   %xmm0, %xmm1
199; AVX: vmulsd   %xmm0, %xmm1, %xmm0
200; CHECK-NOT: movsd
201; CHECK: ret
202
203
204define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
205  %1 = fdiv <2 x double> %b, %a
206  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
207  ret <2 x double> %2
208}
209
210; CHECK-LABEL: test2_div_sd
211; SSE2: divsd   %xmm0, %xmm1
212; AVX: vdivsd   %xmm0, %xmm1, %xmm0
213; CHECK-NOT: movsd
214; CHECK: ret
215
216
217define <4 x float> @test3_add_ss(<4 x float> %a, <4 x float> %b) {
218  %1 = fadd <4 x float> %a, %b
219  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
220  ret <4 x float> %2
221}
222
223; CHECK-LABEL: test3_add_ss
224; SSE2: addss   %xmm1, %xmm0
225; AVX: vaddss   %xmm1, %xmm0, %xmm0
226; CHECK-NOT: movss
227; CHECK: ret
228
229
230define <4 x float> @test3_sub_ss(<4 x float> %a, <4 x float> %b) {
231  %1 = fsub <4 x float> %a, %b
232  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
233  ret <4 x float> %2
234}
235
236; CHECK-LABEL: test3_sub_ss
237; SSE2: subss   %xmm1, %xmm0
238; AVX: vsubss   %xmm1, %xmm0, %xmm0
239; CHECK-NOT: movss
240; CHECK: ret
241
242
243define <4 x float> @test3_mul_ss(<4 x float> %a, <4 x float> %b) {
244  %1 = fmul <4 x float> %a, %b
245  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
246  ret <4 x float> %2
247}
248
249; CHECK-LABEL: test3_mul_ss
250; SSE2: mulss   %xmm1, %xmm0
251; AVX: vmulss   %xmm1, %xmm0, %xmm0
252; CHECK-NOT: movss
253; CHECK: ret
254
255
256define <4 x float> @test3_div_ss(<4 x float> %a, <4 x float> %b) {
257  %1 = fdiv <4 x float> %a, %b
258  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
259  ret <4 x float> %2
260}
261
262; CHECK-LABEL: test3_div_ss
263; SSE2: divss   %xmm1, %xmm0
264; AVX: vdivss   %xmm1, %xmm0, %xmm0
265; CHECK-NOT: movss
266; CHECK: ret
267
268
269define <2 x double> @test3_add_sd(<2 x double> %a, <2 x double> %b) {
270  %1 = fadd <2 x double> %a, %b
271  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
272  ret <2 x double> %2
273}
274
275; CHECK-LABEL: test3_add_sd
276; SSE2: addsd   %xmm1, %xmm0
277; AVX: vaddsd   %xmm1, %xmm0, %xmm0
278; CHECK-NOT: movsd
279; CHECK: ret
280
281
282define <2 x double> @test3_sub_sd(<2 x double> %a, <2 x double> %b) {
283  %1 = fsub <2 x double> %a, %b
284  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
285  ret <2 x double> %2
286}
287
288; CHECK-LABEL: test3_sub_sd
289; SSE2: subsd   %xmm1, %xmm0
290; AVX: vsubsd   %xmm1, %xmm0, %xmm0
291; CHECK-NOT: movsd
292; CHECK: ret
293
294
295define <2 x double> @test3_mul_sd(<2 x double> %a, <2 x double> %b) {
296  %1 = fmul <2 x double> %a, %b
297  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
298  ret <2 x double> %2
299}
300
301; CHECK-LABEL: test3_mul_sd
302; SSE2: mulsd   %xmm1, %xmm0
303; AVX: vmulsd   %xmm1, %xmm0, %xmm0
304; CHECK-NOT: movsd
305; CHECK: ret
306
307
308define <2 x double> @test3_div_sd(<2 x double> %a, <2 x double> %b) {
309  %1 = fdiv <2 x double> %a, %b
310  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
311  ret <2 x double> %2
312}
313
314; CHECK-LABEL: test3_div_sd
315; SSE2: divsd   %xmm1, %xmm0
316; AVX: vdivsd   %xmm1, %xmm0, %xmm0
317; CHECK-NOT: movsd
318; CHECK: ret
319
320
321define <4 x float> @test4_add_ss(<4 x float> %a, <4 x float> %b) {
322  %1 = fadd <4 x float> %b, %a
323  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
324  ret <4 x float> %2
325}
326
327; CHECK-LABEL: test4_add_ss
328; SSE2: addss   %xmm0, %xmm1
329; AVX: vaddss   %xmm0, %xmm1, %xmm0
330; CHECK-NOT: movss
331; CHECK: ret
332
333
334define <4 x float> @test4_sub_ss(<4 x float> %a, <4 x float> %b) {
335  %1 = fsub <4 x float> %b, %a
336  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
337  ret <4 x float> %2
338}
339
340; CHECK-LABEL: test4_sub_ss
341; SSE2: subss   %xmm0, %xmm1
342; AVX: vsubss   %xmm0, %xmm1, %xmm0
343; CHECK-NOT: movss
344; CHECK: ret
345
346
347define <4 x float> @test4_mul_ss(<4 x float> %a, <4 x float> %b) {
348  %1 = fmul <4 x float> %b, %a
349  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
350  ret <4 x float> %2
351}
352
353; CHECK-LABEL: test4_mul_ss
354; SSE2: mulss   %xmm0, %xmm1
355; AVX: vmulss   %xmm0, %xmm1, %xmm0
356; CHECK-NOT: movss
357; CHECK: ret
358
359
360define <4 x float> @test4_div_ss(<4 x float> %a, <4 x float> %b) {
361  %1 = fdiv <4 x float> %b, %a
362  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
363  ret <4 x float> %2
364}
365
366; CHECK-LABEL: test4_div_ss
367; SSE2: divss   %xmm0, %xmm1
368; AVX: vdivss   %xmm0, %xmm1, %xmm0
369; CHECK-NOT: movss
370; CHECK: ret
371
372
373define <2 x double> @test4_add_sd(<2 x double> %a, <2 x double> %b) {
374  %1 = fadd <2 x double> %b, %a
375  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
376  ret <2 x double> %2
377}
378
379; CHECK-LABEL: test4_add_sd
380; SSE2: addsd   %xmm0, %xmm1
381; AVX: vaddsd   %xmm0, %xmm1, %xmm0
382; CHECK-NOT: movsd
383; CHECK: ret
384
385
386define <2 x double> @test4_sub_sd(<2 x double> %a, <2 x double> %b) {
387  %1 = fsub <2 x double> %b, %a
388  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
389  ret <2 x double> %2
390}
391
392; CHECK-LABEL: test4_sub_sd
393; SSE2: subsd   %xmm0, %xmm1
394; AVX: vsubsd   %xmm0, %xmm1, %xmm0
395; CHECK-NOT: movsd
396; CHECK: ret
397
398
399define <2 x double> @test4_mul_sd(<2 x double> %a, <2 x double> %b) {
400  %1 = fmul <2 x double> %b, %a
401  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
402  ret <2 x double> %2
403}
404
405; CHECK-LABEL: test4_mul_sd
406; SSE2: mulsd   %xmm0, %xmm1
407; AVX: vmulsd   %xmm0, %xmm1, %xmm0
408; CHECK-NOT: movsd
409; CHECK: ret
410
411
412define <2 x double> @test4_div_sd(<2 x double> %a, <2 x double> %b) {
413  %1 = fdiv <2 x double> %b, %a
414  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
415  ret <2 x double> %2
416}
417
418; CHECK-LABEL: test4_div_sd
419; SSE2: divsd   %xmm0, %xmm1
420; AVX: vdivsd   %xmm0, %xmm1, %xmm0
421; CHECK-NOT: movsd
422; CHECK: ret
423
424