• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
2; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
3
4
5define double @test1(double %A) {
6  %1 = bitcast double %A to <2 x i32>
7  %add = add <2 x i32> %1, <i32 3, i32 5>
8  %2 = bitcast <2 x i32> %add to double
9  ret double %2
10}
11; FIXME: Ideally we should be able to fold the entire body of @test1 into a
12; single paddd instruction. At the moment we produce the sequence
13; pshufd+paddq+pshufd. This is fixed with the widening legalization.
14;
15; CHECK-LABEL: test1
16; CHECK-NOT: movsd
17; CHECK: pshufd
18; CHECK-NEXT: paddd
19; CHECK-NEXT: pshufd
20; CHECK-NEXT: ret
21;
22; CHECK-WIDE-LABEL: test1
23; CHECK-WIDE-NOT: movsd
24; CHECK-WIDE: paddd
25; CHECK-WIDE-NEXT: ret
26
27
28define double @test2(double %A, double %B) {
29  %1 = bitcast double %A to <2 x i32>
30  %2 = bitcast double %B to <2 x i32>
31  %add = add <2 x i32> %1, %2
32  %3 = bitcast <2 x i32> %add to double
33  ret double %3
34}
35; CHECK-LABEL: test2
36; CHECK-NOT: movsd
37; CHECK: paddd
38; CHECK-NEXT: ret
39;
40; CHECK-WIDE-LABEL: test2
41; CHECK-WIDE-NOT: movsd
42; CHECK-WIDE: paddd
43; CHECK-WIDE-NEXT: ret
44
45
46define i64 @test3(i64 %A) {
47  %1 = bitcast i64 %A to <2 x float>
48  %add = fadd <2 x float> %1, <float 3.0, float 5.0>
49  %2 = bitcast <2 x float> %add to i64
50  ret i64 %2
51}
52; CHECK-LABEL: test3
53; CHECK-NOT: pshufd
54; CHECK: addps
55; CHECK-NOT: pshufd
56; CHECK: ret
57;
58; CHECK-WIDE-LABEL: test3
59; CHECK-WIDE-NOT: pshufd
60; CHECK-WIDE: addps
61; CHECK-WIDE-NOT: pshufd
62; CHECK-WIDE: ret
63
64
65define i64 @test4(i64 %A) {
66  %1 = bitcast i64 %A to <2 x i32>
67  %add = add <2 x i32> %1, <i32 3, i32 5>
68  %2 = bitcast <2 x i32> %add to i64
69  ret i64 %2
70}
71; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd.
72; Ideally, we should fold that sequence into a single paddd. This is fixed with
73; the widening legalization.
74;
75; CHECK-LABEL: test4
76; CHECK: pshufd
77; CHECK-NEXT: paddd
78; CHECK-NEXT: pshufd
79; CHECK: ret
80;
81; CHECK-WIDE-LABEL: test4
82; CHECK-WIDE: movd %{{rdi|rcx}},
83; CHECK-WIDE-NEXT: paddd
84; CHECK-WIDE-NEXT: movd {{.*}}, %rax
85; CHECK-WIDE: ret
86
87
88define double @test5(double %A) {
89  %1 = bitcast double %A to <2 x float>
90  %add = fadd <2 x float> %1, <float 3.0, float 5.0>
91  %2 = bitcast <2 x float> %add to double
92  ret double %2
93}
94; CHECK-LABEL: test5
95; CHECK: addps
96; CHECK-NEXT: ret
97;
98; CHECK-WIDE-LABEL: test5
99; CHECK-WIDE: addps
100; CHECK-WIDE-NEXT: ret
101
102
103define double @test6(double %A) {
104  %1 = bitcast double %A to <4 x i16>
105  %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
106  %2 = bitcast <4 x i16> %add to double
107  ret double %2
108}
109; FIXME: Ideally we should be able to fold the entire body of @test6 into a
110; single paddw instruction. This is fixed with the widening legalization.
111;
112; CHECK-LABEL: test6
113; CHECK-NOT: movsd
114; CHECK: punpcklwd
115; CHECK-NEXT: paddw
116; CHECK-NEXT: pshufb
117; CHECK-NEXT: ret
118;
119; CHECK-WIDE-LABEL: test6
120; CHECK-WIDE-NOT: mov
121; CHECK-WIDE-NOT: punpcklwd
122; CHECK-WIDE: paddw
123; CHECK-WIDE-NEXT: ret
124
125
126define double @test7(double %A, double %B) {
127  %1 = bitcast double %A to <4 x i16>
128  %2 = bitcast double %B to <4 x i16>
129  %add = add <4 x i16> %1, %2
130  %3 = bitcast <4 x i16> %add to double
131  ret double %3
132}
133; CHECK-LABEL: test7
134; CHECK-NOT: movsd
135; CHECK-NOT: punpcklwd
136; CHECK: paddw
137; CHECK-NEXT: ret
138;
139; CHECK-WIDE-LABEL: test7
140; CHECK-WIDE-NOT: movsd
141; CHECK-WIDE-NOT: punpcklwd
142; CHECK-WIDE: paddw
143; CHECK-WIDE-NEXT: ret
144
145
146define double @test8(double %A) {
147  %1 = bitcast double %A to <8 x i8>
148  %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
149  %2 = bitcast <8 x i8> %add to double
150  ret double %2
151}
152; FIXME: Ideally we should be able to fold the entire body of @test8 into a
153; single paddb instruction. At the moment we produce the sequence
154; pshufd+paddw+pshufd. This is fixed with the widening legalization.
155;
156; CHECK-LABEL: test8
157; CHECK-NOT: movsd
158; CHECK: punpcklbw
159; CHECK-NEXT: paddb
160; CHECK-NEXT: pshufb
161; CHECK-NEXT: ret
162;
163; CHECK-WIDE-LABEL: test8
164; CHECK-WIDE-NOT: movsd
165; CHECK-WIDE-NOT: punpcklbw
166; CHECK-WIDE: paddb
167; CHECK-WIDE-NEXT: ret
168
169
170define double @test9(double %A, double %B) {
171  %1 = bitcast double %A to <8 x i8>
172  %2 = bitcast double %B to <8 x i8>
173  %add = add <8 x i8> %1, %2
174  %3 = bitcast <8 x i8> %add to double
175  ret double %3
176}
177; CHECK-LABEL: test9
178; CHECK-NOT: movsd
179; CHECK-NOT: punpcklbw
180; CHECK: paddb
181; CHECK-NEXT: ret
182;
183; CHECK-WIDE-LABEL: test9
184; CHECK-WIDE-NOT: movsd
185; CHECK-WIDE-NOT: punpcklbw
186; CHECK-WIDE: paddb
187; CHECK-WIDE-NEXT: ret
188
189