• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
2; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
3
4define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
5  ; SSE2: sitofpv2i8v2double
6  ; SSE2: cost of 20 {{.*}} sitofp
7  %1 = sitofp <2 x i8> %a to <2 x double>
8  ret <2 x double> %1
9}
10
11define <4 x double> @sitofpv4i8v4double(<4 x i8> %a) {
12  ; SSE2: sitofpv4i8v4double
13  ; SSE2: cost of 40 {{.*}} sitofp
14  %1 = sitofp <4 x i8> %a to <4 x double>
15  ret <4 x double> %1
16}
17
18define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) {
19  ; SSE2: sitofpv8i8v8double
20  ; SSE2: cost of 80 {{.*}} sitofp
21%1 = sitofp <8 x i8> %a to <8 x double>
22  ret <8 x double> %1
23}
24
25define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) {
26  ; SSE2: sitofpv16i8v16double
27  ; SSE2: cost of 160 {{.*}} sitofp
28  %1 = sitofp <16 x i8> %a to <16 x double>
29  ret <16 x double> %1
30}
31
32define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) {
33  ; SSE2: sitofpv32i8v32double
34  ; SSE2: cost of 320 {{.*}} sitofp
35  %1 = sitofp <32 x i8> %a to <32 x double>
36  ret <32 x double> %1
37}
38
39define <2 x double> @sitofpv2i16v2double(<2 x i16> %a) {
40  ; SSE2: sitofpv2i16v2double
41  ; SSE2: cost of 20 {{.*}} sitofp
42  %1 = sitofp <2 x i16> %a to <2 x double>
43  ret <2 x double> %1
44}
45
46define <4 x double> @sitofpv4i16v4double(<4 x i16> %a) {
47  ; SSE2: sitofpv4i16v4double
48  ; SSE2: cost of 40 {{.*}} sitofp
49  %1 = sitofp <4 x i16> %a to <4 x double>
50  ret <4 x double> %1
51}
52
53define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) {
54  ; SSE2: sitofpv8i16v8double
55  ; SSE2: cost of 80 {{.*}} sitofp
56  %1 = sitofp <8 x i16> %a to <8 x double>
57  ret <8 x double> %1
58}
59
60define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) {
61  ; SSE2: sitofpv16i16v16double
62  ; SSE2: cost of 160 {{.*}} sitofp
63  %1 = sitofp <16 x i16> %a to <16 x double>
64  ret <16 x double> %1
65}
66
67define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) {
68  ; SSE2: sitofpv32i16v32double
69  ; SSE2: cost of 320 {{.*}} sitofp
70  %1 = sitofp <32 x i16> %a to <32 x double>
71  ret <32 x double> %1
72}
73
74define <2 x double> @sitofpv2i32v2double(<2 x i32> %a) {
75  ; SSE2: sitofpv2i32v2double
76  ; SSE2: cost of 20 {{.*}} sitofp
77  %1 = sitofp <2 x i32> %a to <2 x double>
78  ret <2 x double> %1
79}
80
81define <4 x double> @sitofpv4i32v4double(<4 x i32> %a) {
82  ; SSE2: sitofpv4i32v4double
83  ; SSE2: cost of 40 {{.*}} sitofp
84  %1 = sitofp <4 x i32> %a to <4 x double>
85  ret <4 x double> %1
86}
87
88define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) {
89  ; SSE2: sitofpv8i32v8double
90  ; SSE2: cost of 80 {{.*}} sitofp
91  %1 = sitofp <8 x i32> %a to <8 x double>
92  ret <8 x double> %1
93}
94
95define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) {
96  ; SSE2: sitofpv16i32v16double
97  ; SSE2: cost of 160 {{.*}} sitofp
98  %1 = sitofp <16 x i32> %a to <16 x double>
99  ret <16 x double> %1
100}
101
102define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) {
103  ; SSE2: sitofpv32i32v32double
104  ; SSE2: cost of 320 {{.*}} sitofp
105  %1 = sitofp <32 x i32> %a to <32 x double>
106  ret <32 x double> %1
107}
108
109define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) {
110  ; SSE2: sitofpv2i64v2double
111  ; SSE2: cost of 20 {{.*}} sitofp
112  %1 = sitofp <2 x i64> %a to <2 x double>
113  ret <2 x double> %1
114}
115
116define <4 x double> @sitofpv4i64v4double(<4 x i64> %a) {
117  ; SSE2: sitofpv4i64v4double
118  ; SSE2: cost of 40 {{.*}} sitofp
119  %1 = sitofp <4 x i64> %a to <4 x double>
120  ret <4 x double> %1
121}
122
123define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) {
124  %1 = sitofp <8 x i64> %a to <8 x double>
125  ; SSE2: sitofpv8i64v8double
126  ; SSE2: cost of 80 {{.*}} sitofp
127  ret <8 x double> %1
128}
129
130define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) {
131  ; SSE2: sitofpv16i64v16double
132  ; SSE2: cost of 160 {{.*}} sitofp
133  %1 = sitofp <16 x i64> %a to <16 x double>
134  ret <16 x double> %1
135}
136
137define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) {
138  ; SSE2: sitofpv32i64v32double
139  ; SSE2: cost of 320 {{.*}} sitofp
140  %1 = sitofp <32 x i64> %a to <32 x double>
141  ret <32 x double> %1
142}
143
144define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) {
145  ; SSE2: sitofpv2i8v2float
146  ; SSE2: cost of 15 {{.*}} sitofp
147  %1 = sitofp <2 x i8> %a to <2 x float>
148  ret <2 x float> %1
149}
150
151define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
152  ; SSE2: sitofpv4i8v4float
153  ; SSE2: cost of 15 {{.*}} sitofp
154  %1 = sitofp <4 x i8> %a to <4 x float>
155  ret <4 x float> %1
156}
157
158define <8 x float> @sitofpv8i8v8float(<8 x i8> %a) {
159  ; SSE2: sitofpv8i8v8float
160  ; SSE2: cost of 15 {{.*}} sitofp
161  %1 = sitofp <8 x i8> %a to <8 x float>
162  ret <8 x float> %1
163}
164
165define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) {
166  ; SSE2: sitofpv16i8v16float
167  ; SSE2: cost of 8 {{.*}} sitofp
168  %1 = sitofp <16 x i8> %a to <16 x float>
169  ret <16 x float> %1
170}
171
172define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) {
173  ; SSE2: sitofpv32i8v32float
174  ; SSE2: cost of 16 {{.*}} sitofp
175  %1 = sitofp <32 x i8> %a to <32 x float>
176  ret <32 x float> %1
177}
178
179define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) {
180  ; SSE2: sitofpv2i16v2float
181  ; SSE2: cost of 15 {{.*}} sitofp
182  %1 = sitofp <2 x i16> %a to <2 x float>
183  ret <2 x float> %1
184}
185
186define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
187  ; SSE2: sitofpv4i16v4float
188  ; SSE2: cost of 15 {{.*}} sitofp
189  %1 = sitofp <4 x i16> %a to <4 x float>
190  ret <4 x float> %1
191}
192
193define <8 x float> @sitofpv8i16v8float(<8 x i16> %a) {
194  ; SSE2: sitofpv8i16v8float
195  ; SSE2: cost of 15 {{.*}} sitofp
196  %1 = sitofp <8 x i16> %a to <8 x float>
197  ret <8 x float> %1
198}
199
200define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) {
201  ; SSE2: sitofpv16i16v16float
202  ; SSE2: cost of 30 {{.*}} sitofp
203  %1 = sitofp <16 x i16> %a to <16 x float>
204  ret <16 x float> %1
205}
206
207define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) {
208  ; SSE2: sitofpv32i16v32float
209  ; SSE2: cost of 60 {{.*}} sitofp
210  %1 = sitofp <32 x i16> %a to <32 x float>
211  ret <32 x float> %1
212}
213
214define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) {
215  ; SSE2: sitofpv2i32v2float
216  ; SSE2: cost of 15 {{.*}} sitofp
217  %1 = sitofp <2 x i32> %a to <2 x float>
218  ret <2 x float> %1
219}
220
221define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
222  ; SSE2: sitofpv4i32v4float
223  ; SSE2: cost of 15 {{.*}} sitofp
224  %1 = sitofp <4 x i32> %a to <4 x float>
225  ret <4 x float> %1
226}
227
228define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
229  ; SSE2: sitofpv8i32v8float
230  ; SSE2: cost of 30 {{.*}} sitofp
231  %1 = sitofp <8 x i32> %a to <8 x float>
232  ret <8 x float> %1
233}
234
235define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
236  ; SSE2: sitofpv16i32v16float
237  ; SSE2: cost of 60 {{.*}} sitofp
238  %1 = sitofp <16 x i32> %a to <16 x float>
239  ret <16 x float> %1
240}
241
242define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
243  ; SSE2: sitofpv32i32v32float
244  ; SSE2: cost of 120 {{.*}} sitofp
245  %1 = sitofp <32 x i32> %a to <32 x float>
246  ret <32 x float> %1
247}
248
249define <2 x float> @sitofpv2i64v2float(<2 x i64> %a) {
250  ; SSE2: sitofpv2i64v2float
251  ; SSE2: cost of 15 {{.*}} sitofp
252  %1 = sitofp <2 x i64> %a to <2 x float>
253  ret <2 x float> %1
254}
255
256define <4 x float> @sitofpv4i64v4float(<4 x i64> %a) {
257  ; SSE2: sitofpv4i64v4float
258  ; SSE2: cost of 30 {{.*}} sitofp
259  %1 = sitofp <4 x i64> %a to <4 x float>
260  ret <4 x float> %1
261}
262
263define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) {
264  ; SSE2: sitofpv8i64v8float
265  ; SSE2: cost of 60 {{.*}} sitofp
266  %1 = sitofp <8 x i64> %a to <8 x float>
267  ret <8 x float> %1
268}
269
270define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) {
271  ; SSE2: sitofpv16i64v16float
272  ; SSE2: cost of 120 {{.*}} sitofp
273  %1 = sitofp <16 x i64> %a to <16 x float>
274  ret <16 x float> %1
275}
276
277define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
278  ; SSE2: sitofpv32i64v32float
279  ; SSE2: cost of 240 {{.*}} sitofp
280  %1 = sitofp <32 x i64> %a to <32 x float>
281  ret <32 x float> %1
282}
283
284; AVX512F-LABEL: sitofp_16i8_float
285; AVX512F: cost of 2 {{.*}} sitofp
286define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
287  %1 = sitofp <16 x i8> %a to <16 x float>
288  ret <16 x float> %1
289}
290
291define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
292  ; AVX512F-LABEL: sitofp_16i16_float
293  ; AVX512F: cost of 2 {{.*}} sitofp
294  %1 = sitofp <16 x i16> %a to <16 x float>
295  ret <16 x float> %1
296}
297
298; AVX512F-LABEL: sitofp_8i8_double
299; AVX512F: cost of 2 {{.*}} sitofp
300define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
301  %1 = sitofp <8 x i8> %a to <8 x double>
302  ret <8 x double> %1
303}
304
305; AVX512F-LABEL: sitofp_8i16_double
306; AVX512F: cost of 2 {{.*}} sitofp
307define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
308  %1 = sitofp <8 x i16> %a to <8 x double>
309  ret <8 x double> %1
310}
311
312; AVX512F-LABEL: sitofp_8i1_double
313; AVX512F: cost of 4 {{.*}} sitofp
314define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
315  %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
316  %1 = sitofp <8 x i1> %cmpres to <8 x double>
317  ret <8 x double> %1
318}
319
320; AVX512F-LABEL: sitofp_16i1_float
321; AVX512F: cost of 3 {{.*}} sitofp
322define <16 x float> @sitofp_16i1_float(<16 x float> %a) {
323  %cmpres = fcmp ogt <16 x float> %a, zeroinitializer
324  %1 = sitofp <16 x i1> %cmpres to <16 x float>
325  ret <16 x float> %1
326}
327