• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
3
4define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
5  ; CHECK: add_v4f32:
6
7  %1 = load <4 x float>* %a
8  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
9  %2 = load <4 x float>* %b
10  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
11  %3 = fadd <4 x float> %1, %2
12  ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
13  store <4 x float> %3, <4 x float>* %c
14  ; CHECK-DAG: st.w [[R3]], 0($4)
15
16  ret void
17  ; CHECK: .size add_v4f32
18}
19
20define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
21  ; CHECK: add_v2f64:
22
23  %1 = load <2 x double>* %a
24  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
25  %2 = load <2 x double>* %b
26  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
27  %3 = fadd <2 x double> %1, %2
28  ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
29  store <2 x double> %3, <2 x double>* %c
30  ; CHECK-DAG: st.d [[R3]], 0($4)
31
32  ret void
33  ; CHECK: .size add_v2f64
34}
35
36define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
37  ; CHECK: sub_v4f32:
38
39  %1 = load <4 x float>* %a
40  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
41  %2 = load <4 x float>* %b
42  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
43  %3 = fsub <4 x float> %1, %2
44  ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
45  store <4 x float> %3, <4 x float>* %c
46  ; CHECK-DAG: st.w [[R3]], 0($4)
47
48  ret void
49  ; CHECK: .size sub_v4f32
50}
51
52define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
53  ; CHECK: sub_v2f64:
54
55  %1 = load <2 x double>* %a
56  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
57  %2 = load <2 x double>* %b
58  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
59  %3 = fsub <2 x double> %1, %2
60  ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
61  store <2 x double> %3, <2 x double>* %c
62  ; CHECK-DAG: st.d [[R3]], 0($4)
63
64  ret void
65  ; CHECK: .size sub_v2f64
66}
67
68define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
69  ; CHECK: mul_v4f32:
70
71  %1 = load <4 x float>* %a
72  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
73  %2 = load <4 x float>* %b
74  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
75  %3 = fmul <4 x float> %1, %2
76  ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
77  store <4 x float> %3, <4 x float>* %c
78  ; CHECK-DAG: st.w [[R3]], 0($4)
79
80  ret void
81  ; CHECK: .size mul_v4f32
82}
83
84define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
85  ; CHECK: mul_v2f64:
86
87  %1 = load <2 x double>* %a
88  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
89  %2 = load <2 x double>* %b
90  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
91  %3 = fmul <2 x double> %1, %2
92  ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
93  store <2 x double> %3, <2 x double>* %c
94  ; CHECK-DAG: st.d [[R3]], 0($4)
95
96  ret void
97  ; CHECK: .size mul_v2f64
98}
99
100define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
101                       <4 x float>* %c) nounwind {
102  ; CHECK: fma_v4f32:
103
104  %1 = load <4 x float>* %a
105  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
106  %2 = load <4 x float>* %b
107  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
108  %3 = load <4 x float>* %c
109  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
110  %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2,
111                                              <4 x float> %3)
112  ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]]
113  store <4 x float> %4, <4 x float>* %d
114  ; CHECK-DAG: st.w [[R1]], 0($4)
115
116  ret void
117  ; CHECK: .size fma_v4f32
118}
119
120define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
121                       <2 x double>* %c) nounwind {
122  ; CHECK: fma_v2f64:
123
124  %1 = load <2 x double>* %a
125  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
126  %2 = load <2 x double>* %b
127  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
128  %3 = load <2 x double>* %c
129  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
130  %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2,
131                                               <2 x double> %3)
132  ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]]
133  store <2 x double> %4, <2 x double>* %d
134  ; CHECK-DAG: st.d [[R1]], 0($4)
135
136  ret void
137  ; CHECK: .size fma_v2f64
138}
139
140define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
141                       <4 x float>* %c) nounwind {
142  ; CHECK: fmsub_v4f32:
143
144  %1 = load <4 x float>* %a
145  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
146  %2 = load <4 x float>* %b
147  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
148  %3 = load <4 x float>* %c
149  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
150  %4 = fmul <4 x float> %2, %3
151  %5 = fsub <4 x float> %1, %4
152  ; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]]
153  store <4 x float> %5, <4 x float>* %d
154  ; CHECK-DAG: st.w [[R1]], 0($4)
155
156  ret void
157  ; CHECK: .size fmsub_v4f32
158}
159
160define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
161                       <2 x double>* %c) nounwind {
162  ; CHECK: fmsub_v2f64:
163
164  %1 = load <2 x double>* %a
165  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
166  %2 = load <2 x double>* %b
167  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
168  %3 = load <2 x double>* %c
169  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
170  %4 = fmul <2 x double> %2, %3
171  %5 = fsub <2 x double> %1, %4
172  ; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]]
173  store <2 x double> %5, <2 x double>* %d
174  ; CHECK-DAG: st.d [[R1]], 0($4)
175
176  ret void
177  ; CHECK: .size fmsub_v2f64
178}
179
180define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
181  ; CHECK: fdiv_v4f32:
182
183  %1 = load <4 x float>* %a
184  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
185  %2 = load <4 x float>* %b
186  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
187  %3 = fdiv <4 x float> %1, %2
188  ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
189  store <4 x float> %3, <4 x float>* %c
190  ; CHECK-DAG: st.w [[R3]], 0($4)
191
192  ret void
193  ; CHECK: .size fdiv_v4f32
194}
195
196define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
197  ; CHECK: fdiv_v2f64:
198
199  %1 = load <2 x double>* %a
200  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
201  %2 = load <2 x double>* %b
202  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
203  %3 = fdiv <2 x double> %1, %2
204  ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
205  store <2 x double> %3, <2 x double>* %c
206  ; CHECK-DAG: st.d [[R3]], 0($4)
207
208  ret void
209  ; CHECK: .size fdiv_v2f64
210}
211
212define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
213  ; CHECK: fabs_v4f32:
214
215  %1 = load <4 x float>* %a
216  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
217  %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1)
218  ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
219  store <4 x float> %2, <4 x float>* %c
220  ; CHECK-DAG: st.w [[R3]], 0($4)
221
222  ret void
223  ; CHECK: .size fabs_v4f32
224}
225
226define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
227  ; CHECK: fabs_v2f64:
228
229  %1 = load <2 x double>* %a
230  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
231  %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1)
232  ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]]
233  store <2 x double> %2, <2 x double>* %c
234  ; CHECK-DAG: st.d [[R3]], 0($4)
235
236  ret void
237  ; CHECK: .size fabs_v2f64
238}
239
240define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
241  ; CHECK: fexp2_v4f32:
242
243  %1 = load <4 x float>* %a
244  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
245  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
246  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
247  ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
248  ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]]
249  store <4 x float> %2, <4 x float>* %c
250  ; CHECK-DAG: st.w [[R4]], 0($4)
251
252  ret void
253  ; CHECK: .size fexp2_v4f32
254}
255
256define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
257  ; CHECK: fexp2_v2f64:
258
259  %1 = load <2 x double>* %a
260  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
261  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
262  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
263  ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]]
264  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
265  store <2 x double> %2, <2 x double>* %c
266  ; CHECK-DAG: st.d [[R4]], 0($4)
267
268  ret void
269  ; CHECK: .size fexp2_v2f64
270}
271
272define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
273  ; CHECK: fexp2_v4f32_2:
274
275  %1 = load <4 x float>* %a
276  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
277  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
278  %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
279  ; CHECK-DAG: lui [[R3:\$[0-9]+]], 16384
280  ; CHECK-DAG: fill.w [[R4:\$w[0-9]+]], [[R3]]
281  ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]]
282  store <4 x float> %3, <4 x float>* %c
283  ; CHECK-DAG: st.w [[R5]], 0($4)
284
285  ret void
286  ; CHECK: .size fexp2_v4f32_2
287}
288
289define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
290  ; CHECK:      .8byte 4611686018427387904
291  ; CHECK-NEXT: .8byte 4611686018427387904
292  ; CHECK: fexp2_v2f64_2:
293
294  %1 = load <2 x double>* %a
295  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
296  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
297  %3 = fmul <2 x double> <double 2.0, double 2.0>, %2
298  ; CHECK-DAG: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
299  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[G_PTR]])
300  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
301  store <2 x double> %3, <2 x double>* %c
302  ; CHECK-DAG: st.d [[R4]], 0($4)
303
304  ret void
305  ; CHECK: .size fexp2_v2f64_2
306}
307
308define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
309  ; CHECK: fsqrt_v4f32:
310
311  %1 = load <4 x float>* %a
312  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
313  %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1)
314  ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]]
315  store <4 x float> %2, <4 x float>* %c
316  ; CHECK-DAG: st.w [[R3]], 0($4)
317
318  ret void
319  ; CHECK: .size fsqrt_v4f32
320}
321
322define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
323  ; CHECK: fsqrt_v2f64:
324
325  %1 = load <2 x double>* %a
326  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
327  %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1)
328  ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]]
329  store <2 x double> %2, <2 x double>* %c
330  ; CHECK-DAG: st.d [[R3]], 0($4)
331
332  ret void
333  ; CHECK: .size fsqrt_v2f64
334}
335
336define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
337  ; CHECK: ffint_u_v4f32:
338
339  %1 = load <4 x i32>* %a
340  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
341  %2 = uitofp <4 x i32> %1 to <4 x float>
342  ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]]
343  store <4 x float> %2, <4 x float>* %c
344  ; CHECK-DAG: st.w [[R3]], 0($4)
345
346  ret void
347  ; CHECK: .size ffint_u_v4f32
348}
349
350define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
351  ; CHECK: ffint_u_v2f64:
352
353  %1 = load <2 x i64>* %a
354  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
355  %2 = uitofp <2 x i64> %1 to <2 x double>
356  ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]]
357  store <2 x double> %2, <2 x double>* %c
358  ; CHECK-DAG: st.d [[R3]], 0($4)
359
360  ret void
361  ; CHECK: .size ffint_u_v2f64
362}
363
364define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
365  ; CHECK: ffint_s_v4f32:
366
367  %1 = load <4 x i32>* %a
368  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
369  %2 = sitofp <4 x i32> %1 to <4 x float>
370  ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]]
371  store <4 x float> %2, <4 x float>* %c
372  ; CHECK-DAG: st.w [[R3]], 0($4)
373
374  ret void
375  ; CHECK: .size ffint_s_v4f32
376}
377
378define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
379  ; CHECK: ffint_s_v2f64:
380
381  %1 = load <2 x i64>* %a
382  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
383  %2 = sitofp <2 x i64> %1 to <2 x double>
384  ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]]
385  store <2 x double> %2, <2 x double>* %c
386  ; CHECK-DAG: st.d [[R3]], 0($4)
387
388  ret void
389  ; CHECK: .size ffint_s_v2f64
390}
391
392define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
393  ; CHECK: ftrunc_u_v4f32:
394
395  %1 = load <4 x float>* %a
396  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
397  %2 = fptoui <4 x float> %1 to <4 x i32>
398  ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]]
399  store <4 x i32> %2, <4 x i32>* %c
400  ; CHECK-DAG: st.w [[R3]], 0($4)
401
402  ret void
403  ; CHECK: .size ftrunc_u_v4f32
404}
405
406define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
407  ; CHECK: ftrunc_u_v2f64:
408
409  %1 = load <2 x double>* %a
410  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
411  %2 = fptoui <2 x double> %1 to <2 x i64>
412  ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]]
413  store <2 x i64> %2, <2 x i64>* %c
414  ; CHECK-DAG: st.d [[R3]], 0($4)
415
416  ret void
417  ; CHECK: .size ftrunc_u_v2f64
418}
419
420define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
421  ; CHECK: ftrunc_s_v4f32:
422
423  %1 = load <4 x float>* %a
424  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
425  %2 = fptosi <4 x float> %1 to <4 x i32>
426  ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]]
427  store <4 x i32> %2, <4 x i32>* %c
428  ; CHECK-DAG: st.w [[R3]], 0($4)
429
430  ret void
431  ; CHECK: .size ftrunc_s_v4f32
432}
433
434define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
435  ; CHECK: ftrunc_s_v2f64:
436
437  %1 = load <2 x double>* %a
438  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
439  %2 = fptosi <2 x double> %1 to <2 x i64>
440  ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]]
441  store <2 x i64> %2, <2 x i64>* %c
442  ; CHECK-DAG: st.d [[R3]], 0($4)
443
444  ret void
445  ; CHECK: .size ftrunc_s_v2f64
446}
447
448declare <4 x float>  @llvm.fabs.v4f32(<4 x float>  %Val)
449declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val)
450declare <4 x float>  @llvm.exp2.v4f32(<4 x float>  %val)
451declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val)
452declare <4 x float>  @llvm.fma.v4f32(<4 x float>  %a, <4 x float>  %b,
453                                     <4 x float>  %c)
454declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b,
455                                     <2 x double> %c)
456declare <4 x float>  @llvm.sqrt.v4f32(<4 x float>  %Val)
457declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val)
458