• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
4
5define arm_aapcs_vfpcc <4 x float> @fceil_float32_t(<4 x float> %src) {
6; CHECK-MVE-LABEL: fceil_float32_t:
7; CHECK-MVE:       @ %bb.0: @ %entry
8; CHECK-MVE-NEXT:    vrintp.f32 s7, s3
9; CHECK-MVE-NEXT:    vrintp.f32 s6, s2
10; CHECK-MVE-NEXT:    vrintp.f32 s5, s1
11; CHECK-MVE-NEXT:    vrintp.f32 s4, s0
12; CHECK-MVE-NEXT:    vmov q0, q1
13; CHECK-MVE-NEXT:    bx lr
14;
15; CHECK-MVEFP-LABEL: fceil_float32_t:
16; CHECK-MVEFP:       @ %bb.0: @ %entry
17; CHECK-MVEFP-NEXT:    vrintp.f32 q0, q0
18; CHECK-MVEFP-NEXT:    bx lr
19entry:
20  %0 = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> %src)
21  ret <4 x float> %0
22}
23
24define arm_aapcs_vfpcc <8 x half> @fceil_float16_t(<8 x half> %src) {
25; CHECK-MVE-LABEL: fceil_float16_t:
26; CHECK-MVE:       @ %bb.0: @ %entry
27; CHECK-MVE-NEXT:    vmovx.f16 s4, s0
28; CHECK-MVE-NEXT:    vrintp.f16 s8, s1
29; CHECK-MVE-NEXT:    vrintp.f16 s4, s4
30; CHECK-MVE-NEXT:    vmov r0, s4
31; CHECK-MVE-NEXT:    vrintp.f16 s4, s0
32; CHECK-MVE-NEXT:    vmov r1, s4
33; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
34; CHECK-MVE-NEXT:    vmov.16 q1[0], r1
35; CHECK-MVE-NEXT:    vrintp.f16 s0, s0
36; CHECK-MVE-NEXT:    vmov.16 q1[1], r0
37; CHECK-MVE-NEXT:    vmov r0, s8
38; CHECK-MVE-NEXT:    vmovx.f16 s8, s1
39; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
40; CHECK-MVE-NEXT:    vrintp.f16 s8, s8
41; CHECK-MVE-NEXT:    vmov r0, s8
42; CHECK-MVE-NEXT:    vrintp.f16 s8, s2
43; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
44; CHECK-MVE-NEXT:    vmov r0, s8
45; CHECK-MVE-NEXT:    vmovx.f16 s8, s2
46; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
47; CHECK-MVE-NEXT:    vrintp.f16 s8, s8
48; CHECK-MVE-NEXT:    vmov r0, s8
49; CHECK-MVE-NEXT:    vrintp.f16 s8, s3
50; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
51; CHECK-MVE-NEXT:    vmov r0, s8
52; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
53; CHECK-MVE-NEXT:    vmov r0, s0
54; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
55; CHECK-MVE-NEXT:    vmov q0, q1
56; CHECK-MVE-NEXT:    bx lr
57;
58; CHECK-MVEFP-LABEL: fceil_float16_t:
59; CHECK-MVEFP:       @ %bb.0: @ %entry
60; CHECK-MVEFP-NEXT:    vrintp.f16 q0, q0
61; CHECK-MVEFP-NEXT:    bx lr
62entry:
63  %0 = call fast <8 x half> @llvm.ceil.v8f16(<8 x half> %src)
64  ret <8 x half> %0
65}
66
67define arm_aapcs_vfpcc <2 x double> @fceil_float64_t(<2 x double> %src) {
68; CHECK-LABEL: fceil_float64_t:
69; CHECK:       @ %bb.0: @ %entry
70; CHECK-NEXT:    .save {r7, lr}
71; CHECK-NEXT:    push {r7, lr}
72; CHECK-NEXT:    .vsave {d8, d9}
73; CHECK-NEXT:    vpush {d8, d9}
74; CHECK-NEXT:    vmov q4, q0
75; CHECK-NEXT:    vmov r0, r1, d9
76; CHECK-NEXT:    bl ceil
77; CHECK-NEXT:    vmov r2, r3, d8
78; CHECK-NEXT:    vmov d9, r0, r1
79; CHECK-NEXT:    mov r0, r2
80; CHECK-NEXT:    mov r1, r3
81; CHECK-NEXT:    bl ceil
82; CHECK-NEXT:    vmov d8, r0, r1
83; CHECK-NEXT:    vmov q0, q4
84; CHECK-NEXT:    vpop {d8, d9}
85; CHECK-NEXT:    pop {r7, pc}
86entry:
87  %0 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %src)
88  ret <2 x double> %0
89}
90
91define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) {
92; CHECK-MVE-LABEL: ftrunc_float32_t:
93; CHECK-MVE:       @ %bb.0: @ %entry
94; CHECK-MVE-NEXT:    vrintz.f32 s7, s3
95; CHECK-MVE-NEXT:    vrintz.f32 s6, s2
96; CHECK-MVE-NEXT:    vrintz.f32 s5, s1
97; CHECK-MVE-NEXT:    vrintz.f32 s4, s0
98; CHECK-MVE-NEXT:    vmov q0, q1
99; CHECK-MVE-NEXT:    bx lr
100;
101; CHECK-MVEFP-LABEL: ftrunc_float32_t:
102; CHECK-MVEFP:       @ %bb.0: @ %entry
103; CHECK-MVEFP-NEXT:    vrintz.f32 q0, q0
104; CHECK-MVEFP-NEXT:    bx lr
105entry:
106  %0 = call fast <4 x float> @llvm.trunc.v4f32(<4 x float> %src)
107  ret <4 x float> %0
108}
109
110define arm_aapcs_vfpcc <8 x half> @ftrunc_float16_t(<8 x half> %src) {
111; CHECK-MVE-LABEL: ftrunc_float16_t:
112; CHECK-MVE:       @ %bb.0: @ %entry
113; CHECK-MVE-NEXT:    vmovx.f16 s4, s0
114; CHECK-MVE-NEXT:    vrintz.f16 s8, s1
115; CHECK-MVE-NEXT:    vrintz.f16 s4, s4
116; CHECK-MVE-NEXT:    vmov r0, s4
117; CHECK-MVE-NEXT:    vrintz.f16 s4, s0
118; CHECK-MVE-NEXT:    vmov r1, s4
119; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
120; CHECK-MVE-NEXT:    vmov.16 q1[0], r1
121; CHECK-MVE-NEXT:    vrintz.f16 s0, s0
122; CHECK-MVE-NEXT:    vmov.16 q1[1], r0
123; CHECK-MVE-NEXT:    vmov r0, s8
124; CHECK-MVE-NEXT:    vmovx.f16 s8, s1
125; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
126; CHECK-MVE-NEXT:    vrintz.f16 s8, s8
127; CHECK-MVE-NEXT:    vmov r0, s8
128; CHECK-MVE-NEXT:    vrintz.f16 s8, s2
129; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
130; CHECK-MVE-NEXT:    vmov r0, s8
131; CHECK-MVE-NEXT:    vmovx.f16 s8, s2
132; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
133; CHECK-MVE-NEXT:    vrintz.f16 s8, s8
134; CHECK-MVE-NEXT:    vmov r0, s8
135; CHECK-MVE-NEXT:    vrintz.f16 s8, s3
136; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
137; CHECK-MVE-NEXT:    vmov r0, s8
138; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
139; CHECK-MVE-NEXT:    vmov r0, s0
140; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
141; CHECK-MVE-NEXT:    vmov q0, q1
142; CHECK-MVE-NEXT:    bx lr
143;
144; CHECK-MVEFP-LABEL: ftrunc_float16_t:
145; CHECK-MVEFP:       @ %bb.0: @ %entry
146; CHECK-MVEFP-NEXT:    vrintz.f16 q0, q0
147; CHECK-MVEFP-NEXT:    bx lr
148entry:
149  %0 = call fast <8 x half> @llvm.trunc.v8f16(<8 x half> %src)
150  ret <8 x half> %0
151}
152
153define arm_aapcs_vfpcc <2 x double> @ftrunc_float64_t(<2 x double> %src) {
154; CHECK-LABEL: ftrunc_float64_t:
155; CHECK:       @ %bb.0: @ %entry
156; CHECK-NEXT:    .save {r7, lr}
157; CHECK-NEXT:    push {r7, lr}
158; CHECK-NEXT:    .vsave {d8, d9}
159; CHECK-NEXT:    vpush {d8, d9}
160; CHECK-NEXT:    vmov q4, q0
161; CHECK-NEXT:    vmov r0, r1, d9
162; CHECK-NEXT:    bl trunc
163; CHECK-NEXT:    vmov r2, r3, d8
164; CHECK-NEXT:    vmov d9, r0, r1
165; CHECK-NEXT:    mov r0, r2
166; CHECK-NEXT:    mov r1, r3
167; CHECK-NEXT:    bl trunc
168; CHECK-NEXT:    vmov d8, r0, r1
169; CHECK-NEXT:    vmov q0, q4
170; CHECK-NEXT:    vpop {d8, d9}
171; CHECK-NEXT:    pop {r7, pc}
172entry:
173  %0 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %src)
174  ret <2 x double> %0
175}
176
177define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) {
178; CHECK-MVE-LABEL: frint_float32_t:
179; CHECK-MVE:       @ %bb.0: @ %entry
180; CHECK-MVE-NEXT:    vrintx.f32 s7, s3
181; CHECK-MVE-NEXT:    vrintx.f32 s6, s2
182; CHECK-MVE-NEXT:    vrintx.f32 s5, s1
183; CHECK-MVE-NEXT:    vrintx.f32 s4, s0
184; CHECK-MVE-NEXT:    vmov q0, q1
185; CHECK-MVE-NEXT:    bx lr
186;
187; CHECK-MVEFP-LABEL: frint_float32_t:
188; CHECK-MVEFP:       @ %bb.0: @ %entry
189; CHECK-MVEFP-NEXT:    vrintx.f32 q0, q0
190; CHECK-MVEFP-NEXT:    bx lr
191entry:
192  %0 = call fast <4 x float> @llvm.rint.v4f32(<4 x float> %src)
193  ret <4 x float> %0
194}
195
196define arm_aapcs_vfpcc <8 x half> @frint_float16_t(<8 x half> %src) {
197; CHECK-MVE-LABEL: frint_float16_t:
198; CHECK-MVE:       @ %bb.0: @ %entry
199; CHECK-MVE-NEXT:    vmovx.f16 s4, s0
200; CHECK-MVE-NEXT:    vrintx.f16 s8, s1
201; CHECK-MVE-NEXT:    vrintx.f16 s4, s4
202; CHECK-MVE-NEXT:    vmov r0, s4
203; CHECK-MVE-NEXT:    vrintx.f16 s4, s0
204; CHECK-MVE-NEXT:    vmov r1, s4
205; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
206; CHECK-MVE-NEXT:    vmov.16 q1[0], r1
207; CHECK-MVE-NEXT:    vrintx.f16 s0, s0
208; CHECK-MVE-NEXT:    vmov.16 q1[1], r0
209; CHECK-MVE-NEXT:    vmov r0, s8
210; CHECK-MVE-NEXT:    vmovx.f16 s8, s1
211; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
212; CHECK-MVE-NEXT:    vrintx.f16 s8, s8
213; CHECK-MVE-NEXT:    vmov r0, s8
214; CHECK-MVE-NEXT:    vrintx.f16 s8, s2
215; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
216; CHECK-MVE-NEXT:    vmov r0, s8
217; CHECK-MVE-NEXT:    vmovx.f16 s8, s2
218; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
219; CHECK-MVE-NEXT:    vrintx.f16 s8, s8
220; CHECK-MVE-NEXT:    vmov r0, s8
221; CHECK-MVE-NEXT:    vrintx.f16 s8, s3
222; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
223; CHECK-MVE-NEXT:    vmov r0, s8
224; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
225; CHECK-MVE-NEXT:    vmov r0, s0
226; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
227; CHECK-MVE-NEXT:    vmov q0, q1
228; CHECK-MVE-NEXT:    bx lr
229;
230; CHECK-MVEFP-LABEL: frint_float16_t:
231; CHECK-MVEFP:       @ %bb.0: @ %entry
232; CHECK-MVEFP-NEXT:    vrintx.f16 q0, q0
233; CHECK-MVEFP-NEXT:    bx lr
234entry:
235  %0 = call fast <8 x half> @llvm.rint.v8f16(<8 x half> %src)
236  ret <8 x half> %0
237}
238
239define arm_aapcs_vfpcc <2 x double> @frint_float64_t(<2 x double> %src) {
240; CHECK-LABEL: frint_float64_t:
241; CHECK:       @ %bb.0: @ %entry
242; CHECK-NEXT:    .save {r7, lr}
243; CHECK-NEXT:    push {r7, lr}
244; CHECK-NEXT:    .vsave {d8, d9}
245; CHECK-NEXT:    vpush {d8, d9}
246; CHECK-NEXT:    vmov q4, q0
247; CHECK-NEXT:    vmov r0, r1, d9
248; CHECK-NEXT:    bl rint
249; CHECK-NEXT:    vmov r2, r3, d8
250; CHECK-NEXT:    vmov d9, r0, r1
251; CHECK-NEXT:    mov r0, r2
252; CHECK-NEXT:    mov r1, r3
253; CHECK-NEXT:    bl rint
254; CHECK-NEXT:    vmov d8, r0, r1
255; CHECK-NEXT:    vmov q0, q4
256; CHECK-NEXT:    vpop {d8, d9}
257; CHECK-NEXT:    pop {r7, pc}
258entry:
259  %0 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %src)
260  ret <2 x double> %0
261}
262
263define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) {
264; CHECK-LABEL: fnearbyint_float32_t:
265; CHECK:       @ %bb.0: @ %entry
266; CHECK-NEXT:    vrintr.f32 s7, s3
267; CHECK-NEXT:    vrintr.f32 s6, s2
268; CHECK-NEXT:    vrintr.f32 s5, s1
269; CHECK-NEXT:    vrintr.f32 s4, s0
270; CHECK-NEXT:    vmov q0, q1
271; CHECK-NEXT:    bx lr
272entry:
273  %0 = call fast <4 x float> @llvm.nearbyint.v4f32(<4 x float> %src)
274  ret <4 x float> %0
275}
276
277define arm_aapcs_vfpcc <8 x half> @fnearbyint_float16_t(<8 x half> %src) {
278; CHECK-LABEL: fnearbyint_float16_t:
279; CHECK:       @ %bb.0: @ %entry
280; CHECK-NEXT:    vmovx.f16 s4, s0
281; CHECK-NEXT:    vrintr.f16 s8, s1
282; CHECK-NEXT:    vrintr.f16 s4, s4
283; CHECK-NEXT:    vmov r0, s4
284; CHECK-NEXT:    vrintr.f16 s4, s0
285; CHECK-NEXT:    vmov r1, s4
286; CHECK-NEXT:    vmovx.f16 s0, s3
287; CHECK-NEXT:    vmov.16 q1[0], r1
288; CHECK-NEXT:    vrintr.f16 s0, s0
289; CHECK-NEXT:    vmov.16 q1[1], r0
290; CHECK-NEXT:    vmov r0, s8
291; CHECK-NEXT:    vmovx.f16 s8, s1
292; CHECK-NEXT:    vmov.16 q1[2], r0
293; CHECK-NEXT:    vrintr.f16 s8, s8
294; CHECK-NEXT:    vmov r0, s8
295; CHECK-NEXT:    vrintr.f16 s8, s2
296; CHECK-NEXT:    vmov.16 q1[3], r0
297; CHECK-NEXT:    vmov r0, s8
298; CHECK-NEXT:    vmovx.f16 s8, s2
299; CHECK-NEXT:    vmov.16 q1[4], r0
300; CHECK-NEXT:    vrintr.f16 s8, s8
301; CHECK-NEXT:    vmov r0, s8
302; CHECK-NEXT:    vrintr.f16 s8, s3
303; CHECK-NEXT:    vmov.16 q1[5], r0
304; CHECK-NEXT:    vmov r0, s8
305; CHECK-NEXT:    vmov.16 q1[6], r0
306; CHECK-NEXT:    vmov r0, s0
307; CHECK-NEXT:    vmov.16 q1[7], r0
308; CHECK-NEXT:    vmov q0, q1
309; CHECK-NEXT:    bx lr
310entry:
311  %0 = call fast <8 x half> @llvm.nearbyint.v8f16(<8 x half> %src)
312  ret <8 x half> %0
313}
314
315define arm_aapcs_vfpcc <2 x double> @fnearbyint_float64_t(<2 x double> %src) {
316; CHECK-LABEL: fnearbyint_float64_t:
317; CHECK:       @ %bb.0: @ %entry
318; CHECK-NEXT:    .save {r7, lr}
319; CHECK-NEXT:    push {r7, lr}
320; CHECK-NEXT:    .vsave {d8, d9}
321; CHECK-NEXT:    vpush {d8, d9}
322; CHECK-NEXT:    vmov q4, q0
323; CHECK-NEXT:    vmov r0, r1, d9
324; CHECK-NEXT:    bl nearbyint
325; CHECK-NEXT:    vmov r2, r3, d8
326; CHECK-NEXT:    vmov d9, r0, r1
327; CHECK-NEXT:    mov r0, r2
328; CHECK-NEXT:    mov r1, r3
329; CHECK-NEXT:    bl nearbyint
330; CHECK-NEXT:    vmov d8, r0, r1
331; CHECK-NEXT:    vmov q0, q4
332; CHECK-NEXT:    vpop {d8, d9}
333; CHECK-NEXT:    pop {r7, pc}
334entry:
335  %0 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %src)
336  ret <2 x double> %0
337}
338
339define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) {
340; CHECK-MVE-LABEL: ffloor_float32_t:
341; CHECK-MVE:       @ %bb.0: @ %entry
342; CHECK-MVE-NEXT:    vrintm.f32 s7, s3
343; CHECK-MVE-NEXT:    vrintm.f32 s6, s2
344; CHECK-MVE-NEXT:    vrintm.f32 s5, s1
345; CHECK-MVE-NEXT:    vrintm.f32 s4, s0
346; CHECK-MVE-NEXT:    vmov q0, q1
347; CHECK-MVE-NEXT:    bx lr
348;
349; CHECK-MVEFP-LABEL: ffloor_float32_t:
350; CHECK-MVEFP:       @ %bb.0: @ %entry
351; CHECK-MVEFP-NEXT:    vrintm.f32 q0, q0
352; CHECK-MVEFP-NEXT:    bx lr
353entry:
354  %0 = call fast <4 x float> @llvm.floor.v4f32(<4 x float> %src)
355  ret <4 x float> %0
356}
357
358define arm_aapcs_vfpcc <8 x half> @ffloor_float16_t(<8 x half> %src) {
359; CHECK-MVE-LABEL: ffloor_float16_t:
360; CHECK-MVE:       @ %bb.0: @ %entry
361; CHECK-MVE-NEXT:    vmovx.f16 s4, s0
362; CHECK-MVE-NEXT:    vrintm.f16 s8, s1
363; CHECK-MVE-NEXT:    vrintm.f16 s4, s4
364; CHECK-MVE-NEXT:    vmov r0, s4
365; CHECK-MVE-NEXT:    vrintm.f16 s4, s0
366; CHECK-MVE-NEXT:    vmov r1, s4
367; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
368; CHECK-MVE-NEXT:    vmov.16 q1[0], r1
369; CHECK-MVE-NEXT:    vrintm.f16 s0, s0
370; CHECK-MVE-NEXT:    vmov.16 q1[1], r0
371; CHECK-MVE-NEXT:    vmov r0, s8
372; CHECK-MVE-NEXT:    vmovx.f16 s8, s1
373; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
374; CHECK-MVE-NEXT:    vrintm.f16 s8, s8
375; CHECK-MVE-NEXT:    vmov r0, s8
376; CHECK-MVE-NEXT:    vrintm.f16 s8, s2
377; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
378; CHECK-MVE-NEXT:    vmov r0, s8
379; CHECK-MVE-NEXT:    vmovx.f16 s8, s2
380; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
381; CHECK-MVE-NEXT:    vrintm.f16 s8, s8
382; CHECK-MVE-NEXT:    vmov r0, s8
383; CHECK-MVE-NEXT:    vrintm.f16 s8, s3
384; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
385; CHECK-MVE-NEXT:    vmov r0, s8
386; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
387; CHECK-MVE-NEXT:    vmov r0, s0
388; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
389; CHECK-MVE-NEXT:    vmov q0, q1
390; CHECK-MVE-NEXT:    bx lr
391;
392; CHECK-MVEFP-LABEL: ffloor_float16_t:
393; CHECK-MVEFP:       @ %bb.0: @ %entry
394; CHECK-MVEFP-NEXT:    vrintm.f16 q0, q0
395; CHECK-MVEFP-NEXT:    bx lr
396entry:
397  %0 = call fast <8 x half> @llvm.floor.v8f16(<8 x half> %src)
398  ret <8 x half> %0
399}
400
401define arm_aapcs_vfpcc <2 x double> @ffloor_float64_t(<2 x double> %src) {
402; CHECK-LABEL: ffloor_float64_t:
403; CHECK:       @ %bb.0: @ %entry
404; CHECK-NEXT:    .save {r7, lr}
405; CHECK-NEXT:    push {r7, lr}
406; CHECK-NEXT:    .vsave {d8, d9}
407; CHECK-NEXT:    vpush {d8, d9}
408; CHECK-NEXT:    vmov q4, q0
409; CHECK-NEXT:    vmov r0, r1, d9
410; CHECK-NEXT:    bl floor
411; CHECK-NEXT:    vmov r2, r3, d8
412; CHECK-NEXT:    vmov d9, r0, r1
413; CHECK-NEXT:    mov r0, r2
414; CHECK-NEXT:    mov r1, r3
415; CHECK-NEXT:    bl floor
416; CHECK-NEXT:    vmov d8, r0, r1
417; CHECK-NEXT:    vmov q0, q4
418; CHECK-NEXT:    vpop {d8, d9}
419; CHECK-NEXT:    pop {r7, pc}
420entry:
421  %0 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %src)
422  ret <2 x double> %0
423}
424
425define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) {
426; CHECK-MVE-LABEL: fround_float32_t:
427; CHECK-MVE:       @ %bb.0: @ %entry
428; CHECK-MVE-NEXT:    vrinta.f32 s7, s3
429; CHECK-MVE-NEXT:    vrinta.f32 s6, s2
430; CHECK-MVE-NEXT:    vrinta.f32 s5, s1
431; CHECK-MVE-NEXT:    vrinta.f32 s4, s0
432; CHECK-MVE-NEXT:    vmov q0, q1
433; CHECK-MVE-NEXT:    bx lr
434;
435; CHECK-MVEFP-LABEL: fround_float32_t:
436; CHECK-MVEFP:       @ %bb.0: @ %entry
437; CHECK-MVEFP-NEXT:    vrinta.f32 q0, q0
438; CHECK-MVEFP-NEXT:    bx lr
439entry:
440  %0 = call fast <4 x float> @llvm.round.v4f32(<4 x float> %src)
441  ret <4 x float> %0
442}
443
444define arm_aapcs_vfpcc <8 x half> @fround_float16_t(<8 x half> %src) {
445; CHECK-MVE-LABEL: fround_float16_t:
446; CHECK-MVE:       @ %bb.0: @ %entry
447; CHECK-MVE-NEXT:    vmovx.f16 s4, s0
448; CHECK-MVE-NEXT:    vrinta.f16 s8, s1
449; CHECK-MVE-NEXT:    vrinta.f16 s4, s4
450; CHECK-MVE-NEXT:    vmov r0, s4
451; CHECK-MVE-NEXT:    vrinta.f16 s4, s0
452; CHECK-MVE-NEXT:    vmov r1, s4
453; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
454; CHECK-MVE-NEXT:    vmov.16 q1[0], r1
455; CHECK-MVE-NEXT:    vrinta.f16 s0, s0
456; CHECK-MVE-NEXT:    vmov.16 q1[1], r0
457; CHECK-MVE-NEXT:    vmov r0, s8
458; CHECK-MVE-NEXT:    vmovx.f16 s8, s1
459; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
460; CHECK-MVE-NEXT:    vrinta.f16 s8, s8
461; CHECK-MVE-NEXT:    vmov r0, s8
462; CHECK-MVE-NEXT:    vrinta.f16 s8, s2
463; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
464; CHECK-MVE-NEXT:    vmov r0, s8
465; CHECK-MVE-NEXT:    vmovx.f16 s8, s2
466; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
467; CHECK-MVE-NEXT:    vrinta.f16 s8, s8
468; CHECK-MVE-NEXT:    vmov r0, s8
469; CHECK-MVE-NEXT:    vrinta.f16 s8, s3
470; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
471; CHECK-MVE-NEXT:    vmov r0, s8
472; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
473; CHECK-MVE-NEXT:    vmov r0, s0
474; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
475; CHECK-MVE-NEXT:    vmov q0, q1
476; CHECK-MVE-NEXT:    bx lr
477;
478; CHECK-MVEFP-LABEL: fround_float16_t:
479; CHECK-MVEFP:       @ %bb.0: @ %entry
480; CHECK-MVEFP-NEXT:    vrinta.f16 q0, q0
481; CHECK-MVEFP-NEXT:    bx lr
482entry:
483  %0 = call fast <8 x half> @llvm.round.v8f16(<8 x half> %src)
484  ret <8 x half> %0
485}
486
487define arm_aapcs_vfpcc <2 x double> @fround_float64_t(<2 x double> %src) {
488; CHECK-LABEL: fround_float64_t:
489; CHECK:       @ %bb.0: @ %entry
490; CHECK-NEXT:    .save {r7, lr}
491; CHECK-NEXT:    push {r7, lr}
492; CHECK-NEXT:    .vsave {d8, d9}
493; CHECK-NEXT:    vpush {d8, d9}
494; CHECK-NEXT:    vmov q4, q0
495; CHECK-NEXT:    vmov r0, r1, d9
496; CHECK-NEXT:    bl round
497; CHECK-NEXT:    vmov r2, r3, d8
498; CHECK-NEXT:    vmov d9, r0, r1
499; CHECK-NEXT:    mov r0, r2
500; CHECK-NEXT:    mov r1, r3
501; CHECK-NEXT:    bl round
502; CHECK-NEXT:    vmov d8, r0, r1
503; CHECK-NEXT:    vmov q0, q4
504; CHECK-NEXT:    vpop {d8, d9}
505; CHECK-NEXT:    pop {r7, pc}
506entry:
507  %0 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %src)
508  ret <2 x double> %0
509}
510
511declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
512declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
513declare <4 x float> @llvm.rint.v4f32(<4 x float>)
514declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
515declare <4 x float> @llvm.floor.v4f32(<4 x float>)
516declare <4 x float> @llvm.round.v4f32(<4 x float>)
517declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
518declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
519declare <8 x half> @llvm.rint.v8f16(<8 x half>)
520declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
521declare <8 x half> @llvm.floor.v8f16(<8 x half>)
522declare <8 x half> @llvm.round.v8f16(<8 x half>)
523declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
524declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
525declare <2 x double> @llvm.rint.v2f64(<2 x double>)
526declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
527declare <2 x double> @llvm.floor.v2f64(<2 x double>)
528declare <2 x double> @llvm.round.v2f64(<2 x double>)
529