• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
4
5define arm_aapcs_vfpcc <16 x i8> @add_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
6; CHECK-LABEL: add_int8_t:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vadd.i8 q0, q0, q1
9; CHECK-NEXT:    bx lr
10entry:
11  %0 = add <16 x i8> %src1, %src2
12  ret <16 x i8> %0
13}
14
15define arm_aapcs_vfpcc <8 x i16> @add_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
16; CHECK-LABEL: add_int16_t:
17; CHECK:       @ %bb.0: @ %entry
18; CHECK-NEXT:    vadd.i16 q0, q0, q1
19; CHECK-NEXT:    bx lr
20entry:
21  %0 = add <8 x i16> %src1, %src2
22  ret <8 x i16> %0
23}
24
25define arm_aapcs_vfpcc <4 x i32> @add_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
26; CHECK-LABEL: add_int32_t:
27; CHECK:       @ %bb.0: @ %entry
28; CHECK-NEXT:    vadd.i32 q0, q0, q1
29; CHECK-NEXT:    bx lr
30entry:
31  %0 = add nsw <4 x i32> %src1, %src2
32  ret <4 x i32> %0
33}
34
35define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
36; CHECK-LABEL: add_int64_t:
37; CHECK:       @ %bb.0: @ %entry
38; CHECK-NEXT:    .save {r7, lr}
39; CHECK-NEXT:    push {r7, lr}
40; CHECK-NEXT:    vmov r2, s6
41; CHECK-NEXT:    vmov r3, s2
42; CHECK-NEXT:    vmov r0, s7
43; CHECK-NEXT:    vmov r1, s3
44; CHECK-NEXT:    adds.w lr, r3, r2
45; CHECK-NEXT:    vmov r2, s0
46; CHECK-NEXT:    vmov r3, s1
47; CHECK-NEXT:    adc.w r12, r1, r0
48; CHECK-NEXT:    vmov r0, s4
49; CHECK-NEXT:    vmov r1, s5
50; CHECK-NEXT:    adds r0, r0, r2
51; CHECK-NEXT:    adcs r1, r3
52; CHECK-NEXT:    vmov.32 q0[0], r0
53; CHECK-NEXT:    vmov.32 q0[1], r1
54; CHECK-NEXT:    vmov.32 q0[2], lr
55; CHECK-NEXT:    vmov.32 q0[3], r12
56; CHECK-NEXT:    pop {r7, pc}
57entry:
58  %0 = add nsw <2 x i64> %src1, %src2
59  ret <2 x i64> %0
60}
61
62define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) {
63; CHECK-MVE-LABEL: add_float32_t:
64; CHECK-MVE:       @ %bb.0: @ %entry
65; CHECK-MVE-NEXT:    vadd.f32 s11, s7, s3
66; CHECK-MVE-NEXT:    vadd.f32 s10, s6, s2
67; CHECK-MVE-NEXT:    vadd.f32 s9, s5, s1
68; CHECK-MVE-NEXT:    vadd.f32 s8, s4, s0
69; CHECK-MVE-NEXT:    vmov q0, q2
70; CHECK-MVE-NEXT:    bx lr
71;
72; CHECK-MVEFP-LABEL: add_float32_t:
73; CHECK-MVEFP:       @ %bb.0: @ %entry
74; CHECK-MVEFP-NEXT:    vadd.f32 q0, q1, q0
75; CHECK-MVEFP-NEXT:    bx lr
76entry:
77  %0 = fadd nnan ninf nsz <4 x float> %src2, %src1
78  ret <4 x float> %0
79}
80
81define arm_aapcs_vfpcc <8 x half> @add_float16_t(<8 x half> %src1, <8 x half> %src2) {
82; CHECK-MVE-LABEL: add_float16_t:
83; CHECK-MVE:       @ %bb.0: @ %entry
84; CHECK-MVE-NEXT:    vadd.f16 s8, s4, s0
85; CHECK-MVE-NEXT:    vmovx.f16 s10, s4
86; CHECK-MVE-NEXT:    vmov r0, s8
87; CHECK-MVE-NEXT:    vmovx.f16 s8, s0
88; CHECK-MVE-NEXT:    vadd.f16 s8, s10, s8
89; CHECK-MVE-NEXT:    vadd.f16 s12, s5, s1
90; CHECK-MVE-NEXT:    vmov r1, s8
91; CHECK-MVE-NEXT:    vmov.16 q2[0], r0
92; CHECK-MVE-NEXT:    vmov r0, s12
93; CHECK-MVE-NEXT:    vmovx.f16 s12, s1
94; CHECK-MVE-NEXT:    vmovx.f16 s14, s5
95; CHECK-MVE-NEXT:    vmov.16 q2[1], r1
96; CHECK-MVE-NEXT:    vadd.f16 s12, s14, s12
97; CHECK-MVE-NEXT:    vmov.16 q2[2], r0
98; CHECK-MVE-NEXT:    vmov r0, s12
99; CHECK-MVE-NEXT:    vadd.f16 s12, s6, s2
100; CHECK-MVE-NEXT:    vmov.16 q2[3], r0
101; CHECK-MVE-NEXT:    vmov r0, s12
102; CHECK-MVE-NEXT:    vmovx.f16 s12, s2
103; CHECK-MVE-NEXT:    vmovx.f16 s14, s6
104; CHECK-MVE-NEXT:    vadd.f16 s12, s14, s12
105; CHECK-MVE-NEXT:    vmov.16 q2[4], r0
106; CHECK-MVE-NEXT:    vmov r0, s12
107; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
108; CHECK-MVE-NEXT:    vmovx.f16 s2, s7
109; CHECK-MVE-NEXT:    vadd.f16 s12, s7, s3
110; CHECK-MVE-NEXT:    vmov.16 q2[5], r0
111; CHECK-MVE-NEXT:    vmov r0, s12
112; CHECK-MVE-NEXT:    vadd.f16 s0, s2, s0
113; CHECK-MVE-NEXT:    vmov.16 q2[6], r0
114; CHECK-MVE-NEXT:    vmov r0, s0
115; CHECK-MVE-NEXT:    vmov.16 q2[7], r0
116; CHECK-MVE-NEXT:    vmov q0, q2
117; CHECK-MVE-NEXT:    bx lr
118;
119; CHECK-MVEFP-LABEL: add_float16_t:
120; CHECK-MVEFP:       @ %bb.0: @ %entry
121; CHECK-MVEFP-NEXT:    vadd.f16 q0, q1, q0
122; CHECK-MVEFP-NEXT:    bx lr
123entry:
124  %0 = fadd nnan ninf nsz <8 x half> %src2, %src1
125  ret <8 x half> %0
126}
127
128define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) {
129; CHECK-LABEL: add_float64_t:
130; CHECK:       @ %bb.0: @ %entry
131; CHECK-NEXT:    .save {r7, lr}
132; CHECK-NEXT:    push {r7, lr}
133; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
134; CHECK-NEXT:    vpush {d8, d9, d10, d11}
135; CHECK-NEXT:    vmov q4, q1
136; CHECK-NEXT:    vmov q5, q0
137; CHECK-NEXT:    vmov r0, r1, d9
138; CHECK-NEXT:    vmov r2, r3, d11
139; CHECK-NEXT:    bl __aeabi_dadd
140; CHECK-NEXT:    vmov lr, r12, d8
141; CHECK-NEXT:    vmov r2, r3, d10
142; CHECK-NEXT:    vmov d9, r0, r1
143; CHECK-NEXT:    mov r0, lr
144; CHECK-NEXT:    mov r1, r12
145; CHECK-NEXT:    bl __aeabi_dadd
146; CHECK-NEXT:    vmov d8, r0, r1
147; CHECK-NEXT:    vmov q0, q4
148; CHECK-NEXT:    vpop {d8, d9, d10, d11}
149; CHECK-NEXT:    pop {r7, pc}
150entry:
151  %0 = fadd nnan ninf nsz <2 x double> %src2, %src1
152  ret <2 x double> %0
153}
154
155
156define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
157; CHECK-LABEL: sub_int8_t:
158; CHECK:       @ %bb.0: @ %entry
159; CHECK-NEXT:    vsub.i8 q0, q1, q0
160; CHECK-NEXT:    bx lr
161entry:
162  %0 = sub <16 x i8> %src2, %src1
163  ret <16 x i8> %0
164}
165
166define arm_aapcs_vfpcc <8 x i16> @sub_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
167; CHECK-LABEL: sub_int16_t:
168; CHECK:       @ %bb.0: @ %entry
169; CHECK-NEXT:    vsub.i16 q0, q1, q0
170; CHECK-NEXT:    bx lr
171entry:
172  %0 = sub <8 x i16> %src2, %src1
173  ret <8 x i16> %0
174}
175
176define arm_aapcs_vfpcc <4 x i32> @sub_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
177; CHECK-LABEL: sub_int32_t:
178; CHECK:       @ %bb.0: @ %entry
179; CHECK-NEXT:    vsub.i32 q0, q1, q0
180; CHECK-NEXT:    bx lr
181entry:
182  %0 = sub nsw <4 x i32> %src2, %src1
183  ret <4 x i32> %0
184}
185
186define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
187; CHECK-LABEL: sub_int64_t:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    .save {r7, lr}
190; CHECK-NEXT:    push {r7, lr}
191; CHECK-NEXT:    vmov r2, s2
192; CHECK-NEXT:    vmov r3, s6
193; CHECK-NEXT:    vmov r0, s3
194; CHECK-NEXT:    vmov r1, s7
195; CHECK-NEXT:    subs.w lr, r3, r2
196; CHECK-NEXT:    vmov r2, s4
197; CHECK-NEXT:    vmov r3, s5
198; CHECK-NEXT:    sbc.w r12, r1, r0
199; CHECK-NEXT:    vmov r0, s0
200; CHECK-NEXT:    vmov r1, s1
201; CHECK-NEXT:    subs r0, r2, r0
202; CHECK-NEXT:    sbc.w r1, r3, r1
203; CHECK-NEXT:    vmov.32 q0[0], r0
204; CHECK-NEXT:    vmov.32 q0[1], r1
205; CHECK-NEXT:    vmov.32 q0[2], lr
206; CHECK-NEXT:    vmov.32 q0[3], r12
207; CHECK-NEXT:    pop {r7, pc}
208entry:
209  %0 = sub nsw <2 x i64> %src2, %src1
210  ret <2 x i64> %0
211}
212
213define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) {
214; CHECK-MVE-LABEL: sub_float32_t:
215; CHECK-MVE:       @ %bb.0: @ %entry
216; CHECK-MVE-NEXT:    vsub.f32 s11, s7, s3
217; CHECK-MVE-NEXT:    vsub.f32 s10, s6, s2
218; CHECK-MVE-NEXT:    vsub.f32 s9, s5, s1
219; CHECK-MVE-NEXT:    vsub.f32 s8, s4, s0
220; CHECK-MVE-NEXT:    vmov q0, q2
221; CHECK-MVE-NEXT:    bx lr
222;
223; CHECK-MVEFP-LABEL: sub_float32_t:
224; CHECK-MVEFP:       @ %bb.0: @ %entry
225; CHECK-MVEFP-NEXT:    vsub.f32 q0, q1, q0
226; CHECK-MVEFP-NEXT:    bx lr
227entry:
228  %0 = fsub nnan ninf nsz <4 x float> %src2, %src1
229  ret <4 x float> %0
230}
231
232define arm_aapcs_vfpcc <8 x half> @sub_float16_t(<8 x half> %src1, <8 x half> %src2) {
233; CHECK-MVE-LABEL: sub_float16_t:
234; CHECK-MVE:       @ %bb.0: @ %entry
235; CHECK-MVE-NEXT:    vsub.f16 s8, s4, s0
236; CHECK-MVE-NEXT:    vmovx.f16 s10, s4
237; CHECK-MVE-NEXT:    vmov r0, s8
238; CHECK-MVE-NEXT:    vmovx.f16 s8, s0
239; CHECK-MVE-NEXT:    vsub.f16 s8, s10, s8
240; CHECK-MVE-NEXT:    vsub.f16 s12, s5, s1
241; CHECK-MVE-NEXT:    vmov r1, s8
242; CHECK-MVE-NEXT:    vmov.16 q2[0], r0
243; CHECK-MVE-NEXT:    vmov r0, s12
244; CHECK-MVE-NEXT:    vmovx.f16 s12, s1
245; CHECK-MVE-NEXT:    vmovx.f16 s14, s5
246; CHECK-MVE-NEXT:    vmov.16 q2[1], r1
247; CHECK-MVE-NEXT:    vsub.f16 s12, s14, s12
248; CHECK-MVE-NEXT:    vmov.16 q2[2], r0
249; CHECK-MVE-NEXT:    vmov r0, s12
250; CHECK-MVE-NEXT:    vsub.f16 s12, s6, s2
251; CHECK-MVE-NEXT:    vmov.16 q2[3], r0
252; CHECK-MVE-NEXT:    vmov r0, s12
253; CHECK-MVE-NEXT:    vmovx.f16 s12, s2
254; CHECK-MVE-NEXT:    vmovx.f16 s14, s6
255; CHECK-MVE-NEXT:    vsub.f16 s12, s14, s12
256; CHECK-MVE-NEXT:    vmov.16 q2[4], r0
257; CHECK-MVE-NEXT:    vmov r0, s12
258; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
259; CHECK-MVE-NEXT:    vmovx.f16 s2, s7
260; CHECK-MVE-NEXT:    vsub.f16 s12, s7, s3
261; CHECK-MVE-NEXT:    vmov.16 q2[5], r0
262; CHECK-MVE-NEXT:    vmov r0, s12
263; CHECK-MVE-NEXT:    vsub.f16 s0, s2, s0
264; CHECK-MVE-NEXT:    vmov.16 q2[6], r0
265; CHECK-MVE-NEXT:    vmov r0, s0
266; CHECK-MVE-NEXT:    vmov.16 q2[7], r0
267; CHECK-MVE-NEXT:    vmov q0, q2
268; CHECK-MVE-NEXT:    bx lr
269;
270; CHECK-MVEFP-LABEL: sub_float16_t:
271; CHECK-MVEFP:       @ %bb.0: @ %entry
272; CHECK-MVEFP-NEXT:    vsub.f16 q0, q1, q0
273; CHECK-MVEFP-NEXT:    bx lr
274entry:
275  %0 = fsub nnan ninf nsz <8 x half> %src2, %src1
276  ret <8 x half> %0
277}
278
279define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) {
280; CHECK-LABEL: sub_float64_t:
281; CHECK:       @ %bb.0: @ %entry
282; CHECK-NEXT:    .save {r7, lr}
283; CHECK-NEXT:    push {r7, lr}
284; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
285; CHECK-NEXT:    vpush {d8, d9, d10, d11}
286; CHECK-NEXT:    vmov q4, q1
287; CHECK-NEXT:    vmov q5, q0
288; CHECK-NEXT:    vmov r0, r1, d9
289; CHECK-NEXT:    vmov r2, r3, d11
290; CHECK-NEXT:    bl __aeabi_dsub
291; CHECK-NEXT:    vmov lr, r12, d8
292; CHECK-NEXT:    vmov r2, r3, d10
293; CHECK-NEXT:    vmov d9, r0, r1
294; CHECK-NEXT:    mov r0, lr
295; CHECK-NEXT:    mov r1, r12
296; CHECK-NEXT:    bl __aeabi_dsub
297; CHECK-NEXT:    vmov d8, r0, r1
298; CHECK-NEXT:    vmov q0, q4
299; CHECK-NEXT:    vpop {d8, d9, d10, d11}
300; CHECK-NEXT:    pop {r7, pc}
301entry:
302  %0 = fsub nnan ninf nsz <2 x double> %src2, %src1
303  ret <2 x double> %0
304}
305
306
307define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
308; CHECK-LABEL: mul_int8_t:
309; CHECK:       @ %bb.0: @ %entry
310; CHECK-NEXT:    vmul.i8 q0, q0, q1
311; CHECK-NEXT:    bx lr
312entry:
313  %0 = mul <16 x i8> %src1, %src2
314  ret <16 x i8> %0
315}
316
317define arm_aapcs_vfpcc <8 x i16> @mul_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
318; CHECK-LABEL: mul_int16_t:
319; CHECK:       @ %bb.0: @ %entry
320; CHECK-NEXT:    vmul.i16 q0, q0, q1
321; CHECK-NEXT:    bx lr
322entry:
323  %0 = mul <8 x i16> %src1, %src2
324  ret <8 x i16> %0
325}
326
327define arm_aapcs_vfpcc <4 x i32> @mul_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
328; CHECK-LABEL: mul_int32_t:
329; CHECK:       @ %bb.0: @ %entry
330; CHECK-NEXT:    vmul.i32 q0, q0, q1
331; CHECK-NEXT:    bx lr
332entry:
333  %0 = mul nsw <4 x i32> %src1, %src2
334  ret <4 x i32> %0
335}
336
337define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
338; CHECK-LABEL: mul_int64_t:
339; CHECK:       @ %bb.0: @ %entry
340; CHECK-NEXT:    .save {r4, r5, r7, lr}
341; CHECK-NEXT:    push {r4, r5, r7, lr}
342; CHECK-NEXT:    vmov r0, s4
343; CHECK-NEXT:    vmov r1, s0
344; CHECK-NEXT:    vmov r2, s5
345; CHECK-NEXT:    umull r12, r3, r1, r0
346; CHECK-NEXT:    mla lr, r1, r2, r3
347; CHECK-NEXT:    vmov r3, s6
348; CHECK-NEXT:    vmov r1, s2
349; CHECK-NEXT:    vmov r2, s7
350; CHECK-NEXT:    umull r4, r5, r1, r3
351; CHECK-NEXT:    mla r1, r1, r2, r5
352; CHECK-NEXT:    vmov r2, s1
353; CHECK-NEXT:    mla r0, r2, r0, lr
354; CHECK-NEXT:    vmov r2, s3
355; CHECK-NEXT:    vmov.32 q0[0], r12
356; CHECK-NEXT:    vmov.32 q0[1], r0
357; CHECK-NEXT:    vmov.32 q0[2], r4
358; CHECK-NEXT:    mla r1, r2, r3, r1
359; CHECK-NEXT:    vmov.32 q0[3], r1
360; CHECK-NEXT:    pop {r4, r5, r7, pc}
361entry:
362  %0 = mul nsw <2 x i64> %src1, %src2
363  ret <2 x i64> %0
364}
365
366define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) {
367; CHECK-MVE-LABEL: mul_float16_t:
368; CHECK-MVE:       @ %bb.0: @ %entry
369; CHECK-MVE-NEXT:    vmul.f16 s8, s4, s0
370; CHECK-MVE-NEXT:    vmovx.f16 s10, s4
371; CHECK-MVE-NEXT:    vmov r0, s8
372; CHECK-MVE-NEXT:    vmovx.f16 s8, s0
373; CHECK-MVE-NEXT:    vmul.f16 s8, s10, s8
374; CHECK-MVE-NEXT:    vmul.f16 s12, s5, s1
375; CHECK-MVE-NEXT:    vmov r1, s8
376; CHECK-MVE-NEXT:    vmov.16 q2[0], r0
377; CHECK-MVE-NEXT:    vmov r0, s12
378; CHECK-MVE-NEXT:    vmovx.f16 s12, s1
379; CHECK-MVE-NEXT:    vmovx.f16 s14, s5
380; CHECK-MVE-NEXT:    vmov.16 q2[1], r1
381; CHECK-MVE-NEXT:    vmul.f16 s12, s14, s12
382; CHECK-MVE-NEXT:    vmov.16 q2[2], r0
383; CHECK-MVE-NEXT:    vmov r0, s12
384; CHECK-MVE-NEXT:    vmul.f16 s12, s6, s2
385; CHECK-MVE-NEXT:    vmov.16 q2[3], r0
386; CHECK-MVE-NEXT:    vmov r0, s12
387; CHECK-MVE-NEXT:    vmovx.f16 s12, s2
388; CHECK-MVE-NEXT:    vmovx.f16 s14, s6
389; CHECK-MVE-NEXT:    vmul.f16 s12, s14, s12
390; CHECK-MVE-NEXT:    vmov.16 q2[4], r0
391; CHECK-MVE-NEXT:    vmov r0, s12
392; CHECK-MVE-NEXT:    vmovx.f16 s0, s3
393; CHECK-MVE-NEXT:    vmovx.f16 s2, s7
394; CHECK-MVE-NEXT:    vmul.f16 s12, s7, s3
395; CHECK-MVE-NEXT:    vmov.16 q2[5], r0
396; CHECK-MVE-NEXT:    vmov r0, s12
397; CHECK-MVE-NEXT:    vmul.f16 s0, s2, s0
398; CHECK-MVE-NEXT:    vmov.16 q2[6], r0
399; CHECK-MVE-NEXT:    vmov r0, s0
400; CHECK-MVE-NEXT:    vmov.16 q2[7], r0
401; CHECK-MVE-NEXT:    vmov q0, q2
402; CHECK-MVE-NEXT:    bx lr
403;
404; CHECK-MVEFP-LABEL: mul_float16_t:
405; CHECK-MVEFP:       @ %bb.0: @ %entry
406; CHECK-MVEFP-NEXT:    vmul.f16 q0, q1, q0
407; CHECK-MVEFP-NEXT:    bx lr
408entry:
409  %0 = fmul nnan ninf nsz <8 x half> %src2, %src1
410  ret <8 x half> %0
411}
412
413define arm_aapcs_vfpcc <4 x float> @mul_float32_t(<4 x float> %src1, <4 x float> %src2) {
414; CHECK-MVE-LABEL: mul_float32_t:
415; CHECK-MVE:       @ %bb.0: @ %entry
416; CHECK-MVE-NEXT:    vmul.f32 s11, s7, s3
417; CHECK-MVE-NEXT:    vmul.f32 s10, s6, s2
418; CHECK-MVE-NEXT:    vmul.f32 s9, s5, s1
419; CHECK-MVE-NEXT:    vmul.f32 s8, s4, s0
420; CHECK-MVE-NEXT:    vmov q0, q2
421; CHECK-MVE-NEXT:    bx lr
422;
423; CHECK-MVEFP-LABEL: mul_float32_t:
424; CHECK-MVEFP:       @ %bb.0: @ %entry
425; CHECK-MVEFP-NEXT:    vmul.f32 q0, q1, q0
426; CHECK-MVEFP-NEXT:    bx lr
427entry:
428  %0 = fmul nnan ninf nsz <4 x float> %src2, %src1
429  ret <4 x float> %0
430}
431
432define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) {
433; CHECK-LABEL: mul_float64_t:
434; CHECK:       @ %bb.0: @ %entry
435; CHECK-NEXT:    .save {r7, lr}
436; CHECK-NEXT:    push {r7, lr}
437; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
438; CHECK-NEXT:    vpush {d8, d9, d10, d11}
439; CHECK-NEXT:    vmov q4, q1
440; CHECK-NEXT:    vmov q5, q0
441; CHECK-NEXT:    vmov r0, r1, d9
442; CHECK-NEXT:    vmov r2, r3, d11
443; CHECK-NEXT:    bl __aeabi_dmul
444; CHECK-NEXT:    vmov lr, r12, d8
445; CHECK-NEXT:    vmov r2, r3, d10
446; CHECK-NEXT:    vmov d9, r0, r1
447; CHECK-NEXT:    mov r0, lr
448; CHECK-NEXT:    mov r1, r12
449; CHECK-NEXT:    bl __aeabi_dmul
450; CHECK-NEXT:    vmov d8, r0, r1
451; CHECK-NEXT:    vmov q0, q4
452; CHECK-NEXT:    vpop {d8, d9, d10, d11}
453; CHECK-NEXT:    pop {r7, pc}
454entry:
455  %0 = fmul nnan ninf nsz <2 x double> %src2, %src1
456  ret <2 x double> %0
457}
458
459