1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP 4 5define arm_aapcs_vfpcc <16 x i8> @add_int8_t(<16 x i8> %src1, <16 x i8> %src2) { 6; CHECK-LABEL: add_int8_t: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vadd.i8 q0, q0, q1 9; CHECK-NEXT: bx lr 10entry: 11 %0 = add <16 x i8> %src1, %src2 12 ret <16 x i8> %0 13} 14 15define arm_aapcs_vfpcc <8 x i16> @add_int16_t(<8 x i16> %src1, <8 x i16> %src2) { 16; CHECK-LABEL: add_int16_t: 17; CHECK: @ %bb.0: @ %entry 18; CHECK-NEXT: vadd.i16 q0, q0, q1 19; CHECK-NEXT: bx lr 20entry: 21 %0 = add <8 x i16> %src1, %src2 22 ret <8 x i16> %0 23} 24 25define arm_aapcs_vfpcc <4 x i32> @add_int32_t(<4 x i32> %src1, <4 x i32> %src2) { 26; CHECK-LABEL: add_int32_t: 27; CHECK: @ %bb.0: @ %entry 28; CHECK-NEXT: vadd.i32 q0, q0, q1 29; CHECK-NEXT: bx lr 30entry: 31 %0 = add nsw <4 x i32> %src1, %src2 32 ret <4 x i32> %0 33} 34 35define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) { 36; CHECK-LABEL: add_int64_t: 37; CHECK: @ %bb.0: @ %entry 38; CHECK-NEXT: .save {r7, lr} 39; CHECK-NEXT: push {r7, lr} 40; CHECK-NEXT: vmov r2, s6 41; CHECK-NEXT: vmov r3, s2 42; CHECK-NEXT: vmov r0, s7 43; CHECK-NEXT: vmov r1, s3 44; CHECK-NEXT: adds.w lr, r3, r2 45; CHECK-NEXT: vmov r2, s0 46; CHECK-NEXT: vmov r3, s1 47; CHECK-NEXT: adc.w r12, r1, r0 48; CHECK-NEXT: vmov r0, s4 49; CHECK-NEXT: vmov r1, s5 50; CHECK-NEXT: adds r0, r0, r2 51; CHECK-NEXT: adcs r1, r3 52; CHECK-NEXT: vmov.32 q0[0], r0 53; CHECK-NEXT: vmov.32 q0[1], r1 54; CHECK-NEXT: vmov.32 q0[2], lr 55; CHECK-NEXT: vmov.32 q0[3], r12 56; CHECK-NEXT: pop {r7, pc} 57entry: 58 %0 = add nsw <2 x i64> %src1, %src2 59 ret <2 x i64> %0 60} 61 62define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) { 63; CHECK-MVE-LABEL: add_float32_t: 64; CHECK-MVE: @ %bb.0: @ %entry 65; CHECK-MVE-NEXT: vadd.f32 s11, s7, s3 66; CHECK-MVE-NEXT: vadd.f32 s10, s6, s2 67; CHECK-MVE-NEXT: vadd.f32 s9, s5, s1 68; CHECK-MVE-NEXT: vadd.f32 s8, s4, s0 69; CHECK-MVE-NEXT: vmov q0, q2 70; CHECK-MVE-NEXT: bx lr 71; 72; CHECK-MVEFP-LABEL: add_float32_t: 73; CHECK-MVEFP: @ %bb.0: @ %entry 74; CHECK-MVEFP-NEXT: vadd.f32 q0, q1, q0 75; CHECK-MVEFP-NEXT: bx lr 76entry: 77 %0 = fadd nnan ninf nsz <4 x float> %src2, %src1 78 ret <4 x float> %0 79} 80 81define arm_aapcs_vfpcc <8 x half> @add_float16_t(<8 x half> %src1, <8 x half> %src2) { 82; CHECK-MVE-LABEL: add_float16_t: 83; CHECK-MVE: @ %bb.0: @ %entry 84; CHECK-MVE-NEXT: vadd.f16 s8, s4, s0 85; CHECK-MVE-NEXT: vmovx.f16 s10, s4 86; CHECK-MVE-NEXT: vmov r0, s8 87; CHECK-MVE-NEXT: vmovx.f16 s8, s0 88; CHECK-MVE-NEXT: vadd.f16 s8, s10, s8 89; CHECK-MVE-NEXT: vadd.f16 s12, s5, s1 90; CHECK-MVE-NEXT: vmov r1, s8 91; CHECK-MVE-NEXT: vmov.16 q2[0], r0 92; CHECK-MVE-NEXT: vmov r0, s12 93; CHECK-MVE-NEXT: vmovx.f16 s12, s1 94; CHECK-MVE-NEXT: vmovx.f16 s14, s5 95; CHECK-MVE-NEXT: vmov.16 q2[1], r1 96; CHECK-MVE-NEXT: vadd.f16 s12, s14, s12 97; CHECK-MVE-NEXT: vmov.16 q2[2], r0 98; CHECK-MVE-NEXT: vmov r0, s12 99; CHECK-MVE-NEXT: vadd.f16 s12, s6, s2 100; CHECK-MVE-NEXT: vmov.16 q2[3], r0 101; CHECK-MVE-NEXT: vmov r0, s12 102; CHECK-MVE-NEXT: vmovx.f16 s12, s2 103; CHECK-MVE-NEXT: vmovx.f16 s14, s6 104; CHECK-MVE-NEXT: vadd.f16 s12, s14, s12 105; CHECK-MVE-NEXT: vmov.16 q2[4], r0 106; CHECK-MVE-NEXT: vmov r0, s12 107; CHECK-MVE-NEXT: vmovx.f16 s0, s3 108; CHECK-MVE-NEXT: vmovx.f16 s2, s7 109; CHECK-MVE-NEXT: vadd.f16 s12, s7, s3 110; CHECK-MVE-NEXT: vmov.16 q2[5], r0 111; CHECK-MVE-NEXT: vmov r0, s12 112; CHECK-MVE-NEXT: vadd.f16 s0, s2, s0 113; CHECK-MVE-NEXT: vmov.16 q2[6], r0 114; CHECK-MVE-NEXT: vmov r0, s0 115; CHECK-MVE-NEXT: vmov.16 q2[7], r0 116; CHECK-MVE-NEXT: vmov q0, q2 117; CHECK-MVE-NEXT: bx lr 118; 119; CHECK-MVEFP-LABEL: add_float16_t: 120; CHECK-MVEFP: @ %bb.0: @ %entry 121; CHECK-MVEFP-NEXT: vadd.f16 q0, q1, q0 122; CHECK-MVEFP-NEXT: bx lr 123entry: 124 %0 = fadd nnan ninf nsz <8 x half> %src2, %src1 125 ret <8 x half> %0 126} 127 128define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) { 129; CHECK-LABEL: add_float64_t: 130; CHECK: @ %bb.0: @ %entry 131; CHECK-NEXT: .save {r7, lr} 132; CHECK-NEXT: push {r7, lr} 133; CHECK-NEXT: .vsave {d8, d9, d10, d11} 134; CHECK-NEXT: vpush {d8, d9, d10, d11} 135; CHECK-NEXT: vmov q4, q1 136; CHECK-NEXT: vmov q5, q0 137; CHECK-NEXT: vmov r0, r1, d9 138; CHECK-NEXT: vmov r2, r3, d11 139; CHECK-NEXT: bl __aeabi_dadd 140; CHECK-NEXT: vmov lr, r12, d8 141; CHECK-NEXT: vmov r2, r3, d10 142; CHECK-NEXT: vmov d9, r0, r1 143; CHECK-NEXT: mov r0, lr 144; CHECK-NEXT: mov r1, r12 145; CHECK-NEXT: bl __aeabi_dadd 146; CHECK-NEXT: vmov d8, r0, r1 147; CHECK-NEXT: vmov q0, q4 148; CHECK-NEXT: vpop {d8, d9, d10, d11} 149; CHECK-NEXT: pop {r7, pc} 150entry: 151 %0 = fadd nnan ninf nsz <2 x double> %src2, %src1 152 ret <2 x double> %0 153} 154 155 156define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) { 157; CHECK-LABEL: sub_int8_t: 158; CHECK: @ %bb.0: @ %entry 159; CHECK-NEXT: vsub.i8 q0, q1, q0 160; CHECK-NEXT: bx lr 161entry: 162 %0 = sub <16 x i8> %src2, %src1 163 ret <16 x i8> %0 164} 165 166define arm_aapcs_vfpcc <8 x i16> @sub_int16_t(<8 x i16> %src1, <8 x i16> %src2) { 167; CHECK-LABEL: sub_int16_t: 168; CHECK: @ %bb.0: @ %entry 169; CHECK-NEXT: vsub.i16 q0, q1, q0 170; CHECK-NEXT: bx lr 171entry: 172 %0 = sub <8 x i16> %src2, %src1 173 ret <8 x i16> %0 174} 175 176define arm_aapcs_vfpcc <4 x i32> @sub_int32_t(<4 x i32> %src1, <4 x i32> %src2) { 177; CHECK-LABEL: sub_int32_t: 178; CHECK: @ %bb.0: @ %entry 179; CHECK-NEXT: vsub.i32 q0, q1, q0 180; CHECK-NEXT: bx lr 181entry: 182 %0 = sub nsw <4 x i32> %src2, %src1 183 ret <4 x i32> %0 184} 185 186define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { 187; CHECK-LABEL: sub_int64_t: 188; CHECK: @ %bb.0: @ %entry 189; CHECK-NEXT: .save {r7, lr} 190; CHECK-NEXT: push {r7, lr} 191; CHECK-NEXT: vmov r2, s2 192; CHECK-NEXT: vmov r3, s6 193; CHECK-NEXT: vmov r0, s3 194; CHECK-NEXT: vmov r1, s7 195; CHECK-NEXT: subs.w lr, r3, r2 196; CHECK-NEXT: vmov r2, s4 197; CHECK-NEXT: vmov r3, s5 198; CHECK-NEXT: sbc.w r12, r1, r0 199; CHECK-NEXT: vmov r0, s0 200; CHECK-NEXT: vmov r1, s1 201; CHECK-NEXT: subs r0, r2, r0 202; CHECK-NEXT: sbc.w r1, r3, r1 203; CHECK-NEXT: vmov.32 q0[0], r0 204; CHECK-NEXT: vmov.32 q0[1], r1 205; CHECK-NEXT: vmov.32 q0[2], lr 206; CHECK-NEXT: vmov.32 q0[3], r12 207; CHECK-NEXT: pop {r7, pc} 208entry: 209 %0 = sub nsw <2 x i64> %src2, %src1 210 ret <2 x i64> %0 211} 212 213define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) { 214; CHECK-MVE-LABEL: sub_float32_t: 215; CHECK-MVE: @ %bb.0: @ %entry 216; CHECK-MVE-NEXT: vsub.f32 s11, s7, s3 217; CHECK-MVE-NEXT: vsub.f32 s10, s6, s2 218; CHECK-MVE-NEXT: vsub.f32 s9, s5, s1 219; CHECK-MVE-NEXT: vsub.f32 s8, s4, s0 220; CHECK-MVE-NEXT: vmov q0, q2 221; CHECK-MVE-NEXT: bx lr 222; 223; CHECK-MVEFP-LABEL: sub_float32_t: 224; CHECK-MVEFP: @ %bb.0: @ %entry 225; CHECK-MVEFP-NEXT: vsub.f32 q0, q1, q0 226; CHECK-MVEFP-NEXT: bx lr 227entry: 228 %0 = fsub nnan ninf nsz <4 x float> %src2, %src1 229 ret <4 x float> %0 230} 231 232define arm_aapcs_vfpcc <8 x half> @sub_float16_t(<8 x half> %src1, <8 x half> %src2) { 233; CHECK-MVE-LABEL: sub_float16_t: 234; CHECK-MVE: @ %bb.0: @ %entry 235; CHECK-MVE-NEXT: vsub.f16 s8, s4, s0 236; CHECK-MVE-NEXT: vmovx.f16 s10, s4 237; CHECK-MVE-NEXT: vmov r0, s8 238; CHECK-MVE-NEXT: vmovx.f16 s8, s0 239; CHECK-MVE-NEXT: vsub.f16 s8, s10, s8 240; CHECK-MVE-NEXT: vsub.f16 s12, s5, s1 241; CHECK-MVE-NEXT: vmov r1, s8 242; CHECK-MVE-NEXT: vmov.16 q2[0], r0 243; CHECK-MVE-NEXT: vmov r0, s12 244; CHECK-MVE-NEXT: vmovx.f16 s12, s1 245; CHECK-MVE-NEXT: vmovx.f16 s14, s5 246; CHECK-MVE-NEXT: vmov.16 q2[1], r1 247; CHECK-MVE-NEXT: vsub.f16 s12, s14, s12 248; CHECK-MVE-NEXT: vmov.16 q2[2], r0 249; CHECK-MVE-NEXT: vmov r0, s12 250; CHECK-MVE-NEXT: vsub.f16 s12, s6, s2 251; CHECK-MVE-NEXT: vmov.16 q2[3], r0 252; CHECK-MVE-NEXT: vmov r0, s12 253; CHECK-MVE-NEXT: vmovx.f16 s12, s2 254; CHECK-MVE-NEXT: vmovx.f16 s14, s6 255; CHECK-MVE-NEXT: vsub.f16 s12, s14, s12 256; CHECK-MVE-NEXT: vmov.16 q2[4], r0 257; CHECK-MVE-NEXT: vmov r0, s12 258; CHECK-MVE-NEXT: vmovx.f16 s0, s3 259; CHECK-MVE-NEXT: vmovx.f16 s2, s7 260; CHECK-MVE-NEXT: vsub.f16 s12, s7, s3 261; CHECK-MVE-NEXT: vmov.16 q2[5], r0 262; CHECK-MVE-NEXT: vmov r0, s12 263; CHECK-MVE-NEXT: vsub.f16 s0, s2, s0 264; CHECK-MVE-NEXT: vmov.16 q2[6], r0 265; CHECK-MVE-NEXT: vmov r0, s0 266; CHECK-MVE-NEXT: vmov.16 q2[7], r0 267; CHECK-MVE-NEXT: vmov q0, q2 268; CHECK-MVE-NEXT: bx lr 269; 270; CHECK-MVEFP-LABEL: sub_float16_t: 271; CHECK-MVEFP: @ %bb.0: @ %entry 272; CHECK-MVEFP-NEXT: vsub.f16 q0, q1, q0 273; CHECK-MVEFP-NEXT: bx lr 274entry: 275 %0 = fsub nnan ninf nsz <8 x half> %src2, %src1 276 ret <8 x half> %0 277} 278 279define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) { 280; CHECK-LABEL: sub_float64_t: 281; CHECK: @ %bb.0: @ %entry 282; CHECK-NEXT: .save {r7, lr} 283; CHECK-NEXT: push {r7, lr} 284; CHECK-NEXT: .vsave {d8, d9, d10, d11} 285; CHECK-NEXT: vpush {d8, d9, d10, d11} 286; CHECK-NEXT: vmov q4, q1 287; CHECK-NEXT: vmov q5, q0 288; CHECK-NEXT: vmov r0, r1, d9 289; CHECK-NEXT: vmov r2, r3, d11 290; CHECK-NEXT: bl __aeabi_dsub 291; CHECK-NEXT: vmov lr, r12, d8 292; CHECK-NEXT: vmov r2, r3, d10 293; CHECK-NEXT: vmov d9, r0, r1 294; CHECK-NEXT: mov r0, lr 295; CHECK-NEXT: mov r1, r12 296; CHECK-NEXT: bl __aeabi_dsub 297; CHECK-NEXT: vmov d8, r0, r1 298; CHECK-NEXT: vmov q0, q4 299; CHECK-NEXT: vpop {d8, d9, d10, d11} 300; CHECK-NEXT: pop {r7, pc} 301entry: 302 %0 = fsub nnan ninf nsz <2 x double> %src2, %src1 303 ret <2 x double> %0 304} 305 306 307define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) { 308; CHECK-LABEL: mul_int8_t: 309; CHECK: @ %bb.0: @ %entry 310; CHECK-NEXT: vmul.i8 q0, q0, q1 311; CHECK-NEXT: bx lr 312entry: 313 %0 = mul <16 x i8> %src1, %src2 314 ret <16 x i8> %0 315} 316 317define arm_aapcs_vfpcc <8 x i16> @mul_int16_t(<8 x i16> %src1, <8 x i16> %src2) { 318; CHECK-LABEL: mul_int16_t: 319; CHECK: @ %bb.0: @ %entry 320; CHECK-NEXT: vmul.i16 q0, q0, q1 321; CHECK-NEXT: bx lr 322entry: 323 %0 = mul <8 x i16> %src1, %src2 324 ret <8 x i16> %0 325} 326 327define arm_aapcs_vfpcc <4 x i32> @mul_int32_t(<4 x i32> %src1, <4 x i32> %src2) { 328; CHECK-LABEL: mul_int32_t: 329; CHECK: @ %bb.0: @ %entry 330; CHECK-NEXT: vmul.i32 q0, q0, q1 331; CHECK-NEXT: bx lr 332entry: 333 %0 = mul nsw <4 x i32> %src1, %src2 334 ret <4 x i32> %0 335} 336 337define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) { 338; CHECK-LABEL: mul_int64_t: 339; CHECK: @ %bb.0: @ %entry 340; CHECK-NEXT: .save {r4, r5, r7, lr} 341; CHECK-NEXT: push {r4, r5, r7, lr} 342; CHECK-NEXT: vmov r0, s4 343; CHECK-NEXT: vmov r1, s0 344; CHECK-NEXT: vmov r2, s5 345; CHECK-NEXT: umull r12, r3, r1, r0 346; CHECK-NEXT: mla lr, r1, r2, r3 347; CHECK-NEXT: vmov r3, s6 348; CHECK-NEXT: vmov r1, s2 349; CHECK-NEXT: vmov r2, s7 350; CHECK-NEXT: umull r4, r5, r1, r3 351; CHECK-NEXT: mla r1, r1, r2, r5 352; CHECK-NEXT: vmov r2, s1 353; CHECK-NEXT: mla r0, r2, r0, lr 354; CHECK-NEXT: vmov r2, s3 355; CHECK-NEXT: vmov.32 q0[0], r12 356; CHECK-NEXT: vmov.32 q0[1], r0 357; CHECK-NEXT: vmov.32 q0[2], r4 358; CHECK-NEXT: mla r1, r2, r3, r1 359; CHECK-NEXT: vmov.32 q0[3], r1 360; CHECK-NEXT: pop {r4, r5, r7, pc} 361entry: 362 %0 = mul nsw <2 x i64> %src1, %src2 363 ret <2 x i64> %0 364} 365 366define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) { 367; CHECK-MVE-LABEL: mul_float16_t: 368; CHECK-MVE: @ %bb.0: @ %entry 369; CHECK-MVE-NEXT: vmul.f16 s8, s4, s0 370; CHECK-MVE-NEXT: vmovx.f16 s10, s4 371; CHECK-MVE-NEXT: vmov r0, s8 372; CHECK-MVE-NEXT: vmovx.f16 s8, s0 373; CHECK-MVE-NEXT: vmul.f16 s8, s10, s8 374; CHECK-MVE-NEXT: vmul.f16 s12, s5, s1 375; CHECK-MVE-NEXT: vmov r1, s8 376; CHECK-MVE-NEXT: vmov.16 q2[0], r0 377; CHECK-MVE-NEXT: vmov r0, s12 378; CHECK-MVE-NEXT: vmovx.f16 s12, s1 379; CHECK-MVE-NEXT: vmovx.f16 s14, s5 380; CHECK-MVE-NEXT: vmov.16 q2[1], r1 381; CHECK-MVE-NEXT: vmul.f16 s12, s14, s12 382; CHECK-MVE-NEXT: vmov.16 q2[2], r0 383; CHECK-MVE-NEXT: vmov r0, s12 384; CHECK-MVE-NEXT: vmul.f16 s12, s6, s2 385; CHECK-MVE-NEXT: vmov.16 q2[3], r0 386; CHECK-MVE-NEXT: vmov r0, s12 387; CHECK-MVE-NEXT: vmovx.f16 s12, s2 388; CHECK-MVE-NEXT: vmovx.f16 s14, s6 389; CHECK-MVE-NEXT: vmul.f16 s12, s14, s12 390; CHECK-MVE-NEXT: vmov.16 q2[4], r0 391; CHECK-MVE-NEXT: vmov r0, s12 392; CHECK-MVE-NEXT: vmovx.f16 s0, s3 393; CHECK-MVE-NEXT: vmovx.f16 s2, s7 394; CHECK-MVE-NEXT: vmul.f16 s12, s7, s3 395; CHECK-MVE-NEXT: vmov.16 q2[5], r0 396; CHECK-MVE-NEXT: vmov r0, s12 397; CHECK-MVE-NEXT: vmul.f16 s0, s2, s0 398; CHECK-MVE-NEXT: vmov.16 q2[6], r0 399; CHECK-MVE-NEXT: vmov r0, s0 400; CHECK-MVE-NEXT: vmov.16 q2[7], r0 401; CHECK-MVE-NEXT: vmov q0, q2 402; CHECK-MVE-NEXT: bx lr 403; 404; CHECK-MVEFP-LABEL: mul_float16_t: 405; CHECK-MVEFP: @ %bb.0: @ %entry 406; CHECK-MVEFP-NEXT: vmul.f16 q0, q1, q0 407; CHECK-MVEFP-NEXT: bx lr 408entry: 409 %0 = fmul nnan ninf nsz <8 x half> %src2, %src1 410 ret <8 x half> %0 411} 412 413define arm_aapcs_vfpcc <4 x float> @mul_float32_t(<4 x float> %src1, <4 x float> %src2) { 414; CHECK-MVE-LABEL: mul_float32_t: 415; CHECK-MVE: @ %bb.0: @ %entry 416; CHECK-MVE-NEXT: vmul.f32 s11, s7, s3 417; CHECK-MVE-NEXT: vmul.f32 s10, s6, s2 418; CHECK-MVE-NEXT: vmul.f32 s9, s5, s1 419; CHECK-MVE-NEXT: vmul.f32 s8, s4, s0 420; CHECK-MVE-NEXT: vmov q0, q2 421; CHECK-MVE-NEXT: bx lr 422; 423; CHECK-MVEFP-LABEL: mul_float32_t: 424; CHECK-MVEFP: @ %bb.0: @ %entry 425; CHECK-MVEFP-NEXT: vmul.f32 q0, q1, q0 426; CHECK-MVEFP-NEXT: bx lr 427entry: 428 %0 = fmul nnan ninf nsz <4 x float> %src2, %src1 429 ret <4 x float> %0 430} 431 432define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) { 433; CHECK-LABEL: mul_float64_t: 434; CHECK: @ %bb.0: @ %entry 435; CHECK-NEXT: .save {r7, lr} 436; CHECK-NEXT: push {r7, lr} 437; CHECK-NEXT: .vsave {d8, d9, d10, d11} 438; CHECK-NEXT: vpush {d8, d9, d10, d11} 439; CHECK-NEXT: vmov q4, q1 440; CHECK-NEXT: vmov q5, q0 441; CHECK-NEXT: vmov r0, r1, d9 442; CHECK-NEXT: vmov r2, r3, d11 443; CHECK-NEXT: bl __aeabi_dmul 444; CHECK-NEXT: vmov lr, r12, d8 445; CHECK-NEXT: vmov r2, r3, d10 446; CHECK-NEXT: vmov d9, r0, r1 447; CHECK-NEXT: mov r0, lr 448; CHECK-NEXT: mov r1, r12 449; CHECK-NEXT: bl __aeabi_dmul 450; CHECK-NEXT: vmov d8, r0, r1 451; CHECK-NEXT: vmov q0, q4 452; CHECK-NEXT: vpop {d8, d9, d10, d11} 453; CHECK-NEXT: pop {r7, pc} 454entry: 455 %0 = fmul nnan ninf nsz <2 x double> %src2, %src1 456 ret <2 x double> %0 457} 458 459