1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP 4 5define arm_aapcs_vfpcc <4 x float> @fceil_float32_t(<4 x float> %src) { 6; CHECK-MVE-LABEL: fceil_float32_t: 7; CHECK-MVE: @ %bb.0: @ %entry 8; CHECK-MVE-NEXT: vrintp.f32 s7, s3 9; CHECK-MVE-NEXT: vrintp.f32 s6, s2 10; CHECK-MVE-NEXT: vrintp.f32 s5, s1 11; CHECK-MVE-NEXT: vrintp.f32 s4, s0 12; CHECK-MVE-NEXT: vmov q0, q1 13; CHECK-MVE-NEXT: bx lr 14; 15; CHECK-MVEFP-LABEL: fceil_float32_t: 16; CHECK-MVEFP: @ %bb.0: @ %entry 17; CHECK-MVEFP-NEXT: vrintp.f32 q0, q0 18; CHECK-MVEFP-NEXT: bx lr 19entry: 20 %0 = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> %src) 21 ret <4 x float> %0 22} 23 24define arm_aapcs_vfpcc <8 x half> @fceil_float16_t(<8 x half> %src) { 25; CHECK-MVE-LABEL: fceil_float16_t: 26; CHECK-MVE: @ %bb.0: @ %entry 27; CHECK-MVE-NEXT: vmovx.f16 s4, s0 28; CHECK-MVE-NEXT: vrintp.f16 s8, s1 29; CHECK-MVE-NEXT: vrintp.f16 s4, s4 30; CHECK-MVE-NEXT: vmov r0, s4 31; CHECK-MVE-NEXT: vrintp.f16 s4, s0 32; CHECK-MVE-NEXT: vmov r1, s4 33; CHECK-MVE-NEXT: vmovx.f16 s0, s3 34; CHECK-MVE-NEXT: vmov.16 q1[0], r1 35; CHECK-MVE-NEXT: vrintp.f16 s0, s0 36; CHECK-MVE-NEXT: vmov.16 q1[1], r0 37; CHECK-MVE-NEXT: vmov r0, s8 38; CHECK-MVE-NEXT: vmovx.f16 s8, s1 39; CHECK-MVE-NEXT: vmov.16 q1[2], r0 40; CHECK-MVE-NEXT: vrintp.f16 s8, s8 41; CHECK-MVE-NEXT: vmov r0, s8 42; CHECK-MVE-NEXT: vrintp.f16 s8, s2 43; CHECK-MVE-NEXT: vmov.16 q1[3], r0 44; CHECK-MVE-NEXT: vmov r0, s8 45; CHECK-MVE-NEXT: vmovx.f16 s8, s2 46; CHECK-MVE-NEXT: vmov.16 q1[4], r0 47; CHECK-MVE-NEXT: vrintp.f16 s8, s8 48; CHECK-MVE-NEXT: vmov r0, s8 49; CHECK-MVE-NEXT: vrintp.f16 s8, s3 50; CHECK-MVE-NEXT: vmov.16 q1[5], r0 51; CHECK-MVE-NEXT: vmov r0, s8 52; CHECK-MVE-NEXT: vmov.16 q1[6], r0 53; CHECK-MVE-NEXT: vmov r0, s0 54; CHECK-MVE-NEXT: vmov.16 q1[7], r0 55; CHECK-MVE-NEXT: vmov q0, q1 56; CHECK-MVE-NEXT: bx lr 57; 58; CHECK-MVEFP-LABEL: fceil_float16_t: 59; CHECK-MVEFP: @ %bb.0: @ %entry 60; CHECK-MVEFP-NEXT: vrintp.f16 q0, q0 61; CHECK-MVEFP-NEXT: bx lr 62entry: 63 %0 = call fast <8 x half> @llvm.ceil.v8f16(<8 x half> %src) 64 ret <8 x half> %0 65} 66 67define arm_aapcs_vfpcc <2 x double> @fceil_float64_t(<2 x double> %src) { 68; CHECK-LABEL: fceil_float64_t: 69; CHECK: @ %bb.0: @ %entry 70; CHECK-NEXT: .save {r7, lr} 71; CHECK-NEXT: push {r7, lr} 72; CHECK-NEXT: .vsave {d8, d9} 73; CHECK-NEXT: vpush {d8, d9} 74; CHECK-NEXT: vmov q4, q0 75; CHECK-NEXT: vmov r0, r1, d9 76; CHECK-NEXT: bl ceil 77; CHECK-NEXT: vmov r2, r3, d8 78; CHECK-NEXT: vmov d9, r0, r1 79; CHECK-NEXT: mov r0, r2 80; CHECK-NEXT: mov r1, r3 81; CHECK-NEXT: bl ceil 82; CHECK-NEXT: vmov d8, r0, r1 83; CHECK-NEXT: vmov q0, q4 84; CHECK-NEXT: vpop {d8, d9} 85; CHECK-NEXT: pop {r7, pc} 86entry: 87 %0 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %src) 88 ret <2 x double> %0 89} 90 91define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) { 92; CHECK-MVE-LABEL: ftrunc_float32_t: 93; CHECK-MVE: @ %bb.0: @ %entry 94; CHECK-MVE-NEXT: vrintz.f32 s7, s3 95; CHECK-MVE-NEXT: vrintz.f32 s6, s2 96; CHECK-MVE-NEXT: vrintz.f32 s5, s1 97; CHECK-MVE-NEXT: vrintz.f32 s4, s0 98; CHECK-MVE-NEXT: vmov q0, q1 99; CHECK-MVE-NEXT: bx lr 100; 101; CHECK-MVEFP-LABEL: ftrunc_float32_t: 102; CHECK-MVEFP: @ %bb.0: @ %entry 103; CHECK-MVEFP-NEXT: vrintz.f32 q0, q0 104; CHECK-MVEFP-NEXT: bx lr 105entry: 106 %0 = call fast <4 x float> @llvm.trunc.v4f32(<4 x float> %src) 107 ret <4 x float> %0 108} 109 110define arm_aapcs_vfpcc <8 x half> @ftrunc_float16_t(<8 x half> %src) { 111; CHECK-MVE-LABEL: ftrunc_float16_t: 112; CHECK-MVE: @ %bb.0: @ %entry 113; CHECK-MVE-NEXT: vmovx.f16 s4, s0 114; CHECK-MVE-NEXT: vrintz.f16 s8, s1 115; CHECK-MVE-NEXT: vrintz.f16 s4, s4 116; CHECK-MVE-NEXT: vmov r0, s4 117; CHECK-MVE-NEXT: vrintz.f16 s4, s0 118; CHECK-MVE-NEXT: vmov r1, s4 119; CHECK-MVE-NEXT: vmovx.f16 s0, s3 120; CHECK-MVE-NEXT: vmov.16 q1[0], r1 121; CHECK-MVE-NEXT: vrintz.f16 s0, s0 122; CHECK-MVE-NEXT: vmov.16 q1[1], r0 123; CHECK-MVE-NEXT: vmov r0, s8 124; CHECK-MVE-NEXT: vmovx.f16 s8, s1 125; CHECK-MVE-NEXT: vmov.16 q1[2], r0 126; CHECK-MVE-NEXT: vrintz.f16 s8, s8 127; CHECK-MVE-NEXT: vmov r0, s8 128; CHECK-MVE-NEXT: vrintz.f16 s8, s2 129; CHECK-MVE-NEXT: vmov.16 q1[3], r0 130; CHECK-MVE-NEXT: vmov r0, s8 131; CHECK-MVE-NEXT: vmovx.f16 s8, s2 132; CHECK-MVE-NEXT: vmov.16 q1[4], r0 133; CHECK-MVE-NEXT: vrintz.f16 s8, s8 134; CHECK-MVE-NEXT: vmov r0, s8 135; CHECK-MVE-NEXT: vrintz.f16 s8, s3 136; CHECK-MVE-NEXT: vmov.16 q1[5], r0 137; CHECK-MVE-NEXT: vmov r0, s8 138; CHECK-MVE-NEXT: vmov.16 q1[6], r0 139; CHECK-MVE-NEXT: vmov r0, s0 140; CHECK-MVE-NEXT: vmov.16 q1[7], r0 141; CHECK-MVE-NEXT: vmov q0, q1 142; CHECK-MVE-NEXT: bx lr 143; 144; CHECK-MVEFP-LABEL: ftrunc_float16_t: 145; CHECK-MVEFP: @ %bb.0: @ %entry 146; CHECK-MVEFP-NEXT: vrintz.f16 q0, q0 147; CHECK-MVEFP-NEXT: bx lr 148entry: 149 %0 = call fast <8 x half> @llvm.trunc.v8f16(<8 x half> %src) 150 ret <8 x half> %0 151} 152 153define arm_aapcs_vfpcc <2 x double> @ftrunc_float64_t(<2 x double> %src) { 154; CHECK-LABEL: ftrunc_float64_t: 155; CHECK: @ %bb.0: @ %entry 156; CHECK-NEXT: .save {r7, lr} 157; CHECK-NEXT: push {r7, lr} 158; CHECK-NEXT: .vsave {d8, d9} 159; CHECK-NEXT: vpush {d8, d9} 160; CHECK-NEXT: vmov q4, q0 161; CHECK-NEXT: vmov r0, r1, d9 162; CHECK-NEXT: bl trunc 163; CHECK-NEXT: vmov r2, r3, d8 164; CHECK-NEXT: vmov d9, r0, r1 165; CHECK-NEXT: mov r0, r2 166; CHECK-NEXT: mov r1, r3 167; CHECK-NEXT: bl trunc 168; CHECK-NEXT: vmov d8, r0, r1 169; CHECK-NEXT: vmov q0, q4 170; CHECK-NEXT: vpop {d8, d9} 171; CHECK-NEXT: pop {r7, pc} 172entry: 173 %0 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %src) 174 ret <2 x double> %0 175} 176 177define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) { 178; CHECK-MVE-LABEL: frint_float32_t: 179; CHECK-MVE: @ %bb.0: @ %entry 180; CHECK-MVE-NEXT: vrintx.f32 s7, s3 181; CHECK-MVE-NEXT: vrintx.f32 s6, s2 182; CHECK-MVE-NEXT: vrintx.f32 s5, s1 183; CHECK-MVE-NEXT: vrintx.f32 s4, s0 184; CHECK-MVE-NEXT: vmov q0, q1 185; CHECK-MVE-NEXT: bx lr 186; 187; CHECK-MVEFP-LABEL: frint_float32_t: 188; CHECK-MVEFP: @ %bb.0: @ %entry 189; CHECK-MVEFP-NEXT: vrintx.f32 q0, q0 190; CHECK-MVEFP-NEXT: bx lr 191entry: 192 %0 = call fast <4 x float> @llvm.rint.v4f32(<4 x float> %src) 193 ret <4 x float> %0 194} 195 196define arm_aapcs_vfpcc <8 x half> @frint_float16_t(<8 x half> %src) { 197; CHECK-MVE-LABEL: frint_float16_t: 198; CHECK-MVE: @ %bb.0: @ %entry 199; CHECK-MVE-NEXT: vmovx.f16 s4, s0 200; CHECK-MVE-NEXT: vrintx.f16 s8, s1 201; CHECK-MVE-NEXT: vrintx.f16 s4, s4 202; CHECK-MVE-NEXT: vmov r0, s4 203; CHECK-MVE-NEXT: vrintx.f16 s4, s0 204; CHECK-MVE-NEXT: vmov r1, s4 205; CHECK-MVE-NEXT: vmovx.f16 s0, s3 206; CHECK-MVE-NEXT: vmov.16 q1[0], r1 207; CHECK-MVE-NEXT: vrintx.f16 s0, s0 208; CHECK-MVE-NEXT: vmov.16 q1[1], r0 209; CHECK-MVE-NEXT: vmov r0, s8 210; CHECK-MVE-NEXT: vmovx.f16 s8, s1 211; CHECK-MVE-NEXT: vmov.16 q1[2], r0 212; CHECK-MVE-NEXT: vrintx.f16 s8, s8 213; CHECK-MVE-NEXT: vmov r0, s8 214; CHECK-MVE-NEXT: vrintx.f16 s8, s2 215; CHECK-MVE-NEXT: vmov.16 q1[3], r0 216; CHECK-MVE-NEXT: vmov r0, s8 217; CHECK-MVE-NEXT: vmovx.f16 s8, s2 218; CHECK-MVE-NEXT: vmov.16 q1[4], r0 219; CHECK-MVE-NEXT: vrintx.f16 s8, s8 220; CHECK-MVE-NEXT: vmov r0, s8 221; CHECK-MVE-NEXT: vrintx.f16 s8, s3 222; CHECK-MVE-NEXT: vmov.16 q1[5], r0 223; CHECK-MVE-NEXT: vmov r0, s8 224; CHECK-MVE-NEXT: vmov.16 q1[6], r0 225; CHECK-MVE-NEXT: vmov r0, s0 226; CHECK-MVE-NEXT: vmov.16 q1[7], r0 227; CHECK-MVE-NEXT: vmov q0, q1 228; CHECK-MVE-NEXT: bx lr 229; 230; CHECK-MVEFP-LABEL: frint_float16_t: 231; CHECK-MVEFP: @ %bb.0: @ %entry 232; CHECK-MVEFP-NEXT: vrintx.f16 q0, q0 233; CHECK-MVEFP-NEXT: bx lr 234entry: 235 %0 = call fast <8 x half> @llvm.rint.v8f16(<8 x half> %src) 236 ret <8 x half> %0 237} 238 239define arm_aapcs_vfpcc <2 x double> @frint_float64_t(<2 x double> %src) { 240; CHECK-LABEL: frint_float64_t: 241; CHECK: @ %bb.0: @ %entry 242; CHECK-NEXT: .save {r7, lr} 243; CHECK-NEXT: push {r7, lr} 244; CHECK-NEXT: .vsave {d8, d9} 245; CHECK-NEXT: vpush {d8, d9} 246; CHECK-NEXT: vmov q4, q0 247; CHECK-NEXT: vmov r0, r1, d9 248; CHECK-NEXT: bl rint 249; CHECK-NEXT: vmov r2, r3, d8 250; CHECK-NEXT: vmov d9, r0, r1 251; CHECK-NEXT: mov r0, r2 252; CHECK-NEXT: mov r1, r3 253; CHECK-NEXT: bl rint 254; CHECK-NEXT: vmov d8, r0, r1 255; CHECK-NEXT: vmov q0, q4 256; CHECK-NEXT: vpop {d8, d9} 257; CHECK-NEXT: pop {r7, pc} 258entry: 259 %0 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %src) 260 ret <2 x double> %0 261} 262 263define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) { 264; CHECK-LABEL: fnearbyint_float32_t: 265; CHECK: @ %bb.0: @ %entry 266; CHECK-NEXT: vrintr.f32 s7, s3 267; CHECK-NEXT: vrintr.f32 s6, s2 268; CHECK-NEXT: vrintr.f32 s5, s1 269; CHECK-NEXT: vrintr.f32 s4, s0 270; CHECK-NEXT: vmov q0, q1 271; CHECK-NEXT: bx lr 272entry: 273 %0 = call fast <4 x float> @llvm.nearbyint.v4f32(<4 x float> %src) 274 ret <4 x float> %0 275} 276 277define arm_aapcs_vfpcc <8 x half> @fnearbyint_float16_t(<8 x half> %src) { 278; CHECK-LABEL: fnearbyint_float16_t: 279; CHECK: @ %bb.0: @ %entry 280; CHECK-NEXT: vmovx.f16 s4, s0 281; CHECK-NEXT: vrintr.f16 s8, s1 282; CHECK-NEXT: vrintr.f16 s4, s4 283; CHECK-NEXT: vmov r0, s4 284; CHECK-NEXT: vrintr.f16 s4, s0 285; CHECK-NEXT: vmov r1, s4 286; CHECK-NEXT: vmovx.f16 s0, s3 287; CHECK-NEXT: vmov.16 q1[0], r1 288; CHECK-NEXT: vrintr.f16 s0, s0 289; CHECK-NEXT: vmov.16 q1[1], r0 290; CHECK-NEXT: vmov r0, s8 291; CHECK-NEXT: vmovx.f16 s8, s1 292; CHECK-NEXT: vmov.16 q1[2], r0 293; CHECK-NEXT: vrintr.f16 s8, s8 294; CHECK-NEXT: vmov r0, s8 295; CHECK-NEXT: vrintr.f16 s8, s2 296; CHECK-NEXT: vmov.16 q1[3], r0 297; CHECK-NEXT: vmov r0, s8 298; CHECK-NEXT: vmovx.f16 s8, s2 299; CHECK-NEXT: vmov.16 q1[4], r0 300; CHECK-NEXT: vrintr.f16 s8, s8 301; CHECK-NEXT: vmov r0, s8 302; CHECK-NEXT: vrintr.f16 s8, s3 303; CHECK-NEXT: vmov.16 q1[5], r0 304; CHECK-NEXT: vmov r0, s8 305; CHECK-NEXT: vmov.16 q1[6], r0 306; CHECK-NEXT: vmov r0, s0 307; CHECK-NEXT: vmov.16 q1[7], r0 308; CHECK-NEXT: vmov q0, q1 309; CHECK-NEXT: bx lr 310entry: 311 %0 = call fast <8 x half> @llvm.nearbyint.v8f16(<8 x half> %src) 312 ret <8 x half> %0 313} 314 315define arm_aapcs_vfpcc <2 x double> @fnearbyint_float64_t(<2 x double> %src) { 316; CHECK-LABEL: fnearbyint_float64_t: 317; CHECK: @ %bb.0: @ %entry 318; CHECK-NEXT: .save {r7, lr} 319; CHECK-NEXT: push {r7, lr} 320; CHECK-NEXT: .vsave {d8, d9} 321; CHECK-NEXT: vpush {d8, d9} 322; CHECK-NEXT: vmov q4, q0 323; CHECK-NEXT: vmov r0, r1, d9 324; CHECK-NEXT: bl nearbyint 325; CHECK-NEXT: vmov r2, r3, d8 326; CHECK-NEXT: vmov d9, r0, r1 327; CHECK-NEXT: mov r0, r2 328; CHECK-NEXT: mov r1, r3 329; CHECK-NEXT: bl nearbyint 330; CHECK-NEXT: vmov d8, r0, r1 331; CHECK-NEXT: vmov q0, q4 332; CHECK-NEXT: vpop {d8, d9} 333; CHECK-NEXT: pop {r7, pc} 334entry: 335 %0 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %src) 336 ret <2 x double> %0 337} 338 339define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) { 340; CHECK-MVE-LABEL: ffloor_float32_t: 341; CHECK-MVE: @ %bb.0: @ %entry 342; CHECK-MVE-NEXT: vrintm.f32 s7, s3 343; CHECK-MVE-NEXT: vrintm.f32 s6, s2 344; CHECK-MVE-NEXT: vrintm.f32 s5, s1 345; CHECK-MVE-NEXT: vrintm.f32 s4, s0 346; CHECK-MVE-NEXT: vmov q0, q1 347; CHECK-MVE-NEXT: bx lr 348; 349; CHECK-MVEFP-LABEL: ffloor_float32_t: 350; CHECK-MVEFP: @ %bb.0: @ %entry 351; CHECK-MVEFP-NEXT: vrintm.f32 q0, q0 352; CHECK-MVEFP-NEXT: bx lr 353entry: 354 %0 = call fast <4 x float> @llvm.floor.v4f32(<4 x float> %src) 355 ret <4 x float> %0 356} 357 358define arm_aapcs_vfpcc <8 x half> @ffloor_float16_t(<8 x half> %src) { 359; CHECK-MVE-LABEL: ffloor_float16_t: 360; CHECK-MVE: @ %bb.0: @ %entry 361; CHECK-MVE-NEXT: vmovx.f16 s4, s0 362; CHECK-MVE-NEXT: vrintm.f16 s8, s1 363; CHECK-MVE-NEXT: vrintm.f16 s4, s4 364; CHECK-MVE-NEXT: vmov r0, s4 365; CHECK-MVE-NEXT: vrintm.f16 s4, s0 366; CHECK-MVE-NEXT: vmov r1, s4 367; CHECK-MVE-NEXT: vmovx.f16 s0, s3 368; CHECK-MVE-NEXT: vmov.16 q1[0], r1 369; CHECK-MVE-NEXT: vrintm.f16 s0, s0 370; CHECK-MVE-NEXT: vmov.16 q1[1], r0 371; CHECK-MVE-NEXT: vmov r0, s8 372; CHECK-MVE-NEXT: vmovx.f16 s8, s1 373; CHECK-MVE-NEXT: vmov.16 q1[2], r0 374; CHECK-MVE-NEXT: vrintm.f16 s8, s8 375; CHECK-MVE-NEXT: vmov r0, s8 376; CHECK-MVE-NEXT: vrintm.f16 s8, s2 377; CHECK-MVE-NEXT: vmov.16 q1[3], r0 378; CHECK-MVE-NEXT: vmov r0, s8 379; CHECK-MVE-NEXT: vmovx.f16 s8, s2 380; CHECK-MVE-NEXT: vmov.16 q1[4], r0 381; CHECK-MVE-NEXT: vrintm.f16 s8, s8 382; CHECK-MVE-NEXT: vmov r0, s8 383; CHECK-MVE-NEXT: vrintm.f16 s8, s3 384; CHECK-MVE-NEXT: vmov.16 q1[5], r0 385; CHECK-MVE-NEXT: vmov r0, s8 386; CHECK-MVE-NEXT: vmov.16 q1[6], r0 387; CHECK-MVE-NEXT: vmov r0, s0 388; CHECK-MVE-NEXT: vmov.16 q1[7], r0 389; CHECK-MVE-NEXT: vmov q0, q1 390; CHECK-MVE-NEXT: bx lr 391; 392; CHECK-MVEFP-LABEL: ffloor_float16_t: 393; CHECK-MVEFP: @ %bb.0: @ %entry 394; CHECK-MVEFP-NEXT: vrintm.f16 q0, q0 395; CHECK-MVEFP-NEXT: bx lr 396entry: 397 %0 = call fast <8 x half> @llvm.floor.v8f16(<8 x half> %src) 398 ret <8 x half> %0 399} 400 401define arm_aapcs_vfpcc <2 x double> @ffloor_float64_t(<2 x double> %src) { 402; CHECK-LABEL: ffloor_float64_t: 403; CHECK: @ %bb.0: @ %entry 404; CHECK-NEXT: .save {r7, lr} 405; CHECK-NEXT: push {r7, lr} 406; CHECK-NEXT: .vsave {d8, d9} 407; CHECK-NEXT: vpush {d8, d9} 408; CHECK-NEXT: vmov q4, q0 409; CHECK-NEXT: vmov r0, r1, d9 410; CHECK-NEXT: bl floor 411; CHECK-NEXT: vmov r2, r3, d8 412; CHECK-NEXT: vmov d9, r0, r1 413; CHECK-NEXT: mov r0, r2 414; CHECK-NEXT: mov r1, r3 415; CHECK-NEXT: bl floor 416; CHECK-NEXT: vmov d8, r0, r1 417; CHECK-NEXT: vmov q0, q4 418; CHECK-NEXT: vpop {d8, d9} 419; CHECK-NEXT: pop {r7, pc} 420entry: 421 %0 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %src) 422 ret <2 x double> %0 423} 424 425define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) { 426; CHECK-MVE-LABEL: fround_float32_t: 427; CHECK-MVE: @ %bb.0: @ %entry 428; CHECK-MVE-NEXT: vrinta.f32 s7, s3 429; CHECK-MVE-NEXT: vrinta.f32 s6, s2 430; CHECK-MVE-NEXT: vrinta.f32 s5, s1 431; CHECK-MVE-NEXT: vrinta.f32 s4, s0 432; CHECK-MVE-NEXT: vmov q0, q1 433; CHECK-MVE-NEXT: bx lr 434; 435; CHECK-MVEFP-LABEL: fround_float32_t: 436; CHECK-MVEFP: @ %bb.0: @ %entry 437; CHECK-MVEFP-NEXT: vrinta.f32 q0, q0 438; CHECK-MVEFP-NEXT: bx lr 439entry: 440 %0 = call fast <4 x float> @llvm.round.v4f32(<4 x float> %src) 441 ret <4 x float> %0 442} 443 444define arm_aapcs_vfpcc <8 x half> @fround_float16_t(<8 x half> %src) { 445; CHECK-MVE-LABEL: fround_float16_t: 446; CHECK-MVE: @ %bb.0: @ %entry 447; CHECK-MVE-NEXT: vmovx.f16 s4, s0 448; CHECK-MVE-NEXT: vrinta.f16 s8, s1 449; CHECK-MVE-NEXT: vrinta.f16 s4, s4 450; CHECK-MVE-NEXT: vmov r0, s4 451; CHECK-MVE-NEXT: vrinta.f16 s4, s0 452; CHECK-MVE-NEXT: vmov r1, s4 453; CHECK-MVE-NEXT: vmovx.f16 s0, s3 454; CHECK-MVE-NEXT: vmov.16 q1[0], r1 455; CHECK-MVE-NEXT: vrinta.f16 s0, s0 456; CHECK-MVE-NEXT: vmov.16 q1[1], r0 457; CHECK-MVE-NEXT: vmov r0, s8 458; CHECK-MVE-NEXT: vmovx.f16 s8, s1 459; CHECK-MVE-NEXT: vmov.16 q1[2], r0 460; CHECK-MVE-NEXT: vrinta.f16 s8, s8 461; CHECK-MVE-NEXT: vmov r0, s8 462; CHECK-MVE-NEXT: vrinta.f16 s8, s2 463; CHECK-MVE-NEXT: vmov.16 q1[3], r0 464; CHECK-MVE-NEXT: vmov r0, s8 465; CHECK-MVE-NEXT: vmovx.f16 s8, s2 466; CHECK-MVE-NEXT: vmov.16 q1[4], r0 467; CHECK-MVE-NEXT: vrinta.f16 s8, s8 468; CHECK-MVE-NEXT: vmov r0, s8 469; CHECK-MVE-NEXT: vrinta.f16 s8, s3 470; CHECK-MVE-NEXT: vmov.16 q1[5], r0 471; CHECK-MVE-NEXT: vmov r0, s8 472; CHECK-MVE-NEXT: vmov.16 q1[6], r0 473; CHECK-MVE-NEXT: vmov r0, s0 474; CHECK-MVE-NEXT: vmov.16 q1[7], r0 475; CHECK-MVE-NEXT: vmov q0, q1 476; CHECK-MVE-NEXT: bx lr 477; 478; CHECK-MVEFP-LABEL: fround_float16_t: 479; CHECK-MVEFP: @ %bb.0: @ %entry 480; CHECK-MVEFP-NEXT: vrinta.f16 q0, q0 481; CHECK-MVEFP-NEXT: bx lr 482entry: 483 %0 = call fast <8 x half> @llvm.round.v8f16(<8 x half> %src) 484 ret <8 x half> %0 485} 486 487define arm_aapcs_vfpcc <2 x double> @fround_float64_t(<2 x double> %src) { 488; CHECK-LABEL: fround_float64_t: 489; CHECK: @ %bb.0: @ %entry 490; CHECK-NEXT: .save {r7, lr} 491; CHECK-NEXT: push {r7, lr} 492; CHECK-NEXT: .vsave {d8, d9} 493; CHECK-NEXT: vpush {d8, d9} 494; CHECK-NEXT: vmov q4, q0 495; CHECK-NEXT: vmov r0, r1, d9 496; CHECK-NEXT: bl round 497; CHECK-NEXT: vmov r2, r3, d8 498; CHECK-NEXT: vmov d9, r0, r1 499; CHECK-NEXT: mov r0, r2 500; CHECK-NEXT: mov r1, r3 501; CHECK-NEXT: bl round 502; CHECK-NEXT: vmov d8, r0, r1 503; CHECK-NEXT: vmov q0, q4 504; CHECK-NEXT: vpop {d8, d9} 505; CHECK-NEXT: pop {r7, pc} 506entry: 507 %0 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %src) 508 ret <2 x double> %0 509} 510 511declare <4 x float> @llvm.ceil.v4f32(<4 x float>) 512declare <4 x float> @llvm.trunc.v4f32(<4 x float>) 513declare <4 x float> @llvm.rint.v4f32(<4 x float>) 514declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) 515declare <4 x float> @llvm.floor.v4f32(<4 x float>) 516declare <4 x float> @llvm.round.v4f32(<4 x float>) 517declare <8 x half> @llvm.ceil.v8f16(<8 x half>) 518declare <8 x half> @llvm.trunc.v8f16(<8 x half>) 519declare <8 x half> @llvm.rint.v8f16(<8 x half>) 520declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) 521declare <8 x half> @llvm.floor.v8f16(<8 x half>) 522declare <8 x half> @llvm.round.v8f16(<8 x half>) 523declare <2 x double> @llvm.ceil.v2f64(<2 x double>) 524declare <2 x double> @llvm.trunc.v2f64(<2 x double>) 525declare <2 x double> @llvm.rint.v2f64(<2 x double>) 526declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) 527declare <2 x double> @llvm.floor.v2f64(<2 x double>) 528declare <2 x double> @llvm.round.v2f64(<2 x double>) 529