1; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL 2; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL 3 4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" 5target triple = "armv7-eabihf" 6 7; CHECK-FP16-LABEL: test_fadd: 8; CHECK-FP16: vcvtb.f32.f16 9; CHECK-FP16: vcvtb.f32.f16 10; CHECK-FP16: vadd.f32 11; CHECK-FP16: vcvtb.f16.f32 12; CHECK-LIBCALL-LABEL: test_fadd: 13; CHECK-LIBCALL: bl __gnu_h2f_ieee 14; CHECK-LIBCALL: bl __gnu_h2f_ieee 15; CHECK-LIBCALL: vadd.f32 16; CHECK-LIBCALL: bl __gnu_f2h_ieee 17define void @test_fadd(half* %p, half* %q) #0 { 18 %a = load half, half* %p, align 2 19 %b = load half, half* %q, align 2 20 %r = fadd half %a, %b 21 store half %r, half* %p 22 ret void 23} 24 25; CHECK-FP16-LABEL: test_fsub: 26; CHECK-FP16: vcvtb.f32.f16 27; CHECK-FP16: vcvtb.f32.f16 28; CHECK-FP16: vsub.f32 29; CHECK-FP16: vcvtb.f16.f32 30; CHECK-LIBCALL-LABEL: test_fsub: 31; CHECK-LIBCALL: bl __gnu_h2f_ieee 32; CHECK-LIBCALL: bl __gnu_h2f_ieee 33; CHECK-LIBCALL: vsub.f32 34; CHECK-LIBCALL: bl __gnu_f2h_ieee 35define void @test_fsub(half* %p, half* %q) #0 { 36 %a = load half, half* %p, align 2 37 %b = load half, half* %q, align 2 38 %r = fsub half %a, %b 39 store half %r, half* %p 40 ret void 41} 42 43; CHECK-FP16-LABEL: test_fmul: 44; CHECK-FP16: vcvtb.f32.f16 45; CHECK-FP16: vcvtb.f32.f16 46; CHECK-FP16: vmul.f32 47; CHECK-FP16: vcvtb.f16.f32 48; CHECK-LIBCALL-LABEL: test_fmul 49; CHECK-LIBCALL: bl __gnu_h2f_ieee 50; CHECK-LIBCALL: bl __gnu_h2f_ieee 51; CHECK-LIBCALL: vmul.f32 52; CHECK-LIBCALL: bl __gnu_f2h_ieee 53define void @test_fmul(half* %p, half* %q) #0 { 54 %a = load half, half* %p, align 2 55 %b = load half, half* %q, align 2 56 %r = fmul half %a, %b 57 store half %r, half* %p 58 ret void 59} 60 61; CHECK-FP16-LABEL: test_fdiv: 62; CHECK-FP16: vcvtb.f32.f16 63; CHECK-FP16: vcvtb.f32.f16 64; CHECK-FP16: vdiv.f32 65; CHECK-FP16: vcvtb.f16.f32 66; CHECK-LIBCALL-LABEL: test_fdiv 67; CHECK-LIBCALL: bl __gnu_h2f_ieee 68; CHECK-LIBCALL: bl __gnu_h2f_ieee 69; CHECK-LIBCALL: vdiv.f32 70; CHECK-LIBCALL: bl __gnu_f2h_ieee 71define void @test_fdiv(half* %p, half* %q) #0 { 72 %a = load half, half* %p, align 2 73 %b = load half, half* %q, align 2 74 %r = fdiv half %a, %b 75 store half %r, half* %p 76 ret void 77} 78 79; CHECK-FP16-LABEL: test_frem: 80; CHECK-FP16: vcvtb.f32.f16 81; CHECK-FP16: vcvtb.f32.f16 82; CHECK-FP16: bl fmodf 83; CHECK-FP16: vcvtb.f16.f32 84; CHECK-LIBCALL-LABEL: test_frem 85; CHECK-LIBCALL: bl __gnu_h2f_ieee 86; CHECK-LIBCALL: bl __gnu_h2f_ieee 87; CHECK-LIBCALL: bl fmodf 88; CHECK-LIBCALL: bl __gnu_f2h_ieee 89define void @test_frem(half* %p, half* %q) #0 { 90 %a = load half, half* %p, align 2 91 %b = load half, half* %q, align 2 92 %r = frem half %a, %b 93 store half %r, half* %p 94 ret void 95} 96 97; CHECK-ALL-LABEL: test_load_store: 98; CHECK-ALL-NEXT: .fnstart 99; CHECK-ALL-NEXT: ldrh r0, [r0] 100; CHECK-ALL-NEXT: strh r0, [r1] 101; CHECK-ALL-NEXT: bx lr 102define void @test_load_store(half* %p, half* %q) #0 { 103 %a = load half, half* %p, align 2 104 store half %a, half* %q 105 ret void 106} 107 108; Testing only successfull compilation of function calls. In ARM ABI, half 109; args and returns are handled as f32. 110 111declare half @test_callee(half %a, half %b) #0 112 113; CHECK-ALL-LABEL: test_call: 114; CHECK-ALL-NEXT: .fnstart 115; CHECK-ALL-NEXT: push {r11, lr} 116; CHECK-ALL-NEXT: bl test_callee 117; CHECK-ALL-NEXT: pop {r11, pc} 118define half @test_call(half %a, half %b) #0 { 119 %r = call half @test_callee(half %a, half %b) 120 ret half %r 121} 122 123; CHECK-ALL-LABEL: test_call_flipped: 124; CHECK-ALL-NEXT: .fnstart 125; CHECK-ALL-NEXT: push {r11, lr} 126; CHECK-ALL-NEXT: mov r2, r0 127; CHECK-ALL-NEXT: mov r0, r1 128; CHECK-ALL-NEXT: mov r1, r2 129; CHECK-ALL-NEXT: bl test_callee 130; CHECK-ALL-NEXT: pop {r11, pc} 131define half @test_call_flipped(half %a, half %b) #0 { 132 %r = call half @test_callee(half %b, half %a) 133 ret half %r 134} 135 136; CHECK-ALL-LABEL: test_tailcall_flipped: 137; CHECK-ALL-NEXT: .fnstart 138; CHECK-ALL-NEXT: mov r2, r0 139; CHECK-ALL-NEXT: mov r0, r1 140; CHECK-ALL-NEXT: mov r1, r2 141; CHECK-ALL-NEXT: b test_callee 142define half @test_tailcall_flipped(half %a, half %b) #0 { 143 %r = tail call half @test_callee(half %b, half %a) 144 ret half %r 145} 146 147; Optimizer picks %p or %q based on %c and only loads that value 148; No conversion is needed 149; CHECK-ALL-LABEL: test_select: 150; CHECK-ALL-NEXT: .fnstart 151; CHECK-ALL-NEXT: cmp r2, #0 152; CHECK-ALL-NEXT: movne r1, r0 153; CHECK-ALL-NEXT: ldrh r1, [r1] 154; CHECK-ALL-NEXT: strh r1, [r0] 155; CHECK-ALL-NEXT: bx lr 156define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 { 157 %a = load half, half* %p, align 2 158 %b = load half, half* %q, align 2 159 %r = select i1 %c, half %a, half %b 160 store half %r, half* %p 161 ret void 162} 163 164; Test only two variants of fcmp. These get translated to f32 vcmpe 165; instructions anyway. 166; CHECK-FP16-LABEL: test_fcmp_une: 167; CHECK-FP16: vcvtb.f32.f16 168; CHECK-FP16: vcvtb.f32.f16 169; CHECK-FP16: vcmpe.f32 170; CHECK-FP16: vmrs APSR_nzcv, fpscr 171; CHECK-FP16: movwne 172; CHECK-LIBCALL-LABEL: test_fcmp_une: 173; CHECK-LIBCALL: bl __gnu_h2f_ieee 174; CHECK-LIBCALL: bl __gnu_h2f_ieee 175; CHECK-LIBCALL: vcmpe.f32 176; CHECK-LIBCALL: movwne 177define i1 @test_fcmp_une(half* %p, half* %q) #0 { 178 %a = load half, half* %p, align 2 179 %b = load half, half* %q, align 2 180 %r = fcmp une half %a, %b 181 ret i1 %r 182} 183 184; CHECK-FP16-LABEL: test_fcmp_ueq: 185; CHECK-FP16: vcvtb.f32.f16 186; CHECK-FP16: vcvtb.f32.f16 187; CHECK-FP16: vcmpe.f32 188; CHECK-FP16: vmrs APSR_nzcv, fpscr 189; CHECK-FP16: movweq 190; CHECK-FP16: movwvs 191; CHECK-LIBCALL-LABEL: test_fcmp_ueq: 192; CHECK-LIBCALL: bl __gnu_h2f_ieee 193; CHECK-LIBCALL: bl __gnu_h2f_ieee 194; CHECK-LIBCALL: vcmpe.f32 195; CHECK-LIBCALL: movweq 196define i1 @test_fcmp_ueq(half* %p, half* %q) #0 { 197 %a = load half, half* %p, align 2 198 %b = load half, half* %q, align 2 199 %r = fcmp ueq half %a, %b 200 ret i1 %r 201} 202 203; CHECK-FP16-LABEL: test_br_cc: 204; CHECK-FP16: vcvtb.f32.f16 205; CHECK-FP16: vcvtb.f32.f16 206; CHECK-FP16: vcmpe.f32 207; CHECK-FP16: vmrs APSR_nzcv, fpscr 208; CHECK-FP16: strmi 209; CHECK-FP16: strpl 210; CHECK-LIBCALL-LABEL: test_br_cc: 211; CHECK-LIBCALL: bl __gnu_h2f_ieee 212; CHECK-LIBCALL: bl __gnu_h2f_ieee 213; CHECK-LIBCALL: vcmpe.f32 214; CHECK-LIBCALL: strmi 215; CHECK-LIBCALL: strpl 216define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 { 217 %a = load half, half* %p, align 2 218 %b = load half, half* %q, align 2 219 %c = fcmp uge half %a, %b 220 br i1 %c, label %then, label %else 221then: 222 store i32 0, i32* %p1 223 ret void 224else: 225 store i32 0, i32* %p2 226 ret void 227} 228 229declare i1 @test_dummy(half* %p) #0 230; CHECK-FP16-LABEL: test_phi: 231; CHECK-FP16: vcvtb.f32.f16 232; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]: 233; CHECK-FP16: vcvtb.f32.f16 234; CHECK-FP16: bl test_dummy 235; CHECK-FP16: bne [[LOOP]] 236; CHECK-FP16: vcvtb.f16.f32 237; CHECK-LIBCALL-LABEL: test_phi: 238; CHECK-LIBCALL: bl __gnu_h2f_ieee 239; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]: 240; CHECK-LIBCALL: bl __gnu_h2f_ieee 241; CHECK-LIBCALL: bl test_dummy 242; CHECK-LIBCALL: bne [[LOOP]] 243; CHECK-LIBCALL: bl __gnu_f2h_ieee 244define void @test_phi(half* %p) #0 { 245entry: 246 %a = load half, half* %p 247 br label %loop 248loop: 249 %r = phi half [%a, %entry], [%b, %loop] 250 %b = load half, half* %p 251 %c = call i1 @test_dummy(half* %p) 252 br i1 %c, label %loop, label %return 253return: 254 store half %r, half* %p 255 ret void 256} 257 258; CHECK-FP16-LABEL: test_fptosi_i32: 259; CHECK-FP16: vcvtb.f32.f16 260; CHECK-FP16: vcvt.s32.f32 261; CHECK-LIBCALL-LABEL: test_fptosi_i32: 262; CHECK-LIBCALL: bl __gnu_h2f_ieee 263; CHECK-LIBCALL: vcvt.s32.f32 264define i32 @test_fptosi_i32(half* %p) #0 { 265 %a = load half, half* %p, align 2 266 %r = fptosi half %a to i32 267 ret i32 %r 268} 269 270; CHECK-FP16-LABEL: test_fptosi_i64: 271; CHECK-FP16: vcvtb.f32.f16 272; CHECK-FP16: bl __aeabi_f2lz 273; CHECK-LIBCALL-LABEL: test_fptosi_i64: 274; CHECK-LIBCALL: bl __gnu_h2f_ieee 275; CHECK-LIBCALL: bl __aeabi_f2lz 276define i64 @test_fptosi_i64(half* %p) #0 { 277 %a = load half, half* %p, align 2 278 %r = fptosi half %a to i64 279 ret i64 %r 280} 281 282; CHECK-FP16-LABEL: test_fptoui_i32: 283; CHECK-FP16: vcvtb.f32.f16 284; CHECK-FP16: vcvt.u32.f32 285; CHECK-LIBCALL-LABEL: test_fptoui_i32: 286; CHECK-LIBCALL: bl __gnu_h2f_ieee 287; CHECK-LIBCALL: vcvt.u32.f32 288define i32 @test_fptoui_i32(half* %p) #0 { 289 %a = load half, half* %p, align 2 290 %r = fptoui half %a to i32 291 ret i32 %r 292} 293 294; CHECK-FP16-LABEL: test_fptoui_i64: 295; CHECK-FP16: vcvtb.f32.f16 296; CHECK-FP16: bl __aeabi_f2ulz 297; CHECK-LIBCALL-LABEL: test_fptoui_i64: 298; CHECK-LIBCALL: bl __gnu_h2f_ieee 299; CHECK-LIBCALL: bl __aeabi_f2ulz 300define i64 @test_fptoui_i64(half* %p) #0 { 301 %a = load half, half* %p, align 2 302 %r = fptoui half %a to i64 303 ret i64 %r 304} 305 306; CHECK-FP16-LABEL: test_sitofp_i32: 307; CHECK-FP16: vcvt.f32.s32 308; CHECK-FP16: vcvtb.f16.f32 309; CHECK-LIBCALL-LABEL: test_sitofp_i32: 310; CHECK-LIBCALL: vcvt.f32.s32 311; CHECK-LIBCALL: bl __gnu_f2h_ieee 312define void @test_sitofp_i32(i32 %a, half* %p) #0 { 313 %r = sitofp i32 %a to half 314 store half %r, half* %p 315 ret void 316} 317 318; CHECK-FP16-LABEL: test_uitofp_i32: 319; CHECK-FP16: vcvt.f32.u32 320; CHECK-FP16: vcvtb.f16.f32 321; CHECK-LIBCALL-LABEL: test_uitofp_i32: 322; CHECK-LIBCALL: vcvt.f32.u32 323; CHECK-LIBCALL: bl __gnu_f2h_ieee 324define void @test_uitofp_i32(i32 %a, half* %p) #0 { 325 %r = uitofp i32 %a to half 326 store half %r, half* %p 327 ret void 328} 329 330; CHECK-FP16-LABEL: test_sitofp_i64: 331; CHECK-FP16: bl __aeabi_l2f 332; CHECK-FP16: vcvtb.f16.f32 333; CHECK-LIBCALL-LABEL: test_sitofp_i64: 334; CHECK-LIBCALL: bl __aeabi_l2f 335; CHECK-LIBCALL: bl __gnu_f2h_ieee 336define void @test_sitofp_i64(i64 %a, half* %p) #0 { 337 %r = sitofp i64 %a to half 338 store half %r, half* %p 339 ret void 340} 341 342; CHECK-FP16-LABEL: test_uitofp_i64: 343; CHECK-FP16: bl __aeabi_ul2f 344; CHECK-FP16: vcvtb.f16.f32 345; CHECK-LIBCALL-LABEL: test_uitofp_i64: 346; CHECK-LIBCALL: bl __aeabi_ul2f 347; CHECK-LIBCALL: bl __gnu_f2h_ieee 348define void @test_uitofp_i64(i64 %a, half* %p) #0 { 349 %r = uitofp i64 %a to half 350 store half %r, half* %p 351 ret void 352} 353 354; CHECK-FP16-LABEL: test_fptrunc_float: 355; CHECK-FP16: vcvtb.f16.f32 356; CHECK-LIBCALL-LABEL: test_fptrunc_float: 357; CHECK-LIBCALL: bl __gnu_f2h_ieee 358define void @test_fptrunc_float(float %f, half* %p) #0 { 359 %a = fptrunc float %f to half 360 store half %a, half* %p 361 ret void 362} 363 364; CHECK-FP16-LABEL: test_fptrunc_double: 365; CHECK-FP16: bl __aeabi_d2h 366; CHECK-LIBCALL-LABEL: test_fptrunc_double: 367; CHECK-LIBCALL: bl __aeabi_d2h 368define void @test_fptrunc_double(double %d, half* %p) #0 { 369 %a = fptrunc double %d to half 370 store half %a, half* %p 371 ret void 372} 373 374; CHECK-FP16-LABEL: test_fpextend_float: 375; CHECK-FP16: vcvtb.f32.f16 376; CHECK-LIBCALL-LABEL: test_fpextend_float: 377; CHECK-LIBCALL: b __gnu_h2f_ieee 378define float @test_fpextend_float(half* %p) { 379 %a = load half, half* %p, align 2 380 %r = fpext half %a to float 381 ret float %r 382} 383 384; CHECK-FP16-LABEL: test_fpextend_double: 385; CHECK-FP16: vcvtb.f32.f16 386; CHECK-FP16: vcvt.f64.f32 387; CHECK-LIBCALL-LABEL: test_fpextend_double: 388; CHECK-LIBCALL: bl __gnu_h2f_ieee 389; CHECK-LIBCALL: vcvt.f64.f32 390define double @test_fpextend_double(half* %p) { 391 %a = load half, half* %p, align 2 392 %r = fpext half %a to double 393 ret double %r 394} 395 396; CHECK-ALL-LABEL: test_bitcast_halftoi16: 397; CHECK-ALL-NEXT: .fnstart 398; CHECK-ALL-NEXT: ldrh r0, [r0] 399; CHECK-ALL-NEXT: bx lr 400define i16 @test_bitcast_halftoi16(half* %p) #0 { 401 %a = load half, half* %p, align 2 402 %r = bitcast half %a to i16 403 ret i16 %r 404} 405 406; CHECK-ALL-LABEL: test_bitcast_i16tohalf: 407; CHECK-ALL-NEXT: .fnstart 408; CHECK-ALL-NEXT: strh r0, [r1] 409; CHECK-ALL-NEXT: bx lr 410define void @test_bitcast_i16tohalf(i16 %a, half* %p) #0 { 411 %r = bitcast i16 %a to half 412 store half %r, half* %p 413 ret void 414} 415 416declare half @llvm.sqrt.f16(half %a) #0 417declare half @llvm.powi.f16(half %a, i32 %b) #0 418declare half @llvm.sin.f16(half %a) #0 419declare half @llvm.cos.f16(half %a) #0 420declare half @llvm.pow.f16(half %a, half %b) #0 421declare half @llvm.exp.f16(half %a) #0 422declare half @llvm.exp2.f16(half %a) #0 423declare half @llvm.log.f16(half %a) #0 424declare half @llvm.log10.f16(half %a) #0 425declare half @llvm.log2.f16(half %a) #0 426declare half @llvm.fma.f16(half %a, half %b, half %c) #0 427declare half @llvm.fabs.f16(half %a) #0 428declare half @llvm.minnum.f16(half %a, half %b) #0 429declare half @llvm.maxnum.f16(half %a, half %b) #0 430declare half @llvm.copysign.f16(half %a, half %b) #0 431declare half @llvm.floor.f16(half %a) #0 432declare half @llvm.ceil.f16(half %a) #0 433declare half @llvm.trunc.f16(half %a) #0 434declare half @llvm.rint.f16(half %a) #0 435declare half @llvm.nearbyint.f16(half %a) #0 436declare half @llvm.round.f16(half %a) #0 437declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 438 439; CHECK-FP16-LABEL: test_sqrt: 440; CHECK-FP16: vcvtb.f32.f16 441; CHECK-FP16: vsqrt.f32 442; CHECK-FP16: vcvtb.f16.f32 443; CHECK-LIBCALL-LABEL: test_sqrt: 444; CHECK-LIBCALL: bl __gnu_h2f_ieee 445; CHECK-LIBCALL: vsqrt.f32 446; CHECK-LIBCALL: bl __gnu_f2h_ieee 447define void @test_sqrt(half* %p) #0 { 448 %a = load half, half* %p, align 2 449 %r = call half @llvm.sqrt.f16(half %a) 450 store half %r, half* %p 451 ret void 452} 453 454; CHECK-FP16-LABEL: test_fpowi: 455; CHECK-FP16: vcvtb.f32.f16 456; CHECK-FP16: bl __powisf2 457; CHECK-FP16: vcvtb.f16.f32 458; CHECK-LIBCALL-LABEL: test_fpowi: 459; CHECK-LIBCALL: bl __gnu_h2f_ieee 460; CHECK-LIBCALL: bl __powisf2 461; CHECK-LIBCALL: bl __gnu_f2h_ieee 462define void @test_fpowi(half* %p, i32 %b) #0 { 463 %a = load half, half* %p, align 2 464 %r = call half @llvm.powi.f16(half %a, i32 %b) 465 store half %r, half* %p 466 ret void 467} 468 469; CHECK-FP16-LABEL: test_sin: 470; CHECK-FP16: vcvtb.f32.f16 471; CHECK-FP16: bl sinf 472; CHECK-FP16: vcvtb.f16.f32 473; CHECK-LIBCALL-LABEL: test_sin: 474; CHECK-LIBCALL: bl __gnu_h2f_ieee 475; CHECK-LIBCALL: bl sinf 476; CHECK-LIBCALL: bl __gnu_f2h_ieee 477define void @test_sin(half* %p) #0 { 478 %a = load half, half* %p, align 2 479 %r = call half @llvm.sin.f16(half %a) 480 store half %r, half* %p 481 ret void 482} 483 484; CHECK-FP16-LABEL: test_cos: 485; CHECK-FP16: vcvtb.f32.f16 486; CHECK-FP16: bl cosf 487; CHECK-FP16: vcvtb.f16.f32 488; CHECK-LIBCALL-LABEL: test_cos: 489; CHECK-LIBCALL: bl __gnu_h2f_ieee 490; CHECK-LIBCALL: bl cosf 491; CHECK-LIBCALL: bl __gnu_f2h_ieee 492define void @test_cos(half* %p) #0 { 493 %a = load half, half* %p, align 2 494 %r = call half @llvm.cos.f16(half %a) 495 store half %r, half* %p 496 ret void 497} 498 499; CHECK-FP16-LABEL: test_pow: 500; CHECK-FP16: vcvtb.f32.f16 501; CHECK-FP16: vcvtb.f32.f16 502; CHECK-FP16: bl powf 503; CHECK-FP16: vcvtb.f16.f32 504; CHECK-LIBCALL-LABEL: test_pow: 505; CHECK-LIBCALL: bl __gnu_h2f_ieee 506; CHECK-LIBCALL: bl __gnu_h2f_ieee 507; CHECK-LIBCALL: bl powf 508; CHECK-LIBCALL: bl __gnu_f2h_ieee 509define void @test_pow(half* %p, half* %q) #0 { 510 %a = load half, half* %p, align 2 511 %b = load half, half* %q, align 2 512 %r = call half @llvm.pow.f16(half %a, half %b) 513 store half %r, half* %p 514 ret void 515} 516 517; CHECK-FP16-LABEL: test_exp: 518; CHECK-FP16: vcvtb.f32.f16 519; CHECK-FP16: bl expf 520; CHECK-FP16: vcvtb.f16.f32 521; CHECK-LIBCALL-LABEL: test_exp: 522; CHECK-LIBCALL: bl __gnu_h2f_ieee 523; CHECK-LIBCALL: bl expf 524; CHECK-LIBCALL: bl __gnu_f2h_ieee 525define void @test_exp(half* %p) #0 { 526 %a = load half, half* %p, align 2 527 %r = call half @llvm.exp.f16(half %a) 528 store half %r, half* %p 529 ret void 530} 531 532; CHECK-FP16-LABEL: test_exp2: 533; CHECK-FP16: vcvtb.f32.f16 534; CHECK-FP16: bl exp2f 535; CHECK-FP16: vcvtb.f16.f32 536; CHECK-LIBCALL-LABEL: test_exp2: 537; CHECK-LIBCALL: bl __gnu_h2f_ieee 538; CHECK-LIBCALL: bl exp2f 539; CHECK-LIBCALL: bl __gnu_f2h_ieee 540define void @test_exp2(half* %p) #0 { 541 %a = load half, half* %p, align 2 542 %r = call half @llvm.exp2.f16(half %a) 543 store half %r, half* %p 544 ret void 545} 546 547; CHECK-FP16-LABEL: test_log: 548; CHECK-FP16: vcvtb.f32.f16 549; CHECK-FP16: bl logf 550; CHECK-FP16: vcvtb.f16.f32 551; CHECK-LIBCALL-LABEL: test_log: 552; CHECK-LIBCALL: bl __gnu_h2f_ieee 553; CHECK-LIBCALL: bl logf 554; CHECK-LIBCALL: bl __gnu_f2h_ieee 555define void @test_log(half* %p) #0 { 556 %a = load half, half* %p, align 2 557 %r = call half @llvm.log.f16(half %a) 558 store half %r, half* %p 559 ret void 560} 561 562; CHECK-FP16-LABEL: test_log10: 563; CHECK-FP16: vcvtb.f32.f16 564; CHECK-FP16: bl log10f 565; CHECK-FP16: vcvtb.f16.f32 566; CHECK-LIBCALL-LABEL: test_log10: 567; CHECK-LIBCALL: bl __gnu_h2f_ieee 568; CHECK-LIBCALL: bl log10f 569; CHECK-LIBCALL: bl __gnu_f2h_ieee 570define void @test_log10(half* %p) #0 { 571 %a = load half, half* %p, align 2 572 %r = call half @llvm.log10.f16(half %a) 573 store half %r, half* %p 574 ret void 575} 576 577; CHECK-FP16-LABEL: test_log2: 578; CHECK-FP16: vcvtb.f32.f16 579; CHECK-FP16: bl log2f 580; CHECK-FP16: vcvtb.f16.f32 581; CHECK-LIBCALL-LABEL: test_log2: 582; CHECK-LIBCALL: bl __gnu_h2f_ieee 583; CHECK-LIBCALL: bl log2f 584; CHECK-LIBCALL: bl __gnu_f2h_ieee 585define void @test_log2(half* %p) #0 { 586 %a = load half, half* %p, align 2 587 %r = call half @llvm.log2.f16(half %a) 588 store half %r, half* %p 589 ret void 590} 591 592; CHECK-FP16-LABEL: test_fma: 593; CHECK-FP16: vcvtb.f32.f16 594; CHECK-FP16: vcvtb.f32.f16 595; CHECK-FP16: vcvtb.f32.f16 596; CHECK-FP16: bl fmaf 597; CHECK-FP16: vcvtb.f16.f32 598; CHECK-LIBCALL-LABEL: test_fma: 599; CHECK-LIBCALL: bl __gnu_h2f_ieee 600; CHECK-LIBCALL: bl __gnu_h2f_ieee 601; CHECK-LIBCALL: bl __gnu_h2f_ieee 602; CHECK-LIBCALL: bl fmaf 603; CHECK-LIBCALL: bl __gnu_f2h_ieee 604define void @test_fma(half* %p, half* %q, half* %r) #0 { 605 %a = load half, half* %p, align 2 606 %b = load half, half* %q, align 2 607 %c = load half, half* %r, align 2 608 %v = call half @llvm.fma.f16(half %a, half %b, half %c) 609 store half %v, half* %p 610 ret void 611} 612 613; CHECK-FP16-LABEL: test_fabs: 614; CHECK-FP16: vcvtb.f32.f16 615; CHECK-FP16: vabs.f32 616; CHECK-FP16: vcvtb.f16.f32 617; CHECK-LIBCALL-LABEL: test_fabs: 618; CHECK-LIBCALL: bl __gnu_h2f_ieee 619; CHECK-LIBCALL: bfc 620; CHECK-LIBCALL: bl __gnu_f2h_ieee 621define void @test_fabs(half* %p) { 622 %a = load half, half* %p, align 2 623 %r = call half @llvm.fabs.f16(half %a) 624 store half %r, half* %p 625 ret void 626} 627 628; CHECK-FP16-LABEL: test_minnum: 629; CHECK-FP16: vcvtb.f32.f16 630; CHECK-FP16: vcvtb.f32.f16 631; CHECK-FP16: bl fminf 632; CHECK-FP16: vcvtb.f16.f32 633; CHECK-LIBCALL-LABEL: test_minnum: 634; CHECK-LIBCALL: bl __gnu_h2f_ieee 635; CHECK-LIBCALL: bl __gnu_h2f_ieee 636; CHECK-LIBCALL: bl fminf 637; CHECK-LIBCALL: bl __gnu_f2h_ieee 638define void @test_minnum(half* %p, half* %q) #0 { 639 %a = load half, half* %p, align 2 640 %b = load half, half* %q, align 2 641 %r = call half @llvm.minnum.f16(half %a, half %b) 642 store half %r, half* %p 643 ret void 644} 645 646; CHECK-FP16-LABEL: test_maxnum: 647; CHECK-FP16: vcvtb.f32.f16 648; CHECK-FP16: vcvtb.f32.f16 649; CHECK-FP16: bl fmaxf 650; CHECK-FP16: vcvtb.f16.f32 651; CHECK-LIBCALL-LABEL: test_maxnum: 652; CHECK-LIBCALL: bl __gnu_h2f_ieee 653; CHECK-LIBCALL: bl __gnu_h2f_ieee 654; CHECK-LIBCALL: bl fmaxf 655; CHECK-LIBCALL: bl __gnu_f2h_ieee 656define void @test_maxnum(half* %p, half* %q) #0 { 657 %a = load half, half* %p, align 2 658 %b = load half, half* %q, align 2 659 %r = call half @llvm.maxnum.f16(half %a, half %b) 660 store half %r, half* %p 661 ret void 662} 663 664; CHECK-FP16-LABEL: test_copysign: 665; CHECK-FP16: vcvtb.f32.f16 666; CHECK-FP16: vcvtb.f32.f16 667; CHECK-FP16: vbsl 668; CHECK-FP16: vcvtb.f16.f32 669; CHECK-LIBCALL-LABEL: test_copysign: 670; CHECK-LIBCALL: bl __gnu_h2f_ieee 671; CHECK-LIBCALL: bl __gnu_h2f_ieee 672; CHECK-LIBCALL: vbsl 673; CHECK-LIBCALL: bl __gnu_f2h_ieee 674define void @test_copysign(half* %p, half* %q) #0 { 675 %a = load half, half* %p, align 2 676 %b = load half, half* %q, align 2 677 %r = call half @llvm.copysign.f16(half %a, half %b) 678 store half %r, half* %p 679 ret void 680} 681 682; CHECK-FP16-LABEL: test_floor: 683; CHECK-FP16: vcvtb.f32.f16 684; CHECK-FP16: bl floorf 685; CHECK-FP16: vcvtb.f16.f32 686; CHECK-LIBCALL-LABEL: test_floor: 687; CHECK-LIBCALL: bl __gnu_h2f_ieee 688; CHECK-LIBCALL: bl floorf 689; CHECK-LIBCALL: bl __gnu_f2h_ieee 690define void @test_floor(half* %p) { 691 %a = load half, half* %p, align 2 692 %r = call half @llvm.floor.f16(half %a) 693 store half %r, half* %p 694 ret void 695} 696 697; CHECK-FP16-LABEL: test_ceil: 698; CHECK-FP16: vcvtb.f32.f16 699; CHECK-FP16: bl ceilf 700; CHECK-FP16: vcvtb.f16.f32 701; CHECK-LIBCALL-LABEL: test_ceil: 702; CHECK-LIBCALL: bl __gnu_h2f_ieee 703; CHECK-LIBCALL: bl ceilf 704; CHECK-LIBCALL: bl __gnu_f2h_ieee 705define void @test_ceil(half* %p) { 706 %a = load half, half* %p, align 2 707 %r = call half @llvm.ceil.f16(half %a) 708 store half %r, half* %p 709 ret void 710} 711 712; CHECK-FP16-LABEL: test_trunc: 713; CHECK-FP16: vcvtb.f32.f16 714; CHECK-FP16: bl truncf 715; CHECK-FP16: vcvtb.f16.f32 716; CHECK-LIBCALL-LABEL: test_trunc: 717; CHECK-LIBCALL: bl __gnu_h2f_ieee 718; CHECK-LIBCALL: bl truncf 719; CHECK-LIBCALL: bl __gnu_f2h_ieee 720define void @test_trunc(half* %p) { 721 %a = load half, half* %p, align 2 722 %r = call half @llvm.trunc.f16(half %a) 723 store half %r, half* %p 724 ret void 725} 726 727; CHECK-FP16-LABEL: test_rint: 728; CHECK-FP16: vcvtb.f32.f16 729; CHECK-FP16: bl rintf 730; CHECK-FP16: vcvtb.f16.f32 731; CHECK-LIBCALL-LABEL: test_rint: 732; CHECK-LIBCALL: bl __gnu_h2f_ieee 733; CHECK-LIBCALL: bl rintf 734; CHECK-LIBCALL: bl __gnu_f2h_ieee 735define void @test_rint(half* %p) { 736 %a = load half, half* %p, align 2 737 %r = call half @llvm.rint.f16(half %a) 738 store half %r, half* %p 739 ret void 740} 741 742; CHECK-FP16-LABEL: test_nearbyint: 743; CHECK-FP16: vcvtb.f32.f16 744; CHECK-FP16: bl nearbyintf 745; CHECK-FP16: vcvtb.f16.f32 746; CHECK-LIBCALL-LABEL: test_nearbyint: 747; CHECK-LIBCALL: bl __gnu_h2f_ieee 748; CHECK-LIBCALL: bl nearbyintf 749; CHECK-LIBCALL: bl __gnu_f2h_ieee 750define void @test_nearbyint(half* %p) { 751 %a = load half, half* %p, align 2 752 %r = call half @llvm.nearbyint.f16(half %a) 753 store half %r, half* %p 754 ret void 755} 756 757; CHECK-FP16-LABEL: test_round: 758; CHECK-FP16: vcvtb.f32.f16 759; CHECK-FP16: bl roundf 760; CHECK-FP16: vcvtb.f16.f32 761; CHECK-LIBCALL-LABEL: test_round: 762; CHECK-LIBCALL: bl __gnu_h2f_ieee 763; CHECK-LIBCALL: bl roundf 764; CHECK-LIBCALL: bl __gnu_f2h_ieee 765define void @test_round(half* %p) { 766 %a = load half, half* %p, align 2 767 %r = call half @llvm.round.f16(half %a) 768 store half %r, half* %p 769 ret void 770} 771 772; CHECK-FP16-LABEL: test_fmuladd: 773; CHECK-FP16: vcvtb.f32.f16 774; CHECK-FP16: vcvtb.f32.f16 775; CHECK-FP16: vcvtb.f32.f16 776; CHECK-FP16: vmla.f32 777; CHECK-FP16: vcvtb.f16.f32 778; CHECK-LIBCALL-LABEL: test_fmuladd: 779; CHECK-LIBCALL: bl __gnu_h2f_ieee 780; CHECK-LIBCALL: bl __gnu_h2f_ieee 781; CHECK-LIBCALL: bl __gnu_h2f_ieee 782; CHECK-LIBCALL: vmla.f32 783; CHECK-LIBCALL: bl __gnu_f2h_ieee 784define void @test_fmuladd(half* %p, half* %q, half* %r) #0 { 785 %a = load half, half* %p, align 2 786 %b = load half, half* %q, align 2 787 %c = load half, half* %r, align 2 788 %v = call half @llvm.fmuladd.f16(half %a, half %b, half %c) 789 store half %v, half* %p 790 ret void 791} 792 793; f16 vectors are not legal in the backend. Vector elements are not assigned 794; to the register, but are stored in the stack instead. Hence insertelement 795; and extractelement have these extra loads and stores. 796 797; CHECK-ALL-LABEL: test_insertelement: 798; CHECK-ALL-NEXT: .fnstart 799; CHECK-ALL-NEXT: sub sp, sp, #8 800; CHECK-ALL-NEXT: ldrh 801; CHECK-ALL-NEXT: strh 802; CHECK-ALL-NEXT: ldrh 803; CHECK-ALL-NEXT: strh 804; CHECK-ALL-NEXT: ldrh 805; CHECK-ALL-NEXT: strh 806; CHECK-ALL-NEXT: ldrh 807; CHECK-ALL-NEXT: strh 808; CHECK-ALL-NEXT: mov 809; CHECK-ALL-NEXT: ldrh 810; CHECK-ALL-NEXT: add 811; CHECK-ALL-NEXT: strh 812; CHECK-ALL-NEXT: ldrh 813; CHECK-ALL-NEXT: strh 814; CHECK-ALL-NEXT: ldrh 815; CHECK-ALL-NEXT: strh 816; CHECK-ALL-NEXT: ldrh 817; CHECK-ALL-NEXT: strh 818; CHECK-ALL-NEXT: ldrh 819; CHECK-ALL-NEXT: strh 820; CHECK-ALL-NEXT: add sp, sp, #8 821; CHECK-ALL-NEXT: bx lr 822define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 { 823 %a = load half, half* %p, align 2 824 %b = load <4 x half>, <4 x half>* %q, align 8 825 %c = insertelement <4 x half> %b, half %a, i32 %i 826 store <4 x half> %c, <4 x half>* %q 827 ret void 828} 829 830; CHECK-ALL-LABEL: test_extractelement: 831; CHECK-ALL-NEXT: .fnstart 832; CHECK-ALL-NEXT: sub sp, sp, #8 833; CHECK-ALL-NEXT: ldrh 834; CHECK-ALL-NEXT: ldrh 835; CHECK-ALL-NEXT: orr 836; CHECK-ALL-NEXT: str 837; CHECK-ALL-NEXT: ldrh 838; CHECK-ALL-NEXT: ldrh 839; CHECK-ALL-NEXT: orr 840; CHECK-ALL-NEXT: str 841; CHECK-ALL-NEXT: mov 842; CHECK-ALL-NEXT: add 843; CHECK-ALL-NEXT: ldrh 844; CHECK-ALL-NEXT: strh 845; CHECK-ALL-NEXT: add sp, sp, #8 846; CHECK-ALL-NEXT: bx lr 847define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 { 848 %a = load <4 x half>, <4 x half>* %q, align 8 849 %b = extractelement <4 x half> %a, i32 %i 850 store half %b, half* %p 851 ret void 852} 853 854; test struct operations 855 856%struct.dummy = type { i32, half } 857 858; CHECK-ALL-LABEL: test_insertvalue: 859; CHECK-ALL-NEXT: .fnstart 860; CHECK-ALL-NEXT: ldr 861; CHECK-ALL-NEXT: ldrh 862; CHECK-ALL-NEXT: strh 863; CHECK-ALL-NEXT: str 864; CHECK-ALL-NEXT: bx lr 865define void @test_insertvalue(%struct.dummy* %p, half* %q) { 866 %a = load %struct.dummy, %struct.dummy* %p 867 %b = load half, half* %q 868 %c = insertvalue %struct.dummy %a, half %b, 1 869 store %struct.dummy %c, %struct.dummy* %p 870 ret void 871} 872 873; CHECK-ALL-LABEL: test_extractvalue: 874; CHECK-ALL-NEXT: .fnstart 875; CHECK-ALL-NEXT: ldrh 876; CHECK-ALL-NEXT: strh 877; CHECK-ALL-NEXT: bx lr 878define void @test_extractvalue(%struct.dummy* %p, half* %q) { 879 %a = load %struct.dummy, %struct.dummy* %p 880 %b = extractvalue %struct.dummy %a, 1 881 store half %b, half* %q 882 ret void 883} 884 885; CHECK-FP16-LABEL: test_struct_return: 886; CHECK-FP16: vcvtb.f32.f16 887; CHECK-LIBCALL-LABEL: test_struct_return: 888; CHECK-LIBCALL: bl __gnu_h2f_ieee 889define %struct.dummy @test_struct_return(%struct.dummy* %p) { 890 %a = load %struct.dummy, %struct.dummy* %p 891 ret %struct.dummy %a 892} 893 894; CHECK-ALL-LABEL: test_struct_arg: 895; CHECK-ALL-NEXT: .fnstart 896; CHECK-ALL-NEXT: mov r0, r1 897; CHECK-ALL-NEXT: bx lr 898define half @test_struct_arg(%struct.dummy %p) { 899 %a = extractvalue %struct.dummy %p, 1 900 ret half %a 901} 902 903attributes #0 = { nounwind } 904