1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefix=X32-NOF16C 3; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefix=X32-F16C 4; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=X64-NOF16C 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=f16c | FileCheck %s --check-prefix=X64-F16C 6 7@a = global half 0xH0000, align 2 8@b = global half 0xH0000, align 2 9@c = global half 0xH0000, align 2 10 11define float @half_to_float() strictfp { 12; X32-NOF16C-LABEL: half_to_float: 13; X32-NOF16C: ## %bb.0: 14; X32-NOF16C-NEXT: subl $12, %esp 15; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 16; X32-NOF16C-NEXT: movzwl _a, %eax 17; X32-NOF16C-NEXT: movl %eax, (%esp) 18; X32-NOF16C-NEXT: calll ___extendhfsf2 19; X32-NOF16C-NEXT: addl $12, %esp 20; X32-NOF16C-NEXT: retl 21; 22; X32-F16C-LABEL: half_to_float: 23; X32-F16C: ## %bb.0: 24; X32-F16C-NEXT: subl $12, %esp 25; X32-F16C-NEXT: .cfi_def_cfa_offset 16 26; X32-F16C-NEXT: movzwl _a, %eax 27; X32-F16C-NEXT: movl %eax, (%esp) 28; X32-F16C-NEXT: calll ___extendhfsf2 29; X32-F16C-NEXT: addl $12, %esp 30; X32-F16C-NEXT: retl 31; 32; X64-NOF16C-LABEL: half_to_float: 33; X64-NOF16C: ## %bb.0: 34; X64-NOF16C-NEXT: pushq %rax 35; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 36; X64-NOF16C-NEXT: movzwl {{.*}}(%rip), %edi 37; X64-NOF16C-NEXT: callq ___extendhfsf2 38; X64-NOF16C-NEXT: popq %rax 39; X64-NOF16C-NEXT: retq 40; 41; X64-F16C-LABEL: half_to_float: 42; X64-F16C: ## %bb.0: 43; X64-F16C-NEXT: movzwl {{.*}}(%rip), %eax 44; X64-F16C-NEXT: vmovd %eax, %xmm0 45; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 46; X64-F16C-NEXT: retq 47 %1 = load half, half* @a, align 2 48 %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0 49 ret float %2 50} 51 52define double @half_to_double() strictfp { 53; X32-NOF16C-LABEL: half_to_double: 54; X32-NOF16C: ## %bb.0: 55; X32-NOF16C-NEXT: subl $12, %esp 56; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 57; X32-NOF16C-NEXT: movzwl _a, %eax 58; X32-NOF16C-NEXT: movl %eax, (%esp) 59; X32-NOF16C-NEXT: calll ___extendhfsf2 60; X32-NOF16C-NEXT: addl $12, %esp 61; X32-NOF16C-NEXT: retl 62; 63; X32-F16C-LABEL: half_to_double: 64; X32-F16C: ## %bb.0: 65; X32-F16C-NEXT: subl $12, %esp 66; X32-F16C-NEXT: .cfi_def_cfa_offset 16 67; X32-F16C-NEXT: movzwl _a, %eax 68; X32-F16C-NEXT: movl %eax, (%esp) 69; X32-F16C-NEXT: calll ___extendhfsf2 70; X32-F16C-NEXT: addl $12, %esp 71; X32-F16C-NEXT: retl 72; 73; X64-NOF16C-LABEL: half_to_double: 74; X64-NOF16C: ## %bb.0: 75; X64-NOF16C-NEXT: pushq %rax 76; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 77; X64-NOF16C-NEXT: movzwl {{.*}}(%rip), %edi 78; X64-NOF16C-NEXT: callq ___extendhfsf2 79; X64-NOF16C-NEXT: cvtss2sd %xmm0, %xmm0 80; X64-NOF16C-NEXT: popq %rax 81; X64-NOF16C-NEXT: retq 82; 83; X64-F16C-LABEL: half_to_double: 84; X64-F16C: ## %bb.0: 85; X64-F16C-NEXT: movzwl {{.*}}(%rip), %eax 86; X64-F16C-NEXT: vmovd %eax, %xmm0 87; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 88; X64-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 89; X64-F16C-NEXT: retq 90 %1 = load half, half* @a, align 2 91 %2 = tail call double @llvm.experimental.constrained.fpext.f64.f16(half %1, metadata !"fpexcept.strict") #0 92 ret double %2 93} 94 95define x86_fp80 @half_to_fp80() strictfp { 96; X32-NOF16C-LABEL: half_to_fp80: 97; X32-NOF16C: ## %bb.0: 98; X32-NOF16C-NEXT: subl $12, %esp 99; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 100; X32-NOF16C-NEXT: movzwl _a, %eax 101; X32-NOF16C-NEXT: movl %eax, (%esp) 102; X32-NOF16C-NEXT: calll ___extendhfsf2 103; X32-NOF16C-NEXT: addl $12, %esp 104; X32-NOF16C-NEXT: retl 105; 106; X32-F16C-LABEL: half_to_fp80: 107; X32-F16C: ## %bb.0: 108; X32-F16C-NEXT: subl $12, %esp 109; X32-F16C-NEXT: .cfi_def_cfa_offset 16 110; X32-F16C-NEXT: movzwl _a, %eax 111; X32-F16C-NEXT: movl %eax, (%esp) 112; X32-F16C-NEXT: calll ___extendhfsf2 113; X32-F16C-NEXT: addl $12, %esp 114; X32-F16C-NEXT: retl 115; 116; X64-NOF16C-LABEL: half_to_fp80: 117; X64-NOF16C: ## %bb.0: 118; X64-NOF16C-NEXT: pushq %rax 119; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 120; X64-NOF16C-NEXT: movzwl {{.*}}(%rip), %edi 121; X64-NOF16C-NEXT: callq ___extendhfsf2 122; X64-NOF16C-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) 123; X64-NOF16C-NEXT: flds {{[0-9]+}}(%rsp) 124; X64-NOF16C-NEXT: wait 125; X64-NOF16C-NEXT: popq %rax 126; X64-NOF16C-NEXT: retq 127; 128; X64-F16C-LABEL: half_to_fp80: 129; X64-F16C: ## %bb.0: 130; X64-F16C-NEXT: movzwl {{.*}}(%rip), %eax 131; X64-F16C-NEXT: vmovd %eax, %xmm0 132; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 133; X64-F16C-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) 134; X64-F16C-NEXT: flds -{{[0-9]+}}(%rsp) 135; X64-F16C-NEXT: wait 136; X64-F16C-NEXT: retq 137 %1 = load half, half* @a, align 2 138 %2 = tail call x86_fp80 @llvm.experimental.constrained.fpext.f80.f16(half %1, metadata !"fpexcept.strict") #0 139 ret x86_fp80 %2 140} 141 142define void @float_to_half(float %0) strictfp { 143; X32-NOF16C-LABEL: float_to_half: 144; X32-NOF16C: ## %bb.0: 145; X32-NOF16C-NEXT: subl $12, %esp 146; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 147; X32-NOF16C-NEXT: flds {{[0-9]+}}(%esp) 148; X32-NOF16C-NEXT: fstps (%esp) 149; X32-NOF16C-NEXT: wait 150; X32-NOF16C-NEXT: calll ___truncsfhf2 151; X32-NOF16C-NEXT: movw %ax, _a 152; X32-NOF16C-NEXT: addl $12, %esp 153; X32-NOF16C-NEXT: retl 154; 155; X32-F16C-LABEL: float_to_half: 156; X32-F16C: ## %bb.0: 157; X32-F16C-NEXT: subl $12, %esp 158; X32-F16C-NEXT: .cfi_def_cfa_offset 16 159; X32-F16C-NEXT: flds {{[0-9]+}}(%esp) 160; X32-F16C-NEXT: fstps (%esp) 161; X32-F16C-NEXT: wait 162; X32-F16C-NEXT: calll ___truncsfhf2 163; X32-F16C-NEXT: movw %ax, _a 164; X32-F16C-NEXT: addl $12, %esp 165; X32-F16C-NEXT: retl 166; 167; X64-NOF16C-LABEL: float_to_half: 168; X64-NOF16C: ## %bb.0: 169; X64-NOF16C-NEXT: pushq %rax 170; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 171; X64-NOF16C-NEXT: callq ___truncsfhf2 172; X64-NOF16C-NEXT: movw %ax, {{.*}}(%rip) 173; X64-NOF16C-NEXT: popq %rax 174; X64-NOF16C-NEXT: retq 175; 176; X64-F16C-LABEL: float_to_half: 177; X64-F16C: ## %bb.0: 178; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 179; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 180; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 181; X64-F16C-NEXT: vpextrw $0, %xmm0, {{.*}}(%rip) 182; X64-F16C-NEXT: retq 183 %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 184 store half %2, half* @a, align 2 185 ret void 186} 187 188define void @double_to_half(double %0) strictfp { 189; X32-NOF16C-LABEL: double_to_half: 190; X32-NOF16C: ## %bb.0: 191; X32-NOF16C-NEXT: subl $12, %esp 192; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 193; X32-NOF16C-NEXT: fldl {{[0-9]+}}(%esp) 194; X32-NOF16C-NEXT: fstpl (%esp) 195; X32-NOF16C-NEXT: wait 196; X32-NOF16C-NEXT: calll ___truncdfhf2 197; X32-NOF16C-NEXT: movw %ax, _a 198; X32-NOF16C-NEXT: addl $12, %esp 199; X32-NOF16C-NEXT: retl 200; 201; X32-F16C-LABEL: double_to_half: 202; X32-F16C: ## %bb.0: 203; X32-F16C-NEXT: subl $12, %esp 204; X32-F16C-NEXT: .cfi_def_cfa_offset 16 205; X32-F16C-NEXT: fldl {{[0-9]+}}(%esp) 206; X32-F16C-NEXT: fstpl (%esp) 207; X32-F16C-NEXT: wait 208; X32-F16C-NEXT: calll ___truncdfhf2 209; X32-F16C-NEXT: movw %ax, _a 210; X32-F16C-NEXT: addl $12, %esp 211; X32-F16C-NEXT: retl 212; 213; X64-NOF16C-LABEL: double_to_half: 214; X64-NOF16C: ## %bb.0: 215; X64-NOF16C-NEXT: pushq %rax 216; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 217; X64-NOF16C-NEXT: callq ___truncdfhf2 218; X64-NOF16C-NEXT: movw %ax, {{.*}}(%rip) 219; X64-NOF16C-NEXT: popq %rax 220; X64-NOF16C-NEXT: retq 221; 222; X64-F16C-LABEL: double_to_half: 223; X64-F16C: ## %bb.0: 224; X64-F16C-NEXT: pushq %rax 225; X64-F16C-NEXT: .cfi_def_cfa_offset 16 226; X64-F16C-NEXT: callq ___truncdfhf2 227; X64-F16C-NEXT: movw %ax, {{.*}}(%rip) 228; X64-F16C-NEXT: popq %rax 229; X64-F16C-NEXT: retq 230 %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 231 store half %2, half* @a, align 2 232 ret void 233} 234 235define void @fp80_to_half(x86_fp80 %0) strictfp { 236; X32-NOF16C-LABEL: fp80_to_half: 237; X32-NOF16C: ## %bb.0: 238; X32-NOF16C-NEXT: subl $28, %esp 239; X32-NOF16C-NEXT: .cfi_def_cfa_offset 32 240; X32-NOF16C-NEXT: fldt {{[0-9]+}}(%esp) 241; X32-NOF16C-NEXT: fstpt (%esp) 242; X32-NOF16C-NEXT: wait 243; X32-NOF16C-NEXT: calll ___truncxfhf2 244; X32-NOF16C-NEXT: movw %ax, _a 245; X32-NOF16C-NEXT: addl $28, %esp 246; X32-NOF16C-NEXT: retl 247; 248; X32-F16C-LABEL: fp80_to_half: 249; X32-F16C: ## %bb.0: 250; X32-F16C-NEXT: subl $28, %esp 251; X32-F16C-NEXT: .cfi_def_cfa_offset 32 252; X32-F16C-NEXT: fldt {{[0-9]+}}(%esp) 253; X32-F16C-NEXT: fstpt (%esp) 254; X32-F16C-NEXT: wait 255; X32-F16C-NEXT: calll ___truncxfhf2 256; X32-F16C-NEXT: movw %ax, _a 257; X32-F16C-NEXT: addl $28, %esp 258; X32-F16C-NEXT: retl 259; 260; X64-NOF16C-LABEL: fp80_to_half: 261; X64-NOF16C: ## %bb.0: 262; X64-NOF16C-NEXT: subq $24, %rsp 263; X64-NOF16C-NEXT: .cfi_def_cfa_offset 32 264; X64-NOF16C-NEXT: fldt {{[0-9]+}}(%rsp) 265; X64-NOF16C-NEXT: fstpt (%rsp) 266; X64-NOF16C-NEXT: wait 267; X64-NOF16C-NEXT: callq ___truncxfhf2 268; X64-NOF16C-NEXT: movw %ax, {{.*}}(%rip) 269; X64-NOF16C-NEXT: addq $24, %rsp 270; X64-NOF16C-NEXT: retq 271; 272; X64-F16C-LABEL: fp80_to_half: 273; X64-F16C: ## %bb.0: 274; X64-F16C-NEXT: subq $24, %rsp 275; X64-F16C-NEXT: .cfi_def_cfa_offset 32 276; X64-F16C-NEXT: fldt {{[0-9]+}}(%rsp) 277; X64-F16C-NEXT: fstpt (%rsp) 278; X64-F16C-NEXT: wait 279; X64-F16C-NEXT: callq ___truncxfhf2 280; X64-F16C-NEXT: movw %ax, {{.*}}(%rip) 281; X64-F16C-NEXT: addq $24, %rsp 282; X64-F16C-NEXT: retq 283 %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f80(x86_fp80 %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 284 store half %2, half* @a, align 2 285 ret void 286} 287 288define void @add() strictfp { 289; X32-NOF16C-LABEL: add: 290; X32-NOF16C: ## %bb.0: 291; X32-NOF16C-NEXT: subl $12, %esp 292; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 293; X32-NOF16C-NEXT: movzwl _a, %eax 294; X32-NOF16C-NEXT: movl %eax, (%esp) 295; X32-NOF16C-NEXT: calll ___extendhfsf2 296; X32-NOF16C-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill 297; X32-NOF16C-NEXT: wait 298; X32-NOF16C-NEXT: movzwl _b, %eax 299; X32-NOF16C-NEXT: movl %eax, (%esp) 300; X32-NOF16C-NEXT: calll ___extendhfsf2 301; X32-NOF16C-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload 302; X32-NOF16C-NEXT: faddp %st, %st(1) 303; X32-NOF16C-NEXT: fstps (%esp) 304; X32-NOF16C-NEXT: wait 305; X32-NOF16C-NEXT: calll ___truncsfhf2 306; X32-NOF16C-NEXT: movw %ax, _c 307; X32-NOF16C-NEXT: addl $12, %esp 308; X32-NOF16C-NEXT: retl 309; 310; X32-F16C-LABEL: add: 311; X32-F16C: ## %bb.0: 312; X32-F16C-NEXT: subl $12, %esp 313; X32-F16C-NEXT: .cfi_def_cfa_offset 16 314; X32-F16C-NEXT: movzwl _a, %eax 315; X32-F16C-NEXT: movl %eax, (%esp) 316; X32-F16C-NEXT: calll ___extendhfsf2 317; X32-F16C-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill 318; X32-F16C-NEXT: wait 319; X32-F16C-NEXT: movzwl _b, %eax 320; X32-F16C-NEXT: movl %eax, (%esp) 321; X32-F16C-NEXT: calll ___extendhfsf2 322; X32-F16C-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload 323; X32-F16C-NEXT: faddp %st, %st(1) 324; X32-F16C-NEXT: fstps (%esp) 325; X32-F16C-NEXT: wait 326; X32-F16C-NEXT: calll ___truncsfhf2 327; X32-F16C-NEXT: movw %ax, _c 328; X32-F16C-NEXT: addl $12, %esp 329; X32-F16C-NEXT: retl 330; 331; X64-NOF16C-LABEL: add: 332; X64-NOF16C: ## %bb.0: 333; X64-NOF16C-NEXT: pushq %rax 334; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 335; X64-NOF16C-NEXT: movzwl {{.*}}(%rip), %edi 336; X64-NOF16C-NEXT: callq ___extendhfsf2 337; X64-NOF16C-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill 338; X64-NOF16C-NEXT: movzwl {{.*}}(%rip), %edi 339; X64-NOF16C-NEXT: callq ___extendhfsf2 340; X64-NOF16C-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Folded Reload 341; X64-NOF16C-NEXT: callq ___truncsfhf2 342; X64-NOF16C-NEXT: movw %ax, {{.*}}(%rip) 343; X64-NOF16C-NEXT: popq %rax 344; X64-NOF16C-NEXT: retq 345; 346; X64-F16C-LABEL: add: 347; X64-F16C: ## %bb.0: 348; X64-F16C-NEXT: movzwl {{.*}}(%rip), %eax 349; X64-F16C-NEXT: vmovd %eax, %xmm0 350; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 351; X64-F16C-NEXT: movzwl {{.*}}(%rip), %eax 352; X64-F16C-NEXT: vmovd %eax, %xmm1 353; X64-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 354; X64-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 355; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 356; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 357; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 358; X64-F16C-NEXT: vpextrw $0, %xmm0, {{.*}}(%rip) 359; X64-F16C-NEXT: retq 360 %1 = load half, half* @a, align 2 361 %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0 362 %3 = load half, half* @b, align 2 363 %4 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %3, metadata !"fpexcept.strict") #0 364 %5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 365 %6 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 366 store half %6, half* @c, align 2 367 ret void 368} 369 370declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) 371declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) 372declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f16(half, metadata) 373declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) 374declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) 375declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) 376declare half @llvm.experimental.constrained.fptrunc.f16.f80(x86_fp80, metadata, metadata) 377 378attributes #0 = { strictfp } 379 380