1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C 5 6define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind { 7; X86-LABEL: ir_fadd_v1f16: 8; X86: # %bb.0: 9; X86-NEXT: subl $28, %esp 10; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 11; X86-NEXT: movss %xmm0, (%esp) 12; X86-NEXT: calll __gnu_f2h_ieee 13; X86-NEXT: movzwl %ax, %eax 14; X86-NEXT: movl %eax, (%esp) 15; X86-NEXT: calll __gnu_h2f_ieee 16; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill 17; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 18; X86-NEXT: movss %xmm0, (%esp) 19; X86-NEXT: calll __gnu_f2h_ieee 20; X86-NEXT: movzwl %ax, %eax 21; X86-NEXT: movl %eax, (%esp) 22; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload 23; X86-NEXT: fstps {{[0-9]+}}(%esp) 24; X86-NEXT: calll __gnu_h2f_ieee 25; X86-NEXT: fstps {{[0-9]+}}(%esp) 26; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 27; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0 28; X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 29; X86-NEXT: flds {{[0-9]+}}(%esp) 30; X86-NEXT: addl $28, %esp 31; X86-NEXT: retl 32; 33; X64-LABEL: ir_fadd_v1f16: 34; X64: # %bb.0: 35; X64-NEXT: pushq %rax 36; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill 37; X64-NEXT: movaps %xmm1, %xmm0 38; X64-NEXT: callq __gnu_f2h_ieee 39; X64-NEXT: movzwl %ax, %edi 40; X64-NEXT: callq __gnu_h2f_ieee 41; X64-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 42; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload 43; X64-NEXT: # xmm0 = mem[0],zero,zero,zero 44; X64-NEXT: callq __gnu_f2h_ieee 45; X64-NEXT: movzwl %ax, %edi 46; X64-NEXT: callq __gnu_h2f_ieee 47; X64-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload 48; X64-NEXT: popq %rax 49; X64-NEXT: retq 50; 51; F16C-LABEL: ir_fadd_v1f16: 52; F16C: # %bb.0: 53; F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 54; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 55; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 56; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 57; F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 58; F16C-NEXT: retq 59 %retval = fadd <1 x half> %arg0, %arg1 60 ret <1 x half> %retval 61} 62 63define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind { 64; X86-LABEL: ir_fadd_v2f16: 65; X86: # %bb.0: 66; X86-NEXT: subl $64, %esp 67; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 68; X86-NEXT: movss %xmm0, (%esp) 69; X86-NEXT: calll __gnu_f2h_ieee 70; X86-NEXT: movzwl %ax, %eax 71; X86-NEXT: movl %eax, (%esp) 72; X86-NEXT: calll __gnu_h2f_ieee 73; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill 74; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 75; X86-NEXT: movss %xmm0, (%esp) 76; X86-NEXT: calll __gnu_f2h_ieee 77; X86-NEXT: movzwl %ax, %eax 78; X86-NEXT: movl %eax, (%esp) 79; X86-NEXT: calll __gnu_h2f_ieee 80; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill 81; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 82; X86-NEXT: movss %xmm0, (%esp) 83; X86-NEXT: calll __gnu_f2h_ieee 84; X86-NEXT: movzwl %ax, %eax 85; X86-NEXT: movl %eax, (%esp) 86; X86-NEXT: calll __gnu_h2f_ieee 87; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill 88; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 89; X86-NEXT: movss %xmm0, (%esp) 90; X86-NEXT: calll __gnu_f2h_ieee 91; X86-NEXT: movzwl %ax, %eax 92; X86-NEXT: movl %eax, (%esp) 93; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload 94; X86-NEXT: fstps {{[0-9]+}}(%esp) 95; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload 96; X86-NEXT: fstps {{[0-9]+}}(%esp) 97; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload 98; X86-NEXT: fstps {{[0-9]+}}(%esp) 99; X86-NEXT: calll __gnu_h2f_ieee 100; X86-NEXT: fstps {{[0-9]+}}(%esp) 101; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 102; X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 103; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm1 104; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0 105; X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 106; X86-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 107; X86-NEXT: flds {{[0-9]+}}(%esp) 108; X86-NEXT: flds {{[0-9]+}}(%esp) 109; X86-NEXT: addl $64, %esp 110; X86-NEXT: retl 111; 112; X64-LABEL: ir_fadd_v2f16: 113; X64: # %bb.0: 114; X64-NEXT: subq $24, %rsp 115; X64-NEXT: movss %xmm2, {{[0-9]+}}(%rsp) # 4-byte Spill 116; X64-NEXT: movss %xmm1, {{[0-9]+}}(%rsp) # 4-byte Spill 117; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill 118; X64-NEXT: movaps %xmm3, %xmm0 119; X64-NEXT: callq __gnu_f2h_ieee 120; X64-NEXT: movzwl %ax, %edi 121; X64-NEXT: callq __gnu_h2f_ieee 122; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill 123; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload 124; X64-NEXT: # xmm0 = mem[0],zero,zero,zero 125; X64-NEXT: callq __gnu_f2h_ieee 126; X64-NEXT: movzwl %ax, %edi 127; X64-NEXT: callq __gnu_h2f_ieee 128; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill 129; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload 130; X64-NEXT: # xmm0 = mem[0],zero,zero,zero 131; X64-NEXT: callq __gnu_f2h_ieee 132; X64-NEXT: movzwl %ax, %edi 133; X64-NEXT: callq __gnu_h2f_ieee 134; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill 135; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload 136; X64-NEXT: # xmm0 = mem[0],zero,zero,zero 137; X64-NEXT: callq __gnu_f2h_ieee 138; X64-NEXT: movzwl %ax, %edi 139; X64-NEXT: callq __gnu_h2f_ieee 140; X64-NEXT: addss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload 141; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload 142; X64-NEXT: # xmm1 = mem[0],zero,zero,zero 143; X64-NEXT: addss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload 144; X64-NEXT: addq $24, %rsp 145; X64-NEXT: retq 146; 147; F16C-LABEL: ir_fadd_v2f16: 148; F16C: # %bb.0: 149; F16C-NEXT: vcvtps2ph $4, %xmm3, %xmm3 150; F16C-NEXT: vcvtph2ps %xmm3, %xmm3 151; F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 152; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 153; F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2 154; F16C-NEXT: vcvtph2ps %xmm2, %xmm2 155; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 156; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 157; F16C-NEXT: vaddss %xmm2, %xmm0, %xmm0 158; F16C-NEXT: vaddss %xmm3, %xmm1, %xmm1 159; F16C-NEXT: retq 160 %retval = fadd <2 x half> %arg0, %arg1 161 ret <2 x half> %retval 162} 163