1# exp(x) = 2^hi + 2^hi (2^lo - 1) 2# where hi+lo = log2e*x with 128bit precision 3# exact log2e*x calculation depends on nearest rounding mode 4# using the exact multiplication method of Dekker and Veltkamp 5 6.global expl 7.type expl,@function 8expl: 9 fldt 8(%rsp) 10 11 # interesting case: 0x1p-32 <= |x| < 16384 12 # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] 13 mov 16(%rsp), %ax 14 or $0x8000, %ax 15 sub $0xbfdf, %ax 16 cmp $45, %ax 17 jbe 2f 18 test %ax, %ax 19 fld1 20 js 1f 21 # if |x|>=0x1p14 or nan return 2^trunc(x) 22 fscale 23 fstp %st(1) 24 ret 25 # if |x|<0x1p-32 return 1+x 261: faddp 27 ret 28 29 # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc 30 # it will be wrong on non-nearest rounding mode 312: fldl2e 32 subq $48, %rsp 33 # hi = log2e_hi*x 34 # 2^hi = exp2l(hi) 35 fmul %st(1),%st 36 fld %st(0) 37 fstpt (%rsp) 38 fstpt 16(%rsp) 39 fstpt 32(%rsp) 40 call exp2l@PLT 41 # if 2^hi == inf return 2^hi 42 fld %st(0) 43 fstpt (%rsp) 44 cmpw $0x7fff, 8(%rsp) 45 je 1f 46 fldt 32(%rsp) 47 fldt 16(%rsp) 48 # fpu stack: 2^hi x hi 49 # exact mult: x*log2e 50 fld %st(1) 51 # c = 0x1p32+1 52 movq $0x41f0000000100000,%rax 53 pushq %rax 54 fldl (%rsp) 55 # xh = x - c*x + c*x 56 # xl = x - xh 57 fmulp 58 fld %st(2) 59 fsub %st(1), %st 60 faddp 61 fld %st(2) 62 fsub %st(1), %st 63 # yh = log2e_hi - c*log2e_hi + c*log2e_hi 64 movq $0x3ff7154765200000,%rax 65 pushq %rax 66 fldl (%rsp) 67 # fpu stack: 2^hi x hi xh xl yh 68 # lo = hi - xh*yh + xl*yh 69 fld %st(2) 70 fmul %st(1), %st 71 fsubp %st, %st(4) 72 fmul %st(1), %st 73 faddp %st, %st(3) 74 # yl = log2e_hi - yh 75 movq $0x3de705fc2f000000,%rax 76 pushq %rax 77 fldl (%rsp) 78 # fpu stack: 2^hi x lo xh xl yl 79 # lo += xh*yl + xl*yl 80 fmul %st, %st(2) 81 fmulp %st, %st(1) 82 fxch %st(2) 83 faddp 84 faddp 85 # log2e_lo 86 movq $0xbfbe,%rax 87 pushq %rax 88 movq $0x82f0025f2dc582ee,%rax 89 pushq %rax 90 fldt (%rsp) 91 addq $40,%rsp 92 # fpu stack: 2^hi x lo log2e_lo 93 # lo += log2e_lo*x 94 # return 2^hi + 2^hi (2^lo - 1) 95 fmulp %st, %st(2) 96 faddp 97 f2xm1 98 fmul %st(1), %st 99 faddp 1001: addq $48, %rsp 101 ret 102