1# exp(x) = 2^hi + 2^hi (2^lo - 1) 2# where hi+lo = log2e*x with 128bit precision 3# exact log2e*x calculation depends on nearest rounding mode 4# using the exact multiplication method of Dekker and Veltkamp 5 6.global expl 7.type expl,@function 8expl: 9 fldt 4(%esp) 10 11 # interesting case: 0x1p-32 <= |x| < 16384 12 # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] 13 mov 12(%esp), %ax 14 or $0x8000, %ax 15 sub $0xbfdf, %ax 16 cmp $45, %ax 17 jbe 2f 18 test %ax, %ax 19 fld1 20 js 1f 21 # if |x|>=0x1p14 or nan return 2^trunc(x) 22 fscale 23 fstp %st(1) 24 ret 25 # if |x|<0x1p-32 return 1+x 261: faddp 27 ret 28 29 # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc 30 # it will be wrong on non-nearest rounding mode 312: fldl2e 32 subl $44, %esp 33 # hi = log2e_hi*x 34 # 2^hi = exp2l(hi) 35 fmul %st(1),%st 36 fld %st(0) 37 fstpt (%esp) 38 fstpt 16(%esp) 39 fstpt 32(%esp) 40.hidden __exp2l 41 call __exp2l 42 # if 2^hi == inf return 2^hi 43 fld %st(0) 44 fstpt (%esp) 45 cmpw $0x7fff, 8(%esp) 46 je 1f 47 fldt 32(%esp) 48 fldt 16(%esp) 49 # fpu stack: 2^hi x hi 50 # exact mult: x*log2e 51 fld %st(1) 52 # c = 0x1p32+1 53 pushl $0x41f00000 54 pushl $0x00100000 55 fldl (%esp) 56 # xh = x - c*x + c*x 57 # xl = x - xh 58 fmulp 59 fld %st(2) 60 fsub %st(1), %st 61 faddp 62 fld %st(2) 63 fsub %st(1), %st 64 # yh = log2e_hi - c*log2e_hi + c*log2e_hi 65 pushl $0x3ff71547 66 pushl $0x65200000 67 fldl (%esp) 68 # fpu stack: 2^hi x hi xh xl yh 69 # lo = hi - xh*yh + xl*yh 70 fld %st(2) 71 fmul %st(1), %st 72 fsubp %st, %st(4) 73 fmul %st(1), %st 74 faddp %st, %st(3) 75 # yl = log2e_hi - yh 76 pushl $0x3de705fc 77 pushl $0x2f000000 78 fldl (%esp) 79 # fpu stack: 2^hi x lo xh xl yl 80 # lo += xh*yl + xl*yl 81 fmul %st, %st(2) 82 fmulp %st, %st(1) 83 fxch %st(2) 84 faddp 85 faddp 86 # log2e_lo 87 pushl $0xbfbe 88 pushl $0x82f0025f 89 pushl $0x2dc582ee 90 fldt (%esp) 91 addl $36,%esp 92 # fpu stack: 2^hi x lo log2e_lo 93 # lo += log2e_lo*x 94 # return 2^hi + 2^hi (2^lo - 1) 95 fmulp %st, %st(2) 96 faddp 97 f2xm1 98 fmul %st(1), %st 99 faddp 1001: addl $44, %esp 101 ret 102