• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
2 //
3 // ====================================================
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 //
6 // Developed at SunSoft, a Sun Microsystems, Inc. business.
7 // Permission to use, copy, modify, and distribute this
8 // software is freely granted, provided that this notice
9 // is preserved.
10 // ====================================================
11 //
12 // The original source code covered by the above license above has been
13 // modified significantly by Google Inc.
14 // Copyright 2016 the V8 project authors. All rights reserved.
15 
16 #include "src/base/ieee754.h"
17 
18 #include <cmath>
19 #include <limits>
20 
21 #include "src/base/build_config.h"
22 #include "src/base/macros.h"
23 #include "src/base/overflowing-math.h"
24 
25 namespace v8 {
26 namespace base {
27 namespace ieee754 {
28 
29 namespace {
30 
31 /* Disable "potential divide by 0" warning in Visual Studio compiler. */
32 
33 #if V8_CC_MSVC
34 
35 #pragma warning(disable : 4723)
36 
37 #endif
38 
39 /*
40  * The original fdlibm code used statements like:
41  *  n0 = ((*(int*)&one)>>29)^1;   * index of high word *
42  *  ix0 = *(n0+(int*)&x);     * high word of x *
43  *  ix1 = *((1-n0)+(int*)&x);   * low word of x *
44  * to dig two 32 bit words out of the 64 bit IEEE floating point
45  * value.  That is non-ANSI, and, moreover, the gcc instruction
46  * scheduler gets it wrong.  We instead use the following macros.
47  * Unlike the original code, we determine the endianness at compile
48  * time, not at run time; I don't see much benefit to selecting
49  * endianness at run time.
50  */
51 
52 /* Get two 32 bit ints from a double.  */
53 
54 #define EXTRACT_WORDS(ix0, ix1, d)         \
55   do {                                     \
56     uint64_t bits = bit_cast<uint64_t>(d); \
57     (ix0) = bits >> 32;                    \
58     (ix1) = bits & 0xFFFFFFFFu;            \
59   } while (false)
60 
61 /* Get the more significant 32 bit int from a double.  */
62 
63 #define GET_HIGH_WORD(i, d)                \
64   do {                                     \
65     uint64_t bits = bit_cast<uint64_t>(d); \
66     (i) = bits >> 32;                      \
67   } while (false)
68 
69 /* Get the less significant 32 bit int from a double.  */
70 
71 #define GET_LOW_WORD(i, d)                 \
72   do {                                     \
73     uint64_t bits = bit_cast<uint64_t>(d); \
74     (i) = bits & 0xFFFFFFFFu;              \
75   } while (false)
76 
77 /* Set a double from two 32 bit ints.  */
78 
79 #define INSERT_WORDS(d, ix0, ix1)             \
80   do {                                        \
81     uint64_t bits = 0;                        \
82     bits |= static_cast<uint64_t>(ix0) << 32; \
83     bits |= static_cast<uint32_t>(ix1);       \
84     (d) = bit_cast<double>(bits);             \
85   } while (false)
86 
87 /* Set the more significant 32 bits of a double from an int.  */
88 
89 #define SET_HIGH_WORD(d, v)                 \
90   do {                                      \
91     uint64_t bits = bit_cast<uint64_t>(d);  \
92     bits &= 0x0000'0000'FFFF'FFFF;          \
93     bits |= static_cast<uint64_t>(v) << 32; \
94     (d) = bit_cast<double>(bits);           \
95   } while (false)
96 
97 /* Set the less significant 32 bits of a double from an int.  */
98 
99 #define SET_LOW_WORD(d, v)                 \
100   do {                                     \
101     uint64_t bits = bit_cast<uint64_t>(d); \
102     bits &= 0xFFFF'FFFF'0000'0000;         \
103     bits |= static_cast<uint32_t>(v);      \
104     (d) = bit_cast<double>(bits);          \
105   } while (false)
106 
107 int32_t __ieee754_rem_pio2(double x, double* y) V8_WARN_UNUSED_RESULT;
108 double __kernel_cos(double x, double y) V8_WARN_UNUSED_RESULT;
109 int __kernel_rem_pio2(double* x, double* y, int e0, int nx, int prec,
110                       const int32_t* ipio2) V8_WARN_UNUSED_RESULT;
111 double __kernel_sin(double x, double y, int iy) V8_WARN_UNUSED_RESULT;
112 
113 /* __ieee754_rem_pio2(x,y)
114  *
115  * return the remainder of x rem pi/2 in y[0]+y[1]
116  * use __kernel_rem_pio2()
117  */
__ieee754_rem_pio2(double x,double * y)118 int32_t __ieee754_rem_pio2(double x, double *y) {
119   /*
120    * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
121    */
122   static const int32_t two_over_pi[] = {
123       0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,
124       0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,
125       0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,
126       0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,
127       0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,
128       0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
129       0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,
130       0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,
131       0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
132       0x73A8C9, 0x60E27B, 0xC08C6B,
133   };
134 
135   static const int32_t npio2_hw[] = {
136       0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
137       0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
138       0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
139       0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
140       0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
141       0x404858EB, 0x404921FB,
142   };
143 
144   /*
145    * invpio2:  53 bits of 2/pi
146    * pio2_1:   first  33 bit of pi/2
147    * pio2_1t:  pi/2 - pio2_1
148    * pio2_2:   second 33 bit of pi/2
149    * pio2_2t:  pi/2 - (pio2_1+pio2_2)
150    * pio2_3:   third  33 bit of pi/2
151    * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
152    */
153 
154   static const double
155       zero = 0.00000000000000000000e+00,    /* 0x00000000, 0x00000000 */
156       half = 5.00000000000000000000e-01,    /* 0x3FE00000, 0x00000000 */
157       two24 = 1.67772160000000000000e+07,   /* 0x41700000, 0x00000000 */
158       invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
159       pio2_1 = 1.57079632673412561417e+00,  /* 0x3FF921FB, 0x54400000 */
160       pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
161       pio2_2 = 6.07710050630396597660e-11,  /* 0x3DD0B461, 0x1A600000 */
162       pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
163       pio2_3 = 2.02226624871116645580e-21,  /* 0x3BA3198A, 0x2E000000 */
164       pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
165 
166   double z, w, t, r, fn;
167   double tx[3];
168   int32_t e0, i, j, nx, n, ix, hx;
169   uint32_t low;
170 
171   z = 0;
172   GET_HIGH_WORD(hx, x); /* high word of x */
173   ix = hx & 0x7FFFFFFF;
174   if (ix <= 0x3FE921FB) { /* |x| ~<= pi/4 , no need for reduction */
175     y[0] = x;
176     y[1] = 0;
177     return 0;
178   }
179   if (ix < 0x4002D97C) { /* |x| < 3pi/4, special case with n=+-1 */
180     if (hx > 0) {
181       z = x - pio2_1;
182       if (ix != 0x3FF921FB) { /* 33+53 bit pi is good enough */
183         y[0] = z - pio2_1t;
184         y[1] = (z - y[0]) - pio2_1t;
185       } else { /* near pi/2, use 33+33+53 bit pi */
186         z -= pio2_2;
187         y[0] = z - pio2_2t;
188         y[1] = (z - y[0]) - pio2_2t;
189       }
190       return 1;
191     } else { /* negative x */
192       z = x + pio2_1;
193       if (ix != 0x3FF921FB) { /* 33+53 bit pi is good enough */
194         y[0] = z + pio2_1t;
195         y[1] = (z - y[0]) + pio2_1t;
196       } else { /* near pi/2, use 33+33+53 bit pi */
197         z += pio2_2;
198         y[0] = z + pio2_2t;
199         y[1] = (z - y[0]) + pio2_2t;
200       }
201       return -1;
202     }
203   }
204   if (ix <= 0x413921FB) { /* |x| ~<= 2^19*(pi/2), medium size */
205     t = fabs(x);
206     n = static_cast<int32_t>(t * invpio2 + half);
207     fn = static_cast<double>(n);
208     r = t - fn * pio2_1;
209     w = fn * pio2_1t; /* 1st round good to 85 bit */
210     if (n < 32 && ix != npio2_hw[n - 1]) {
211       y[0] = r - w; /* quick check no cancellation */
212     } else {
213       uint32_t high;
214       j = ix >> 20;
215       y[0] = r - w;
216       GET_HIGH_WORD(high, y[0]);
217       i = j - ((high >> 20) & 0x7FF);
218       if (i > 16) { /* 2nd iteration needed, good to 118 */
219         t = r;
220         w = fn * pio2_2;
221         r = t - w;
222         w = fn * pio2_2t - ((t - r) - w);
223         y[0] = r - w;
224         GET_HIGH_WORD(high, y[0]);
225         i = j - ((high >> 20) & 0x7FF);
226         if (i > 49) { /* 3rd iteration need, 151 bits acc */
227           t = r;      /* will cover all possible cases */
228           w = fn * pio2_3;
229           r = t - w;
230           w = fn * pio2_3t - ((t - r) - w);
231           y[0] = r - w;
232         }
233       }
234     }
235     y[1] = (r - y[0]) - w;
236     if (hx < 0) {
237       y[0] = -y[0];
238       y[1] = -y[1];
239       return -n;
240     } else {
241       return n;
242     }
243   }
244   /*
245    * all other (large) arguments
246    */
247   if (ix >= 0x7FF00000) { /* x is inf or NaN */
248     y[0] = y[1] = x - x;
249     return 0;
250   }
251   /* set z = scalbn(|x|,ilogb(x)-23) */
252   GET_LOW_WORD(low, x);
253   SET_LOW_WORD(z, low);
254   e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
255   SET_HIGH_WORD(z, ix - static_cast<int32_t>(static_cast<uint32_t>(e0) << 20));
256   for (i = 0; i < 2; i++) {
257     tx[i] = static_cast<double>(static_cast<int32_t>(z));
258     z = (z - tx[i]) * two24;
259   }
260   tx[2] = z;
261   nx = 3;
262   while (tx[nx - 1] == zero) nx--; /* skip zero term */
263   n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
264   if (hx < 0) {
265     y[0] = -y[0];
266     y[1] = -y[1];
267     return -n;
268   }
269   return n;
270 }
271 
272 /* __kernel_cos( x,  y )
273  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
274  * Input x is assumed to be bounded by ~pi/4 in magnitude.
275  * Input y is the tail of x.
276  *
277  * Algorithm
278  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
279  *      2. if x < 2^-27 (hx<0x3E400000 0), return 1 with inexact if x!=0.
280  *      3. cos(x) is approximated by a polynomial of degree 14 on
281  *         [0,pi/4]
282  *                                       4            14
283  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
284  *         where the remez error is
285  *
286  *      |              2     4     6     8     10    12     14 |     -58
287  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
288  *      |                                                      |
289  *
290  *                     4     6     8     10    12     14
291  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
292  *             cos(x) = 1 - x*x/2 + r
293  *         since cos(x+y) ~ cos(x) - sin(x)*y
294  *                        ~ cos(x) - x*y,
295  *         a correction term is necessary in cos(x) and hence
296  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
297  *         For better accuracy when x > 0.3, let qx = |x|/4 with
298  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
299  *         Then
300  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
301  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
302  *         magnitude of the latter is at least a quarter of x*x/2,
303  *         thus, reducing the rounding error in the subtraction.
304  */
__kernel_cos(double x,double y)305 V8_INLINE double __kernel_cos(double x, double y) {
306   static const double
307       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
308       C1 = 4.16666666666666019037e-02,  /* 0x3FA55555, 0x5555554C */
309       C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
310       C3 = 2.48015872894767294178e-05,  /* 0x3EFA01A0, 0x19CB1590 */
311       C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
312       C5 = 2.08757232129817482790e-09,  /* 0x3E21EE9E, 0xBDB4B1C4 */
313       C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
314 
315   double a, iz, z, r, qx;
316   int32_t ix;
317   GET_HIGH_WORD(ix, x);
318   ix &= 0x7FFFFFFF;                           /* ix = |x|'s high word*/
319   if (ix < 0x3E400000) {                      /* if x < 2**27 */
320     if (static_cast<int>(x) == 0) return one; /* generate inexact */
321   }
322   z = x * x;
323   r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
324   if (ix < 0x3FD33333) { /* if |x| < 0.3 */
325     return one - (0.5 * z - (z * r - x * y));
326   } else {
327     if (ix > 0x3FE90000) { /* x > 0.78125 */
328       qx = 0.28125;
329     } else {
330       INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */
331     }
332     iz = 0.5 * z - qx;
333     a = one - qx;
334     return a - (iz - (z * r - x * y));
335   }
336 }
337 
338 /* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
339  * double x[],y[]; int e0,nx,prec; int ipio2[];
340  *
341  * __kernel_rem_pio2 return the last three digits of N with
342  *              y = x - N*pi/2
343  * so that |y| < pi/2.
344  *
345  * The method is to compute the integer (mod 8) and fraction parts of
346  * (2/pi)*x without doing the full multiplication. In general we
347  * skip the part of the product that are known to be a huge integer (
348  * more accurately, = 0 mod 8 ). Thus the number of operations are
349  * independent of the exponent of the input.
350  *
351  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
352  *
353  * Input parameters:
354  *      x[]     The input value (must be positive) is broken into nx
355  *              pieces of 24-bit integers in double precision format.
356  *              x[i] will be the i-th 24 bit of x. The scaled exponent
357  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
358  *              match x's up to 24 bits.
359  *
360  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
361  *                      e0 = ilogb(z)-23
362  *                      z  = scalbn(z,-e0)
363  *              for i = 0,1,2
364  *                      x[i] = floor(z)
365  *                      z    = (z-x[i])*2**24
366  *
367  *
368  *      y[]     output result in an array of double precision numbers.
369  *              The dimension of y[] is:
370  *                      24-bit  precision       1
371  *                      53-bit  precision       2
372  *                      64-bit  precision       2
373  *                      113-bit precision       3
374  *              The actual value is the sum of them. Thus for 113-bit
375  *              precison, one may have to do something like:
376  *
377  *              long double t,w,r_head, r_tail;
378  *              t = (long double)y[2] + (long double)y[1];
379  *              w = (long double)y[0];
380  *              r_head = t+w;
381  *              r_tail = w - (r_head - t);
382  *
383  *      e0      The exponent of x[0]
384  *
385  *      nx      dimension of x[]
386  *
387  *      prec    an integer indicating the precision:
388  *                      0       24  bits (single)
389  *                      1       53  bits (double)
390  *                      2       64  bits (extended)
391  *                      3       113 bits (quad)
392  *
393  *      ipio2[]
394  *              integer array, contains the (24*i)-th to (24*i+23)-th
395  *              bit of 2/pi after binary point. The corresponding
396  *              floating value is
397  *
398  *                      ipio2[i] * 2^(-24(i+1)).
399  *
400  * External function:
401  *      double scalbn(), floor();
402  *
403  *
404  * Here is the description of some local variables:
405  *
406  *      jk      jk+1 is the initial number of terms of ipio2[] needed
407  *              in the computation. The recommended value is 2,3,4,
408  *              6 for single, double, extended,and quad.
409  *
410  *      jz      local integer variable indicating the number of
411  *              terms of ipio2[] used.
412  *
413  *      jx      nx - 1
414  *
415  *      jv      index for pointing to the suitable ipio2[] for the
416  *              computation. In general, we want
417  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
418  *              is an integer. Thus
419  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
420  *              Hence jv = max(0,(e0-3)/24).
421  *
422  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
423  *
424  *      q[]     double array with integral value, representing the
425  *              24-bits chunk of the product of x and 2/pi.
426  *
427  *      q0      the corresponding exponent of q[0]. Note that the
428  *              exponent for q[i] would be q0-24*i.
429  *
430  *      PIo2[]  double precision array, obtained by cutting pi/2
431  *              into 24 bits chunks.
432  *
433  *      f[]     ipio2[] in floating point
434  *
435  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
436  *
437  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
438  *
439  *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
440  *              it also indicates the *sign* of the result.
441  *
442  */
__kernel_rem_pio2(double * x,double * y,int e0,int nx,int prec,const int32_t * ipio2)443 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
444                       const int32_t *ipio2) {
445   /* Constants:
446    * The hexadecimal values are the intended ones for the following
447    * constants. The decimal values may be used, provided that the
448    * compiler will convert from decimal to binary accurately enough
449    * to produce the hexadecimal values shown.
450    */
451   static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */
452 
453   static const double PIo2[] = {
454       1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
455       7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
456       5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
457       3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
458       1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
459       1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
460       2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
461       2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
462   };
463 
464   static const double
465       zero = 0.0,
466       one = 1.0,
467       two24 = 1.67772160000000000000e+07,  /* 0x41700000, 0x00000000 */
468       twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
469 
470   int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
471   double z, fw, f[20], fq[20], q[20];
472 
473   /* initialize jk*/
474   jk = init_jk[prec];
475   jp = jk;
476 
477   /* determine jx,jv,q0, note that 3>q0 */
478   jx = nx - 1;
479   jv = (e0 - 3) / 24;
480   if (jv < 0) jv = 0;
481   q0 = e0 - 24 * (jv + 1);
482 
483   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
484   j = jv - jx;
485   m = jx + jk;
486   for (i = 0; i <= m; i++, j++) {
487     f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);
488   }
489 
490   /* compute q[0],q[1],...q[jk] */
491   for (i = 0; i <= jk; i++) {
492     for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
493     q[i] = fw;
494   }
495 
496   jz = jk;
497 recompute:
498   /* distill q[] into iq[] reversingly */
499   for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
500     fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
501     iq[i] = static_cast<int32_t>(z - two24 * fw);
502     z = q[j - 1] + fw;
503   }
504 
505   /* compute n */
506   z = scalbn(z, q0);           /* actual value of z */
507   z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
508   n = static_cast<int32_t>(z);
509   z -= static_cast<double>(n);
510   ih = 0;
511   if (q0 > 0) { /* need iq[jz-1] to determine n */
512     i = (iq[jz - 1] >> (24 - q0));
513     n += i;
514     iq[jz - 1] -= i << (24 - q0);
515     ih = iq[jz - 1] >> (23 - q0);
516   } else if (q0 == 0) {
517     ih = iq[jz - 1] >> 23;
518   } else if (z >= 0.5) {
519     ih = 2;
520   }
521 
522   if (ih > 0) { /* q > 0.5 */
523     n += 1;
524     carry = 0;
525     for (i = 0; i < jz; i++) { /* compute 1-q */
526       j = iq[i];
527       if (carry == 0) {
528         if (j != 0) {
529           carry = 1;
530           iq[i] = 0x1000000 - j;
531         }
532       } else {
533         iq[i] = 0xFFFFFF - j;
534       }
535     }
536     if (q0 > 0) { /* rare case: chance is 1 in 12 */
537       switch (q0) {
538         case 1:
539           iq[jz - 1] &= 0x7FFFFF;
540           break;
541         case 2:
542           iq[jz - 1] &= 0x3FFFFF;
543           break;
544       }
545     }
546     if (ih == 2) {
547       z = one - z;
548       if (carry != 0) z -= scalbn(one, q0);
549     }
550   }
551 
552   /* check if recomputation is needed */
553   if (z == zero) {
554     j = 0;
555     for (i = jz - 1; i >= jk; i--) j |= iq[i];
556     if (j == 0) { /* need recomputation */
557       for (k = 1; jk >= k && iq[jk - k] == 0; k++) {
558         /* k = no. of terms needed */
559       }
560 
561       for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */
562         f[jx + i] = ipio2[jv + i];
563         for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
564         q[i] = fw;
565       }
566       jz += k;
567       goto recompute;
568     }
569   }
570 
571   /* chop off zero terms */
572   if (z == 0.0) {
573     jz -= 1;
574     q0 -= 24;
575     while (iq[jz] == 0) {
576       jz--;
577       q0 -= 24;
578     }
579   } else { /* break z into 24-bit if necessary */
580     z = scalbn(z, -q0);
581     if (z >= two24) {
582       fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
583       iq[jz] = z - two24 * fw;
584       jz += 1;
585       q0 += 24;
586       iq[jz] = fw;
587     } else {
588       iq[jz] = z;
589     }
590   }
591 
592   /* convert integer "bit" chunk to floating-point value */
593   fw = scalbn(one, q0);
594   for (i = jz; i >= 0; i--) {
595     q[i] = fw * iq[i];
596     fw *= twon24;
597   }
598 
599   /* compute PIo2[0,...,jp]*q[jz,...,0] */
600   for (i = jz; i >= 0; i--) {
601     for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];
602     fq[jz - i] = fw;
603   }
604 
605   /* compress fq[] into y[] */
606   switch (prec) {
607     case 0:
608       fw = 0.0;
609       for (i = jz; i >= 0; i--) fw += fq[i];
610       y[0] = (ih == 0) ? fw : -fw;
611       break;
612     case 1:
613     case 2:
614       fw = 0.0;
615       for (i = jz; i >= 0; i--) fw += fq[i];
616       y[0] = (ih == 0) ? fw : -fw;
617       fw = fq[0] - fw;
618       for (i = 1; i <= jz; i++) fw += fq[i];
619       y[1] = (ih == 0) ? fw : -fw;
620       break;
621     case 3: /* painful */
622       for (i = jz; i > 0; i--) {
623         fw = fq[i - 1] + fq[i];
624         fq[i] += fq[i - 1] - fw;
625         fq[i - 1] = fw;
626       }
627       for (i = jz; i > 1; i--) {
628         fw = fq[i - 1] + fq[i];
629         fq[i] += fq[i - 1] - fw;
630         fq[i - 1] = fw;
631       }
632       for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];
633       if (ih == 0) {
634         y[0] = fq[0];
635         y[1] = fq[1];
636         y[2] = fw;
637       } else {
638         y[0] = -fq[0];
639         y[1] = -fq[1];
640         y[2] = -fw;
641       }
642   }
643   return n & 7;
644 }
645 
646 /* __kernel_sin( x, y, iy)
647  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
648  * Input x is assumed to be bounded by ~pi/4 in magnitude.
649  * Input y is the tail of x.
650  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
651  *
652  * Algorithm
653  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
654  *      2. if x < 2^-27 (hx<0x3E400000 0), return x with inexact if x!=0.
655  *      3. sin(x) is approximated by a polynomial of degree 13 on
656  *         [0,pi/4]
657  *                               3            13
658  *              sin(x) ~ x + S1*x + ... + S6*x
659  *         where
660  *
661  *      |sin(x)         2     4     6     8     10     12  |     -58
662  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
663  *      |  x                                               |
664  *
665  *      4. sin(x+y) = sin(x) + sin'(x')*y
666  *                  ~ sin(x) + (1-x*x/2)*y
667  *         For better accuracy, let
668  *                   3      2      2      2      2
669  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
670  *         then                   3    2
671  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
672  */
__kernel_sin(double x,double y,int iy)673 V8_INLINE double __kernel_sin(double x, double y, int iy) {
674   static const double
675       half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
676       S1 = -1.66666666666666324348e-01,  /* 0xBFC55555, 0x55555549 */
677       S2 = 8.33333333332248946124e-03,   /* 0x3F811111, 0x1110F8A6 */
678       S3 = -1.98412698298579493134e-04,  /* 0xBF2A01A0, 0x19C161D5 */
679       S4 = 2.75573137070700676789e-06,   /* 0x3EC71DE3, 0x57B1FE7D */
680       S5 = -2.50507602534068634195e-08,  /* 0xBE5AE5E6, 0x8A2B9CEB */
681       S6 = 1.58969099521155010221e-10;   /* 0x3DE5D93A, 0x5ACFD57C */
682 
683   double z, r, v;
684   int32_t ix;
685   GET_HIGH_WORD(ix, x);
686   ix &= 0x7FFFFFFF;      /* high word of x */
687   if (ix < 0x3E400000) { /* |x| < 2**-27 */
688     if (static_cast<int>(x) == 0) return x;
689   } /* generate inexact */
690   z = x * x;
691   v = z * x;
692   r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
693   if (iy == 0) {
694     return x + v * (S1 + z * r);
695   } else {
696     return x - ((z * (half * y - v * r) - y) - v * S1);
697   }
698 }
699 
700 /* __kernel_tan( x, y, k )
701  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
702  * Input x is assumed to be bounded by ~pi/4 in magnitude.
703  * Input y is the tail of x.
704  * Input k indicates whether tan (if k=1) or
705  * -1/tan (if k= -1) is returned.
706  *
707  * Algorithm
708  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
709  *      2. if x < 2^-28 (hx<0x3E300000 0), return x with inexact if x!=0.
710  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
711  *         [0,0.67434]
712  *                               3             27
713  *              tan(x) ~ x + T1*x + ... + T13*x
714  *         where
715  *
716  *              |tan(x)         2     4            26   |     -59.2
717  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
718  *              |  x                                    |
719  *
720  *         Note: tan(x+y) = tan(x) + tan'(x)*y
721  *                        ~ tan(x) + (1+x*x)*y
722  *         Therefore, for better accuracy in computing tan(x+y), let
723  *                   3      2      2       2       2
724  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
725  *         then
726  *                                  3    2
727  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
728  *
729  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
730  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
731  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
732  */
__kernel_tan(double x,double y,int iy)733 double __kernel_tan(double x, double y, int iy) {
734   static const double xxx[] = {
735       3.33333333333334091986e-01,             /* 3FD55555, 55555563 */
736       1.33333333333201242699e-01,             /* 3FC11111, 1110FE7A */
737       5.39682539762260521377e-02,             /* 3FABA1BA, 1BB341FE */
738       2.18694882948595424599e-02,             /* 3F9664F4, 8406D637 */
739       8.86323982359930005737e-03,             /* 3F8226E3, E96E8493 */
740       3.59207910759131235356e-03,             /* 3F6D6D22, C9560328 */
741       1.45620945432529025516e-03,             /* 3F57DBC8, FEE08315 */
742       5.88041240820264096874e-04,             /* 3F4344D8, F2F26501 */
743       2.46463134818469906812e-04,             /* 3F3026F7, 1A8D1068 */
744       7.81794442939557092300e-05,             /* 3F147E88, A03792A6 */
745       7.14072491382608190305e-05,             /* 3F12B80F, 32F0A7E9 */
746       -1.85586374855275456654e-05,            /* BEF375CB, DB605373 */
747       2.59073051863633712884e-05,             /* 3EFB2A70, 74BF7AD4 */
748       /* one */ 1.00000000000000000000e+00,   /* 3FF00000, 00000000 */
749       /* pio4 */ 7.85398163397448278999e-01,  /* 3FE921FB, 54442D18 */
750       /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
751   };
752 #define one xxx[13]
753 #define pio4 xxx[14]
754 #define pio4lo xxx[15]
755 #define T xxx
756 
757   double z, r, v, w, s;
758   int32_t ix, hx;
759 
760   GET_HIGH_WORD(hx, x);             /* high word of x */
761   ix = hx & 0x7FFFFFFF;             /* high word of |x| */
762   if (ix < 0x3E300000) {            /* x < 2**-28 */
763     if (static_cast<int>(x) == 0) { /* generate inexact */
764       uint32_t low;
765       GET_LOW_WORD(low, x);
766       if (((ix | low) | (iy + 1)) == 0) {
767         return one / fabs(x);
768       } else {
769         if (iy == 1) {
770           return x;
771         } else { /* compute -1 / (x+y) carefully */
772           double a, t;
773 
774           z = w = x + y;
775           SET_LOW_WORD(z, 0);
776           v = y - (z - x);
777           t = a = -one / w;
778           SET_LOW_WORD(t, 0);
779           s = one + t * z;
780           return t + a * (s + t * v);
781         }
782       }
783     }
784   }
785   if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */
786     if (hx < 0) {
787       x = -x;
788       y = -y;
789     }
790     z = pio4 - x;
791     w = pio4lo - y;
792     x = z + w;
793     y = 0.0;
794   }
795   z = x * x;
796   w = z * z;
797   /*
798    * Break x^5*(T[1]+x^2*T[2]+...) into
799    * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
800    * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
801    */
802   r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
803   v = z *
804       (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
805   s = z * x;
806   r = y + z * (s * (r + v) + y);
807   r += T[0] * s;
808   w = x + r;
809   if (ix >= 0x3FE59428) {
810     v = iy;
811     return (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
812   }
813   if (iy == 1) {
814     return w;
815   } else {
816     /*
817      * if allow error up to 2 ulp, simply return
818      * -1.0 / (x+r) here
819      */
820     /* compute -1.0 / (x+r) accurately */
821     double a, t;
822     z = w;
823     SET_LOW_WORD(z, 0);
824     v = r - (z - x);  /* z+v = r+x */
825     t = a = -1.0 / w; /* a = -1.0/w */
826     SET_LOW_WORD(t, 0);
827     s = 1.0 + t * z;
828     return t + a * (s + t * v);
829   }
830 
831 #undef one
832 #undef pio4
833 #undef pio4lo
834 #undef T
835 }
836 
837 }  // namespace
838 
839 /* acos(x)
840  * Method :
841  *      acos(x)  = pi/2 - asin(x)
842  *      acos(-x) = pi/2 + asin(x)
843  * For |x|<=0.5
844  *      acos(x) = pi/2 - (x + x*x^2*R(x^2))     (see asin.c)
845  * For x>0.5
846  *      acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
847  *              = 2asin(sqrt((1-x)/2))
848  *              = 2s + 2s*z*R(z)        ...z=(1-x)/2, s=sqrt(z)
849  *              = 2f + (2c + 2s*z*R(z))
850  *     where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
851  *     for f so that f+c ~ sqrt(z).
852  * For x<-0.5
853  *      acos(x) = pi - 2asin(sqrt((1-|x|)/2))
854  *              = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
855  *
856  * Special cases:
857  *      if x is NaN, return x itself;
858  *      if |x|>1, return NaN with invalid signal.
859  *
860  * Function needed: sqrt
861  */
acos(double x)862 double acos(double x) {
863   static const double
864       one = 1.00000000000000000000e+00,     /* 0x3FF00000, 0x00000000 */
865       pi = 3.14159265358979311600e+00,      /* 0x400921FB, 0x54442D18 */
866       pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
867       pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
868       pS0 = 1.66666666666666657415e-01,     /* 0x3FC55555, 0x55555555 */
869       pS1 = -3.25565818622400915405e-01,    /* 0xBFD4D612, 0x03EB6F7D */
870       pS2 = 2.01212532134862925881e-01,     /* 0x3FC9C155, 0x0E884455 */
871       pS3 = -4.00555345006794114027e-02,    /* 0xBFA48228, 0xB5688F3B */
872       pS4 = 7.91534994289814532176e-04,     /* 0x3F49EFE0, 0x7501B288 */
873       pS5 = 3.47933107596021167570e-05,     /* 0x3F023DE1, 0x0DFDF709 */
874       qS1 = -2.40339491173441421878e+00,    /* 0xC0033A27, 0x1C8A2D4B */
875       qS2 = 2.02094576023350569471e+00,     /* 0x40002AE5, 0x9C598AC8 */
876       qS3 = -6.88283971605453293030e-01,    /* 0xBFE6066C, 0x1B8D0159 */
877       qS4 = 7.70381505559019352791e-02;     /* 0x3FB3B8C5, 0xB12E9282 */
878 
879   double z, p, q, r, w, s, c, df;
880   int32_t hx, ix;
881   GET_HIGH_WORD(hx, x);
882   ix = hx & 0x7FFFFFFF;
883   if (ix >= 0x3FF00000) { /* |x| >= 1 */
884     uint32_t lx;
885     GET_LOW_WORD(lx, x);
886     if (((ix - 0x3FF00000) | lx) == 0) { /* |x|==1 */
887       if (hx > 0)
888         return 0.0; /* acos(1) = 0  */
889       else
890         return pi + 2.0 * pio2_lo; /* acos(-1)= pi */
891     }
892     return std::numeric_limits<double>::signaling_NaN();  // acos(|x|>1) is NaN
893   }
894   if (ix < 0x3FE00000) {                            /* |x| < 0.5 */
895     if (ix <= 0x3C600000) return pio2_hi + pio2_lo; /*if|x|<2**-57*/
896     z = x * x;
897     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
898     q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
899     r = p / q;
900     return pio2_hi - (x - (pio2_lo - x * r));
901   } else if (hx < 0) { /* x < -0.5 */
902     z = (one + x) * 0.5;
903     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
904     q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
905     s = sqrt(z);
906     r = p / q;
907     w = r * s - pio2_lo;
908     return pi - 2.0 * (s + w);
909   } else { /* x > 0.5 */
910     z = (one - x) * 0.5;
911     s = sqrt(z);
912     df = s;
913     SET_LOW_WORD(df, 0);
914     c = (z - df * df) / (s + df);
915     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
916     q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
917     r = p / q;
918     w = r * s + c;
919     return 2.0 * (df + w);
920   }
921 }
922 
923 /* acosh(x)
924  * Method :
925  *      Based on
926  *              acosh(x) = log [ x + sqrt(x*x-1) ]
927  *      we have
928  *              acosh(x) := log(x)+ln2, if x is large; else
929  *              acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
930  *              acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
931  *
932  * Special cases:
933  *      acosh(x) is NaN with signal if x<1.
934  *      acosh(NaN) is NaN without signal.
935  */
acosh(double x)936 double acosh(double x) {
937   static const double
938       one = 1.0,
939       ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
940   double t;
941   int32_t hx;
942   uint32_t lx;
943   EXTRACT_WORDS(hx, lx, x);
944   if (hx < 0x3FF00000) { /* x < 1 */
945     return std::numeric_limits<double>::signaling_NaN();
946   } else if (hx >= 0x41B00000) { /* x > 2**28 */
947     if (hx >= 0x7FF00000) {      /* x is inf of NaN */
948       return x + x;
949     } else {
950       return log(x) + ln2; /* acosh(huge)=log(2x) */
951     }
952   } else if (((hx - 0x3FF00000) | lx) == 0) {
953     return 0.0;                 /* acosh(1) = 0 */
954   } else if (hx > 0x40000000) { /* 2**28 > x > 2 */
955     t = x * x;
956     return log(2.0 * x - one / (x + sqrt(t - one)));
957   } else { /* 1<x<2 */
958     t = x - one;
959     return log1p(t + sqrt(2.0 * t + t * t));
960   }
961 }
962 
963 /* asin(x)
964  * Method :
965  *      Since  asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
966  *      we approximate asin(x) on [0,0.5] by
967  *              asin(x) = x + x*x^2*R(x^2)
968  *      where
969  *              R(x^2) is a rational approximation of (asin(x)-x)/x^3
970  *      and its remez error is bounded by
971  *              |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
972  *
973  *      For x in [0.5,1]
974  *              asin(x) = pi/2-2*asin(sqrt((1-x)/2))
975  *      Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
976  *      then for x>0.98
977  *              asin(x) = pi/2 - 2*(s+s*z*R(z))
978  *                      = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
979  *      For x<=0.98, let pio4_hi = pio2_hi/2, then
980  *              f = hi part of s;
981  *              c = sqrt(z) - f = (z-f*f)/(s+f)         ...f+c=sqrt(z)
982  *      and
983  *              asin(x) = pi/2 - 2*(s+s*z*R(z))
984  *                      = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
985  *                      = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
986  *
987  * Special cases:
988  *      if x is NaN, return x itself;
989  *      if |x|>1, return NaN with invalid signal.
990  */
asin(double x)991 double asin(double x) {
992   static const double
993       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
994       huge = 1.000e+300,
995       pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
996       pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
997       pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
998                                             /* coefficient for R(x^2) */
999       pS0 = 1.66666666666666657415e-01,     /* 0x3FC55555, 0x55555555 */
1000       pS1 = -3.25565818622400915405e-01,    /* 0xBFD4D612, 0x03EB6F7D */
1001       pS2 = 2.01212532134862925881e-01,     /* 0x3FC9C155, 0x0E884455 */
1002       pS3 = -4.00555345006794114027e-02,    /* 0xBFA48228, 0xB5688F3B */
1003       pS4 = 7.91534994289814532176e-04,     /* 0x3F49EFE0, 0x7501B288 */
1004       pS5 = 3.47933107596021167570e-05,     /* 0x3F023DE1, 0x0DFDF709 */
1005       qS1 = -2.40339491173441421878e+00,    /* 0xC0033A27, 0x1C8A2D4B */
1006       qS2 = 2.02094576023350569471e+00,     /* 0x40002AE5, 0x9C598AC8 */
1007       qS3 = -6.88283971605453293030e-01,    /* 0xBFE6066C, 0x1B8D0159 */
1008       qS4 = 7.70381505559019352791e-02;     /* 0x3FB3B8C5, 0xB12E9282 */
1009 
1010   double t, w, p, q, c, r, s;
1011   int32_t hx, ix;
1012 
1013   t = 0;
1014   GET_HIGH_WORD(hx, x);
1015   ix = hx & 0x7FFFFFFF;
1016   if (ix >= 0x3FF00000) { /* |x|>= 1 */
1017     uint32_t lx;
1018     GET_LOW_WORD(lx, x);
1019     if (((ix - 0x3FF00000) | lx) == 0) { /* asin(1)=+-pi/2 with inexact */
1020       return x * pio2_hi + x * pio2_lo;
1021     }
1022     return std::numeric_limits<double>::signaling_NaN();  // asin(|x|>1) is NaN
1023   } else if (ix < 0x3FE00000) {     /* |x|<0.5 */
1024     if (ix < 0x3E400000) {          /* if |x| < 2**-27 */
1025       if (huge + x > one) return x; /* return x with inexact if x!=0*/
1026     } else {
1027       t = x * x;
1028     }
1029     p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1030     q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1031     w = p / q;
1032     return x + x * w;
1033   }
1034   /* 1> |x|>= 0.5 */
1035   w = one - fabs(x);
1036   t = w * 0.5;
1037   p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1038   q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1039   s = sqrt(t);
1040   if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */
1041     w = p / q;
1042     t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
1043   } else {
1044     w = s;
1045     SET_LOW_WORD(w, 0);
1046     c = (t - w * w) / (s + w);
1047     r = p / q;
1048     p = 2.0 * s * r - (pio2_lo - 2.0 * c);
1049     q = pio4_hi - 2.0 * w;
1050     t = pio4_hi - (p - q);
1051   }
1052   if (hx > 0)
1053     return t;
1054   else
1055     return -t;
1056 }
1057 /* asinh(x)
1058  * Method :
1059  *      Based on
1060  *              asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
1061  *      we have
1062  *      asinh(x) := x  if  1+x*x=1,
1063  *               := sign(x)*(log(x)+ln2)) for large |x|, else
1064  *               := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
1065  *               := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2)))
1066  */
asinh(double x)1067 double asinh(double x) {
1068   static const double
1069       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1070       ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
1071       huge = 1.00000000000000000000e+300;
1072 
1073   double t, w;
1074   int32_t hx, ix;
1075   GET_HIGH_WORD(hx, x);
1076   ix = hx & 0x7FFFFFFF;
1077   if (ix >= 0x7FF00000) return x + x; /* x is inf or NaN */
1078   if (ix < 0x3E300000) {              /* |x|<2**-28 */
1079     if (huge + x > one) return x;     /* return x inexact except 0 */
1080   }
1081   if (ix > 0x41B00000) { /* |x| > 2**28 */
1082     w = log(fabs(x)) + ln2;
1083   } else if (ix > 0x40000000) { /* 2**28 > |x| > 2.0 */
1084     t = fabs(x);
1085     w = log(2.0 * t + one / (sqrt(x * x + one) + t));
1086   } else { /* 2.0 > |x| > 2**-28 */
1087     t = x * x;
1088     w = log1p(fabs(x) + t / (one + sqrt(one + t)));
1089   }
1090   if (hx > 0) {
1091     return w;
1092   } else {
1093     return -w;
1094   }
1095 }
1096 
1097 /* atan(x)
1098  * Method
1099  *   1. Reduce x to positive by atan(x) = -atan(-x).
1100  *   2. According to the integer k=4t+0.25 chopped, t=x, the argument
1101  *      is further reduced to one of the following intervals and the
1102  *      arctangent of t is evaluated by the corresponding formula:
1103  *
1104  *      [0,7/16]      atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
1105  *      [7/16,11/16]  atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
1106  *      [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
1107  *      [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
1108  *      [39/16,INF]   atan(x) = atan(INF) + atan( -1/t )
1109  *
1110  * Constants:
1111  * The hexadecimal values are the intended ones for the following
1112  * constants. The decimal values may be used, provided that the
1113  * compiler will convert from decimal to binary accurately enough
1114  * to produce the hexadecimal values shown.
1115  */
atan(double x)1116 double atan(double x) {
1117   static const double atanhi[] = {
1118       4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
1119       7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
1120       9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
1121       1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
1122   };
1123 
1124   static const double atanlo[] = {
1125       2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
1126       3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
1127       1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
1128       6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
1129   };
1130 
1131   static const double aT[] = {
1132       3.33333333333329318027e-01,  /* 0x3FD55555, 0x5555550D */
1133       -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
1134       1.42857142725034663711e-01,  /* 0x3FC24924, 0x920083FF */
1135       -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
1136       9.09088713343650656196e-02,  /* 0x3FB745CD, 0xC54C206E */
1137       -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
1138       6.66107313738753120669e-02,  /* 0x3FB10D66, 0xA0D03D51 */
1139       -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
1140       4.97687799461593236017e-02,  /* 0x3FA97B4B, 0x24760DEB */
1141       -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
1142       1.62858201153657823623e-02,  /* 0x3F90AD3A, 0xE322DA11 */
1143   };
1144 
1145   static const double one = 1.0, huge = 1.0e300;
1146 
1147   double w, s1, s2, z;
1148   int32_t ix, hx, id;
1149 
1150   GET_HIGH_WORD(hx, x);
1151   ix = hx & 0x7FFFFFFF;
1152   if (ix >= 0x44100000) { /* if |x| >= 2^66 */
1153     uint32_t low;
1154     GET_LOW_WORD(low, x);
1155     if (ix > 0x7FF00000 || (ix == 0x7FF00000 && (low != 0)))
1156       return x + x; /* NaN */
1157     if (hx > 0)
1158       return atanhi[3] + *const_cast<volatile double*>(&atanlo[3]);
1159     else
1160       return -atanhi[3] - *const_cast<volatile double*>(&atanlo[3]);
1161   }
1162   if (ix < 0x3FDC0000) {            /* |x| < 0.4375 */
1163     if (ix < 0x3E400000) {          /* |x| < 2^-27 */
1164       if (huge + x > one) return x; /* raise inexact */
1165     }
1166     id = -1;
1167   } else {
1168     x = fabs(x);
1169     if (ix < 0x3FF30000) {   /* |x| < 1.1875 */
1170       if (ix < 0x3FE60000) { /* 7/16 <=|x|<11/16 */
1171         id = 0;
1172         x = (2.0 * x - one) / (2.0 + x);
1173       } else { /* 11/16<=|x|< 19/16 */
1174         id = 1;
1175         x = (x - one) / (x + one);
1176       }
1177     } else {
1178       if (ix < 0x40038000) { /* |x| < 2.4375 */
1179         id = 2;
1180         x = (x - 1.5) / (one + 1.5 * x);
1181       } else { /* 2.4375 <= |x| < 2^66 */
1182         id = 3;
1183         x = -1.0 / x;
1184       }
1185     }
1186   }
1187   /* end of argument reduction */
1188   z = x * x;
1189   w = z * z;
1190   /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
1191   s1 = z * (aT[0] +
1192             w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
1193   s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
1194   if (id < 0) {
1195     return x - x * (s1 + s2);
1196   } else {
1197     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
1198     return (hx < 0) ? -z : z;
1199   }
1200 }
1201 
1202 /* atan2(y,x)
1203  * Method :
1204  *  1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
1205  *  2. Reduce x to positive by (if x and y are unexceptional):
1206  *    ARG (x+iy) = arctan(y/x)       ... if x > 0,
1207  *    ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
1208  *
1209  * Special cases:
1210  *
1211  *  ATAN2((anything), NaN ) is NaN;
1212  *  ATAN2(NAN , (anything) ) is NaN;
1213  *  ATAN2(+-0, +(anything but NaN)) is +-0  ;
1214  *  ATAN2(+-0, -(anything but NaN)) is +-pi ;
1215  *  ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
1216  *  ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
1217  *  ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
1218  *  ATAN2(+-INF,+INF ) is +-pi/4 ;
1219  *  ATAN2(+-INF,-INF ) is +-3pi/4;
1220  *  ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
1221  *
1222  * Constants:
1223  * The hexadecimal values are the intended ones for the following
1224  * constants. The decimal values may be used, provided that the
1225  * compiler will convert from decimal to binary accurately enough
1226  * to produce the hexadecimal values shown.
1227  */
atan2(double y,double x)1228 double atan2(double y, double x) {
1229   static volatile double tiny = 1.0e-300;
1230   static const double
1231       zero = 0.0,
1232       pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
1233       pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
1234       pi = 3.1415926535897931160E+00;     /* 0x400921FB, 0x54442D18 */
1235   static volatile double pi_lo =
1236       1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
1237 
1238   double z;
1239   int32_t k, m, hx, hy, ix, iy;
1240   uint32_t lx, ly;
1241 
1242   EXTRACT_WORDS(hx, lx, x);
1243   ix = hx & 0x7FFFFFFF;
1244   EXTRACT_WORDS(hy, ly, y);
1245   iy = hy & 0x7FFFFFFF;
1246   if (((ix | ((lx | NegateWithWraparound<int32_t>(lx)) >> 31)) > 0x7FF00000) ||
1247       ((iy | ((ly | NegateWithWraparound<int32_t>(ly)) >> 31)) > 0x7FF00000)) {
1248     return x + y; /* x or y is NaN */
1249   }
1250   if ((SubWithWraparound(hx, 0x3FF00000) | lx) == 0) {
1251     return atan(y); /* x=1.0 */
1252   }
1253   m = ((hy >> 31) & 1) | ((hx >> 30) & 2);           /* 2*sign(x)+sign(y) */
1254 
1255   /* when y = 0 */
1256   if ((iy | ly) == 0) {
1257     switch (m) {
1258       case 0:
1259       case 1:
1260         return y; /* atan(+-0,+anything)=+-0 */
1261       case 2:
1262         return pi + tiny; /* atan(+0,-anything) = pi */
1263       case 3:
1264         return -pi - tiny; /* atan(-0,-anything) =-pi */
1265     }
1266   }
1267   /* when x = 0 */
1268   if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1269 
1270   /* when x is INF */
1271   if (ix == 0x7FF00000) {
1272     if (iy == 0x7FF00000) {
1273       switch (m) {
1274         case 0:
1275           return pi_o_4 + tiny; /* atan(+INF,+INF) */
1276         case 1:
1277           return -pi_o_4 - tiny; /* atan(-INF,+INF) */
1278         case 2:
1279           return 3.0 * pi_o_4 + tiny; /*atan(+INF,-INF)*/
1280         case 3:
1281           return -3.0 * pi_o_4 - tiny; /*atan(-INF,-INF)*/
1282       }
1283     } else {
1284       switch (m) {
1285         case 0:
1286           return zero; /* atan(+...,+INF) */
1287         case 1:
1288           return -zero; /* atan(-...,+INF) */
1289         case 2:
1290           return pi + tiny; /* atan(+...,-INF) */
1291         case 3:
1292           return -pi - tiny; /* atan(-...,-INF) */
1293       }
1294     }
1295   }
1296   /* when y is INF */
1297   if (iy == 0x7FF00000) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1298 
1299   /* compute y/x */
1300   k = (iy - ix) >> 20;
1301   if (k > 60) { /* |y/x| >  2**60 */
1302     z = pi_o_2 + 0.5 * pi_lo;
1303     m &= 1;
1304   } else if (hx < 0 && k < -60) {
1305     z = 0.0; /* 0 > |y|/x > -2**-60 */
1306   } else {
1307     z = atan(fabs(y / x)); /* safe to do y/x */
1308   }
1309   switch (m) {
1310     case 0:
1311       return z; /* atan(+,+) */
1312     case 1:
1313       return -z; /* atan(-,+) */
1314     case 2:
1315       return pi - (z - pi_lo); /* atan(+,-) */
1316     default:                   /* case 3 */
1317       return (z - pi_lo) - pi; /* atan(-,-) */
1318   }
1319 }
1320 
1321 /* cos(x)
1322  * Return cosine function of x.
1323  *
1324  * kernel function:
1325  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
1326  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
1327  *      __ieee754_rem_pio2      ... argument reduction routine
1328  *
1329  * Method.
1330  *      Let S,C and T denote the sin, cos and tan respectively on
1331  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
1332  *      in [-pi/4 , +pi/4], and let n = k mod 4.
1333  *      We have
1334  *
1335  *          n        sin(x)      cos(x)        tan(x)
1336  *     ----------------------------------------------------------
1337  *          0          S           C             T
1338  *          1          C          -S            -1/T
1339  *          2         -S          -C             T
1340  *          3         -C           S            -1/T
1341  *     ----------------------------------------------------------
1342  *
1343  * Special cases:
1344  *      Let trig be any of sin, cos, or tan.
1345  *      trig(+-INF)  is NaN, with signals;
1346  *      trig(NaN)    is that NaN;
1347  *
1348  * Accuracy:
1349  *      TRIG(x) returns trig(x) nearly rounded
1350  */
cos(double x)1351 double cos(double x) {
1352   double y[2], z = 0.0;
1353   int32_t n, ix;
1354 
1355   /* High word of x. */
1356   GET_HIGH_WORD(ix, x);
1357 
1358   /* |x| ~< pi/4 */
1359   ix &= 0x7FFFFFFF;
1360   if (ix <= 0x3FE921FB) {
1361     return __kernel_cos(x, z);
1362   } else if (ix >= 0x7FF00000) {
1363     /* cos(Inf or NaN) is NaN */
1364     return x - x;
1365   } else {
1366     /* argument reduction needed */
1367     n = __ieee754_rem_pio2(x, y);
1368     switch (n & 3) {
1369       case 0:
1370         return __kernel_cos(y[0], y[1]);
1371       case 1:
1372         return -__kernel_sin(y[0], y[1], 1);
1373       case 2:
1374         return -__kernel_cos(y[0], y[1]);
1375       default:
1376         return __kernel_sin(y[0], y[1], 1);
1377     }
1378   }
1379 }
1380 
1381 /* exp(x)
1382  * Returns the exponential of x.
1383  *
1384  * Method
1385  *   1. Argument reduction:
1386  *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
1387  *      Given x, find r and integer k such that
1388  *
1389  *               x = k*ln2 + r,  |r| <= 0.5*ln2.
1390  *
1391  *      Here r will be represented as r = hi-lo for better
1392  *      accuracy.
1393  *
1394  *   2. Approximation of exp(r) by a special rational function on
1395  *      the interval [0,0.34658]:
1396  *      Write
1397  *          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
1398  *      We use a special Remes algorithm on [0,0.34658] to generate
1399  *      a polynomial of degree 5 to approximate R. The maximum error
1400  *      of this polynomial approximation is bounded by 2**-59. In
1401  *      other words,
1402  *          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
1403  *      (where z=r*r, and the values of P1 to P5 are listed below)
1404  *      and
1405  *          |                  5          |     -59
1406  *          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
1407  *          |                             |
1408  *      The computation of exp(r) thus becomes
1409  *                             2*r
1410  *              exp(r) = 1 + -------
1411  *                            R - r
1412  *                                 r*R1(r)
1413  *                     = 1 + r + ----------- (for better accuracy)
1414  *                                2 - R1(r)
1415  *      where
1416  *                               2       4             10
1417  *              R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
1418  *
1419  *   3. Scale back to obtain exp(x):
1420  *      From step 1, we have
1421  *         exp(x) = 2^k * exp(r)
1422  *
1423  * Special cases:
1424  *      exp(INF) is INF, exp(NaN) is NaN;
1425  *      exp(-INF) is 0, and
1426  *      for finite argument, only exp(0)=1 is exact.
1427  *
1428  * Accuracy:
1429  *      according to an error analysis, the error is always less than
1430  *      1 ulp (unit in the last place).
1431  *
1432  * Misc. info.
1433  *      For IEEE double
1434  *          if x >  7.09782712893383973096e+02 then exp(x) overflow
1435  *          if x < -7.45133219101941108420e+02 then exp(x) underflow
1436  *
1437  * Constants:
1438  * The hexadecimal values are the intended ones for the following
1439  * constants. The decimal values may be used, provided that the
1440  * compiler will convert from decimal to binary accurately enough
1441  * to produce the hexadecimal values shown.
1442  */
exp(double x)1443 double exp(double x) {
1444   static const double
1445       one = 1.0,
1446       halF[2] = {0.5, -0.5},
1447       o_threshold = 7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */
1448       u_threshold = -7.45133219101941108420e+02, /* 0xC0874910, 0xD52D3051 */
1449       ln2HI[2] = {6.93147180369123816490e-01,    /* 0x3FE62E42, 0xFEE00000 */
1450                   -6.93147180369123816490e-01},  /* 0xBFE62E42, 0xFEE00000 */
1451       ln2LO[2] = {1.90821492927058770002e-10,    /* 0x3DEA39EF, 0x35793C76 */
1452                   -1.90821492927058770002e-10},  /* 0xBDEA39EF, 0x35793C76 */
1453       invln2 = 1.44269504088896338700e+00,       /* 0x3FF71547, 0x652B82FE */
1454       P1 = 1.66666666666666019037e-01,           /* 0x3FC55555, 0x5555553E */
1455       P2 = -2.77777777770155933842e-03,          /* 0xBF66C16C, 0x16BEBD93 */
1456       P3 = 6.61375632143793436117e-05,           /* 0x3F11566A, 0xAF25DE2C */
1457       P4 = -1.65339022054652515390e-06,          /* 0xBEBBBD41, 0xC5D26BF1 */
1458       P5 = 4.13813679705723846039e-08,           /* 0x3E663769, 0x72BEA4D0 */
1459       E = 2.718281828459045;                     /* 0x4005BF0A, 0x8B145769 */
1460 
1461   static volatile double
1462       huge = 1.0e+300,
1463       twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/
1464       two1023 = 8.988465674311579539e307;     /* 0x1p1023 */
1465 
1466   double y, hi = 0.0, lo = 0.0, c, t, twopk;
1467   int32_t k = 0, xsb;
1468   uint32_t hx;
1469 
1470   GET_HIGH_WORD(hx, x);
1471   xsb = (hx >> 31) & 1; /* sign bit of x */
1472   hx &= 0x7FFFFFFF;     /* high word of |x| */
1473 
1474   /* filter out non-finite argument */
1475   if (hx >= 0x40862E42) { /* if |x|>=709.78... */
1476     if (hx >= 0x7FF00000) {
1477       uint32_t lx;
1478       GET_LOW_WORD(lx, x);
1479       if (((hx & 0xFFFFF) | lx) != 0)
1480         return x + x; /* NaN */
1481       else
1482         return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */
1483     }
1484     if (x > o_threshold) return huge * huge;         /* overflow */
1485     if (x < u_threshold) return twom1000 * twom1000; /* underflow */
1486   }
1487 
1488   /* argument reduction */
1489   if (hx > 0x3FD62E42) {   /* if  |x| > 0.5 ln2 */
1490     if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
1491       /* TODO(rtoy): We special case exp(1) here to return the correct
1492        * value of E, as the computation below would get the last bit
1493        * wrong. We should probably fix the algorithm instead.
1494        */
1495       if (x == 1.0) return E;
1496       hi = x - ln2HI[xsb];
1497       lo = ln2LO[xsb];
1498       k = 1 - xsb - xsb;
1499     } else {
1500       k = static_cast<int>(invln2 * x + halF[xsb]);
1501       t = k;
1502       hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */
1503       lo = t * ln2LO[0];
1504     }
1505     x = hi - lo;
1506   } else if (hx < 0x3E300000) {         /* when |x|<2**-28 */
1507     if (huge + x > one) return one + x; /* trigger inexact */
1508   } else {
1509     k = 0;
1510   }
1511 
1512   /* x is now in primary range */
1513   t = x * x;
1514   if (k >= -1021) {
1515     INSERT_WORDS(
1516         twopk,
1517         0x3FF00000 + static_cast<int32_t>(static_cast<uint32_t>(k) << 20), 0);
1518   } else {
1519     INSERT_WORDS(twopk, 0x3FF00000 + (static_cast<uint32_t>(k + 1000) << 20),
1520                  0);
1521   }
1522   c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
1523   if (k == 0) {
1524     return one - ((x * c) / (c - 2.0) - x);
1525   } else {
1526     y = one - ((lo - (x * c) / (2.0 - c)) - hi);
1527   }
1528   if (k >= -1021) {
1529     if (k == 1024) return y * 2.0 * two1023;
1530     return y * twopk;
1531   } else {
1532     return y * twopk * twom1000;
1533   }
1534 }
1535 
1536 /*
1537  * Method :
1538  *    1.Reduced x to positive by atanh(-x) = -atanh(x)
1539  *    2.For x>=0.5
1540  *              1              2x                          x
1541  *  atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
1542  *              2             1 - x                      1 - x
1543  *
1544  *   For x<0.5
1545  *  atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
1546  *
1547  * Special cases:
1548  *  atanh(x) is NaN if |x| > 1 with signal;
1549  *  atanh(NaN) is that NaN with no signal;
1550  *  atanh(+-1) is +-INF with signal.
1551  *
1552  */
atanh(double x)1553 double atanh(double x) {
1554   static const double one = 1.0, huge = 1e300;
1555   static const double zero = 0.0;
1556 
1557   double t;
1558   int32_t hx, ix;
1559   uint32_t lx;
1560   EXTRACT_WORDS(hx, lx, x);
1561   ix = hx & 0x7FFFFFFF;
1562   if ((ix | ((lx | NegateWithWraparound<int32_t>(lx)) >> 31)) > 0x3FF00000) {
1563     /* |x|>1 */
1564     return std::numeric_limits<double>::signaling_NaN();
1565   }
1566   if (ix == 0x3FF00000) {
1567     return x > 0 ? std::numeric_limits<double>::infinity()
1568                  : -std::numeric_limits<double>::infinity();
1569   }
1570   if (ix < 0x3E300000 && (huge + x) > zero) return x; /* x<2**-28 */
1571   SET_HIGH_WORD(x, ix);
1572   if (ix < 0x3FE00000) { /* x < 0.5 */
1573     t = x + x;
1574     t = 0.5 * log1p(t + t * x / (one - x));
1575   } else {
1576     t = 0.5 * log1p((x + x) / (one - x));
1577   }
1578   if (hx >= 0)
1579     return t;
1580   else
1581     return -t;
1582 }
1583 
1584 /* log(x)
1585  * Return the logrithm of x
1586  *
1587  * Method :
1588  *   1. Argument Reduction: find k and f such that
1589  *     x = 2^k * (1+f),
1590  *     where  sqrt(2)/2 < 1+f < sqrt(2) .
1591  *
1592  *   2. Approximation of log(1+f).
1593  *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1594  *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1595  *         = 2s + s*R
1596  *      We use a special Reme algorithm on [0,0.1716] to generate
1597  *  a polynomial of degree 14 to approximate R The maximum error
1598  *  of this polynomial approximation is bounded by 2**-58.45. In
1599  *  other words,
1600  *            2      4      6      8      10      12      14
1601  *      R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
1602  *    (the values of Lg1 to Lg7 are listed in the program)
1603  *  and
1604  *      |      2          14          |     -58.45
1605  *      | Lg1*s +...+Lg7*s    -  R(z) | <= 2
1606  *      |                             |
1607  *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1608  *  In order to guarantee error in log below 1ulp, we compute log
1609  *  by
1610  *    log(1+f) = f - s*(f - R)  (if f is not too large)
1611  *    log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1612  *
1613  *  3. Finally,  log(x) = k*ln2 + log(1+f).
1614  *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1615  *     Here ln2 is split into two floating point number:
1616  *      ln2_hi + ln2_lo,
1617  *     where n*ln2_hi is always exact for |n| < 2000.
1618  *
1619  * Special cases:
1620  *  log(x) is NaN with signal if x < 0 (including -INF) ;
1621  *  log(+INF) is +INF; log(0) is -INF with signal;
1622  *  log(NaN) is that NaN with no signal.
1623  *
1624  * Accuracy:
1625  *  according to an error analysis, the error is always less than
1626  *  1 ulp (unit in the last place).
1627  *
1628  * Constants:
1629  * The hexadecimal values are the intended ones for the following
1630  * constants. The decimal values may be used, provided that the
1631  * compiler will convert from decimal to binary accurately enough
1632  * to produce the hexadecimal values shown.
1633  */
log(double x)1634 double log(double x) {
1635   static const double                      /* -- */
1636       ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1637       ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1638       two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
1639       Lg1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
1640       Lg2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
1641       Lg3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
1642       Lg4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
1643       Lg5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
1644       Lg6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
1645       Lg7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
1646 
1647   static const double zero = 0.0;
1648 
1649   double hfsq, f, s, z, R, w, t1, t2, dk;
1650   int32_t k, hx, i, j;
1651   uint32_t lx;
1652 
1653   EXTRACT_WORDS(hx, lx, x);
1654 
1655   k = 0;
1656   if (hx < 0x00100000) { /* x < 2**-1022  */
1657     if (((hx & 0x7FFFFFFF) | lx) == 0) {
1658       return -std::numeric_limits<double>::infinity(); /* log(+-0)=-inf */
1659     }
1660     if (hx < 0) {
1661       return std::numeric_limits<double>::signaling_NaN(); /* log(-#) = NaN */
1662     }
1663     k -= 54;
1664     x *= two54; /* subnormal number, scale up x */
1665     GET_HIGH_WORD(hx, x);
1666   }
1667   if (hx >= 0x7FF00000) return x + x;
1668   k += (hx >> 20) - 1023;
1669   hx &= 0x000FFFFF;
1670   i = (hx + 0x95F64) & 0x100000;
1671   SET_HIGH_WORD(x, hx | (i ^ 0x3FF00000)); /* normalize x or x/2 */
1672   k += (i >> 20);
1673   f = x - 1.0;
1674   if ((0x000FFFFF & (2 + hx)) < 3) { /* -2**-20 <= f < 2**-20 */
1675     if (f == zero) {
1676       if (k == 0) {
1677         return zero;
1678       } else {
1679         dk = static_cast<double>(k);
1680         return dk * ln2_hi + dk * ln2_lo;
1681       }
1682     }
1683     R = f * f * (0.5 - 0.33333333333333333 * f);
1684     if (k == 0) {
1685       return f - R;
1686     } else {
1687       dk = static_cast<double>(k);
1688       return dk * ln2_hi - ((R - dk * ln2_lo) - f);
1689     }
1690   }
1691   s = f / (2.0 + f);
1692   dk = static_cast<double>(k);
1693   z = s * s;
1694   i = hx - 0x6147A;
1695   w = z * z;
1696   j = 0x6B851 - hx;
1697   t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1698   t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1699   i |= j;
1700   R = t2 + t1;
1701   if (i > 0) {
1702     hfsq = 0.5 * f * f;
1703     if (k == 0)
1704       return f - (hfsq - s * (hfsq + R));
1705     else
1706       return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f);
1707   } else {
1708     if (k == 0)
1709       return f - s * (f - R);
1710     else
1711       return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
1712   }
1713 }
1714 
1715 /* double log1p(double x)
1716  *
1717  * Method :
1718  *   1. Argument Reduction: find k and f such that
1719  *      1+x = 2^k * (1+f),
1720  *     where  sqrt(2)/2 < 1+f < sqrt(2) .
1721  *
1722  *      Note. If k=0, then f=x is exact. However, if k!=0, then f
1723  *  may not be representable exactly. In that case, a correction
1724  *  term is need. Let u=1+x rounded. Let c = (1+x)-u, then
1725  *  log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
1726  *  and add back the correction term c/u.
1727  *  (Note: when x > 2**53, one can simply return log(x))
1728  *
1729  *   2. Approximation of log1p(f).
1730  *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1731  *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1732  *         = 2s + s*R
1733  *      We use a special Reme algorithm on [0,0.1716] to generate
1734  *  a polynomial of degree 14 to approximate R The maximum error
1735  *  of this polynomial approximation is bounded by 2**-58.45. In
1736  *  other words,
1737  *            2      4      6      8      10      12      14
1738  *      R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
1739  *    (the values of Lp1 to Lp7 are listed in the program)
1740  *  and
1741  *      |      2          14          |     -58.45
1742  *      | Lp1*s +...+Lp7*s    -  R(z) | <= 2
1743  *      |                             |
1744  *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1745  *  In order to guarantee error in log below 1ulp, we compute log
1746  *  by
1747  *    log1p(f) = f - (hfsq - s*(hfsq+R)).
1748  *
1749  *  3. Finally, log1p(x) = k*ln2 + log1p(f).
1750  *           = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1751  *     Here ln2 is split into two floating point number:
1752  *      ln2_hi + ln2_lo,
1753  *     where n*ln2_hi is always exact for |n| < 2000.
1754  *
1755  * Special cases:
1756  *  log1p(x) is NaN with signal if x < -1 (including -INF) ;
1757  *  log1p(+INF) is +INF; log1p(-1) is -INF with signal;
1758  *  log1p(NaN) is that NaN with no signal.
1759  *
1760  * Accuracy:
1761  *  according to an error analysis, the error is always less than
1762  *  1 ulp (unit in the last place).
1763  *
1764  * Constants:
1765  * The hexadecimal values are the intended ones for the following
1766  * constants. The decimal values may be used, provided that the
1767  * compiler will convert from decimal to binary accurately enough
1768  * to produce the hexadecimal values shown.
1769  *
1770  * Note: Assuming log() return accurate answer, the following
1771  *   algorithm can be used to compute log1p(x) to within a few ULP:
1772  *
1773  *    u = 1+x;
1774  *    if(u==1.0) return x ; else
1775  *         return log(u)*(x/(u-1.0));
1776  *
1777  *   See HP-15C Advanced Functions Handbook, p.193.
1778  */
log1p(double x)1779 double log1p(double x) {
1780   static const double                      /* -- */
1781       ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1782       ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1783       two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
1784       Lp1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
1785       Lp2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
1786       Lp3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
1787       Lp4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
1788       Lp5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
1789       Lp6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
1790       Lp7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
1791 
1792   static const double zero = 0.0;
1793 
1794   double hfsq, f, c, s, z, R, u;
1795   int32_t k, hx, hu, ax;
1796 
1797   GET_HIGH_WORD(hx, x);
1798   ax = hx & 0x7FFFFFFF;
1799 
1800   k = 1;
1801   if (hx < 0x3FDA827A) {    /* 1+x < sqrt(2)+ */
1802     if (ax >= 0x3FF00000) { /* x <= -1.0 */
1803       if (x == -1.0)
1804         return -std::numeric_limits<double>::infinity(); /* log1p(-1)=+inf */
1805       else
1806         return std::numeric_limits<double>::signaling_NaN();  // log1p(x<-1)=NaN
1807     }
1808     if (ax < 0x3E200000) {    /* |x| < 2**-29 */
1809       if (two54 + x > zero    /* raise inexact */
1810           && ax < 0x3C900000) /* |x| < 2**-54 */
1811         return x;
1812       else
1813         return x - x * x * 0.5;
1814     }
1815     if (hx > 0 || hx <= static_cast<int32_t>(0xBFD2BEC4)) {
1816       k = 0;
1817       f = x;
1818       hu = 1;
1819     } /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
1820   }
1821   if (hx >= 0x7FF00000) return x + x;
1822   if (k != 0) {
1823     if (hx < 0x43400000) {
1824       u = 1.0 + x;
1825       GET_HIGH_WORD(hu, u);
1826       k = (hu >> 20) - 1023;
1827       c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
1828       c /= u;
1829     } else {
1830       u = x;
1831       GET_HIGH_WORD(hu, u);
1832       k = (hu >> 20) - 1023;
1833       c = 0;
1834     }
1835     hu &= 0x000FFFFF;
1836     /*
1837      * The approximation to sqrt(2) used in thresholds is not
1838      * critical.  However, the ones used above must give less
1839      * strict bounds than the one here so that the k==0 case is
1840      * never reached from here, since here we have committed to
1841      * using the correction term but don't use it if k==0.
1842      */
1843     if (hu < 0x6A09E) {                  /* u ~< sqrt(2) */
1844       SET_HIGH_WORD(u, hu | 0x3FF00000); /* normalize u */
1845     } else {
1846       k += 1;
1847       SET_HIGH_WORD(u, hu | 0x3FE00000); /* normalize u/2 */
1848       hu = (0x00100000 - hu) >> 2;
1849     }
1850     f = u - 1.0;
1851   }
1852   hfsq = 0.5 * f * f;
1853   if (hu == 0) { /* |f| < 2**-20 */
1854     if (f == zero) {
1855       if (k == 0) {
1856         return zero;
1857       } else {
1858         c += k * ln2_lo;
1859         return k * ln2_hi + c;
1860       }
1861     }
1862     R = hfsq * (1.0 - 0.66666666666666666 * f);
1863     if (k == 0)
1864       return f - R;
1865     else
1866       return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
1867   }
1868   s = f / (2.0 + f);
1869   z = s * s;
1870   R = z * (Lp1 +
1871            z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));
1872   if (k == 0)
1873     return f - (hfsq - s * (hfsq + R));
1874   else
1875     return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
1876 }
1877 
1878 /*
1879  * k_log1p(f):
1880  * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
1881  *
1882  * The following describes the overall strategy for computing
1883  * logarithms in base e.  The argument reduction and adding the final
1884  * term of the polynomial are done by the caller for increased accuracy
1885  * when different bases are used.
1886  *
1887  * Method :
1888  *   1. Argument Reduction: find k and f such that
1889  *         x = 2^k * (1+f),
1890  *         where  sqrt(2)/2 < 1+f < sqrt(2) .
1891  *
1892  *   2. Approximation of log(1+f).
1893  *      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1894  *            = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1895  *            = 2s + s*R
1896  *      We use a special Reme algorithm on [0,0.1716] to generate
1897  *      a polynomial of degree 14 to approximate R The maximum error
1898  *      of this polynomial approximation is bounded by 2**-58.45. In
1899  *      other words,
1900  *          2      4      6      8      10      12      14
1901  *          R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
1902  *      (the values of Lg1 to Lg7 are listed in the program)
1903  *      and
1904  *          |      2          14          |     -58.45
1905  *          | Lg1*s +...+Lg7*s    -  R(z) | <= 2
1906  *          |                             |
1907  *      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1908  *      In order to guarantee error in log below 1ulp, we compute log
1909  *      by
1910  *          log(1+f) = f - s*(f - R)            (if f is not too large)
1911  *          log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1912  *
1913  *   3. Finally,  log(x) = k*ln2 + log(1+f).
1914  *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1915  *      Here ln2 is split into two floating point number:
1916  *          ln2_hi + ln2_lo,
1917  *      where n*ln2_hi is always exact for |n| < 2000.
1918  *
1919  * Special cases:
1920  *      log(x) is NaN with signal if x < 0 (including -INF) ;
1921  *      log(+INF) is +INF; log(0) is -INF with signal;
1922  *      log(NaN) is that NaN with no signal.
1923  *
1924  * Accuracy:
1925  *      according to an error analysis, the error is always less than
1926  *      1 ulp (unit in the last place).
1927  *
1928  * Constants:
1929  * The hexadecimal values are the intended ones for the following
1930  * constants. The decimal values may be used, provided that the
1931  * compiler will convert from decimal to binary accurately enough
1932  * to produce the hexadecimal values shown.
1933  */
1934 
1935 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1936     Lg2 = 3.999999999940941908e-01,                 /* 3FD99999 9997FA04 */
1937     Lg3 = 2.857142874366239149e-01,                 /* 3FD24924 94229359 */
1938     Lg4 = 2.222219843214978396e-01,                 /* 3FCC71C5 1D8E78AF */
1939     Lg5 = 1.818357216161805012e-01,                 /* 3FC74664 96CB03DE */
1940     Lg6 = 1.531383769920937332e-01,                 /* 3FC39A09 D078C69F */
1941     Lg7 = 1.479819860511658591e-01;                 /* 3FC2F112 DF3E5244 */
1942 
1943 /*
1944  * We always inline k_log1p(), since doing so produces a
1945  * substantial performance improvement (~40% on amd64).
1946  */
k_log1p(double f)1947 static inline double k_log1p(double f) {
1948   double hfsq, s, z, R, w, t1, t2;
1949 
1950   s = f / (2.0 + f);
1951   z = s * s;
1952   w = z * z;
1953   t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1954   t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1955   R = t2 + t1;
1956   hfsq = 0.5 * f * f;
1957   return s * (hfsq + R);
1958 }
1959 
1960 /*
1961  * Return the base 2 logarithm of x.  See e_log.c and k_log.h for most
1962  * comments.
1963  *
1964  * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
1965  * then does the combining and scaling steps
1966  *    log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
1967  * in not-quite-routine extra precision.
1968  */
log2(double x)1969 double log2(double x) {
1970   static const double
1971       two54 = 1.80143985094819840000e+16,   /* 0x43500000, 0x00000000 */
1972       ivln2hi = 1.44269504072144627571e+00, /* 0x3FF71547, 0x65200000 */
1973       ivln2lo = 1.67517131648865118353e-10; /* 0x3DE705FC, 0x2EEFA200 */
1974 
1975   double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
1976   int32_t i, k, hx;
1977   uint32_t lx;
1978 
1979   EXTRACT_WORDS(hx, lx, x);
1980 
1981   k = 0;
1982   if (hx < 0x00100000) { /* x < 2**-1022  */
1983     if (((hx & 0x7FFFFFFF) | lx) == 0) {
1984       return -std::numeric_limits<double>::infinity(); /* log(+-0)=-inf */
1985     }
1986     if (hx < 0) {
1987       return std::numeric_limits<double>::signaling_NaN(); /* log(-#) = NaN */
1988     }
1989     k -= 54;
1990     x *= two54; /* subnormal number, scale up x */
1991     GET_HIGH_WORD(hx, x);
1992   }
1993   if (hx >= 0x7FF00000) return x + x;
1994   if (hx == 0x3FF00000 && lx == 0) return 0.0; /* log(1) = +0 */
1995   k += (hx >> 20) - 1023;
1996   hx &= 0x000FFFFF;
1997   i = (hx + 0x95F64) & 0x100000;
1998   SET_HIGH_WORD(x, hx | (i ^ 0x3FF00000)); /* normalize x or x/2 */
1999   k += (i >> 20);
2000   y = static_cast<double>(k);
2001   f = x - 1.0;
2002   hfsq = 0.5 * f * f;
2003   r = k_log1p(f);
2004 
2005   /*
2006    * f-hfsq must (for args near 1) be evaluated in extra precision
2007    * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
2008    * This is fairly efficient since f-hfsq only depends on f, so can
2009    * be evaluated in parallel with R.  Not combining hfsq with R also
2010    * keeps R small (though not as small as a true `lo' term would be),
2011    * so that extra precision is not needed for terms involving R.
2012    *
2013    * Compiler bugs involving extra precision used to break Dekker's
2014    * theorem for spitting f-hfsq as hi+lo, unless double_t was used
2015    * or the multi-precision calculations were avoided when double_t
2016    * has extra precision.  These problems are now automatically
2017    * avoided as a side effect of the optimization of combining the
2018    * Dekker splitting step with the clear-low-bits step.
2019    *
2020    * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
2021    * precision to avoid a very large cancellation when x is very near
2022    * these values.  Unlike the above cancellations, this problem is
2023    * specific to base 2.  It is strange that adding +-1 is so much
2024    * harder than adding +-ln2 or +-log10_2.
2025    *
2026    * This uses Dekker's theorem to normalize y+val_hi, so the
2027    * compiler bugs are back in some configurations, sigh.  And I
2028    * don't want to used double_t to avoid them, since that gives a
2029    * pessimization and the support for avoiding the pessimization
2030    * is not yet available.
2031    *
2032    * The multi-precision calculations for the multiplications are
2033    * routine.
2034    */
2035   hi = f - hfsq;
2036   SET_LOW_WORD(hi, 0);
2037   lo = (f - hi) - hfsq + r;
2038   val_hi = hi * ivln2hi;
2039   val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
2040 
2041   /* spadd(val_hi, val_lo, y), except for not using double_t: */
2042   w = y + val_hi;
2043   val_lo += (y - w) + val_hi;
2044   val_hi = w;
2045 
2046   return val_lo + val_hi;
2047 }
2048 
2049 /*
2050  * Return the base 10 logarithm of x
2051  *
2052  * Method :
2053  *      Let log10_2hi = leading 40 bits of log10(2) and
2054  *          log10_2lo = log10(2) - log10_2hi,
2055  *          ivln10   = 1/log(10) rounded.
2056  *      Then
2057  *              n = ilogb(x),
2058  *              if(n<0)  n = n+1;
2059  *              x = scalbn(x,-n);
2060  *              log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
2061  *
2062  *  Note 1:
2063  *     To guarantee log10(10**n)=n, where 10**n is normal, the rounding
2064  *     mode must set to Round-to-Nearest.
2065  *  Note 2:
2066  *      [1/log(10)] rounded to 53 bits has error .198 ulps;
2067  *      log10 is monotonic at all binary break points.
2068  *
2069  *  Special cases:
2070  *      log10(x) is NaN if x < 0;
2071  *      log10(+INF) is +INF; log10(0) is -INF;
2072  *      log10(NaN) is that NaN;
2073  *      log10(10**N) = N  for N=0,1,...,22.
2074  */
log10(double x)2075 double log10(double x) {
2076   static const double
2077       two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
2078       ivln10 = 4.34294481903251816668e-01,
2079       log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
2080       log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
2081 
2082   double y;
2083   int32_t i, k, hx;
2084   uint32_t lx;
2085 
2086   EXTRACT_WORDS(hx, lx, x);
2087 
2088   k = 0;
2089   if (hx < 0x00100000) { /* x < 2**-1022  */
2090     if (((hx & 0x7FFFFFFF) | lx) == 0) {
2091       return -std::numeric_limits<double>::infinity(); /* log(+-0)=-inf */
2092     }
2093     if (hx < 0) {
2094       return std::numeric_limits<double>::quiet_NaN(); /* log(-#) = NaN */
2095     }
2096     k -= 54;
2097     x *= two54; /* subnormal number, scale up x */
2098     GET_HIGH_WORD(hx, x);
2099     GET_LOW_WORD(lx, x);
2100   }
2101   if (hx >= 0x7FF00000) return x + x;
2102   if (hx == 0x3FF00000 && lx == 0) return 0.0; /* log(1) = +0 */
2103   k += (hx >> 20) - 1023;
2104 
2105   i = (k & 0x80000000) >> 31;
2106   hx = (hx & 0x000FFFFF) | ((0x3FF - i) << 20);
2107   y = k + i;
2108   SET_HIGH_WORD(x, hx);
2109   SET_LOW_WORD(x, lx);
2110 
2111   double z = y * log10_2lo + ivln10 * log(x);
2112   return z + y * log10_2hi;
2113 }
2114 
2115 /* expm1(x)
2116  * Returns exp(x)-1, the exponential of x minus 1.
2117  *
2118  * Method
2119  *   1. Argument reduction:
2120  *  Given x, find r and integer k such that
2121  *
2122  *               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
2123  *
2124  *      Here a correction term c will be computed to compensate
2125  *  the error in r when rounded to a floating-point number.
2126  *
2127  *   2. Approximating expm1(r) by a special rational function on
2128  *  the interval [0,0.34658]:
2129  *  Since
2130  *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
2131  *  we define R1(r*r) by
2132  *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
2133  *  That is,
2134  *      R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
2135  *         = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
2136  *         = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
2137  *      We use a special Reme algorithm on [0,0.347] to generate
2138  *   a polynomial of degree 5 in r*r to approximate R1. The
2139  *  maximum error of this polynomial approximation is bounded
2140  *  by 2**-61. In other words,
2141  *      R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
2142  *  where   Q1  =  -1.6666666666666567384E-2,
2143  *     Q2  =   3.9682539681370365873E-4,
2144  *     Q3  =  -9.9206344733435987357E-6,
2145  *     Q4  =   2.5051361420808517002E-7,
2146  *     Q5  =  -6.2843505682382617102E-9;
2147  *    z   =  r*r,
2148  *  with error bounded by
2149  *      |                  5           |     -61
2150  *      | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
2151  *      |                              |
2152  *
2153  *  expm1(r) = exp(r)-1 is then computed by the following
2154  *   specific way which minimize the accumulation rounding error:
2155  *             2     3
2156  *            r     r    [ 3 - (R1 + R1*r/2)  ]
2157  *        expm1(r) = r + --- + --- * [--------------------]
2158  *                  2     2    [ 6 - r*(3 - R1*r/2) ]
2159  *
2160  *  To compensate the error in the argument reduction, we use
2161  *    expm1(r+c) = expm1(r) + c + expm1(r)*c
2162  *         ~ expm1(r) + c + r*c
2163  *  Thus c+r*c will be added in as the correction terms for
2164  *  expm1(r+c). Now rearrange the term to avoid optimization
2165  *   screw up:
2166  *            (      2                                    2 )
2167  *            ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
2168  *   expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
2169  *                  ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
2170  *                      (                                             )
2171  *
2172  *       = r - E
2173  *   3. Scale back to obtain expm1(x):
2174  *  From step 1, we have
2175  *     expm1(x) = either 2^k*[expm1(r)+1] - 1
2176  *        = or     2^k*[expm1(r) + (1-2^-k)]
2177  *   4. Implementation notes:
2178  *  (A). To save one multiplication, we scale the coefficient Qi
2179  *       to Qi*2^i, and replace z by (x^2)/2.
2180  *  (B). To achieve maximum accuracy, we compute expm1(x) by
2181  *    (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
2182  *    (ii)  if k=0, return r-E
2183  *    (iii) if k=-1, return 0.5*(r-E)-0.5
2184  *        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
2185  *                  else       return  1.0+2.0*(r-E);
2186  *    (v)   if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
2187  *    (vi)  if k <= 20, return 2^k((1-2^-k)-(E-r)), else
2188  *    (vii) return 2^k(1-((E+2^-k)-r))
2189  *
2190  * Special cases:
2191  *  expm1(INF) is INF, expm1(NaN) is NaN;
2192  *  expm1(-INF) is -1, and
2193  *  for finite argument, only expm1(0)=0 is exact.
2194  *
2195  * Accuracy:
2196  *  according to an error analysis, the error is always less than
2197  *  1 ulp (unit in the last place).
2198  *
2199  * Misc. info.
2200  *  For IEEE double
2201  *      if x >  7.09782712893383973096e+02 then expm1(x) overflow
2202  *
2203  * Constants:
2204  * The hexadecimal values are the intended ones for the following
2205  * constants. The decimal values may be used, provided that the
2206  * compiler will convert from decimal to binary accurately enough
2207  * to produce the hexadecimal values shown.
2208  */
expm1(double x)2209 double expm1(double x) {
2210   static const double
2211       one = 1.0,
2212       tiny = 1.0e-300,
2213       o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
2214       ln2_hi = 6.93147180369123816490e-01,      /* 0x3FE62E42, 0xFEE00000 */
2215       ln2_lo = 1.90821492927058770002e-10,      /* 0x3DEA39EF, 0x35793C76 */
2216       invln2 = 1.44269504088896338700e+00,      /* 0x3FF71547, 0x652B82FE */
2217       /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs =
2218          x*x/2: */
2219       Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
2220       Q2 = 1.58730158725481460165e-03,  /* 3F5A01A0 19FE5585 */
2221       Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
2222       Q4 = 4.00821782732936239552e-06,  /* 3ED0CFCA 86E65239 */
2223       Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
2224 
2225   static volatile double huge = 1.0e+300;
2226 
2227   double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2228   int32_t k, xsb;
2229   uint32_t hx;
2230 
2231   GET_HIGH_WORD(hx, x);
2232   xsb = hx & 0x80000000; /* sign bit of x */
2233   hx &= 0x7FFFFFFF;      /* high word of |x| */
2234 
2235   /* filter out huge and non-finite argument */
2236   if (hx >= 0x4043687A) {   /* if |x|>=56*ln2 */
2237     if (hx >= 0x40862E42) { /* if |x|>=709.78... */
2238       if (hx >= 0x7FF00000) {
2239         uint32_t low;
2240         GET_LOW_WORD(low, x);
2241         if (((hx & 0xFFFFF) | low) != 0)
2242           return x + x; /* NaN */
2243         else
2244           return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
2245       }
2246       if (x > o_threshold) return huge * huge; /* overflow */
2247     }
2248     if (xsb != 0) {        /* x < -56*ln2, return -1.0 with inexact */
2249       if (x + tiny < 0.0)  /* raise inexact */
2250         return tiny - one; /* return -1 */
2251     }
2252   }
2253 
2254   /* argument reduction */
2255   if (hx > 0x3FD62E42) {   /* if  |x| > 0.5 ln2 */
2256     if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2257       if (xsb == 0) {
2258         hi = x - ln2_hi;
2259         lo = ln2_lo;
2260         k = 1;
2261       } else {
2262         hi = x + ln2_hi;
2263         lo = -ln2_lo;
2264         k = -1;
2265       }
2266     } else {
2267       k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
2268       t = k;
2269       hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2270       lo = t * ln2_lo;
2271     }
2272     x = hi - lo;
2273     c = (hi - x) - lo;
2274   } else if (hx < 0x3C900000) { /* when |x|<2**-54, return x */
2275     t = huge + x;               /* return x with inexact flags when x!=0 */
2276     return x - (t - (huge + x));
2277   } else {
2278     k = 0;
2279   }
2280 
2281   /* x is now in primary range */
2282   hfx = 0.5 * x;
2283   hxs = x * hfx;
2284   r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2285   t = 3.0 - r1 * hfx;
2286   e = hxs * ((r1 - t) / (6.0 - x * t));
2287   if (k == 0) {
2288     return x - (x * e - hxs); /* c is 0 */
2289   } else {
2290     INSERT_WORDS(
2291         twopk,
2292         0x3FF00000 + static_cast<int32_t>(static_cast<uint32_t>(k) << 20),
2293         0); /* 2^k */
2294     e = (x * (e - c) - c);
2295     e -= hxs;
2296     if (k == -1) return 0.5 * (x - e) - 0.5;
2297     if (k == 1) {
2298       if (x < -0.25)
2299         return -2.0 * (e - (x + 0.5));
2300       else
2301         return one + 2.0 * (x - e);
2302     }
2303     if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */
2304       y = one - (e - x);
2305       // TODO(mvstanton): is this replacement for the hex float
2306       // sufficient?
2307       // if (k == 1024) y = y*2.0*0x1p1023;
2308       if (k == 1024)
2309         y = y * 2.0 * 8.98846567431158e+307;
2310       else
2311         y = y * twopk;
2312       return y - one;
2313     }
2314     t = one;
2315     if (k < 20) {
2316       SET_HIGH_WORD(t, 0x3FF00000 - (0x200000 >> k)); /* t=1-2^-k */
2317       y = t - (e - x);
2318       y = y * twopk;
2319     } else {
2320       SET_HIGH_WORD(t, ((0x3FF - k) << 20)); /* 2^-k */
2321       y = x - (e + t);
2322       y += one;
2323       y = y * twopk;
2324     }
2325   }
2326   return y;
2327 }
2328 
cbrt(double x)2329 double cbrt(double x) {
2330   static const uint32_t
2331       B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
2332       B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
2333 
2334   /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
2335   static const double P0 = 1.87595182427177009643, /* 0x3FFE03E6, 0x0F61E692 */
2336       P1 = -1.88497979543377169875,                /* 0xBFFE28E0, 0x92F02420 */
2337       P2 = 1.621429720105354466140,                /* 0x3FF9F160, 0x4A49D6C2 */
2338       P3 = -0.758397934778766047437,               /* 0xBFE844CB, 0xBEE751D9 */
2339       P4 = 0.145996192886612446982;                /* 0x3FC2B000, 0xD4E4EDD7 */
2340 
2341   int32_t hx;
2342   double r, s, t = 0.0, w;
2343   uint32_t sign;
2344   uint32_t high, low;
2345 
2346   EXTRACT_WORDS(hx, low, x);
2347   sign = hx & 0x80000000; /* sign= sign(x) */
2348   hx ^= sign;
2349   if (hx >= 0x7FF00000) return (x + x); /* cbrt(NaN,INF) is itself */
2350 
2351   /*
2352    * Rough cbrt to 5 bits:
2353    *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
2354    * where e is integral and >= 0, m is real and in [0, 1), and "/" and
2355    * "%" are integer division and modulus with rounding towards minus
2356    * infinity.  The RHS is always >= the LHS and has a maximum relative
2357    * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
2358    * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
2359    * floating point representation, for finite positive normal values,
2360    * ordinary integer division of the value in bits magically gives
2361    * almost exactly the RHS of the above provided we first subtract the
2362    * exponent bias (1023 for doubles) and later add it back.  We do the
2363    * subtraction virtually to keep e >= 0 so that ordinary integer
2364    * division rounds towards minus infinity; this is also efficient.
2365    */
2366   if (hx < 0x00100000) {             /* zero or subnormal? */
2367     if ((hx | low) == 0) return (x); /* cbrt(0) is itself */
2368     SET_HIGH_WORD(t, 0x43500000);    /* set t= 2**54 */
2369     t *= x;
2370     GET_HIGH_WORD(high, t);
2371     INSERT_WORDS(t, sign | ((high & 0x7FFFFFFF) / 3 + B2), 0);
2372   } else {
2373     INSERT_WORDS(t, sign | (hx / 3 + B1), 0);
2374   }
2375 
2376   /*
2377    * New cbrt to 23 bits:
2378    *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
2379    * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
2380    * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
2381    * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
2382    * gives us bounds for r = t**3/x.
2383    *
2384    * Try to optimize for parallel evaluation as in k_tanf.c.
2385    */
2386   r = (t * t) * (t / x);
2387   t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
2388 
2389   /*
2390    * Round t away from zero to 23 bits (sloppily except for ensuring that
2391    * the result is larger in magnitude than cbrt(x) but not much more than
2392    * 2 23-bit ulps larger).  With rounding towards zero, the error bound
2393    * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
2394    * in the rounded t, the infinite-precision error in the Newton
2395    * approximation barely affects third digit in the final error
2396    * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
2397    * before the final error is larger than 0.667 ulps.
2398    */
2399   uint64_t bits = bit_cast<uint64_t>(t);
2400   bits = (bits + 0x80000000) & 0xFFFFFFFFC0000000ULL;
2401   t = bit_cast<double>(bits);
2402 
2403   /* one step Newton iteration to 53 bits with error < 0.667 ulps */
2404   s = t * t;             /* t*t is exact */
2405   r = x / s;             /* error <= 0.5 ulps; |r| < |t| */
2406   w = t + t;             /* t+t is exact */
2407   r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
2408   t = t + t * r;         /* error <= 0.5 + 0.5/3 + epsilon */
2409 
2410   return (t);
2411 }
2412 
2413 /* sin(x)
2414  * Return sine function of x.
2415  *
2416  * kernel function:
2417  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
2418  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
2419  *      __ieee754_rem_pio2      ... argument reduction routine
2420  *
2421  * Method.
2422  *      Let S,C and T denote the sin, cos and tan respectively on
2423  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2424  *      in [-pi/4 , +pi/4], and let n = k mod 4.
2425  *      We have
2426  *
2427  *          n        sin(x)      cos(x)        tan(x)
2428  *     ----------------------------------------------------------
2429  *          0          S           C             T
2430  *          1          C          -S            -1/T
2431  *          2         -S          -C             T
2432  *          3         -C           S            -1/T
2433  *     ----------------------------------------------------------
2434  *
2435  * Special cases:
2436  *      Let trig be any of sin, cos, or tan.
2437  *      trig(+-INF)  is NaN, with signals;
2438  *      trig(NaN)    is that NaN;
2439  *
2440  * Accuracy:
2441  *      TRIG(x) returns trig(x) nearly rounded
2442  */
sin(double x)2443 double sin(double x) {
2444   double y[2], z = 0.0;
2445   int32_t n, ix;
2446 
2447   /* High word of x. */
2448   GET_HIGH_WORD(ix, x);
2449 
2450   /* |x| ~< pi/4 */
2451   ix &= 0x7FFFFFFF;
2452   if (ix <= 0x3FE921FB) {
2453     return __kernel_sin(x, z, 0);
2454   } else if (ix >= 0x7FF00000) {
2455     /* sin(Inf or NaN) is NaN */
2456     return x - x;
2457   } else {
2458     /* argument reduction needed */
2459     n = __ieee754_rem_pio2(x, y);
2460     switch (n & 3) {
2461       case 0:
2462         return __kernel_sin(y[0], y[1], 1);
2463       case 1:
2464         return __kernel_cos(y[0], y[1]);
2465       case 2:
2466         return -__kernel_sin(y[0], y[1], 1);
2467       default:
2468         return -__kernel_cos(y[0], y[1]);
2469     }
2470   }
2471 }
2472 
2473 /* tan(x)
2474  * Return tangent function of x.
2475  *
2476  * kernel function:
2477  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
2478  *      __ieee754_rem_pio2      ... argument reduction routine
2479  *
2480  * Method.
2481  *      Let S,C and T denote the sin, cos and tan respectively on
2482  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2483  *      in [-pi/4 , +pi/4], and let n = k mod 4.
2484  *      We have
2485  *
2486  *          n        sin(x)      cos(x)        tan(x)
2487  *     ----------------------------------------------------------
2488  *          0          S           C             T
2489  *          1          C          -S            -1/T
2490  *          2         -S          -C             T
2491  *          3         -C           S            -1/T
2492  *     ----------------------------------------------------------
2493  *
2494  * Special cases:
2495  *      Let trig be any of sin, cos, or tan.
2496  *      trig(+-INF)  is NaN, with signals;
2497  *      trig(NaN)    is that NaN;
2498  *
2499  * Accuracy:
2500  *      TRIG(x) returns trig(x) nearly rounded
2501  */
tan(double x)2502 double tan(double x) {
2503   double y[2], z = 0.0;
2504   int32_t n, ix;
2505 
2506   /* High word of x. */
2507   GET_HIGH_WORD(ix, x);
2508 
2509   /* |x| ~< pi/4 */
2510   ix &= 0x7FFFFFFF;
2511   if (ix <= 0x3FE921FB) {
2512     return __kernel_tan(x, z, 1);
2513   } else if (ix >= 0x7FF00000) {
2514     /* tan(Inf or NaN) is NaN */
2515     return x - x; /* NaN */
2516   } else {
2517     /* argument reduction needed */
2518     n = __ieee754_rem_pio2(x, y);
2519     /* 1 -> n even, -1 -> n odd */
2520     return __kernel_tan(y[0], y[1], 1 - ((n & 1) << 1));
2521   }
2522 }
2523 
2524 /*
2525  * ES6 draft 09-27-13, section 20.2.2.12.
2526  * Math.cosh
2527  * Method :
2528  * mathematically cosh(x) if defined to be (exp(x)+exp(-x))/2
2529  *      1. Replace x by |x| (cosh(x) = cosh(-x)).
2530  *      2.
2531  *                                                      [ exp(x) - 1 ]^2
2532  *          0        <= x <= ln2/2  :  cosh(x) := 1 + -------------------
2533  *                                                         2*exp(x)
2534  *
2535  *                                                 exp(x) + 1/exp(x)
2536  *          ln2/2    <= x <= 22     :  cosh(x) := -------------------
2537  *                                                        2
2538  *          22       <= x <= lnovft :  cosh(x) := exp(x)/2
2539  *          lnovft   <= x <= ln2ovft:  cosh(x) := exp(x/2)/2 * exp(x/2)
2540  *          ln2ovft  <  x           :  cosh(x) := huge*huge (overflow)
2541  *
2542  * Special cases:
2543  *      cosh(x) is |x| if x is +INF, -INF, or NaN.
2544  *      only cosh(0)=1 is exact for finite x.
2545  */
cosh(double x)2546 double cosh(double x) {
2547   static const double KCOSH_OVERFLOW = 710.4758600739439;
2548   static const double one = 1.0, half = 0.5;
2549   static volatile double huge = 1.0e+300;
2550 
2551   int32_t ix;
2552 
2553   /* High word of |x|. */
2554   GET_HIGH_WORD(ix, x);
2555   ix &= 0x7FFFFFFF;
2556 
2557   // |x| in [0,0.5*log2], return 1+expm1(|x|)^2/(2*exp(|x|))
2558   if (ix < 0x3FD62E43) {
2559     double t = expm1(fabs(x));
2560     double w = one + t;
2561     // For |x| < 2^-55, cosh(x) = 1
2562     if (ix < 0x3C800000) return w;
2563     return one + (t * t) / (w + w);
2564   }
2565 
2566   // |x| in [0.5*log2, 22], return (exp(|x|)+1/exp(|x|)/2
2567   if (ix < 0x40360000) {
2568     double t = exp(fabs(x));
2569     return half * t + half / t;
2570   }
2571 
2572   // |x| in [22, log(maxdouble)], return half*exp(|x|)
2573   if (ix < 0x40862E42) return half * exp(fabs(x));
2574 
2575   // |x| in [log(maxdouble), overflowthreshold]
2576   if (fabs(x) <= KCOSH_OVERFLOW) {
2577     double w = exp(half * fabs(x));
2578     double t = half * w;
2579     return t * w;
2580   }
2581 
2582   /* x is INF or NaN */
2583   if (ix >= 0x7FF00000) return x * x;
2584 
2585   // |x| > overflowthreshold.
2586   return huge * huge;
2587 }
2588 
2589 /*
2590  * ES2019 Draft 2019-01-02 12.6.4
2591  * Math.pow & Exponentiation Operator
2592  *
2593  * Return X raised to the Yth power
2594  *
2595  * Method:
2596  *     Let x =  2   * (1+f)
2597  *     1. Compute and return log2(x) in two pieces:
2598  *        log2(x) = w1 + w2,
2599  *        where w1 has 53-24 = 29 bit trailing zeros.
2600  *     2. Perform y*log2(x) = n+y' by simulating muti-precision
2601  *        arithmetic, where |y'|<=0.5.
2602  *     3. Return x**y = 2**n*exp(y'*log2)
2603  *
2604  * Special cases:
2605  *     1.  (anything) ** 0  is 1
2606  *     2.  (anything) ** 1  is itself
2607  *     3.  (anything) ** NAN is NAN
2608  *     4.  NAN ** (anything except 0) is NAN
2609  *     5.  +-(|x| > 1) **  +INF is +INF
2610  *     6.  +-(|x| > 1) **  -INF is +0
2611  *     7.  +-(|x| < 1) **  +INF is +0
2612  *     8.  +-(|x| < 1) **  -INF is +INF
2613  *     9.  +-1         ** +-INF is NAN
2614  *     10. +0 ** (+anything except 0, NAN)               is +0
2615  *     11. -0 ** (+anything except 0, NAN, odd integer)  is +0
2616  *     12. +0 ** (-anything except 0, NAN)               is +INF
2617  *     13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
2618  *     14. -0 ** (odd integer) = -( +0 ** (odd integer) )
2619  *     15. +INF ** (+anything except 0,NAN) is +INF
2620  *     16. +INF ** (-anything except 0,NAN) is +0
2621  *     17. -INF ** (anything)  = -0 ** (-anything)
2622  *     18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
2623  *     19. (-anything except 0 and inf) ** (non-integer) is NAN
2624  *
2625  * Accuracy:
2626  *      pow(x,y) returns x**y nearly rounded. In particular,
2627  *      pow(integer, integer) always returns the correct integer provided it is
2628  *      representable.
2629  *
2630  * Constants:
2631  *     The hexadecimal values are the intended ones for the following
2632  *     constants. The decimal values may be used, provided that the
2633  *     compiler will convert from decimal to binary accurately enough
2634  *     to produce the hexadecimal values shown.
2635  */
2636 
pow(double x,double y)2637 double pow(double x, double y) {
2638   static const double
2639       bp[] = {1.0, 1.5},
2640       dp_h[] = {0.0, 5.84962487220764160156e-01},  // 0x3FE2B803, 0x40000000
2641       dp_l[] = {0.0, 1.35003920212974897128e-08},  // 0x3E4CFDEB, 0x43CFD006
2642       zero = 0.0, one = 1.0, two = 2.0,
2643       two53 = 9007199254740992.0,  // 0x43400000, 0x00000000
2644       huge = 1.0e300, tiny = 1.0e-300,
2645       // poly coefs for (3/2)*(log(x)-2s-2/3*s**3
2646       L1 = 5.99999999999994648725e-01,      // 0x3FE33333, 0x33333303
2647       L2 = 4.28571428578550184252e-01,      // 0x3FDB6DB6, 0xDB6FABFF
2648       L3 = 3.33333329818377432918e-01,      // 0x3FD55555, 0x518F264D
2649       L4 = 2.72728123808534006489e-01,      // 0x3FD17460, 0xA91D4101
2650       L5 = 2.30660745775561754067e-01,      // 0x3FCD864A, 0x93C9DB65
2651       L6 = 2.06975017800338417784e-01,      // 0x3FCA7E28, 0x4A454EEF
2652       P1 = 1.66666666666666019037e-01,      // 0x3FC55555, 0x5555553E
2653       P2 = -2.77777777770155933842e-03,     // 0xBF66C16C, 0x16BEBD93
2654       P3 = 6.61375632143793436117e-05,      // 0x3F11566A, 0xAF25DE2C
2655       P4 = -1.65339022054652515390e-06,     // 0xBEBBBD41, 0xC5D26BF1
2656       P5 = 4.13813679705723846039e-08,      // 0x3E663769, 0x72BEA4D0
2657       lg2 = 6.93147180559945286227e-01,     // 0x3FE62E42, 0xFEFA39EF
2658       lg2_h = 6.93147182464599609375e-01,   // 0x3FE62E43, 0x00000000
2659       lg2_l = -1.90465429995776804525e-09,  // 0xBE205C61, 0x0CA86C39
2660       ovt = 8.0085662595372944372e-0017,    // -(1024-log2(ovfl+.5ulp))
2661       cp = 9.61796693925975554329e-01,      // 0x3FEEC709, 0xDC3A03FD =2/(3ln2)
2662       cp_h = 9.61796700954437255859e-01,    // 0x3FEEC709, 0xE0000000 =(float)cp
2663       cp_l = -7.02846165095275826516e-09,   // 0xBE3E2FE0, 0x145B01F5 =tail cp_h
2664       ivln2 = 1.44269504088896338700e+00,   // 0x3FF71547, 0x652B82FE =1/ln2
2665       ivln2_h =
2666           1.44269502162933349609e+00,  // 0x3FF71547, 0x60000000 =24b 1/ln2
2667       ivln2_l =
2668           1.92596299112661746887e-08;  // 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail
2669 
2670   double z, ax, z_h, z_l, p_h, p_l;
2671   double y1, t1, t2, r, s, t, u, v, w;
2672   int i, j, k, yisint, n;
2673   int hx, hy, ix, iy;
2674   unsigned lx, ly;
2675 
2676   EXTRACT_WORDS(hx, lx, x);
2677   EXTRACT_WORDS(hy, ly, y);
2678   ix = hx & 0x7fffffff;
2679   iy = hy & 0x7fffffff;
2680 
2681   /* y==zero: x**0 = 1 */
2682   if ((iy | ly) == 0) return one;
2683 
2684   /* +-NaN return x+y */
2685   if (ix > 0x7ff00000 || ((ix == 0x7ff00000) && (lx != 0)) || iy > 0x7ff00000 ||
2686       ((iy == 0x7ff00000) && (ly != 0))) {
2687     return x + y;
2688   }
2689 
2690   /* determine if y is an odd int when x < 0
2691    * yisint = 0 ... y is not an integer
2692    * yisint = 1 ... y is an odd int
2693    * yisint = 2 ... y is an even int
2694    */
2695   yisint = 0;
2696   if (hx < 0) {
2697     if (iy >= 0x43400000) {
2698       yisint = 2; /* even integer y */
2699     } else if (iy >= 0x3ff00000) {
2700       k = (iy >> 20) - 0x3ff; /* exponent */
2701       if (k > 20) {
2702         j = ly >> (52 - k);
2703         if ((j << (52 - k)) == static_cast<int>(ly)) yisint = 2 - (j & 1);
2704       } else if (ly == 0) {
2705         j = iy >> (20 - k);
2706         if ((j << (20 - k)) == iy) yisint = 2 - (j & 1);
2707       }
2708     }
2709   }
2710 
2711   /* special value of y */
2712   if (ly == 0) {
2713     if (iy == 0x7ff00000) { /* y is +-inf */
2714       if (((ix - 0x3ff00000) | lx) == 0) {
2715         return y - y;                /* inf**+-1 is NaN */
2716       } else if (ix >= 0x3ff00000) { /* (|x|>1)**+-inf = inf,0 */
2717         return (hy >= 0) ? y : zero;
2718       } else { /* (|x|<1)**-,+inf = inf,0 */
2719         return (hy < 0) ? -y : zero;
2720       }
2721     }
2722     if (iy == 0x3ff00000) { /* y is  +-1 */
2723       if (hy < 0) {
2724         return base::Divide(one, x);
2725       } else {
2726         return x;
2727       }
2728     }
2729     if (hy == 0x40000000) return x * x; /* y is  2 */
2730     if (hy == 0x3fe00000) {             /* y is  0.5 */
2731       if (hx >= 0) {                    /* x >= +0 */
2732         return sqrt(x);
2733       }
2734     }
2735   }
2736 
2737   ax = fabs(x);
2738   /* special value of x */
2739   if (lx == 0) {
2740     if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000) {
2741       z = ax;                         /*x is +-0,+-inf,+-1*/
2742       if (hy < 0) z = base::Divide(one, z); /* z = (1/|x|) */
2743       if (hx < 0) {
2744         if (((ix - 0x3ff00000) | yisint) == 0) {
2745           /* (-1)**non-int is NaN */
2746           z = std::numeric_limits<double>::signaling_NaN();
2747         } else if (yisint == 1) {
2748           z = -z; /* (x<0)**odd = -(|x|**odd) */
2749         }
2750       }
2751       return z;
2752     }
2753   }
2754 
2755   n = (hx >> 31) + 1;
2756 
2757   /* (x<0)**(non-int) is NaN */
2758   if ((n | yisint) == 0) {
2759     return std::numeric_limits<double>::signaling_NaN();
2760   }
2761 
2762   s = one; /* s (sign of result -ve**odd) = -1 else = 1 */
2763   if ((n | (yisint - 1)) == 0) s = -one; /* (-ve)**(odd int) */
2764 
2765   /* |y| is huge */
2766   if (iy > 0x41e00000) {   /* if |y| > 2**31 */
2767     if (iy > 0x43f00000) { /* if |y| > 2**64, must o/uflow */
2768       if (ix <= 0x3fefffff) return (hy < 0) ? huge * huge : tiny * tiny;
2769       if (ix >= 0x3ff00000) return (hy > 0) ? huge * huge : tiny * tiny;
2770     }
2771     /* over/underflow if x is not close to one */
2772     if (ix < 0x3fefffff) return (hy < 0) ? s * huge * huge : s * tiny * tiny;
2773     if (ix > 0x3ff00000) return (hy > 0) ? s * huge * huge : s * tiny * tiny;
2774     /* now |1-x| is tiny <= 2**-20, suffice to compute
2775        log(x) by x-x^2/2+x^3/3-x^4/4 */
2776     t = ax - one; /* t has 20 trailing zeros */
2777     w = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25));
2778     u = ivln2_h * t; /* ivln2_h has 21 sig. bits */
2779     v = t * ivln2_l - w * ivln2;
2780     t1 = u + v;
2781     SET_LOW_WORD(t1, 0);
2782     t2 = v - (t1 - u);
2783   } else {
2784     double ss, s2, s_h, s_l, t_h, t_l;
2785     n = 0;
2786     /* take care subnormal number */
2787     if (ix < 0x00100000) {
2788       ax *= two53;
2789       n -= 53;
2790       GET_HIGH_WORD(ix, ax);
2791     }
2792     n += ((ix) >> 20) - 0x3ff;
2793     j = ix & 0x000fffff;
2794     /* determine interval */
2795     ix = j | 0x3ff00000; /* normalize ix */
2796     if (j <= 0x3988E) {
2797       k = 0; /* |x|<sqrt(3/2) */
2798     } else if (j < 0xBB67A) {
2799       k = 1; /* |x|<sqrt(3)   */
2800     } else {
2801       k = 0;
2802       n += 1;
2803       ix -= 0x00100000;
2804     }
2805     SET_HIGH_WORD(ax, ix);
2806 
2807     /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
2808     u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
2809     v = base::Divide(one, ax + bp[k]);
2810     ss = u * v;
2811     s_h = ss;
2812     SET_LOW_WORD(s_h, 0);
2813     /* t_h=ax+bp[k] High */
2814     t_h = zero;
2815     SET_HIGH_WORD(t_h, ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18));
2816     t_l = ax - (t_h - bp[k]);
2817     s_l = v * ((u - s_h * t_h) - s_h * t_l);
2818     /* compute log(ax) */
2819     s2 = ss * ss;
2820     r = s2 * s2 *
2821         (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6)))));
2822     r += s_l * (s_h + ss);
2823     s2 = s_h * s_h;
2824     t_h = 3.0 + s2 + r;
2825     SET_LOW_WORD(t_h, 0);
2826     t_l = r - ((t_h - 3.0) - s2);
2827     /* u+v = ss*(1+...) */
2828     u = s_h * t_h;
2829     v = s_l * t_h + t_l * ss;
2830     /* 2/(3log2)*(ss+...) */
2831     p_h = u + v;
2832     SET_LOW_WORD(p_h, 0);
2833     p_l = v - (p_h - u);
2834     z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
2835     z_l = cp_l * p_h + p_l * cp + dp_l[k];
2836     /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
2837     t = static_cast<double>(n);
2838     t1 = (((z_h + z_l) + dp_h[k]) + t);
2839     SET_LOW_WORD(t1, 0);
2840     t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
2841   }
2842 
2843   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
2844   y1 = y;
2845   SET_LOW_WORD(y1, 0);
2846   p_l = (y - y1) * t1 + y * t2;
2847   p_h = y1 * t1;
2848   z = p_l + p_h;
2849   EXTRACT_WORDS(j, i, z);
2850   if (j >= 0x40900000) {               /* z >= 1024 */
2851     if (((j - 0x40900000) | i) != 0) { /* if z > 1024 */
2852       return s * huge * huge;          /* overflow */
2853     } else {
2854       if (p_l + ovt > z - p_h) return s * huge * huge; /* overflow */
2855     }
2856   } else if ((j & 0x7fffffff) >= 0x4090cc00) { /* z <= -1075 */
2857     if (((j - 0xc090cc00) | i) != 0) {         /* z < -1075 */
2858       return s * tiny * tiny;                  /* underflow */
2859     } else {
2860       if (p_l <= z - p_h) return s * tiny * tiny; /* underflow */
2861     }
2862   }
2863   /*
2864    * compute 2**(p_h+p_l)
2865    */
2866   i = j & 0x7fffffff;
2867   k = (i >> 20) - 0x3ff;
2868   n = 0;
2869   if (i > 0x3fe00000) { /* if |z| > 0.5, set n = [z+0.5] */
2870     n = j + (0x00100000 >> (k + 1));
2871     k = ((n & 0x7fffffff) >> 20) - 0x3ff; /* new k for n */
2872     t = zero;
2873     SET_HIGH_WORD(t, n & ~(0x000fffff >> k));
2874     n = ((n & 0x000fffff) | 0x00100000) >> (20 - k);
2875     if (j < 0) n = -n;
2876     p_h -= t;
2877   }
2878   t = p_l + p_h;
2879   SET_LOW_WORD(t, 0);
2880   u = t * lg2_h;
2881   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
2882   z = u + v;
2883   w = v - (z - u);
2884   t = z * z;
2885   t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
2886   r = base::Divide(z * t1, (t1 - two) - (w + z * w));
2887   z = one - (r - z);
2888   GET_HIGH_WORD(j, z);
2889   j += static_cast<int>(static_cast<uint32_t>(n) << 20);
2890   if ((j >> 20) <= 0) {
2891     z = scalbn(z, n); /* subnormal output */
2892   } else {
2893     int tmp;
2894     GET_HIGH_WORD(tmp, z);
2895     SET_HIGH_WORD(z, tmp + static_cast<int>(static_cast<uint32_t>(n) << 20));
2896   }
2897   return s * z;
2898 }
2899 
2900 /*
2901  * ES6 draft 09-27-13, section 20.2.2.30.
2902  * Math.sinh
2903  * Method :
2904  * mathematically sinh(x) if defined to be (exp(x)-exp(-x))/2
2905  *      1. Replace x by |x| (sinh(-x) = -sinh(x)).
2906  *      2.
2907  *                                                  E + E/(E+1)
2908  *          0        <= x <= 22     :  sinh(x) := --------------, E=expm1(x)
2909  *                                                      2
2910  *
2911  *          22       <= x <= lnovft :  sinh(x) := exp(x)/2
2912  *          lnovft   <= x <= ln2ovft:  sinh(x) := exp(x/2)/2 * exp(x/2)
2913  *          ln2ovft  <  x           :  sinh(x) := x*shuge (overflow)
2914  *
2915  * Special cases:
2916  *      sinh(x) is |x| if x is +Infinity, -Infinity, or NaN.
2917  *      only sinh(0)=0 is exact for finite x.
2918  */
sinh(double x)2919 double sinh(double x) {
2920   static const double KSINH_OVERFLOW = 710.4758600739439,
2921                       TWO_M28 =
2922                           3.725290298461914e-9,  // 2^-28, empty lower half
2923       LOG_MAXD = 709.7822265625;  // 0x40862E42 00000000, empty lower half
2924   static const double shuge = 1.0e307;
2925 
2926   double h = (x < 0) ? -0.5 : 0.5;
2927   // |x| in [0, 22]. return sign(x)*0.5*(E+E/(E+1))
2928   double ax = fabs(x);
2929   if (ax < 22) {
2930     // For |x| < 2^-28, sinh(x) = x
2931     if (ax < TWO_M28) return x;
2932     double t = expm1(ax);
2933     if (ax < 1) {
2934       return h * (2 * t - t * t / (t + 1));
2935     }
2936     return h * (t + t / (t + 1));
2937   }
2938   // |x| in [22, log(maxdouble)], return 0.5 * exp(|x|)
2939   if (ax < LOG_MAXD) return h * exp(ax);
2940   // |x| in [log(maxdouble), overflowthreshold]
2941   // overflowthreshold = 710.4758600739426
2942   if (ax <= KSINH_OVERFLOW) {
2943     double w = exp(0.5 * ax);
2944     double t = h * w;
2945     return t * w;
2946   }
2947   // |x| > overflowthreshold or is NaN.
2948   // Return Infinity of the appropriate sign or NaN.
2949   return x * shuge;
2950 }
2951 
2952 /* Tanh(x)
2953  * Return the Hyperbolic Tangent of x
2954  *
2955  * Method :
2956  *                                 x    -x
2957  *                                e  - e
2958  *  0. tanh(x) is defined to be -----------
2959  *                                 x    -x
2960  *                                e  + e
2961  *  1. reduce x to non-negative by tanh(-x) = -tanh(x).
2962  *  2.  0      <= x <  2**-28 : tanh(x) := x with inexact if x != 0
2963  *                                          -t
2964  *      2**-28 <= x <  1      : tanh(x) := -----; t = expm1(-2x)
2965  *                                         t + 2
2966  *                                               2
2967  *      1      <= x <  22     : tanh(x) := 1 - -----; t = expm1(2x)
2968  *                                             t + 2
2969  *      22     <= x <= INF    : tanh(x) := 1.
2970  *
2971  * Special cases:
2972  *      tanh(NaN) is NaN;
2973  *      only tanh(0)=0 is exact for finite argument.
2974  */
tanh(double x)2975 double tanh(double x) {
2976   static const volatile double tiny = 1.0e-300;
2977   static const double one = 1.0, two = 2.0, huge = 1.0e300;
2978   double t, z;
2979   int32_t jx, ix;
2980 
2981   GET_HIGH_WORD(jx, x);
2982   ix = jx & 0x7FFFFFFF;
2983 
2984   /* x is INF or NaN */
2985   if (ix >= 0x7FF00000) {
2986     if (jx >= 0)
2987       return one / x + one; /* tanh(+-inf)=+-1 */
2988     else
2989       return one / x - one; /* tanh(NaN) = NaN */
2990   }
2991 
2992   /* |x| < 22 */
2993   if (ix < 0x40360000) {            /* |x|<22 */
2994     if (ix < 0x3E300000) {          /* |x|<2**-28 */
2995       if (huge + x > one) return x; /* tanh(tiny) = tiny with inexact */
2996     }
2997     if (ix >= 0x3FF00000) { /* |x|>=1  */
2998       t = expm1(two * fabs(x));
2999       z = one - two / (t + two);
3000     } else {
3001       t = expm1(-two * fabs(x));
3002       z = -t / (t + two);
3003     }
3004     /* |x| >= 22, return +-1 */
3005   } else {
3006     z = one - tiny; /* raise inexact flag */
3007   }
3008   return (jx >= 0) ? z : -z;
3009 }
3010 
3011 #undef EXTRACT_WORDS
3012 #undef GET_HIGH_WORD
3013 #undef GET_LOW_WORD
3014 #undef INSERT_WORDS
3015 #undef SET_HIGH_WORD
3016 #undef SET_LOW_WORD
3017 
3018 }  // namespace ieee754
3019 }  // namespace base
3020 }  // namespace v8
3021