• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
2 //
3 // ====================================================
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 //
6 // Developed at SunSoft, a Sun Microsystems, Inc. business.
7 // Permission to use, copy, modify, and distribute this
8 // software is freely granted, provided that this notice
9 // is preserved.
10 // ====================================================
11 //
12 // The original source code covered by the above license above has been
13 // modified significantly by Google Inc.
14 // Copyright 2016 the V8 project authors. All rights reserved.
15 
16 #include "src/base/ieee754.h"
17 
18 #include <cmath>
19 #include <limits>
20 
21 #include "src/base/build_config.h"
22 #include "src/base/macros.h"
23 
24 namespace v8 {
25 namespace base {
26 namespace ieee754 {
27 
28 namespace {
29 
30 /* Disable "potential divide by 0" warning in Visual Studio compiler. */
31 
32 #if V8_CC_MSVC
33 
34 #pragma warning(disable : 4723)
35 
36 #endif
37 
38 /*
39  * The original fdlibm code used statements like:
40  *  n0 = ((*(int*)&one)>>29)^1;   * index of high word *
41  *  ix0 = *(n0+(int*)&x);     * high word of x *
42  *  ix1 = *((1-n0)+(int*)&x);   * low word of x *
43  * to dig two 32 bit words out of the 64 bit IEEE floating point
44  * value.  That is non-ANSI, and, moreover, the gcc instruction
45  * scheduler gets it wrong.  We instead use the following macros.
46  * Unlike the original code, we determine the endianness at compile
47  * time, not at run time; I don't see much benefit to selecting
48  * endianness at run time.
49  */
50 
51 /*
52  * A union which permits us to convert between a double and two 32 bit
53  * ints.
54  */
55 
56 #if V8_TARGET_LITTLE_ENDIAN
57 
58 typedef union {
59   double value;
60   struct {
61     uint32_t lsw;
62     uint32_t msw;
63   } parts;
64   struct {
65     uint64_t w;
66   } xparts;
67 } ieee_double_shape_type;
68 
69 #else
70 
71 typedef union {
72   double value;
73   struct {
74     uint32_t msw;
75     uint32_t lsw;
76   } parts;
77   struct {
78     uint64_t w;
79   } xparts;
80 } ieee_double_shape_type;
81 
82 #endif
83 
84 /* Get two 32 bit ints from a double.  */
85 
86 #define EXTRACT_WORDS(ix0, ix1, d) \
87   do {                             \
88     ieee_double_shape_type ew_u;   \
89     ew_u.value = (d);              \
90     (ix0) = ew_u.parts.msw;        \
91     (ix1) = ew_u.parts.lsw;        \
92   } while (0)
93 
94 /* Get a 64-bit int from a double. */
95 #define EXTRACT_WORD64(ix, d)    \
96   do {                           \
97     ieee_double_shape_type ew_u; \
98     ew_u.value = (d);            \
99     (ix) = ew_u.xparts.w;        \
100   } while (0)
101 
102 /* Get the more significant 32 bit int from a double.  */
103 
104 #define GET_HIGH_WORD(i, d)      \
105   do {                           \
106     ieee_double_shape_type gh_u; \
107     gh_u.value = (d);            \
108     (i) = gh_u.parts.msw;        \
109   } while (0)
110 
111 /* Get the less significant 32 bit int from a double.  */
112 
113 #define GET_LOW_WORD(i, d)       \
114   do {                           \
115     ieee_double_shape_type gl_u; \
116     gl_u.value = (d);            \
117     (i) = gl_u.parts.lsw;        \
118   } while (0)
119 
120 /* Set a double from two 32 bit ints.  */
121 
122 #define INSERT_WORDS(d, ix0, ix1) \
123   do {                            \
124     ieee_double_shape_type iw_u;  \
125     iw_u.parts.msw = (ix0);       \
126     iw_u.parts.lsw = (ix1);       \
127     (d) = iw_u.value;             \
128   } while (0)
129 
130 /* Set a double from a 64-bit int. */
131 #define INSERT_WORD64(d, ix)     \
132   do {                           \
133     ieee_double_shape_type iw_u; \
134     iw_u.xparts.w = (ix);        \
135     (d) = iw_u.value;            \
136   } while (0)
137 
138 /* Set the more significant 32 bits of a double from an int.  */
139 
140 #define SET_HIGH_WORD(d, v)      \
141   do {                           \
142     ieee_double_shape_type sh_u; \
143     sh_u.value = (d);            \
144     sh_u.parts.msw = (v);        \
145     (d) = sh_u.value;            \
146   } while (0)
147 
148 /* Set the less significant 32 bits of a double from an int.  */
149 
150 #define SET_LOW_WORD(d, v)       \
151   do {                           \
152     ieee_double_shape_type sl_u; \
153     sl_u.value = (d);            \
154     sl_u.parts.lsw = (v);        \
155     (d) = sl_u.value;            \
156   } while (0)
157 
158 /* Support macro. */
159 
160 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
161 
162 int32_t __ieee754_rem_pio2(double x, double *y) WARN_UNUSED_RESULT;
163 double __kernel_cos(double x, double y) WARN_UNUSED_RESULT;
164 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
165                       const int32_t *ipio2) WARN_UNUSED_RESULT;
166 double __kernel_sin(double x, double y, int iy) WARN_UNUSED_RESULT;
167 
168 /* __ieee754_rem_pio2(x,y)
169  *
170  * return the remainder of x rem pi/2 in y[0]+y[1]
171  * use __kernel_rem_pio2()
172  */
__ieee754_rem_pio2(double x,double * y)173 int32_t __ieee754_rem_pio2(double x, double *y) {
174   /*
175    * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
176    */
177   static const int32_t two_over_pi[] = {
178       0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,
179       0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,
180       0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,
181       0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,
182       0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,
183       0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
184       0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,
185       0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,
186       0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
187       0x73A8C9, 0x60E27B, 0xC08C6B,
188   };
189 
190   static const int32_t npio2_hw[] = {
191       0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
192       0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
193       0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
194       0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
195       0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
196       0x404858EB, 0x404921FB,
197   };
198 
199   /*
200    * invpio2:  53 bits of 2/pi
201    * pio2_1:   first  33 bit of pi/2
202    * pio2_1t:  pi/2 - pio2_1
203    * pio2_2:   second 33 bit of pi/2
204    * pio2_2t:  pi/2 - (pio2_1+pio2_2)
205    * pio2_3:   third  33 bit of pi/2
206    * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
207    */
208 
209   static const double
210       zero = 0.00000000000000000000e+00,    /* 0x00000000, 0x00000000 */
211       half = 5.00000000000000000000e-01,    /* 0x3FE00000, 0x00000000 */
212       two24 = 1.67772160000000000000e+07,   /* 0x41700000, 0x00000000 */
213       invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
214       pio2_1 = 1.57079632673412561417e+00,  /* 0x3FF921FB, 0x54400000 */
215       pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
216       pio2_2 = 6.07710050630396597660e-11,  /* 0x3DD0B461, 0x1A600000 */
217       pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
218       pio2_3 = 2.02226624871116645580e-21,  /* 0x3BA3198A, 0x2E000000 */
219       pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
220 
221   double z, w, t, r, fn;
222   double tx[3];
223   int32_t e0, i, j, nx, n, ix, hx;
224   uint32_t low;
225 
226   z = 0;
227   GET_HIGH_WORD(hx, x); /* high word of x */
228   ix = hx & 0x7fffffff;
229   if (ix <= 0x3fe921fb) { /* |x| ~<= pi/4 , no need for reduction */
230     y[0] = x;
231     y[1] = 0;
232     return 0;
233   }
234   if (ix < 0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
235     if (hx > 0) {
236       z = x - pio2_1;
237       if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
238         y[0] = z - pio2_1t;
239         y[1] = (z - y[0]) - pio2_1t;
240       } else { /* near pi/2, use 33+33+53 bit pi */
241         z -= pio2_2;
242         y[0] = z - pio2_2t;
243         y[1] = (z - y[0]) - pio2_2t;
244       }
245       return 1;
246     } else { /* negative x */
247       z = x + pio2_1;
248       if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
249         y[0] = z + pio2_1t;
250         y[1] = (z - y[0]) + pio2_1t;
251       } else { /* near pi/2, use 33+33+53 bit pi */
252         z += pio2_2;
253         y[0] = z + pio2_2t;
254         y[1] = (z - y[0]) + pio2_2t;
255       }
256       return -1;
257     }
258   }
259   if (ix <= 0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
260     t = fabs(x);
261     n = static_cast<int32_t>(t * invpio2 + half);
262     fn = static_cast<double>(n);
263     r = t - fn * pio2_1;
264     w = fn * pio2_1t; /* 1st round good to 85 bit */
265     if (n < 32 && ix != npio2_hw[n - 1]) {
266       y[0] = r - w; /* quick check no cancellation */
267     } else {
268       uint32_t high;
269       j = ix >> 20;
270       y[0] = r - w;
271       GET_HIGH_WORD(high, y[0]);
272       i = j - ((high >> 20) & 0x7ff);
273       if (i > 16) { /* 2nd iteration needed, good to 118 */
274         t = r;
275         w = fn * pio2_2;
276         r = t - w;
277         w = fn * pio2_2t - ((t - r) - w);
278         y[0] = r - w;
279         GET_HIGH_WORD(high, y[0]);
280         i = j - ((high >> 20) & 0x7ff);
281         if (i > 49) { /* 3rd iteration need, 151 bits acc */
282           t = r;      /* will cover all possible cases */
283           w = fn * pio2_3;
284           r = t - w;
285           w = fn * pio2_3t - ((t - r) - w);
286           y[0] = r - w;
287         }
288       }
289     }
290     y[1] = (r - y[0]) - w;
291     if (hx < 0) {
292       y[0] = -y[0];
293       y[1] = -y[1];
294       return -n;
295     } else {
296       return n;
297     }
298   }
299   /*
300    * all other (large) arguments
301    */
302   if (ix >= 0x7ff00000) { /* x is inf or NaN */
303     y[0] = y[1] = x - x;
304     return 0;
305   }
306   /* set z = scalbn(|x|,ilogb(x)-23) */
307   GET_LOW_WORD(low, x);
308   SET_LOW_WORD(z, low);
309   e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
310   SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20));
311   for (i = 0; i < 2; i++) {
312     tx[i] = static_cast<double>(static_cast<int32_t>(z));
313     z = (z - tx[i]) * two24;
314   }
315   tx[2] = z;
316   nx = 3;
317   while (tx[nx - 1] == zero) nx--; /* skip zero term */
318   n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
319   if (hx < 0) {
320     y[0] = -y[0];
321     y[1] = -y[1];
322     return -n;
323   }
324   return n;
325 }
326 
327 /* __kernel_cos( x,  y )
328  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
329  * Input x is assumed to be bounded by ~pi/4 in magnitude.
330  * Input y is the tail of x.
331  *
332  * Algorithm
333  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
334  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
335  *      3. cos(x) is approximated by a polynomial of degree 14 on
336  *         [0,pi/4]
337  *                                       4            14
338  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
339  *         where the remez error is
340  *
341  *      |              2     4     6     8     10    12     14 |     -58
342  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
343  *      |                                                      |
344  *
345  *                     4     6     8     10    12     14
346  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
347  *             cos(x) = 1 - x*x/2 + r
348  *         since cos(x+y) ~ cos(x) - sin(x)*y
349  *                        ~ cos(x) - x*y,
350  *         a correction term is necessary in cos(x) and hence
351  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
352  *         For better accuracy when x > 0.3, let qx = |x|/4 with
353  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
354  *         Then
355  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
356  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
357  *         magnitude of the latter is at least a quarter of x*x/2,
358  *         thus, reducing the rounding error in the subtraction.
359  */
__kernel_cos(double x,double y)360 V8_INLINE double __kernel_cos(double x, double y) {
361   static const double
362       one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
363       C1 = 4.16666666666666019037e-02,  /* 0x3FA55555, 0x5555554C */
364       C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
365       C3 = 2.48015872894767294178e-05,  /* 0x3EFA01A0, 0x19CB1590 */
366       C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
367       C5 = 2.08757232129817482790e-09,  /* 0x3E21EE9E, 0xBDB4B1C4 */
368       C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
369 
370   double a, iz, z, r, qx;
371   int32_t ix;
372   GET_HIGH_WORD(ix, x);
373   ix &= 0x7fffffff;                           /* ix = |x|'s high word*/
374   if (ix < 0x3e400000) {                      /* if x < 2**27 */
375     if (static_cast<int>(x) == 0) return one; /* generate inexact */
376   }
377   z = x * x;
378   r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
379   if (ix < 0x3FD33333) { /* if |x| < 0.3 */
380     return one - (0.5 * z - (z * r - x * y));
381   } else {
382     if (ix > 0x3fe90000) { /* x > 0.78125 */
383       qx = 0.28125;
384     } else {
385       INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */
386     }
387     iz = 0.5 * z - qx;
388     a = one - qx;
389     return a - (iz - (z * r - x * y));
390   }
391 }
392 
393 /* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
394  * double x[],y[]; int e0,nx,prec; int ipio2[];
395  *
396  * __kernel_rem_pio2 return the last three digits of N with
397  *              y = x - N*pi/2
398  * so that |y| < pi/2.
399  *
400  * The method is to compute the integer (mod 8) and fraction parts of
401  * (2/pi)*x without doing the full multiplication. In general we
402  * skip the part of the product that are known to be a huge integer (
403  * more accurately, = 0 mod 8 ). Thus the number of operations are
404  * independent of the exponent of the input.
405  *
406  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
407  *
408  * Input parameters:
409  *      x[]     The input value (must be positive) is broken into nx
410  *              pieces of 24-bit integers in double precision format.
411  *              x[i] will be the i-th 24 bit of x. The scaled exponent
412  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
413  *              match x's up to 24 bits.
414  *
415  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
416  *                      e0 = ilogb(z)-23
417  *                      z  = scalbn(z,-e0)
418  *              for i = 0,1,2
419  *                      x[i] = floor(z)
420  *                      z    = (z-x[i])*2**24
421  *
422  *
423  *      y[]     output result in an array of double precision numbers.
424  *              The dimension of y[] is:
425  *                      24-bit  precision       1
426  *                      53-bit  precision       2
427  *                      64-bit  precision       2
428  *                      113-bit precision       3
429  *              The actual value is the sum of them. Thus for 113-bit
430  *              precison, one may have to do something like:
431  *
432  *              long double t,w,r_head, r_tail;
433  *              t = (long double)y[2] + (long double)y[1];
434  *              w = (long double)y[0];
435  *              r_head = t+w;
436  *              r_tail = w - (r_head - t);
437  *
438  *      e0      The exponent of x[0]
439  *
440  *      nx      dimension of x[]
441  *
442  *      prec    an integer indicating the precision:
443  *                      0       24  bits (single)
444  *                      1       53  bits (double)
445  *                      2       64  bits (extended)
446  *                      3       113 bits (quad)
447  *
448  *      ipio2[]
449  *              integer array, contains the (24*i)-th to (24*i+23)-th
450  *              bit of 2/pi after binary point. The corresponding
451  *              floating value is
452  *
453  *                      ipio2[i] * 2^(-24(i+1)).
454  *
455  * External function:
456  *      double scalbn(), floor();
457  *
458  *
459  * Here is the description of some local variables:
460  *
461  *      jk      jk+1 is the initial number of terms of ipio2[] needed
462  *              in the computation. The recommended value is 2,3,4,
463  *              6 for single, double, extended,and quad.
464  *
465  *      jz      local integer variable indicating the number of
466  *              terms of ipio2[] used.
467  *
468  *      jx      nx - 1
469  *
470  *      jv      index for pointing to the suitable ipio2[] for the
471  *              computation. In general, we want
472  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
473  *              is an integer. Thus
474  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
475  *              Hence jv = max(0,(e0-3)/24).
476  *
477  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
478  *
479  *      q[]     double array with integral value, representing the
480  *              24-bits chunk of the product of x and 2/pi.
481  *
482  *      q0      the corresponding exponent of q[0]. Note that the
483  *              exponent for q[i] would be q0-24*i.
484  *
485  *      PIo2[]  double precision array, obtained by cutting pi/2
486  *              into 24 bits chunks.
487  *
488  *      f[]     ipio2[] in floating point
489  *
490  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
491  *
492  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
493  *
494  *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
495  *              it also indicates the *sign* of the result.
496  *
497  */
__kernel_rem_pio2(double * x,double * y,int e0,int nx,int prec,const int32_t * ipio2)498 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
499                       const int32_t *ipio2) {
500   /* Constants:
501    * The hexadecimal values are the intended ones for the following
502    * constants. The decimal values may be used, provided that the
503    * compiler will convert from decimal to binary accurately enough
504    * to produce the hexadecimal values shown.
505    */
506   static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */
507 
508   static const double PIo2[] = {
509       1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
510       7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
511       5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
512       3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
513       1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
514       1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
515       2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
516       2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
517   };
518 
519   static const double
520       zero = 0.0,
521       one = 1.0,
522       two24 = 1.67772160000000000000e+07,  /* 0x41700000, 0x00000000 */
523       twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
524 
525   int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
526   double z, fw, f[20], fq[20], q[20];
527 
528   /* initialize jk*/
529   jk = init_jk[prec];
530   jp = jk;
531 
532   /* determine jx,jv,q0, note that 3>q0 */
533   jx = nx - 1;
534   jv = (e0 - 3) / 24;
535   if (jv < 0) jv = 0;
536   q0 = e0 - 24 * (jv + 1);
537 
538   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
539   j = jv - jx;
540   m = jx + jk;
541   for (i = 0; i <= m; i++, j++) {
542     f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);
543   }
544 
545   /* compute q[0],q[1],...q[jk] */
546   for (i = 0; i <= jk; i++) {
547     for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
548     q[i] = fw;
549   }
550 
551   jz = jk;
552 recompute:
553   /* distill q[] into iq[] reversingly */
554   for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
555     fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
556     iq[i] = static_cast<int32_t>(z - two24 * fw);
557     z = q[j - 1] + fw;
558   }
559 
560   /* compute n */
561   z = scalbn(z, q0);           /* actual value of z */
562   z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
563   n = static_cast<int32_t>(z);
564   z -= static_cast<double>(n);
565   ih = 0;
566   if (q0 > 0) { /* need iq[jz-1] to determine n */
567     i = (iq[jz - 1] >> (24 - q0));
568     n += i;
569     iq[jz - 1] -= i << (24 - q0);
570     ih = iq[jz - 1] >> (23 - q0);
571   } else if (q0 == 0) {
572     ih = iq[jz - 1] >> 23;
573   } else if (z >= 0.5) {
574     ih = 2;
575   }
576 
577   if (ih > 0) { /* q > 0.5 */
578     n += 1;
579     carry = 0;
580     for (i = 0; i < jz; i++) { /* compute 1-q */
581       j = iq[i];
582       if (carry == 0) {
583         if (j != 0) {
584           carry = 1;
585           iq[i] = 0x1000000 - j;
586         }
587       } else {
588         iq[i] = 0xffffff - j;
589       }
590     }
591     if (q0 > 0) { /* rare case: chance is 1 in 12 */
592       switch (q0) {
593         case 1:
594           iq[jz - 1] &= 0x7fffff;
595           break;
596         case 2:
597           iq[jz - 1] &= 0x3fffff;
598           break;
599       }
600     }
601     if (ih == 2) {
602       z = one - z;
603       if (carry != 0) z -= scalbn(one, q0);
604     }
605   }
606 
607   /* check if recomputation is needed */
608   if (z == zero) {
609     j = 0;
610     for (i = jz - 1; i >= jk; i--) j |= iq[i];
611     if (j == 0) { /* need recomputation */
612       for (k = 1; jk >= k && iq[jk - k] == 0; k++) {
613         /* k = no. of terms needed */
614       }
615 
616       for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */
617         f[jx + i] = ipio2[jv + i];
618         for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
619         q[i] = fw;
620       }
621       jz += k;
622       goto recompute;
623     }
624   }
625 
626   /* chop off zero terms */
627   if (z == 0.0) {
628     jz -= 1;
629     q0 -= 24;
630     while (iq[jz] == 0) {
631       jz--;
632       q0 -= 24;
633     }
634   } else { /* break z into 24-bit if necessary */
635     z = scalbn(z, -q0);
636     if (z >= two24) {
637       fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
638       iq[jz] = z - two24 * fw;
639       jz += 1;
640       q0 += 24;
641       iq[jz] = fw;
642     } else {
643       iq[jz] = z;
644     }
645   }
646 
647   /* convert integer "bit" chunk to floating-point value */
648   fw = scalbn(one, q0);
649   for (i = jz; i >= 0; i--) {
650     q[i] = fw * iq[i];
651     fw *= twon24;
652   }
653 
654   /* compute PIo2[0,...,jp]*q[jz,...,0] */
655   for (i = jz; i >= 0; i--) {
656     for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];
657     fq[jz - i] = fw;
658   }
659 
660   /* compress fq[] into y[] */
661   switch (prec) {
662     case 0:
663       fw = 0.0;
664       for (i = jz; i >= 0; i--) fw += fq[i];
665       y[0] = (ih == 0) ? fw : -fw;
666       break;
667     case 1:
668     case 2:
669       fw = 0.0;
670       for (i = jz; i >= 0; i--) fw += fq[i];
671       y[0] = (ih == 0) ? fw : -fw;
672       fw = fq[0] - fw;
673       for (i = 1; i <= jz; i++) fw += fq[i];
674       y[1] = (ih == 0) ? fw : -fw;
675       break;
676     case 3: /* painful */
677       for (i = jz; i > 0; i--) {
678         fw = fq[i - 1] + fq[i];
679         fq[i] += fq[i - 1] - fw;
680         fq[i - 1] = fw;
681       }
682       for (i = jz; i > 1; i--) {
683         fw = fq[i - 1] + fq[i];
684         fq[i] += fq[i - 1] - fw;
685         fq[i - 1] = fw;
686       }
687       for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];
688       if (ih == 0) {
689         y[0] = fq[0];
690         y[1] = fq[1];
691         y[2] = fw;
692       } else {
693         y[0] = -fq[0];
694         y[1] = -fq[1];
695         y[2] = -fw;
696       }
697   }
698   return n & 7;
699 }
700 
701 /* __kernel_sin( x, y, iy)
702  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
703  * Input x is assumed to be bounded by ~pi/4 in magnitude.
704  * Input y is the tail of x.
705  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
706  *
707  * Algorithm
708  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
709  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
710  *      3. sin(x) is approximated by a polynomial of degree 13 on
711  *         [0,pi/4]
712  *                               3            13
713  *              sin(x) ~ x + S1*x + ... + S6*x
714  *         where
715  *
716  *      |sin(x)         2     4     6     8     10     12  |     -58
717  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
718  *      |  x                                               |
719  *
720  *      4. sin(x+y) = sin(x) + sin'(x')*y
721  *                  ~ sin(x) + (1-x*x/2)*y
722  *         For better accuracy, let
723  *                   3      2      2      2      2
724  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
725  *         then                   3    2
726  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
727  */
__kernel_sin(double x,double y,int iy)728 V8_INLINE double __kernel_sin(double x, double y, int iy) {
729   static const double
730       half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
731       S1 = -1.66666666666666324348e-01,  /* 0xBFC55555, 0x55555549 */
732       S2 = 8.33333333332248946124e-03,   /* 0x3F811111, 0x1110F8A6 */
733       S3 = -1.98412698298579493134e-04,  /* 0xBF2A01A0, 0x19C161D5 */
734       S4 = 2.75573137070700676789e-06,   /* 0x3EC71DE3, 0x57B1FE7D */
735       S5 = -2.50507602534068634195e-08,  /* 0xBE5AE5E6, 0x8A2B9CEB */
736       S6 = 1.58969099521155010221e-10;   /* 0x3DE5D93A, 0x5ACFD57C */
737 
738   double z, r, v;
739   int32_t ix;
740   GET_HIGH_WORD(ix, x);
741   ix &= 0x7fffffff;      /* high word of x */
742   if (ix < 0x3e400000) { /* |x| < 2**-27 */
743     if (static_cast<int>(x) == 0) return x;
744   } /* generate inexact */
745   z = x * x;
746   v = z * x;
747   r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
748   if (iy == 0) {
749     return x + v * (S1 + z * r);
750   } else {
751     return x - ((z * (half * y - v * r) - y) - v * S1);
752   }
753 }
754 
755 /* __kernel_tan( x, y, k )
756  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
757  * Input x is assumed to be bounded by ~pi/4 in magnitude.
758  * Input y is the tail of x.
759  * Input k indicates whether tan (if k=1) or
760  * -1/tan (if k= -1) is returned.
761  *
762  * Algorithm
763  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
764  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
765  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
766  *         [0,0.67434]
767  *                               3             27
768  *              tan(x) ~ x + T1*x + ... + T13*x
769  *         where
770  *
771  *              |tan(x)         2     4            26   |     -59.2
772  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
773  *              |  x                                    |
774  *
775  *         Note: tan(x+y) = tan(x) + tan'(x)*y
776  *                        ~ tan(x) + (1+x*x)*y
777  *         Therefore, for better accuracy in computing tan(x+y), let
778  *                   3      2      2       2       2
779  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
780  *         then
781  *                                  3    2
782  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
783  *
784  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
785  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
786  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
787  */
__kernel_tan(double x,double y,int iy)788 double __kernel_tan(double x, double y, int iy) {
789   static const double xxx[] = {
790       3.33333333333334091986e-01,             /* 3FD55555, 55555563 */
791       1.33333333333201242699e-01,             /* 3FC11111, 1110FE7A */
792       5.39682539762260521377e-02,             /* 3FABA1BA, 1BB341FE */
793       2.18694882948595424599e-02,             /* 3F9664F4, 8406D637 */
794       8.86323982359930005737e-03,             /* 3F8226E3, E96E8493 */
795       3.59207910759131235356e-03,             /* 3F6D6D22, C9560328 */
796       1.45620945432529025516e-03,             /* 3F57DBC8, FEE08315 */
797       5.88041240820264096874e-04,             /* 3F4344D8, F2F26501 */
798       2.46463134818469906812e-04,             /* 3F3026F7, 1A8D1068 */
799       7.81794442939557092300e-05,             /* 3F147E88, A03792A6 */
800       7.14072491382608190305e-05,             /* 3F12B80F, 32F0A7E9 */
801       -1.85586374855275456654e-05,            /* BEF375CB, DB605373 */
802       2.59073051863633712884e-05,             /* 3EFB2A70, 74BF7AD4 */
803       /* one */ 1.00000000000000000000e+00,   /* 3FF00000, 00000000 */
804       /* pio4 */ 7.85398163397448278999e-01,  /* 3FE921FB, 54442D18 */
805       /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
806   };
807 #define one xxx[13]
808 #define pio4 xxx[14]
809 #define pio4lo xxx[15]
810 #define T xxx
811 
812   double z, r, v, w, s;
813   int32_t ix, hx;
814 
815   GET_HIGH_WORD(hx, x);             /* high word of x */
816   ix = hx & 0x7fffffff;             /* high word of |x| */
817   if (ix < 0x3e300000) {            /* x < 2**-28 */
818     if (static_cast<int>(x) == 0) { /* generate inexact */
819       uint32_t low;
820       GET_LOW_WORD(low, x);
821       if (((ix | low) | (iy + 1)) == 0) {
822         return one / fabs(x);
823       } else {
824         if (iy == 1) {
825           return x;
826         } else { /* compute -1 / (x+y) carefully */
827           double a, t;
828 
829           z = w = x + y;
830           SET_LOW_WORD(z, 0);
831           v = y - (z - x);
832           t = a = -one / w;
833           SET_LOW_WORD(t, 0);
834           s = one + t * z;
835           return t + a * (s + t * v);
836         }
837       }
838     }
839   }
840   if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */
841     if (hx < 0) {
842       x = -x;
843       y = -y;
844     }
845     z = pio4 - x;
846     w = pio4lo - y;
847     x = z + w;
848     y = 0.0;
849   }
850   z = x * x;
851   w = z * z;
852   /*
853    * Break x^5*(T[1]+x^2*T[2]+...) into
854    * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
855    * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
856    */
857   r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
858   v = z *
859       (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
860   s = z * x;
861   r = y + z * (s * (r + v) + y);
862   r += T[0] * s;
863   w = x + r;
864   if (ix >= 0x3FE59428) {
865     v = iy;
866     return (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
867   }
868   if (iy == 1) {
869     return w;
870   } else {
871     /*
872      * if allow error up to 2 ulp, simply return
873      * -1.0 / (x+r) here
874      */
875     /* compute -1.0 / (x+r) accurately */
876     double a, t;
877     z = w;
878     SET_LOW_WORD(z, 0);
879     v = r - (z - x);  /* z+v = r+x */
880     t = a = -1.0 / w; /* a = -1.0/w */
881     SET_LOW_WORD(t, 0);
882     s = 1.0 + t * z;
883     return t + a * (s + t * v);
884   }
885 
886 #undef one
887 #undef pio4
888 #undef pio4lo
889 #undef T
890 }
891 
892 }  // namespace
893 
894 /* atan(x)
895  * Method
896  *   1. Reduce x to positive by atan(x) = -atan(-x).
897  *   2. According to the integer k=4t+0.25 chopped, t=x, the argument
898  *      is further reduced to one of the following intervals and the
899  *      arctangent of t is evaluated by the corresponding formula:
900  *
901  *      [0,7/16]      atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
902  *      [7/16,11/16]  atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
903  *      [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
904  *      [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
905  *      [39/16,INF]   atan(x) = atan(INF) + atan( -1/t )
906  *
907  * Constants:
908  * The hexadecimal values are the intended ones for the following
909  * constants. The decimal values may be used, provided that the
910  * compiler will convert from decimal to binary accurately enough
911  * to produce the hexadecimal values shown.
912  */
atan(double x)913 double atan(double x) {
914   static const double atanhi[] = {
915       4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
916       7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
917       9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
918       1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
919   };
920 
921   static const double atanlo[] = {
922       2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
923       3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
924       1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
925       6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
926   };
927 
928   static const double aT[] = {
929       3.33333333333329318027e-01,  /* 0x3FD55555, 0x5555550D */
930       -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
931       1.42857142725034663711e-01,  /* 0x3FC24924, 0x920083FF */
932       -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
933       9.09088713343650656196e-02,  /* 0x3FB745CD, 0xC54C206E */
934       -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
935       6.66107313738753120669e-02,  /* 0x3FB10D66, 0xA0D03D51 */
936       -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
937       4.97687799461593236017e-02,  /* 0x3FA97B4B, 0x24760DEB */
938       -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
939       1.62858201153657823623e-02,  /* 0x3F90AD3A, 0xE322DA11 */
940   };
941 
942   static const double one = 1.0, huge = 1.0e300;
943 
944   double w, s1, s2, z;
945   int32_t ix, hx, id;
946 
947   GET_HIGH_WORD(hx, x);
948   ix = hx & 0x7fffffff;
949   if (ix >= 0x44100000) { /* if |x| >= 2^66 */
950     uint32_t low;
951     GET_LOW_WORD(low, x);
952     if (ix > 0x7ff00000 || (ix == 0x7ff00000 && (low != 0)))
953       return x + x; /* NaN */
954     if (hx > 0)
955       return atanhi[3] + *(volatile double *)&atanlo[3];
956     else
957       return -atanhi[3] - *(volatile double *)&atanlo[3];
958   }
959   if (ix < 0x3fdc0000) {            /* |x| < 0.4375 */
960     if (ix < 0x3e400000) {          /* |x| < 2^-27 */
961       if (huge + x > one) return x; /* raise inexact */
962     }
963     id = -1;
964   } else {
965     x = fabs(x);
966     if (ix < 0x3ff30000) {   /* |x| < 1.1875 */
967       if (ix < 0x3fe60000) { /* 7/16 <=|x|<11/16 */
968         id = 0;
969         x = (2.0 * x - one) / (2.0 + x);
970       } else { /* 11/16<=|x|< 19/16 */
971         id = 1;
972         x = (x - one) / (x + one);
973       }
974     } else {
975       if (ix < 0x40038000) { /* |x| < 2.4375 */
976         id = 2;
977         x = (x - 1.5) / (one + 1.5 * x);
978       } else { /* 2.4375 <= |x| < 2^66 */
979         id = 3;
980         x = -1.0 / x;
981       }
982     }
983   }
984   /* end of argument reduction */
985   z = x * x;
986   w = z * z;
987   /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
988   s1 = z * (aT[0] +
989             w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
990   s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
991   if (id < 0) {
992     return x - x * (s1 + s2);
993   } else {
994     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
995     return (hx < 0) ? -z : z;
996   }
997 }
998 
999 /* atan2(y,x)
1000  * Method :
1001  *  1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
1002  *  2. Reduce x to positive by (if x and y are unexceptional):
1003  *    ARG (x+iy) = arctan(y/x)       ... if x > 0,
1004  *    ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
1005  *
1006  * Special cases:
1007  *
1008  *  ATAN2((anything), NaN ) is NaN;
1009  *  ATAN2(NAN , (anything) ) is NaN;
1010  *  ATAN2(+-0, +(anything but NaN)) is +-0  ;
1011  *  ATAN2(+-0, -(anything but NaN)) is +-pi ;
1012  *  ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
1013  *  ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
1014  *  ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
1015  *  ATAN2(+-INF,+INF ) is +-pi/4 ;
1016  *  ATAN2(+-INF,-INF ) is +-3pi/4;
1017  *  ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
1018  *
1019  * Constants:
1020  * The hexadecimal values are the intended ones for the following
1021  * constants. The decimal values may be used, provided that the
1022  * compiler will convert from decimal to binary accurately enough
1023  * to produce the hexadecimal values shown.
1024  */
atan2(double y,double x)1025 double atan2(double y, double x) {
1026   static volatile double tiny = 1.0e-300;
1027   static const double
1028       zero = 0.0,
1029       pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
1030       pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
1031       pi = 3.1415926535897931160E+00;     /* 0x400921FB, 0x54442D18 */
1032   static volatile double pi_lo =
1033       1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
1034 
1035   double z;
1036   int32_t k, m, hx, hy, ix, iy;
1037   uint32_t lx, ly;
1038 
1039   EXTRACT_WORDS(hx, lx, x);
1040   ix = hx & 0x7fffffff;
1041   EXTRACT_WORDS(hy, ly, y);
1042   iy = hy & 0x7fffffff;
1043   if (((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x7ff00000) ||
1044       ((iy | ((ly | -static_cast<int32_t>(ly)) >> 31)) > 0x7ff00000)) {
1045     return x + y; /* x or y is NaN */
1046   }
1047   if (((hx - 0x3ff00000) | lx) == 0) return atan(y); /* x=1.0 */
1048   m = ((hy >> 31) & 1) | ((hx >> 30) & 2);           /* 2*sign(x)+sign(y) */
1049 
1050   /* when y = 0 */
1051   if ((iy | ly) == 0) {
1052     switch (m) {
1053       case 0:
1054       case 1:
1055         return y; /* atan(+-0,+anything)=+-0 */
1056       case 2:
1057         return pi + tiny; /* atan(+0,-anything) = pi */
1058       case 3:
1059         return -pi - tiny; /* atan(-0,-anything) =-pi */
1060     }
1061   }
1062   /* when x = 0 */
1063   if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1064 
1065   /* when x is INF */
1066   if (ix == 0x7ff00000) {
1067     if (iy == 0x7ff00000) {
1068       switch (m) {
1069         case 0:
1070           return pi_o_4 + tiny; /* atan(+INF,+INF) */
1071         case 1:
1072           return -pi_o_4 - tiny; /* atan(-INF,+INF) */
1073         case 2:
1074           return 3.0 * pi_o_4 + tiny; /*atan(+INF,-INF)*/
1075         case 3:
1076           return -3.0 * pi_o_4 - tiny; /*atan(-INF,-INF)*/
1077       }
1078     } else {
1079       switch (m) {
1080         case 0:
1081           return zero; /* atan(+...,+INF) */
1082         case 1:
1083           return -zero; /* atan(-...,+INF) */
1084         case 2:
1085           return pi + tiny; /* atan(+...,-INF) */
1086         case 3:
1087           return -pi - tiny; /* atan(-...,-INF) */
1088       }
1089     }
1090   }
1091   /* when y is INF */
1092   if (iy == 0x7ff00000) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1093 
1094   /* compute y/x */
1095   k = (iy - ix) >> 20;
1096   if (k > 60) { /* |y/x| >  2**60 */
1097     z = pi_o_2 + 0.5 * pi_lo;
1098     m &= 1;
1099   } else if (hx < 0 && k < -60) {
1100     z = 0.0; /* 0 > |y|/x > -2**-60 */
1101   } else {
1102     z = atan(fabs(y / x)); /* safe to do y/x */
1103   }
1104   switch (m) {
1105     case 0:
1106       return z; /* atan(+,+) */
1107     case 1:
1108       return -z; /* atan(-,+) */
1109     case 2:
1110       return pi - (z - pi_lo); /* atan(+,-) */
1111     default:                   /* case 3 */
1112       return (z - pi_lo) - pi; /* atan(-,-) */
1113   }
1114 }
1115 
1116 /* cos(x)
1117  * Return cosine function of x.
1118  *
1119  * kernel function:
1120  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
1121  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
1122  *      __ieee754_rem_pio2      ... argument reduction routine
1123  *
1124  * Method.
1125  *      Let S,C and T denote the sin, cos and tan respectively on
1126  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
1127  *      in [-pi/4 , +pi/4], and let n = k mod 4.
1128  *      We have
1129  *
1130  *          n        sin(x)      cos(x)        tan(x)
1131  *     ----------------------------------------------------------
1132  *          0          S           C             T
1133  *          1          C          -S            -1/T
1134  *          2         -S          -C             T
1135  *          3         -C           S            -1/T
1136  *     ----------------------------------------------------------
1137  *
1138  * Special cases:
1139  *      Let trig be any of sin, cos, or tan.
1140  *      trig(+-INF)  is NaN, with signals;
1141  *      trig(NaN)    is that NaN;
1142  *
1143  * Accuracy:
1144  *      TRIG(x) returns trig(x) nearly rounded
1145  */
cos(double x)1146 double cos(double x) {
1147   double y[2], z = 0.0;
1148   int32_t n, ix;
1149 
1150   /* High word of x. */
1151   GET_HIGH_WORD(ix, x);
1152 
1153   /* |x| ~< pi/4 */
1154   ix &= 0x7fffffff;
1155   if (ix <= 0x3fe921fb) {
1156     return __kernel_cos(x, z);
1157   } else if (ix >= 0x7ff00000) {
1158     /* cos(Inf or NaN) is NaN */
1159     return x - x;
1160   } else {
1161     /* argument reduction needed */
1162     n = __ieee754_rem_pio2(x, y);
1163     switch (n & 3) {
1164       case 0:
1165         return __kernel_cos(y[0], y[1]);
1166       case 1:
1167         return -__kernel_sin(y[0], y[1], 1);
1168       case 2:
1169         return -__kernel_cos(y[0], y[1]);
1170       default:
1171         return __kernel_sin(y[0], y[1], 1);
1172     }
1173   }
1174 }
1175 
1176 /* exp(x)
1177  * Returns the exponential of x.
1178  *
1179  * Method
1180  *   1. Argument reduction:
1181  *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
1182  *      Given x, find r and integer k such that
1183  *
1184  *               x = k*ln2 + r,  |r| <= 0.5*ln2.
1185  *
1186  *      Here r will be represented as r = hi-lo for better
1187  *      accuracy.
1188  *
1189  *   2. Approximation of exp(r) by a special rational function on
1190  *      the interval [0,0.34658]:
1191  *      Write
1192  *          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
1193  *      We use a special Remes algorithm on [0,0.34658] to generate
1194  *      a polynomial of degree 5 to approximate R. The maximum error
1195  *      of this polynomial approximation is bounded by 2**-59. In
1196  *      other words,
1197  *          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
1198  *      (where z=r*r, and the values of P1 to P5 are listed below)
1199  *      and
1200  *          |                  5          |     -59
1201  *          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
1202  *          |                             |
1203  *      The computation of exp(r) thus becomes
1204  *                             2*r
1205  *              exp(r) = 1 + -------
1206  *                            R - r
1207  *                                 r*R1(r)
1208  *                     = 1 + r + ----------- (for better accuracy)
1209  *                                2 - R1(r)
1210  *      where
1211  *                               2       4             10
1212  *              R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
1213  *
1214  *   3. Scale back to obtain exp(x):
1215  *      From step 1, we have
1216  *         exp(x) = 2^k * exp(r)
1217  *
1218  * Special cases:
1219  *      exp(INF) is INF, exp(NaN) is NaN;
1220  *      exp(-INF) is 0, and
1221  *      for finite argument, only exp(0)=1 is exact.
1222  *
1223  * Accuracy:
1224  *      according to an error analysis, the error is always less than
1225  *      1 ulp (unit in the last place).
1226  *
1227  * Misc. info.
1228  *      For IEEE double
1229  *          if x >  7.09782712893383973096e+02 then exp(x) overflow
1230  *          if x < -7.45133219101941108420e+02 then exp(x) underflow
1231  *
1232  * Constants:
1233  * The hexadecimal values are the intended ones for the following
1234  * constants. The decimal values may be used, provided that the
1235  * compiler will convert from decimal to binary accurately enough
1236  * to produce the hexadecimal values shown.
1237  */
exp(double x)1238 double exp(double x) {
1239   static const double
1240       one = 1.0,
1241       halF[2] = {0.5, -0.5},
1242       o_threshold = 7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */
1243       u_threshold = -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */
1244       ln2HI[2] = {6.93147180369123816490e-01,    /* 0x3fe62e42, 0xfee00000 */
1245                   -6.93147180369123816490e-01},  /* 0xbfe62e42, 0xfee00000 */
1246       ln2LO[2] = {1.90821492927058770002e-10,    /* 0x3dea39ef, 0x35793c76 */
1247                   -1.90821492927058770002e-10},  /* 0xbdea39ef, 0x35793c76 */
1248       invln2 = 1.44269504088896338700e+00,       /* 0x3ff71547, 0x652b82fe */
1249       P1 = 1.66666666666666019037e-01,           /* 0x3FC55555, 0x5555553E */
1250       P2 = -2.77777777770155933842e-03,          /* 0xBF66C16C, 0x16BEBD93 */
1251       P3 = 6.61375632143793436117e-05,           /* 0x3F11566A, 0xAF25DE2C */
1252       P4 = -1.65339022054652515390e-06,          /* 0xBEBBBD41, 0xC5D26BF1 */
1253       P5 = 4.13813679705723846039e-08,           /* 0x3E663769, 0x72BEA4D0 */
1254       E = 2.718281828459045;                     /* 0x4005bf0a, 0x8b145769 */
1255 
1256   static volatile double
1257       huge = 1.0e+300,
1258       twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/
1259       two1023 = 8.988465674311579539e307;     /* 0x1p1023 */
1260 
1261   double y, hi = 0.0, lo = 0.0, c, t, twopk;
1262   int32_t k = 0, xsb;
1263   uint32_t hx;
1264 
1265   GET_HIGH_WORD(hx, x);
1266   xsb = (hx >> 31) & 1; /* sign bit of x */
1267   hx &= 0x7fffffff;     /* high word of |x| */
1268 
1269   /* filter out non-finite argument */
1270   if (hx >= 0x40862E42) { /* if |x|>=709.78... */
1271     if (hx >= 0x7ff00000) {
1272       uint32_t lx;
1273       GET_LOW_WORD(lx, x);
1274       if (((hx & 0xfffff) | lx) != 0)
1275         return x + x; /* NaN */
1276       else
1277         return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */
1278     }
1279     if (x > o_threshold) return huge * huge;         /* overflow */
1280     if (x < u_threshold) return twom1000 * twom1000; /* underflow */
1281   }
1282 
1283   /* argument reduction */
1284   if (hx > 0x3fd62e42) {   /* if  |x| > 0.5 ln2 */
1285     if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
1286       /* TODO(rtoy): We special case exp(1) here to return the correct
1287        * value of E, as the computation below would get the last bit
1288        * wrong. We should probably fix the algorithm instead.
1289        */
1290       if (x == 1.0) return E;
1291       hi = x - ln2HI[xsb];
1292       lo = ln2LO[xsb];
1293       k = 1 - xsb - xsb;
1294     } else {
1295       k = static_cast<int>(invln2 * x + halF[xsb]);
1296       t = k;
1297       hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */
1298       lo = t * ln2LO[0];
1299     }
1300     STRICT_ASSIGN(double, x, hi - lo);
1301   } else if (hx < 0x3e300000) {         /* when |x|<2**-28 */
1302     if (huge + x > one) return one + x; /* trigger inexact */
1303   } else {
1304     k = 0;
1305   }
1306 
1307   /* x is now in primary range */
1308   t = x * x;
1309   if (k >= -1021) {
1310     INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0);
1311   } else {
1312     INSERT_WORDS(twopk, 0x3ff00000 + ((k + 1000) << 20), 0);
1313   }
1314   c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
1315   if (k == 0) {
1316     return one - ((x * c) / (c - 2.0) - x);
1317   } else {
1318     y = one - ((lo - (x * c) / (2.0 - c)) - hi);
1319   }
1320   if (k >= -1021) {
1321     if (k == 1024) return y * 2.0 * two1023;
1322     return y * twopk;
1323   } else {
1324     return y * twopk * twom1000;
1325   }
1326 }
1327 
1328 /*
1329  * Method :
1330  *    1.Reduced x to positive by atanh(-x) = -atanh(x)
1331  *    2.For x>=0.5
1332  *              1              2x                          x
1333  *  atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
1334  *              2             1 - x                      1 - x
1335  *
1336  *   For x<0.5
1337  *  atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
1338  *
1339  * Special cases:
1340  *  atanh(x) is NaN if |x| > 1 with signal;
1341  *  atanh(NaN) is that NaN with no signal;
1342  *  atanh(+-1) is +-INF with signal.
1343  *
1344  */
atanh(double x)1345 double atanh(double x) {
1346   static const double one = 1.0, huge = 1e300;
1347   static const double zero = 0.0;
1348 
1349   double t;
1350   int32_t hx, ix;
1351   uint32_t lx;
1352   EXTRACT_WORDS(hx, lx, x);
1353   ix = hx & 0x7fffffff;
1354   if ((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x3ff00000) /* |x|>1 */
1355     return (x - x) / (x - x);
1356   if (ix == 0x3ff00000) return x / zero;
1357   if (ix < 0x3e300000 && (huge + x) > zero) return x; /* x<2**-28 */
1358   SET_HIGH_WORD(x, ix);
1359   if (ix < 0x3fe00000) { /* x < 0.5 */
1360     t = x + x;
1361     t = 0.5 * log1p(t + t * x / (one - x));
1362   } else {
1363     t = 0.5 * log1p((x + x) / (one - x));
1364   }
1365   if (hx >= 0)
1366     return t;
1367   else
1368     return -t;
1369 }
1370 
1371 /* log(x)
1372  * Return the logrithm of x
1373  *
1374  * Method :
1375  *   1. Argument Reduction: find k and f such that
1376  *     x = 2^k * (1+f),
1377  *     where  sqrt(2)/2 < 1+f < sqrt(2) .
1378  *
1379  *   2. Approximation of log(1+f).
1380  *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1381  *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1382  *         = 2s + s*R
1383  *      We use a special Reme algorithm on [0,0.1716] to generate
1384  *  a polynomial of degree 14 to approximate R The maximum error
1385  *  of this polynomial approximation is bounded by 2**-58.45. In
1386  *  other words,
1387  *            2      4      6      8      10      12      14
1388  *      R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
1389  *    (the values of Lg1 to Lg7 are listed in the program)
1390  *  and
1391  *      |      2          14          |     -58.45
1392  *      | Lg1*s +...+Lg7*s    -  R(z) | <= 2
1393  *      |                             |
1394  *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1395  *  In order to guarantee error in log below 1ulp, we compute log
1396  *  by
1397  *    log(1+f) = f - s*(f - R)  (if f is not too large)
1398  *    log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1399  *
1400  *  3. Finally,  log(x) = k*ln2 + log(1+f).
1401  *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1402  *     Here ln2 is split into two floating point number:
1403  *      ln2_hi + ln2_lo,
1404  *     where n*ln2_hi is always exact for |n| < 2000.
1405  *
1406  * Special cases:
1407  *  log(x) is NaN with signal if x < 0 (including -INF) ;
1408  *  log(+INF) is +INF; log(0) is -INF with signal;
1409  *  log(NaN) is that NaN with no signal.
1410  *
1411  * Accuracy:
1412  *  according to an error analysis, the error is always less than
1413  *  1 ulp (unit in the last place).
1414  *
1415  * Constants:
1416  * The hexadecimal values are the intended ones for the following
1417  * constants. The decimal values may be used, provided that the
1418  * compiler will convert from decimal to binary accurately enough
1419  * to produce the hexadecimal values shown.
1420  */
log(double x)1421 double log(double x) {
1422   static const double                      /* -- */
1423       ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1424       ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1425       two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
1426       Lg1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
1427       Lg2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
1428       Lg3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
1429       Lg4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
1430       Lg5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
1431       Lg6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
1432       Lg7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
1433 
1434   static const double zero = 0.0;
1435   static volatile double vzero = 0.0;
1436 
1437   double hfsq, f, s, z, R, w, t1, t2, dk;
1438   int32_t k, hx, i, j;
1439   uint32_t lx;
1440 
1441   EXTRACT_WORDS(hx, lx, x);
1442 
1443   k = 0;
1444   if (hx < 0x00100000) { /* x < 2**-1022  */
1445     if (((hx & 0x7fffffff) | lx) == 0)
1446       return -two54 / vzero;           /* log(+-0)=-inf */
1447     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1448     k -= 54;
1449     x *= two54; /* subnormal number, scale up x */
1450     GET_HIGH_WORD(hx, x);
1451   }
1452   if (hx >= 0x7ff00000) return x + x;
1453   k += (hx >> 20) - 1023;
1454   hx &= 0x000fffff;
1455   i = (hx + 0x95f64) & 0x100000;
1456   SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
1457   k += (i >> 20);
1458   f = x - 1.0;
1459   if ((0x000fffff & (2 + hx)) < 3) { /* -2**-20 <= f < 2**-20 */
1460     if (f == zero) {
1461       if (k == 0) {
1462         return zero;
1463       } else {
1464         dk = static_cast<double>(k);
1465         return dk * ln2_hi + dk * ln2_lo;
1466       }
1467     }
1468     R = f * f * (0.5 - 0.33333333333333333 * f);
1469     if (k == 0) {
1470       return f - R;
1471     } else {
1472       dk = static_cast<double>(k);
1473       return dk * ln2_hi - ((R - dk * ln2_lo) - f);
1474     }
1475   }
1476   s = f / (2.0 + f);
1477   dk = static_cast<double>(k);
1478   z = s * s;
1479   i = hx - 0x6147a;
1480   w = z * z;
1481   j = 0x6b851 - hx;
1482   t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1483   t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1484   i |= j;
1485   R = t2 + t1;
1486   if (i > 0) {
1487     hfsq = 0.5 * f * f;
1488     if (k == 0)
1489       return f - (hfsq - s * (hfsq + R));
1490     else
1491       return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f);
1492   } else {
1493     if (k == 0)
1494       return f - s * (f - R);
1495     else
1496       return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
1497   }
1498 }
1499 
1500 /* double log1p(double x)
1501  *
1502  * Method :
1503  *   1. Argument Reduction: find k and f such that
1504  *      1+x = 2^k * (1+f),
1505  *     where  sqrt(2)/2 < 1+f < sqrt(2) .
1506  *
1507  *      Note. If k=0, then f=x is exact. However, if k!=0, then f
1508  *  may not be representable exactly. In that case, a correction
1509  *  term is need. Let u=1+x rounded. Let c = (1+x)-u, then
1510  *  log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
1511  *  and add back the correction term c/u.
1512  *  (Note: when x > 2**53, one can simply return log(x))
1513  *
1514  *   2. Approximation of log1p(f).
1515  *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1516  *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1517  *         = 2s + s*R
1518  *      We use a special Reme algorithm on [0,0.1716] to generate
1519  *  a polynomial of degree 14 to approximate R The maximum error
1520  *  of this polynomial approximation is bounded by 2**-58.45. In
1521  *  other words,
1522  *            2      4      6      8      10      12      14
1523  *      R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
1524  *    (the values of Lp1 to Lp7 are listed in the program)
1525  *  and
1526  *      |      2          14          |     -58.45
1527  *      | Lp1*s +...+Lp7*s    -  R(z) | <= 2
1528  *      |                             |
1529  *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1530  *  In order to guarantee error in log below 1ulp, we compute log
1531  *  by
1532  *    log1p(f) = f - (hfsq - s*(hfsq+R)).
1533  *
1534  *  3. Finally, log1p(x) = k*ln2 + log1p(f).
1535  *           = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1536  *     Here ln2 is split into two floating point number:
1537  *      ln2_hi + ln2_lo,
1538  *     where n*ln2_hi is always exact for |n| < 2000.
1539  *
1540  * Special cases:
1541  *  log1p(x) is NaN with signal if x < -1 (including -INF) ;
1542  *  log1p(+INF) is +INF; log1p(-1) is -INF with signal;
1543  *  log1p(NaN) is that NaN with no signal.
1544  *
1545  * Accuracy:
1546  *  according to an error analysis, the error is always less than
1547  *  1 ulp (unit in the last place).
1548  *
1549  * Constants:
1550  * The hexadecimal values are the intended ones for the following
1551  * constants. The decimal values may be used, provided that the
1552  * compiler will convert from decimal to binary accurately enough
1553  * to produce the hexadecimal values shown.
1554  *
1555  * Note: Assuming log() return accurate answer, the following
1556  *   algorithm can be used to compute log1p(x) to within a few ULP:
1557  *
1558  *    u = 1+x;
1559  *    if(u==1.0) return x ; else
1560  *         return log(u)*(x/(u-1.0));
1561  *
1562  *   See HP-15C Advanced Functions Handbook, p.193.
1563  */
log1p(double x)1564 double log1p(double x) {
1565   static const double                      /* -- */
1566       ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1567       ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1568       two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
1569       Lp1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
1570       Lp2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
1571       Lp3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
1572       Lp4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
1573       Lp5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
1574       Lp6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
1575       Lp7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
1576 
1577   static const double zero = 0.0;
1578   static volatile double vzero = 0.0;
1579 
1580   double hfsq, f, c, s, z, R, u;
1581   int32_t k, hx, hu, ax;
1582 
1583   GET_HIGH_WORD(hx, x);
1584   ax = hx & 0x7fffffff;
1585 
1586   k = 1;
1587   if (hx < 0x3FDA827A) {    /* 1+x < sqrt(2)+ */
1588     if (ax >= 0x3ff00000) { /* x <= -1.0 */
1589       if (x == -1.0)
1590         return -two54 / vzero; /* log1p(-1)=+inf */
1591       else
1592         return (x - x) / (x - x); /* log1p(x<-1)=NaN */
1593     }
1594     if (ax < 0x3e200000) {    /* |x| < 2**-29 */
1595       if (two54 + x > zero    /* raise inexact */
1596           && ax < 0x3c900000) /* |x| < 2**-54 */
1597         return x;
1598       else
1599         return x - x * x * 0.5;
1600     }
1601     if (hx > 0 || hx <= static_cast<int32_t>(0xbfd2bec4)) {
1602       k = 0;
1603       f = x;
1604       hu = 1;
1605     } /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
1606   }
1607   if (hx >= 0x7ff00000) return x + x;
1608   if (k != 0) {
1609     if (hx < 0x43400000) {
1610       STRICT_ASSIGN(double, u, 1.0 + x);
1611       GET_HIGH_WORD(hu, u);
1612       k = (hu >> 20) - 1023;
1613       c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
1614       c /= u;
1615     } else {
1616       u = x;
1617       GET_HIGH_WORD(hu, u);
1618       k = (hu >> 20) - 1023;
1619       c = 0;
1620     }
1621     hu &= 0x000fffff;
1622     /*
1623      * The approximation to sqrt(2) used in thresholds is not
1624      * critical.  However, the ones used above must give less
1625      * strict bounds than the one here so that the k==0 case is
1626      * never reached from here, since here we have committed to
1627      * using the correction term but don't use it if k==0.
1628      */
1629     if (hu < 0x6a09e) {                  /* u ~< sqrt(2) */
1630       SET_HIGH_WORD(u, hu | 0x3ff00000); /* normalize u */
1631     } else {
1632       k += 1;
1633       SET_HIGH_WORD(u, hu | 0x3fe00000); /* normalize u/2 */
1634       hu = (0x00100000 - hu) >> 2;
1635     }
1636     f = u - 1.0;
1637   }
1638   hfsq = 0.5 * f * f;
1639   if (hu == 0) { /* |f| < 2**-20 */
1640     if (f == zero) {
1641       if (k == 0) {
1642         return zero;
1643       } else {
1644         c += k * ln2_lo;
1645         return k * ln2_hi + c;
1646       }
1647     }
1648     R = hfsq * (1.0 - 0.66666666666666666 * f);
1649     if (k == 0)
1650       return f - R;
1651     else
1652       return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
1653   }
1654   s = f / (2.0 + f);
1655   z = s * s;
1656   R = z * (Lp1 +
1657            z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));
1658   if (k == 0)
1659     return f - (hfsq - s * (hfsq + R));
1660   else
1661     return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
1662 }
1663 
1664 /*
1665  * k_log1p(f):
1666  * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
1667  *
1668  * The following describes the overall strategy for computing
1669  * logarithms in base e.  The argument reduction and adding the final
1670  * term of the polynomial are done by the caller for increased accuracy
1671  * when different bases are used.
1672  *
1673  * Method :
1674  *   1. Argument Reduction: find k and f such that
1675  *         x = 2^k * (1+f),
1676  *         where  sqrt(2)/2 < 1+f < sqrt(2) .
1677  *
1678  *   2. Approximation of log(1+f).
1679  *      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1680  *            = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1681  *            = 2s + s*R
1682  *      We use a special Reme algorithm on [0,0.1716] to generate
1683  *      a polynomial of degree 14 to approximate R The maximum error
1684  *      of this polynomial approximation is bounded by 2**-58.45. In
1685  *      other words,
1686  *          2      4      6      8      10      12      14
1687  *          R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
1688  *      (the values of Lg1 to Lg7 are listed in the program)
1689  *      and
1690  *          |      2          14          |     -58.45
1691  *          | Lg1*s +...+Lg7*s    -  R(z) | <= 2
1692  *          |                             |
1693  *      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1694  *      In order to guarantee error in log below 1ulp, we compute log
1695  *      by
1696  *          log(1+f) = f - s*(f - R)            (if f is not too large)
1697  *          log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1698  *
1699  *   3. Finally,  log(x) = k*ln2 + log(1+f).
1700  *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1701  *      Here ln2 is split into two floating point number:
1702  *          ln2_hi + ln2_lo,
1703  *      where n*ln2_hi is always exact for |n| < 2000.
1704  *
1705  * Special cases:
1706  *      log(x) is NaN with signal if x < 0 (including -INF) ;
1707  *      log(+INF) is +INF; log(0) is -INF with signal;
1708  *      log(NaN) is that NaN with no signal.
1709  *
1710  * Accuracy:
1711  *      according to an error analysis, the error is always less than
1712  *      1 ulp (unit in the last place).
1713  *
1714  * Constants:
1715  * The hexadecimal values are the intended ones for the following
1716  * constants. The decimal values may be used, provided that the
1717  * compiler will convert from decimal to binary accurately enough
1718  * to produce the hexadecimal values shown.
1719  */
1720 
1721 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1722     Lg2 = 3.999999999940941908e-01,                 /* 3FD99999 9997FA04 */
1723     Lg3 = 2.857142874366239149e-01,                 /* 3FD24924 94229359 */
1724     Lg4 = 2.222219843214978396e-01,                 /* 3FCC71C5 1D8E78AF */
1725     Lg5 = 1.818357216161805012e-01,                 /* 3FC74664 96CB03DE */
1726     Lg6 = 1.531383769920937332e-01,                 /* 3FC39A09 D078C69F */
1727     Lg7 = 1.479819860511658591e-01;                 /* 3FC2F112 DF3E5244 */
1728 
1729 /*
1730  * We always inline k_log1p(), since doing so produces a
1731  * substantial performance improvement (~40% on amd64).
1732  */
k_log1p(double f)1733 static inline double k_log1p(double f) {
1734   double hfsq, s, z, R, w, t1, t2;
1735 
1736   s = f / (2.0 + f);
1737   z = s * s;
1738   w = z * z;
1739   t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1740   t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1741   R = t2 + t1;
1742   hfsq = 0.5 * f * f;
1743   return s * (hfsq + R);
1744 }
1745 
1746 /*
1747  * Return the base 2 logarithm of x.  See e_log.c and k_log.h for most
1748  * comments.
1749  *
1750  * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
1751  * then does the combining and scaling steps
1752  *    log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
1753  * in not-quite-routine extra precision.
1754  */
log2(double x)1755 double log2(double x) {
1756   static const double
1757       two54 = 1.80143985094819840000e+16,   /* 0x43500000, 0x00000000 */
1758       ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */
1759       ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */
1760 
1761   static const double zero = 0.0;
1762   static volatile double vzero = 0.0;
1763 
1764   double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
1765   int32_t i, k, hx;
1766   uint32_t lx;
1767 
1768   EXTRACT_WORDS(hx, lx, x);
1769 
1770   k = 0;
1771   if (hx < 0x00100000) { /* x < 2**-1022  */
1772     if (((hx & 0x7fffffff) | lx) == 0)
1773       return -two54 / vzero;           /* log(+-0)=-inf */
1774     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1775     k -= 54;
1776     x *= two54; /* subnormal number, scale up x */
1777     GET_HIGH_WORD(hx, x);
1778   }
1779   if (hx >= 0x7ff00000) return x + x;
1780   if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
1781   k += (hx >> 20) - 1023;
1782   hx &= 0x000fffff;
1783   i = (hx + 0x95f64) & 0x100000;
1784   SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
1785   k += (i >> 20);
1786   y = static_cast<double>(k);
1787   f = x - 1.0;
1788   hfsq = 0.5 * f * f;
1789   r = k_log1p(f);
1790 
1791   /*
1792    * f-hfsq must (for args near 1) be evaluated in extra precision
1793    * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
1794    * This is fairly efficient since f-hfsq only depends on f, so can
1795    * be evaluated in parallel with R.  Not combining hfsq with R also
1796    * keeps R small (though not as small as a true `lo' term would be),
1797    * so that extra precision is not needed for terms involving R.
1798    *
1799    * Compiler bugs involving extra precision used to break Dekker's
1800    * theorem for spitting f-hfsq as hi+lo, unless double_t was used
1801    * or the multi-precision calculations were avoided when double_t
1802    * has extra precision.  These problems are now automatically
1803    * avoided as a side effect of the optimization of combining the
1804    * Dekker splitting step with the clear-low-bits step.
1805    *
1806    * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
1807    * precision to avoid a very large cancellation when x is very near
1808    * these values.  Unlike the above cancellations, this problem is
1809    * specific to base 2.  It is strange that adding +-1 is so much
1810    * harder than adding +-ln2 or +-log10_2.
1811    *
1812    * This uses Dekker's theorem to normalize y+val_hi, so the
1813    * compiler bugs are back in some configurations, sigh.  And I
1814    * don't want to used double_t to avoid them, since that gives a
1815    * pessimization and the support for avoiding the pessimization
1816    * is not yet available.
1817    *
1818    * The multi-precision calculations for the multiplications are
1819    * routine.
1820    */
1821   hi = f - hfsq;
1822   SET_LOW_WORD(hi, 0);
1823   lo = (f - hi) - hfsq + r;
1824   val_hi = hi * ivln2hi;
1825   val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
1826 
1827   /* spadd(val_hi, val_lo, y), except for not using double_t: */
1828   w = y + val_hi;
1829   val_lo += (y - w) + val_hi;
1830   val_hi = w;
1831 
1832   return val_lo + val_hi;
1833 }
1834 
1835 /*
1836  * Return the base 10 logarithm of x
1837  *
1838  * Method :
1839  *      Let log10_2hi = leading 40 bits of log10(2) and
1840  *          log10_2lo = log10(2) - log10_2hi,
1841  *          ivln10   = 1/log(10) rounded.
1842  *      Then
1843  *              n = ilogb(x),
1844  *              if(n<0)  n = n+1;
1845  *              x = scalbn(x,-n);
1846  *              log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
1847  *
1848  *  Note 1:
1849  *     To guarantee log10(10**n)=n, where 10**n is normal, the rounding
1850  *     mode must set to Round-to-Nearest.
1851  *  Note 2:
1852  *      [1/log(10)] rounded to 53 bits has error .198 ulps;
1853  *      log10 is monotonic at all binary break points.
1854  *
1855  *  Special cases:
1856  *      log10(x) is NaN if x < 0;
1857  *      log10(+INF) is +INF; log10(0) is -INF;
1858  *      log10(NaN) is that NaN;
1859  *      log10(10**N) = N  for N=0,1,...,22.
1860  */
log10(double x)1861 double log10(double x) {
1862   static const double
1863       two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
1864       ivln10 = 4.34294481903251816668e-01,
1865       log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
1866       log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
1867 
1868   static const double zero = 0.0;
1869   static volatile double vzero = 0.0;
1870 
1871   double y;
1872   int32_t i, k, hx;
1873   uint32_t lx;
1874 
1875   EXTRACT_WORDS(hx, lx, x);
1876 
1877   k = 0;
1878   if (hx < 0x00100000) { /* x < 2**-1022  */
1879     if (((hx & 0x7fffffff) | lx) == 0)
1880       return -two54 / vzero;           /* log(+-0)=-inf */
1881     if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1882     k -= 54;
1883     x *= two54; /* subnormal number, scale up x */
1884     GET_HIGH_WORD(hx, x);
1885     GET_LOW_WORD(lx, x);
1886   }
1887   if (hx >= 0x7ff00000) return x + x;
1888   if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
1889   k += (hx >> 20) - 1023;
1890 
1891   i = (k & 0x80000000) >> 31;
1892   hx = (hx & 0x000fffff) | ((0x3ff - i) << 20);
1893   y = k + i;
1894   SET_HIGH_WORD(x, hx);
1895   SET_LOW_WORD(x, lx);
1896 
1897   double z = y * log10_2lo + ivln10 * log(x);
1898   return z + y * log10_2hi;
1899 }
1900 
1901 /* expm1(x)
1902  * Returns exp(x)-1, the exponential of x minus 1.
1903  *
1904  * Method
1905  *   1. Argument reduction:
1906  *  Given x, find r and integer k such that
1907  *
1908  *               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
1909  *
1910  *      Here a correction term c will be computed to compensate
1911  *  the error in r when rounded to a floating-point number.
1912  *
1913  *   2. Approximating expm1(r) by a special rational function on
1914  *  the interval [0,0.34658]:
1915  *  Since
1916  *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
1917  *  we define R1(r*r) by
1918  *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
1919  *  That is,
1920  *      R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
1921  *         = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
1922  *         = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
1923  *      We use a special Reme algorithm on [0,0.347] to generate
1924  *   a polynomial of degree 5 in r*r to approximate R1. The
1925  *  maximum error of this polynomial approximation is bounded
1926  *  by 2**-61. In other words,
1927  *      R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
1928  *  where   Q1  =  -1.6666666666666567384E-2,
1929  *     Q2  =   3.9682539681370365873E-4,
1930  *     Q3  =  -9.9206344733435987357E-6,
1931  *     Q4  =   2.5051361420808517002E-7,
1932  *     Q5  =  -6.2843505682382617102E-9;
1933  *    z   =  r*r,
1934  *  with error bounded by
1935  *      |                  5           |     -61
1936  *      | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
1937  *      |                              |
1938  *
1939  *  expm1(r) = exp(r)-1 is then computed by the following
1940  *   specific way which minimize the accumulation rounding error:
1941  *             2     3
1942  *            r     r    [ 3 - (R1 + R1*r/2)  ]
1943  *        expm1(r) = r + --- + --- * [--------------------]
1944  *                  2     2    [ 6 - r*(3 - R1*r/2) ]
1945  *
1946  *  To compensate the error in the argument reduction, we use
1947  *    expm1(r+c) = expm1(r) + c + expm1(r)*c
1948  *         ~ expm1(r) + c + r*c
1949  *  Thus c+r*c will be added in as the correction terms for
1950  *  expm1(r+c). Now rearrange the term to avoid optimization
1951  *   screw up:
1952  *            (      2                                    2 )
1953  *            ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
1954  *   expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
1955  *                  ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
1956  *                      (                                             )
1957  *
1958  *       = r - E
1959  *   3. Scale back to obtain expm1(x):
1960  *  From step 1, we have
1961  *     expm1(x) = either 2^k*[expm1(r)+1] - 1
1962  *        = or     2^k*[expm1(r) + (1-2^-k)]
1963  *   4. Implementation notes:
1964  *  (A). To save one multiplication, we scale the coefficient Qi
1965  *       to Qi*2^i, and replace z by (x^2)/2.
1966  *  (B). To achieve maximum accuracy, we compute expm1(x) by
1967  *    (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
1968  *    (ii)  if k=0, return r-E
1969  *    (iii) if k=-1, return 0.5*(r-E)-0.5
1970  *        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
1971  *                  else       return  1.0+2.0*(r-E);
1972  *    (v)   if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
1973  *    (vi)  if k <= 20, return 2^k((1-2^-k)-(E-r)), else
1974  *    (vii) return 2^k(1-((E+2^-k)-r))
1975  *
1976  * Special cases:
1977  *  expm1(INF) is INF, expm1(NaN) is NaN;
1978  *  expm1(-INF) is -1, and
1979  *  for finite argument, only expm1(0)=0 is exact.
1980  *
1981  * Accuracy:
1982  *  according to an error analysis, the error is always less than
1983  *  1 ulp (unit in the last place).
1984  *
1985  * Misc. info.
1986  *  For IEEE double
1987  *      if x >  7.09782712893383973096e+02 then expm1(x) overflow
1988  *
1989  * Constants:
1990  * The hexadecimal values are the intended ones for the following
1991  * constants. The decimal values may be used, provided that the
1992  * compiler will convert from decimal to binary accurately enough
1993  * to produce the hexadecimal values shown.
1994  */
expm1(double x)1995 double expm1(double x) {
1996   static const double
1997       one = 1.0,
1998       tiny = 1.0e-300,
1999       o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
2000       ln2_hi = 6.93147180369123816490e-01,      /* 0x3fe62e42, 0xfee00000 */
2001       ln2_lo = 1.90821492927058770002e-10,      /* 0x3dea39ef, 0x35793c76 */
2002       invln2 = 1.44269504088896338700e+00,      /* 0x3ff71547, 0x652b82fe */
2003       /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs =
2004          x*x/2: */
2005       Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
2006       Q2 = 1.58730158725481460165e-03,  /* 3F5A01A0 19FE5585 */
2007       Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
2008       Q4 = 4.00821782732936239552e-06,  /* 3ED0CFCA 86E65239 */
2009       Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
2010 
2011   static volatile double huge = 1.0e+300;
2012 
2013   double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2014   int32_t k, xsb;
2015   uint32_t hx;
2016 
2017   GET_HIGH_WORD(hx, x);
2018   xsb = hx & 0x80000000; /* sign bit of x */
2019   hx &= 0x7fffffff;      /* high word of |x| */
2020 
2021   /* filter out huge and non-finite argument */
2022   if (hx >= 0x4043687A) {   /* if |x|>=56*ln2 */
2023     if (hx >= 0x40862E42) { /* if |x|>=709.78... */
2024       if (hx >= 0x7ff00000) {
2025         uint32_t low;
2026         GET_LOW_WORD(low, x);
2027         if (((hx & 0xfffff) | low) != 0)
2028           return x + x; /* NaN */
2029         else
2030           return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
2031       }
2032       if (x > o_threshold) return huge * huge; /* overflow */
2033     }
2034     if (xsb != 0) {        /* x < -56*ln2, return -1.0 with inexact */
2035       if (x + tiny < 0.0)  /* raise inexact */
2036         return tiny - one; /* return -1 */
2037     }
2038   }
2039 
2040   /* argument reduction */
2041   if (hx > 0x3fd62e42) {   /* if  |x| > 0.5 ln2 */
2042     if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2043       if (xsb == 0) {
2044         hi = x - ln2_hi;
2045         lo = ln2_lo;
2046         k = 1;
2047       } else {
2048         hi = x + ln2_hi;
2049         lo = -ln2_lo;
2050         k = -1;
2051       }
2052     } else {
2053       k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
2054       t = k;
2055       hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2056       lo = t * ln2_lo;
2057     }
2058     STRICT_ASSIGN(double, x, hi - lo);
2059     c = (hi - x) - lo;
2060   } else if (hx < 0x3c900000) { /* when |x|<2**-54, return x */
2061     t = huge + x;               /* return x with inexact flags when x!=0 */
2062     return x - (t - (huge + x));
2063   } else {
2064     k = 0;
2065   }
2066 
2067   /* x is now in primary range */
2068   hfx = 0.5 * x;
2069   hxs = x * hfx;
2070   r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2071   t = 3.0 - r1 * hfx;
2072   e = hxs * ((r1 - t) / (6.0 - x * t));
2073   if (k == 0) {
2074     return x - (x * e - hxs); /* c is 0 */
2075   } else {
2076     INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0); /* 2^k */
2077     e = (x * (e - c) - c);
2078     e -= hxs;
2079     if (k == -1) return 0.5 * (x - e) - 0.5;
2080     if (k == 1) {
2081       if (x < -0.25)
2082         return -2.0 * (e - (x + 0.5));
2083       else
2084         return one + 2.0 * (x - e);
2085     }
2086     if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */
2087       y = one - (e - x);
2088       // TODO(mvstanton): is this replacement for the hex float
2089       // sufficient?
2090       // if (k == 1024) y = y*2.0*0x1p1023;
2091       if (k == 1024)
2092         y = y * 2.0 * 8.98846567431158e+307;
2093       else
2094         y = y * twopk;
2095       return y - one;
2096     }
2097     t = one;
2098     if (k < 20) {
2099       SET_HIGH_WORD(t, 0x3ff00000 - (0x200000 >> k)); /* t=1-2^-k */
2100       y = t - (e - x);
2101       y = y * twopk;
2102     } else {
2103       SET_HIGH_WORD(t, ((0x3ff - k) << 20)); /* 2^-k */
2104       y = x - (e + t);
2105       y += one;
2106       y = y * twopk;
2107     }
2108   }
2109   return y;
2110 }
2111 
cbrt(double x)2112 double cbrt(double x) {
2113   static const uint32_t
2114       B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
2115       B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
2116 
2117   /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
2118   static const double P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */
2119       P1 = -1.88497979543377169875,                /* 0xbffe28e0, 0x92f02420 */
2120       P2 = 1.621429720105354466140,                /* 0x3ff9f160, 0x4a49d6c2 */
2121       P3 = -0.758397934778766047437,               /* 0xbfe844cb, 0xbee751d9 */
2122       P4 = 0.145996192886612446982;                /* 0x3fc2b000, 0xd4e4edd7 */
2123 
2124   int32_t hx;
2125   union {
2126     double value;
2127     uint64_t bits;
2128   } u;
2129   double r, s, t = 0.0, w;
2130   uint32_t sign;
2131   uint32_t high, low;
2132 
2133   EXTRACT_WORDS(hx, low, x);
2134   sign = hx & 0x80000000; /* sign= sign(x) */
2135   hx ^= sign;
2136   if (hx >= 0x7ff00000) return (x + x); /* cbrt(NaN,INF) is itself */
2137 
2138   /*
2139    * Rough cbrt to 5 bits:
2140    *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
2141    * where e is integral and >= 0, m is real and in [0, 1), and "/" and
2142    * "%" are integer division and modulus with rounding towards minus
2143    * infinity.  The RHS is always >= the LHS and has a maximum relative
2144    * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
2145    * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
2146    * floating point representation, for finite positive normal values,
2147    * ordinary integer divison of the value in bits magically gives
2148    * almost exactly the RHS of the above provided we first subtract the
2149    * exponent bias (1023 for doubles) and later add it back.  We do the
2150    * subtraction virtually to keep e >= 0 so that ordinary integer
2151    * division rounds towards minus infinity; this is also efficient.
2152    */
2153   if (hx < 0x00100000) {             /* zero or subnormal? */
2154     if ((hx | low) == 0) return (x); /* cbrt(0) is itself */
2155     SET_HIGH_WORD(t, 0x43500000);    /* set t= 2**54 */
2156     t *= x;
2157     GET_HIGH_WORD(high, t);
2158     INSERT_WORDS(t, sign | ((high & 0x7fffffff) / 3 + B2), 0);
2159   } else {
2160     INSERT_WORDS(t, sign | (hx / 3 + B1), 0);
2161   }
2162 
2163   /*
2164    * New cbrt to 23 bits:
2165    *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
2166    * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
2167    * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
2168    * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
2169    * gives us bounds for r = t**3/x.
2170    *
2171    * Try to optimize for parallel evaluation as in k_tanf.c.
2172    */
2173   r = (t * t) * (t / x);
2174   t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
2175 
2176   /*
2177    * Round t away from zero to 23 bits (sloppily except for ensuring that
2178    * the result is larger in magnitude than cbrt(x) but not much more than
2179    * 2 23-bit ulps larger).  With rounding towards zero, the error bound
2180    * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
2181    * in the rounded t, the infinite-precision error in the Newton
2182    * approximation barely affects third digit in the final error
2183    * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
2184    * before the final error is larger than 0.667 ulps.
2185    */
2186   u.value = t;
2187   u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
2188   t = u.value;
2189 
2190   /* one step Newton iteration to 53 bits with error < 0.667 ulps */
2191   s = t * t;             /* t*t is exact */
2192   r = x / s;             /* error <= 0.5 ulps; |r| < |t| */
2193   w = t + t;             /* t+t is exact */
2194   r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
2195   t = t + t * r;         /* error <= 0.5 + 0.5/3 + epsilon */
2196 
2197   return (t);
2198 }
2199 
2200 /* sin(x)
2201  * Return sine function of x.
2202  *
2203  * kernel function:
2204  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
2205  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
2206  *      __ieee754_rem_pio2      ... argument reduction routine
2207  *
2208  * Method.
2209  *      Let S,C and T denote the sin, cos and tan respectively on
2210  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2211  *      in [-pi/4 , +pi/4], and let n = k mod 4.
2212  *      We have
2213  *
2214  *          n        sin(x)      cos(x)        tan(x)
2215  *     ----------------------------------------------------------
2216  *          0          S           C             T
2217  *          1          C          -S            -1/T
2218  *          2         -S          -C             T
2219  *          3         -C           S            -1/T
2220  *     ----------------------------------------------------------
2221  *
2222  * Special cases:
2223  *      Let trig be any of sin, cos, or tan.
2224  *      trig(+-INF)  is NaN, with signals;
2225  *      trig(NaN)    is that NaN;
2226  *
2227  * Accuracy:
2228  *      TRIG(x) returns trig(x) nearly rounded
2229  */
sin(double x)2230 double sin(double x) {
2231   double y[2], z = 0.0;
2232   int32_t n, ix;
2233 
2234   /* High word of x. */
2235   GET_HIGH_WORD(ix, x);
2236 
2237   /* |x| ~< pi/4 */
2238   ix &= 0x7fffffff;
2239   if (ix <= 0x3fe921fb) {
2240     return __kernel_sin(x, z, 0);
2241   } else if (ix >= 0x7ff00000) {
2242     /* sin(Inf or NaN) is NaN */
2243     return x - x;
2244   } else {
2245     /* argument reduction needed */
2246     n = __ieee754_rem_pio2(x, y);
2247     switch (n & 3) {
2248       case 0:
2249         return __kernel_sin(y[0], y[1], 1);
2250       case 1:
2251         return __kernel_cos(y[0], y[1]);
2252       case 2:
2253         return -__kernel_sin(y[0], y[1], 1);
2254       default:
2255         return -__kernel_cos(y[0], y[1]);
2256     }
2257   }
2258 }
2259 
2260 /* tan(x)
2261  * Return tangent function of x.
2262  *
2263  * kernel function:
2264  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
2265  *      __ieee754_rem_pio2      ... argument reduction routine
2266  *
2267  * Method.
2268  *      Let S,C and T denote the sin, cos and tan respectively on
2269  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2270  *      in [-pi/4 , +pi/4], and let n = k mod 4.
2271  *      We have
2272  *
2273  *          n        sin(x)      cos(x)        tan(x)
2274  *     ----------------------------------------------------------
2275  *          0          S           C             T
2276  *          1          C          -S            -1/T
2277  *          2         -S          -C             T
2278  *          3         -C           S            -1/T
2279  *     ----------------------------------------------------------
2280  *
2281  * Special cases:
2282  *      Let trig be any of sin, cos, or tan.
2283  *      trig(+-INF)  is NaN, with signals;
2284  *      trig(NaN)    is that NaN;
2285  *
2286  * Accuracy:
2287  *      TRIG(x) returns trig(x) nearly rounded
2288  */
tan(double x)2289 double tan(double x) {
2290   double y[2], z = 0.0;
2291   int32_t n, ix;
2292 
2293   /* High word of x. */
2294   GET_HIGH_WORD(ix, x);
2295 
2296   /* |x| ~< pi/4 */
2297   ix &= 0x7fffffff;
2298   if (ix <= 0x3fe921fb) {
2299     return __kernel_tan(x, z, 1);
2300   } else if (ix >= 0x7ff00000) {
2301     /* tan(Inf or NaN) is NaN */
2302     return x - x; /* NaN */
2303   } else {
2304     /* argument reduction needed */
2305     n = __ieee754_rem_pio2(x, y);
2306     /* 1 -> n even, -1 -> n odd */
2307     return __kernel_tan(y[0], y[1], 1 - ((n & 1) << 1));
2308   }
2309 }
2310 
2311 }  // namespace ieee754
2312 }  // namespace base
2313 }  // namespace v8
2314