1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
2 //
3 // ====================================================
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 //
6 // Developed at SunSoft, a Sun Microsystems, Inc. business.
7 // Permission to use, copy, modify, and distribute this
8 // software is freely granted, provided that this notice
9 // is preserved.
10 // ====================================================
11 //
12 // The original source code covered by the above license above has been
13 // modified significantly by Google Inc.
14 // Copyright 2016 the V8 project authors. All rights reserved.
15
16 #include "src/base/ieee754.h"
17
18 #include <cmath>
19 #include <limits>
20
21 #include "src/base/build_config.h"
22 #include "src/base/macros.h"
23
24 namespace v8 {
25 namespace base {
26 namespace ieee754 {
27
28 namespace {
29
30 /* Disable "potential divide by 0" warning in Visual Studio compiler. */
31
32 #if V8_CC_MSVC
33
34 #pragma warning(disable : 4723)
35
36 #endif
37
38 /*
39 * The original fdlibm code used statements like:
40 * n0 = ((*(int*)&one)>>29)^1; * index of high word *
41 * ix0 = *(n0+(int*)&x); * high word of x *
42 * ix1 = *((1-n0)+(int*)&x); * low word of x *
43 * to dig two 32 bit words out of the 64 bit IEEE floating point
44 * value. That is non-ANSI, and, moreover, the gcc instruction
45 * scheduler gets it wrong. We instead use the following macros.
46 * Unlike the original code, we determine the endianness at compile
47 * time, not at run time; I don't see much benefit to selecting
48 * endianness at run time.
49 */
50
51 /*
52 * A union which permits us to convert between a double and two 32 bit
53 * ints.
54 * TODO(jkummerow): This is undefined behavior. Use bit_cast instead.
55 */
56
57 #if V8_TARGET_LITTLE_ENDIAN
58
59 typedef union {
60 double value;
61 struct {
62 uint32_t lsw;
63 uint32_t msw;
64 } parts;
65 struct {
66 uint64_t w;
67 } xparts;
68 } ieee_double_shape_type;
69
70 #else
71
72 typedef union {
73 double value;
74 struct {
75 uint32_t msw;
76 uint32_t lsw;
77 } parts;
78 struct {
79 uint64_t w;
80 } xparts;
81 } ieee_double_shape_type;
82
83 #endif
84
85 /* Get two 32 bit ints from a double. */
86
87 #define EXTRACT_WORDS(ix0, ix1, d) \
88 do { \
89 ieee_double_shape_type ew_u; \
90 ew_u.value = (d); \
91 (ix0) = ew_u.parts.msw; \
92 (ix1) = ew_u.parts.lsw; \
93 } while (0)
94
95 /* Get a 64-bit int from a double. */
96 #define EXTRACT_WORD64(ix, d) \
97 do { \
98 ieee_double_shape_type ew_u; \
99 ew_u.value = (d); \
100 (ix) = ew_u.xparts.w; \
101 } while (0)
102
103 /* Get the more significant 32 bit int from a double. */
104
105 #define GET_HIGH_WORD(i, d) \
106 do { \
107 ieee_double_shape_type gh_u; \
108 gh_u.value = (d); \
109 (i) = gh_u.parts.msw; \
110 } while (0)
111
112 /* Get the less significant 32 bit int from a double. */
113
114 #define GET_LOW_WORD(i, d) \
115 do { \
116 ieee_double_shape_type gl_u; \
117 gl_u.value = (d); \
118 (i) = gl_u.parts.lsw; \
119 } while (0)
120
121 /* Set a double from two 32 bit ints. */
122
123 #define INSERT_WORDS(d, ix0, ix1) \
124 do { \
125 ieee_double_shape_type iw_u; \
126 iw_u.parts.msw = (ix0); \
127 iw_u.parts.lsw = (ix1); \
128 (d) = iw_u.value; \
129 } while (0)
130
131 /* Set a double from a 64-bit int. */
132 #define INSERT_WORD64(d, ix) \
133 do { \
134 ieee_double_shape_type iw_u; \
135 iw_u.xparts.w = (ix); \
136 (d) = iw_u.value; \
137 } while (0)
138
139 /* Set the more significant 32 bits of a double from an int. */
140
141 #define SET_HIGH_WORD(d, v) \
142 do { \
143 ieee_double_shape_type sh_u; \
144 sh_u.value = (d); \
145 sh_u.parts.msw = (v); \
146 (d) = sh_u.value; \
147 } while (0)
148
149 /* Set the less significant 32 bits of a double from an int. */
150
151 #define SET_LOW_WORD(d, v) \
152 do { \
153 ieee_double_shape_type sl_u; \
154 sl_u.value = (d); \
155 sl_u.parts.lsw = (v); \
156 (d) = sl_u.value; \
157 } while (0)
158
159 /* Support macro. */
160
161 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
162
163 int32_t __ieee754_rem_pio2(double x, double* y) V8_WARN_UNUSED_RESULT;
164 double __kernel_cos(double x, double y) V8_WARN_UNUSED_RESULT;
165 int __kernel_rem_pio2(double* x, double* y, int e0, int nx, int prec,
166 const int32_t* ipio2) V8_WARN_UNUSED_RESULT;
167 double __kernel_sin(double x, double y, int iy) V8_WARN_UNUSED_RESULT;
168
169 /* __ieee754_rem_pio2(x,y)
170 *
171 * return the remainder of x rem pi/2 in y[0]+y[1]
172 * use __kernel_rem_pio2()
173 */
__ieee754_rem_pio2(double x,double * y)174 int32_t __ieee754_rem_pio2(double x, double *y) {
175 /*
176 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
177 */
178 static const int32_t two_over_pi[] = {
179 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,
180 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,
181 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,
182 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,
183 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,
184 0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
185 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,
186 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,
187 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
188 0x73A8C9, 0x60E27B, 0xC08C6B,
189 };
190
191 static const int32_t npio2_hw[] = {
192 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
193 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
194 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
195 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
196 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
197 0x404858EB, 0x404921FB,
198 };
199
200 /*
201 * invpio2: 53 bits of 2/pi
202 * pio2_1: first 33 bit of pi/2
203 * pio2_1t: pi/2 - pio2_1
204 * pio2_2: second 33 bit of pi/2
205 * pio2_2t: pi/2 - (pio2_1+pio2_2)
206 * pio2_3: third 33 bit of pi/2
207 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
208 */
209
210 static const double
211 zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
212 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
213 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
214 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
215 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
216 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
217 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
218 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
219 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
220 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
221
222 double z, w, t, r, fn;
223 double tx[3];
224 int32_t e0, i, j, nx, n, ix, hx;
225 uint32_t low;
226
227 z = 0;
228 GET_HIGH_WORD(hx, x); /* high word of x */
229 ix = hx & 0x7FFFFFFF;
230 if (ix <= 0x3FE921FB) { /* |x| ~<= pi/4 , no need for reduction */
231 y[0] = x;
232 y[1] = 0;
233 return 0;
234 }
235 if (ix < 0x4002D97C) { /* |x| < 3pi/4, special case with n=+-1 */
236 if (hx > 0) {
237 z = x - pio2_1;
238 if (ix != 0x3FF921FB) { /* 33+53 bit pi is good enough */
239 y[0] = z - pio2_1t;
240 y[1] = (z - y[0]) - pio2_1t;
241 } else { /* near pi/2, use 33+33+53 bit pi */
242 z -= pio2_2;
243 y[0] = z - pio2_2t;
244 y[1] = (z - y[0]) - pio2_2t;
245 }
246 return 1;
247 } else { /* negative x */
248 z = x + pio2_1;
249 if (ix != 0x3FF921FB) { /* 33+53 bit pi is good enough */
250 y[0] = z + pio2_1t;
251 y[1] = (z - y[0]) + pio2_1t;
252 } else { /* near pi/2, use 33+33+53 bit pi */
253 z += pio2_2;
254 y[0] = z + pio2_2t;
255 y[1] = (z - y[0]) + pio2_2t;
256 }
257 return -1;
258 }
259 }
260 if (ix <= 0x413921FB) { /* |x| ~<= 2^19*(pi/2), medium size */
261 t = fabs(x);
262 n = static_cast<int32_t>(t * invpio2 + half);
263 fn = static_cast<double>(n);
264 r = t - fn * pio2_1;
265 w = fn * pio2_1t; /* 1st round good to 85 bit */
266 if (n < 32 && ix != npio2_hw[n - 1]) {
267 y[0] = r - w; /* quick check no cancellation */
268 } else {
269 uint32_t high;
270 j = ix >> 20;
271 y[0] = r - w;
272 GET_HIGH_WORD(high, y[0]);
273 i = j - ((high >> 20) & 0x7FF);
274 if (i > 16) { /* 2nd iteration needed, good to 118 */
275 t = r;
276 w = fn * pio2_2;
277 r = t - w;
278 w = fn * pio2_2t - ((t - r) - w);
279 y[0] = r - w;
280 GET_HIGH_WORD(high, y[0]);
281 i = j - ((high >> 20) & 0x7FF);
282 if (i > 49) { /* 3rd iteration need, 151 bits acc */
283 t = r; /* will cover all possible cases */
284 w = fn * pio2_3;
285 r = t - w;
286 w = fn * pio2_3t - ((t - r) - w);
287 y[0] = r - w;
288 }
289 }
290 }
291 y[1] = (r - y[0]) - w;
292 if (hx < 0) {
293 y[0] = -y[0];
294 y[1] = -y[1];
295 return -n;
296 } else {
297 return n;
298 }
299 }
300 /*
301 * all other (large) arguments
302 */
303 if (ix >= 0x7FF00000) { /* x is inf or NaN */
304 y[0] = y[1] = x - x;
305 return 0;
306 }
307 /* set z = scalbn(|x|,ilogb(x)-23) */
308 GET_LOW_WORD(low, x);
309 SET_LOW_WORD(z, low);
310 e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
311 SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20));
312 for (i = 0; i < 2; i++) {
313 tx[i] = static_cast<double>(static_cast<int32_t>(z));
314 z = (z - tx[i]) * two24;
315 }
316 tx[2] = z;
317 nx = 3;
318 while (tx[nx - 1] == zero) nx--; /* skip zero term */
319 n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
320 if (hx < 0) {
321 y[0] = -y[0];
322 y[1] = -y[1];
323 return -n;
324 }
325 return n;
326 }
327
328 /* __kernel_cos( x, y )
329 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
330 * Input x is assumed to be bounded by ~pi/4 in magnitude.
331 * Input y is the tail of x.
332 *
333 * Algorithm
334 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
335 * 2. if x < 2^-27 (hx<0x3E400000 0), return 1 with inexact if x!=0.
336 * 3. cos(x) is approximated by a polynomial of degree 14 on
337 * [0,pi/4]
338 * 4 14
339 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
340 * where the remez error is
341 *
342 * | 2 4 6 8 10 12 14 | -58
343 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
344 * | |
345 *
346 * 4 6 8 10 12 14
347 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
348 * cos(x) = 1 - x*x/2 + r
349 * since cos(x+y) ~ cos(x) - sin(x)*y
350 * ~ cos(x) - x*y,
351 * a correction term is necessary in cos(x) and hence
352 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
353 * For better accuracy when x > 0.3, let qx = |x|/4 with
354 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
355 * Then
356 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
357 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
358 * magnitude of the latter is at least a quarter of x*x/2,
359 * thus, reducing the rounding error in the subtraction.
360 */
__kernel_cos(double x,double y)361 V8_INLINE double __kernel_cos(double x, double y) {
362 static const double
363 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
364 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
365 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
366 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
367 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
368 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
369 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
370
371 double a, iz, z, r, qx;
372 int32_t ix;
373 GET_HIGH_WORD(ix, x);
374 ix &= 0x7FFFFFFF; /* ix = |x|'s high word*/
375 if (ix < 0x3E400000) { /* if x < 2**27 */
376 if (static_cast<int>(x) == 0) return one; /* generate inexact */
377 }
378 z = x * x;
379 r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
380 if (ix < 0x3FD33333) { /* if |x| < 0.3 */
381 return one - (0.5 * z - (z * r - x * y));
382 } else {
383 if (ix > 0x3FE90000) { /* x > 0.78125 */
384 qx = 0.28125;
385 } else {
386 INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */
387 }
388 iz = 0.5 * z - qx;
389 a = one - qx;
390 return a - (iz - (z * r - x * y));
391 }
392 }
393
394 /* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
395 * double x[],y[]; int e0,nx,prec; int ipio2[];
396 *
397 * __kernel_rem_pio2 return the last three digits of N with
398 * y = x - N*pi/2
399 * so that |y| < pi/2.
400 *
401 * The method is to compute the integer (mod 8) and fraction parts of
402 * (2/pi)*x without doing the full multiplication. In general we
403 * skip the part of the product that are known to be a huge integer (
404 * more accurately, = 0 mod 8 ). Thus the number of operations are
405 * independent of the exponent of the input.
406 *
407 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
408 *
409 * Input parameters:
410 * x[] The input value (must be positive) is broken into nx
411 * pieces of 24-bit integers in double precision format.
412 * x[i] will be the i-th 24 bit of x. The scaled exponent
413 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
414 * match x's up to 24 bits.
415 *
416 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
417 * e0 = ilogb(z)-23
418 * z = scalbn(z,-e0)
419 * for i = 0,1,2
420 * x[i] = floor(z)
421 * z = (z-x[i])*2**24
422 *
423 *
424 * y[] output result in an array of double precision numbers.
425 * The dimension of y[] is:
426 * 24-bit precision 1
427 * 53-bit precision 2
428 * 64-bit precision 2
429 * 113-bit precision 3
430 * The actual value is the sum of them. Thus for 113-bit
431 * precison, one may have to do something like:
432 *
433 * long double t,w,r_head, r_tail;
434 * t = (long double)y[2] + (long double)y[1];
435 * w = (long double)y[0];
436 * r_head = t+w;
437 * r_tail = w - (r_head - t);
438 *
439 * e0 The exponent of x[0]
440 *
441 * nx dimension of x[]
442 *
443 * prec an integer indicating the precision:
444 * 0 24 bits (single)
445 * 1 53 bits (double)
446 * 2 64 bits (extended)
447 * 3 113 bits (quad)
448 *
449 * ipio2[]
450 * integer array, contains the (24*i)-th to (24*i+23)-th
451 * bit of 2/pi after binary point. The corresponding
452 * floating value is
453 *
454 * ipio2[i] * 2^(-24(i+1)).
455 *
456 * External function:
457 * double scalbn(), floor();
458 *
459 *
460 * Here is the description of some local variables:
461 *
462 * jk jk+1 is the initial number of terms of ipio2[] needed
463 * in the computation. The recommended value is 2,3,4,
464 * 6 for single, double, extended,and quad.
465 *
466 * jz local integer variable indicating the number of
467 * terms of ipio2[] used.
468 *
469 * jx nx - 1
470 *
471 * jv index for pointing to the suitable ipio2[] for the
472 * computation. In general, we want
473 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
474 * is an integer. Thus
475 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
476 * Hence jv = max(0,(e0-3)/24).
477 *
478 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
479 *
480 * q[] double array with integral value, representing the
481 * 24-bits chunk of the product of x and 2/pi.
482 *
483 * q0 the corresponding exponent of q[0]. Note that the
484 * exponent for q[i] would be q0-24*i.
485 *
486 * PIo2[] double precision array, obtained by cutting pi/2
487 * into 24 bits chunks.
488 *
489 * f[] ipio2[] in floating point
490 *
491 * iq[] integer array by breaking up q[] in 24-bits chunk.
492 *
493 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
494 *
495 * ih integer. If >0 it indicates q[] is >= 0.5, hence
496 * it also indicates the *sign* of the result.
497 *
498 */
__kernel_rem_pio2(double * x,double * y,int e0,int nx,int prec,const int32_t * ipio2)499 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
500 const int32_t *ipio2) {
501 /* Constants:
502 * The hexadecimal values are the intended ones for the following
503 * constants. The decimal values may be used, provided that the
504 * compiler will convert from decimal to binary accurately enough
505 * to produce the hexadecimal values shown.
506 */
507 static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */
508
509 static const double PIo2[] = {
510 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
511 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
512 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
513 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
514 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
515 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
516 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
517 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
518 };
519
520 static const double
521 zero = 0.0,
522 one = 1.0,
523 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
524 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
525
526 int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
527 double z, fw, f[20], fq[20], q[20];
528
529 /* initialize jk*/
530 jk = init_jk[prec];
531 jp = jk;
532
533 /* determine jx,jv,q0, note that 3>q0 */
534 jx = nx - 1;
535 jv = (e0 - 3) / 24;
536 if (jv < 0) jv = 0;
537 q0 = e0 - 24 * (jv + 1);
538
539 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
540 j = jv - jx;
541 m = jx + jk;
542 for (i = 0; i <= m; i++, j++) {
543 f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);
544 }
545
546 /* compute q[0],q[1],...q[jk] */
547 for (i = 0; i <= jk; i++) {
548 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
549 q[i] = fw;
550 }
551
552 jz = jk;
553 recompute:
554 /* distill q[] into iq[] reversingly */
555 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
556 fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
557 iq[i] = static_cast<int32_t>(z - two24 * fw);
558 z = q[j - 1] + fw;
559 }
560
561 /* compute n */
562 z = scalbn(z, q0); /* actual value of z */
563 z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
564 n = static_cast<int32_t>(z);
565 z -= static_cast<double>(n);
566 ih = 0;
567 if (q0 > 0) { /* need iq[jz-1] to determine n */
568 i = (iq[jz - 1] >> (24 - q0));
569 n += i;
570 iq[jz - 1] -= i << (24 - q0);
571 ih = iq[jz - 1] >> (23 - q0);
572 } else if (q0 == 0) {
573 ih = iq[jz - 1] >> 23;
574 } else if (z >= 0.5) {
575 ih = 2;
576 }
577
578 if (ih > 0) { /* q > 0.5 */
579 n += 1;
580 carry = 0;
581 for (i = 0; i < jz; i++) { /* compute 1-q */
582 j = iq[i];
583 if (carry == 0) {
584 if (j != 0) {
585 carry = 1;
586 iq[i] = 0x1000000 - j;
587 }
588 } else {
589 iq[i] = 0xFFFFFF - j;
590 }
591 }
592 if (q0 > 0) { /* rare case: chance is 1 in 12 */
593 switch (q0) {
594 case 1:
595 iq[jz - 1] &= 0x7FFFFF;
596 break;
597 case 2:
598 iq[jz - 1] &= 0x3FFFFF;
599 break;
600 }
601 }
602 if (ih == 2) {
603 z = one - z;
604 if (carry != 0) z -= scalbn(one, q0);
605 }
606 }
607
608 /* check if recomputation is needed */
609 if (z == zero) {
610 j = 0;
611 for (i = jz - 1; i >= jk; i--) j |= iq[i];
612 if (j == 0) { /* need recomputation */
613 for (k = 1; jk >= k && iq[jk - k] == 0; k++) {
614 /* k = no. of terms needed */
615 }
616
617 for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */
618 f[jx + i] = ipio2[jv + i];
619 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
620 q[i] = fw;
621 }
622 jz += k;
623 goto recompute;
624 }
625 }
626
627 /* chop off zero terms */
628 if (z == 0.0) {
629 jz -= 1;
630 q0 -= 24;
631 while (iq[jz] == 0) {
632 jz--;
633 q0 -= 24;
634 }
635 } else { /* break z into 24-bit if necessary */
636 z = scalbn(z, -q0);
637 if (z >= two24) {
638 fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
639 iq[jz] = z - two24 * fw;
640 jz += 1;
641 q0 += 24;
642 iq[jz] = fw;
643 } else {
644 iq[jz] = z;
645 }
646 }
647
648 /* convert integer "bit" chunk to floating-point value */
649 fw = scalbn(one, q0);
650 for (i = jz; i >= 0; i--) {
651 q[i] = fw * iq[i];
652 fw *= twon24;
653 }
654
655 /* compute PIo2[0,...,jp]*q[jz,...,0] */
656 for (i = jz; i >= 0; i--) {
657 for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];
658 fq[jz - i] = fw;
659 }
660
661 /* compress fq[] into y[] */
662 switch (prec) {
663 case 0:
664 fw = 0.0;
665 for (i = jz; i >= 0; i--) fw += fq[i];
666 y[0] = (ih == 0) ? fw : -fw;
667 break;
668 case 1:
669 case 2:
670 fw = 0.0;
671 for (i = jz; i >= 0; i--) fw += fq[i];
672 y[0] = (ih == 0) ? fw : -fw;
673 fw = fq[0] - fw;
674 for (i = 1; i <= jz; i++) fw += fq[i];
675 y[1] = (ih == 0) ? fw : -fw;
676 break;
677 case 3: /* painful */
678 for (i = jz; i > 0; i--) {
679 fw = fq[i - 1] + fq[i];
680 fq[i] += fq[i - 1] - fw;
681 fq[i - 1] = fw;
682 }
683 for (i = jz; i > 1; i--) {
684 fw = fq[i - 1] + fq[i];
685 fq[i] += fq[i - 1] - fw;
686 fq[i - 1] = fw;
687 }
688 for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];
689 if (ih == 0) {
690 y[0] = fq[0];
691 y[1] = fq[1];
692 y[2] = fw;
693 } else {
694 y[0] = -fq[0];
695 y[1] = -fq[1];
696 y[2] = -fw;
697 }
698 }
699 return n & 7;
700 }
701
702 /* __kernel_sin( x, y, iy)
703 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
704 * Input x is assumed to be bounded by ~pi/4 in magnitude.
705 * Input y is the tail of x.
706 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
707 *
708 * Algorithm
709 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
710 * 2. if x < 2^-27 (hx<0x3E400000 0), return x with inexact if x!=0.
711 * 3. sin(x) is approximated by a polynomial of degree 13 on
712 * [0,pi/4]
713 * 3 13
714 * sin(x) ~ x + S1*x + ... + S6*x
715 * where
716 *
717 * |sin(x) 2 4 6 8 10 12 | -58
718 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
719 * | x |
720 *
721 * 4. sin(x+y) = sin(x) + sin'(x')*y
722 * ~ sin(x) + (1-x*x/2)*y
723 * For better accuracy, let
724 * 3 2 2 2 2
725 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
726 * then 3 2
727 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
728 */
__kernel_sin(double x,double y,int iy)729 V8_INLINE double __kernel_sin(double x, double y, int iy) {
730 static const double
731 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
732 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
733 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
734 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
735 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
736 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
737 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
738
739 double z, r, v;
740 int32_t ix;
741 GET_HIGH_WORD(ix, x);
742 ix &= 0x7FFFFFFF; /* high word of x */
743 if (ix < 0x3E400000) { /* |x| < 2**-27 */
744 if (static_cast<int>(x) == 0) return x;
745 } /* generate inexact */
746 z = x * x;
747 v = z * x;
748 r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
749 if (iy == 0) {
750 return x + v * (S1 + z * r);
751 } else {
752 return x - ((z * (half * y - v * r) - y) - v * S1);
753 }
754 }
755
756 /* __kernel_tan( x, y, k )
757 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
758 * Input x is assumed to be bounded by ~pi/4 in magnitude.
759 * Input y is the tail of x.
760 * Input k indicates whether tan (if k=1) or
761 * -1/tan (if k= -1) is returned.
762 *
763 * Algorithm
764 * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
765 * 2. if x < 2^-28 (hx<0x3E300000 0), return x with inexact if x!=0.
766 * 3. tan(x) is approximated by a odd polynomial of degree 27 on
767 * [0,0.67434]
768 * 3 27
769 * tan(x) ~ x + T1*x + ... + T13*x
770 * where
771 *
772 * |tan(x) 2 4 26 | -59.2
773 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
774 * | x |
775 *
776 * Note: tan(x+y) = tan(x) + tan'(x)*y
777 * ~ tan(x) + (1+x*x)*y
778 * Therefore, for better accuracy in computing tan(x+y), let
779 * 3 2 2 2 2
780 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
781 * then
782 * 3 2
783 * tan(x+y) = x + (T1*x + (x *(r+y)+y))
784 *
785 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
786 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
787 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
788 */
__kernel_tan(double x,double y,int iy)789 double __kernel_tan(double x, double y, int iy) {
790 static const double xxx[] = {
791 3.33333333333334091986e-01, /* 3FD55555, 55555563 */
792 1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */
793 5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */
794 2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */
795 8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */
796 3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */
797 1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */
798 5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */
799 2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */
800 7.81794442939557092300e-05, /* 3F147E88, A03792A6 */
801 7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */
802 -1.85586374855275456654e-05, /* BEF375CB, DB605373 */
803 2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */
804 /* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */
805 /* pio4 */ 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */
806 /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
807 };
808 #define one xxx[13]
809 #define pio4 xxx[14]
810 #define pio4lo xxx[15]
811 #define T xxx
812
813 double z, r, v, w, s;
814 int32_t ix, hx;
815
816 GET_HIGH_WORD(hx, x); /* high word of x */
817 ix = hx & 0x7FFFFFFF; /* high word of |x| */
818 if (ix < 0x3E300000) { /* x < 2**-28 */
819 if (static_cast<int>(x) == 0) { /* generate inexact */
820 uint32_t low;
821 GET_LOW_WORD(low, x);
822 if (((ix | low) | (iy + 1)) == 0) {
823 return one / fabs(x);
824 } else {
825 if (iy == 1) {
826 return x;
827 } else { /* compute -1 / (x+y) carefully */
828 double a, t;
829
830 z = w = x + y;
831 SET_LOW_WORD(z, 0);
832 v = y - (z - x);
833 t = a = -one / w;
834 SET_LOW_WORD(t, 0);
835 s = one + t * z;
836 return t + a * (s + t * v);
837 }
838 }
839 }
840 }
841 if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */
842 if (hx < 0) {
843 x = -x;
844 y = -y;
845 }
846 z = pio4 - x;
847 w = pio4lo - y;
848 x = z + w;
849 y = 0.0;
850 }
851 z = x * x;
852 w = z * z;
853 /*
854 * Break x^5*(T[1]+x^2*T[2]+...) into
855 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
856 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
857 */
858 r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
859 v = z *
860 (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
861 s = z * x;
862 r = y + z * (s * (r + v) + y);
863 r += T[0] * s;
864 w = x + r;
865 if (ix >= 0x3FE59428) {
866 v = iy;
867 return (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
868 }
869 if (iy == 1) {
870 return w;
871 } else {
872 /*
873 * if allow error up to 2 ulp, simply return
874 * -1.0 / (x+r) here
875 */
876 /* compute -1.0 / (x+r) accurately */
877 double a, t;
878 z = w;
879 SET_LOW_WORD(z, 0);
880 v = r - (z - x); /* z+v = r+x */
881 t = a = -1.0 / w; /* a = -1.0/w */
882 SET_LOW_WORD(t, 0);
883 s = 1.0 + t * z;
884 return t + a * (s + t * v);
885 }
886
887 #undef one
888 #undef pio4
889 #undef pio4lo
890 #undef T
891 }
892
893 } // namespace
894
895 /* acos(x)
896 * Method :
897 * acos(x) = pi/2 - asin(x)
898 * acos(-x) = pi/2 + asin(x)
899 * For |x|<=0.5
900 * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c)
901 * For x>0.5
902 * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
903 * = 2asin(sqrt((1-x)/2))
904 * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z)
905 * = 2f + (2c + 2s*z*R(z))
906 * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
907 * for f so that f+c ~ sqrt(z).
908 * For x<-0.5
909 * acos(x) = pi - 2asin(sqrt((1-|x|)/2))
910 * = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
911 *
912 * Special cases:
913 * if x is NaN, return x itself;
914 * if |x|>1, return NaN with invalid signal.
915 *
916 * Function needed: sqrt
917 */
acos(double x)918 double acos(double x) {
919 static const double
920 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
921 pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
922 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
923 pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
924 pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
925 pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
926 pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
927 pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
928 pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
929 pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
930 qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
931 qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
932 qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
933 qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
934
935 double z, p, q, r, w, s, c, df;
936 int32_t hx, ix;
937 GET_HIGH_WORD(hx, x);
938 ix = hx & 0x7FFFFFFF;
939 if (ix >= 0x3FF00000) { /* |x| >= 1 */
940 uint32_t lx;
941 GET_LOW_WORD(lx, x);
942 if (((ix - 0x3FF00000) | lx) == 0) { /* |x|==1 */
943 if (hx > 0)
944 return 0.0; /* acos(1) = 0 */
945 else
946 return pi + 2.0 * pio2_lo; /* acos(-1)= pi */
947 }
948 return (x - x) / (x - x); /* acos(|x|>1) is NaN */
949 }
950 if (ix < 0x3FE00000) { /* |x| < 0.5 */
951 if (ix <= 0x3C600000) return pio2_hi + pio2_lo; /*if|x|<2**-57*/
952 z = x * x;
953 p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
954 q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
955 r = p / q;
956 return pio2_hi - (x - (pio2_lo - x * r));
957 } else if (hx < 0) { /* x < -0.5 */
958 z = (one + x) * 0.5;
959 p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
960 q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
961 s = sqrt(z);
962 r = p / q;
963 w = r * s - pio2_lo;
964 return pi - 2.0 * (s + w);
965 } else { /* x > 0.5 */
966 z = (one - x) * 0.5;
967 s = sqrt(z);
968 df = s;
969 SET_LOW_WORD(df, 0);
970 c = (z - df * df) / (s + df);
971 p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
972 q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
973 r = p / q;
974 w = r * s + c;
975 return 2.0 * (df + w);
976 }
977 }
978
979 /* acosh(x)
980 * Method :
981 * Based on
982 * acosh(x) = log [ x + sqrt(x*x-1) ]
983 * we have
984 * acosh(x) := log(x)+ln2, if x is large; else
985 * acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
986 * acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
987 *
988 * Special cases:
989 * acosh(x) is NaN with signal if x<1.
990 * acosh(NaN) is NaN without signal.
991 */
acosh(double x)992 double acosh(double x) {
993 static const double
994 one = 1.0,
995 ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
996 double t;
997 int32_t hx;
998 uint32_t lx;
999 EXTRACT_WORDS(hx, lx, x);
1000 if (hx < 0x3FF00000) { /* x < 1 */
1001 return (x - x) / (x - x);
1002 } else if (hx >= 0x41B00000) { /* x > 2**28 */
1003 if (hx >= 0x7FF00000) { /* x is inf of NaN */
1004 return x + x;
1005 } else {
1006 return log(x) + ln2; /* acosh(huge)=log(2x) */
1007 }
1008 } else if (((hx - 0x3FF00000) | lx) == 0) {
1009 return 0.0; /* acosh(1) = 0 */
1010 } else if (hx > 0x40000000) { /* 2**28 > x > 2 */
1011 t = x * x;
1012 return log(2.0 * x - one / (x + sqrt(t - one)));
1013 } else { /* 1<x<2 */
1014 t = x - one;
1015 return log1p(t + sqrt(2.0 * t + t * t));
1016 }
1017 }
1018
1019 /* asin(x)
1020 * Method :
1021 * Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
1022 * we approximate asin(x) on [0,0.5] by
1023 * asin(x) = x + x*x^2*R(x^2)
1024 * where
1025 * R(x^2) is a rational approximation of (asin(x)-x)/x^3
1026 * and its remez error is bounded by
1027 * |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
1028 *
1029 * For x in [0.5,1]
1030 * asin(x) = pi/2-2*asin(sqrt((1-x)/2))
1031 * Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
1032 * then for x>0.98
1033 * asin(x) = pi/2 - 2*(s+s*z*R(z))
1034 * = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
1035 * For x<=0.98, let pio4_hi = pio2_hi/2, then
1036 * f = hi part of s;
1037 * c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z)
1038 * and
1039 * asin(x) = pi/2 - 2*(s+s*z*R(z))
1040 * = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
1041 * = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
1042 *
1043 * Special cases:
1044 * if x is NaN, return x itself;
1045 * if |x|>1, return NaN with invalid signal.
1046 */
asin(double x)1047 double asin(double x) {
1048 static const double
1049 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1050 huge = 1.000e+300,
1051 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
1052 pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
1053 pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
1054 /* coefficient for R(x^2) */
1055 pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
1056 pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
1057 pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
1058 pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
1059 pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
1060 pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
1061 qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
1062 qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
1063 qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
1064 qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
1065
1066 double t, w, p, q, c, r, s;
1067 int32_t hx, ix;
1068
1069 t = 0;
1070 GET_HIGH_WORD(hx, x);
1071 ix = hx & 0x7FFFFFFF;
1072 if (ix >= 0x3FF00000) { /* |x|>= 1 */
1073 uint32_t lx;
1074 GET_LOW_WORD(lx, x);
1075 if (((ix - 0x3FF00000) | lx) == 0) /* asin(1)=+-pi/2 with inexact */
1076 return x * pio2_hi + x * pio2_lo;
1077 return (x - x) / (x - x); /* asin(|x|>1) is NaN */
1078 } else if (ix < 0x3FE00000) { /* |x|<0.5 */
1079 if (ix < 0x3E400000) { /* if |x| < 2**-27 */
1080 if (huge + x > one) return x; /* return x with inexact if x!=0*/
1081 } else {
1082 t = x * x;
1083 }
1084 p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1085 q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1086 w = p / q;
1087 return x + x * w;
1088 }
1089 /* 1> |x|>= 0.5 */
1090 w = one - fabs(x);
1091 t = w * 0.5;
1092 p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1093 q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1094 s = sqrt(t);
1095 if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */
1096 w = p / q;
1097 t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
1098 } else {
1099 w = s;
1100 SET_LOW_WORD(w, 0);
1101 c = (t - w * w) / (s + w);
1102 r = p / q;
1103 p = 2.0 * s * r - (pio2_lo - 2.0 * c);
1104 q = pio4_hi - 2.0 * w;
1105 t = pio4_hi - (p - q);
1106 }
1107 if (hx > 0)
1108 return t;
1109 else
1110 return -t;
1111 }
1112 /* asinh(x)
1113 * Method :
1114 * Based on
1115 * asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
1116 * we have
1117 * asinh(x) := x if 1+x*x=1,
1118 * := sign(x)*(log(x)+ln2)) for large |x|, else
1119 * := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
1120 * := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2)))
1121 */
asinh(double x)1122 double asinh(double x) {
1123 static const double
1124 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1125 ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
1126 huge = 1.00000000000000000000e+300;
1127
1128 double t, w;
1129 int32_t hx, ix;
1130 GET_HIGH_WORD(hx, x);
1131 ix = hx & 0x7FFFFFFF;
1132 if (ix >= 0x7FF00000) return x + x; /* x is inf or NaN */
1133 if (ix < 0x3E300000) { /* |x|<2**-28 */
1134 if (huge + x > one) return x; /* return x inexact except 0 */
1135 }
1136 if (ix > 0x41B00000) { /* |x| > 2**28 */
1137 w = log(fabs(x)) + ln2;
1138 } else if (ix > 0x40000000) { /* 2**28 > |x| > 2.0 */
1139 t = fabs(x);
1140 w = log(2.0 * t + one / (sqrt(x * x + one) + t));
1141 } else { /* 2.0 > |x| > 2**-28 */
1142 t = x * x;
1143 w = log1p(fabs(x) + t / (one + sqrt(one + t)));
1144 }
1145 if (hx > 0) {
1146 return w;
1147 } else {
1148 return -w;
1149 }
1150 }
1151
1152 /* atan(x)
1153 * Method
1154 * 1. Reduce x to positive by atan(x) = -atan(-x).
1155 * 2. According to the integer k=4t+0.25 chopped, t=x, the argument
1156 * is further reduced to one of the following intervals and the
1157 * arctangent of t is evaluated by the corresponding formula:
1158 *
1159 * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
1160 * [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
1161 * [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
1162 * [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
1163 * [39/16,INF] atan(x) = atan(INF) + atan( -1/t )
1164 *
1165 * Constants:
1166 * The hexadecimal values are the intended ones for the following
1167 * constants. The decimal values may be used, provided that the
1168 * compiler will convert from decimal to binary accurately enough
1169 * to produce the hexadecimal values shown.
1170 */
atan(double x)1171 double atan(double x) {
1172 static const double atanhi[] = {
1173 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
1174 7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
1175 9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
1176 1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
1177 };
1178
1179 static const double atanlo[] = {
1180 2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
1181 3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
1182 1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
1183 6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
1184 };
1185
1186 static const double aT[] = {
1187 3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */
1188 -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
1189 1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */
1190 -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
1191 9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */
1192 -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
1193 6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */
1194 -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
1195 4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */
1196 -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
1197 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
1198 };
1199
1200 static const double one = 1.0, huge = 1.0e300;
1201
1202 double w, s1, s2, z;
1203 int32_t ix, hx, id;
1204
1205 GET_HIGH_WORD(hx, x);
1206 ix = hx & 0x7FFFFFFF;
1207 if (ix >= 0x44100000) { /* if |x| >= 2^66 */
1208 uint32_t low;
1209 GET_LOW_WORD(low, x);
1210 if (ix > 0x7FF00000 || (ix == 0x7FF00000 && (low != 0)))
1211 return x + x; /* NaN */
1212 if (hx > 0)
1213 return atanhi[3] + *(volatile double *)&atanlo[3];
1214 else
1215 return -atanhi[3] - *(volatile double *)&atanlo[3];
1216 }
1217 if (ix < 0x3FDC0000) { /* |x| < 0.4375 */
1218 if (ix < 0x3E400000) { /* |x| < 2^-27 */
1219 if (huge + x > one) return x; /* raise inexact */
1220 }
1221 id = -1;
1222 } else {
1223 x = fabs(x);
1224 if (ix < 0x3FF30000) { /* |x| < 1.1875 */
1225 if (ix < 0x3FE60000) { /* 7/16 <=|x|<11/16 */
1226 id = 0;
1227 x = (2.0 * x - one) / (2.0 + x);
1228 } else { /* 11/16<=|x|< 19/16 */
1229 id = 1;
1230 x = (x - one) / (x + one);
1231 }
1232 } else {
1233 if (ix < 0x40038000) { /* |x| < 2.4375 */
1234 id = 2;
1235 x = (x - 1.5) / (one + 1.5 * x);
1236 } else { /* 2.4375 <= |x| < 2^66 */
1237 id = 3;
1238 x = -1.0 / x;
1239 }
1240 }
1241 }
1242 /* end of argument reduction */
1243 z = x * x;
1244 w = z * z;
1245 /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
1246 s1 = z * (aT[0] +
1247 w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
1248 s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
1249 if (id < 0) {
1250 return x - x * (s1 + s2);
1251 } else {
1252 z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
1253 return (hx < 0) ? -z : z;
1254 }
1255 }
1256
1257 /* atan2(y,x)
1258 * Method :
1259 * 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
1260 * 2. Reduce x to positive by (if x and y are unexceptional):
1261 * ARG (x+iy) = arctan(y/x) ... if x > 0,
1262 * ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
1263 *
1264 * Special cases:
1265 *
1266 * ATAN2((anything), NaN ) is NaN;
1267 * ATAN2(NAN , (anything) ) is NaN;
1268 * ATAN2(+-0, +(anything but NaN)) is +-0 ;
1269 * ATAN2(+-0, -(anything but NaN)) is +-pi ;
1270 * ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
1271 * ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
1272 * ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
1273 * ATAN2(+-INF,+INF ) is +-pi/4 ;
1274 * ATAN2(+-INF,-INF ) is +-3pi/4;
1275 * ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
1276 *
1277 * Constants:
1278 * The hexadecimal values are the intended ones for the following
1279 * constants. The decimal values may be used, provided that the
1280 * compiler will convert from decimal to binary accurately enough
1281 * to produce the hexadecimal values shown.
1282 */
atan2(double y,double x)1283 double atan2(double y, double x) {
1284 static volatile double tiny = 1.0e-300;
1285 static const double
1286 zero = 0.0,
1287 pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
1288 pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
1289 pi = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */
1290 static volatile double pi_lo =
1291 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
1292
1293 double z;
1294 int32_t k, m, hx, hy, ix, iy;
1295 uint32_t lx, ly;
1296
1297 EXTRACT_WORDS(hx, lx, x);
1298 ix = hx & 0x7FFFFFFF;
1299 EXTRACT_WORDS(hy, ly, y);
1300 iy = hy & 0x7FFFFFFF;
1301 if (((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x7FF00000) ||
1302 ((iy | ((ly | -static_cast<int32_t>(ly)) >> 31)) > 0x7FF00000)) {
1303 return x + y; /* x or y is NaN */
1304 }
1305 if (((hx - 0x3FF00000) | lx) == 0) return atan(y); /* x=1.0 */
1306 m = ((hy >> 31) & 1) | ((hx >> 30) & 2); /* 2*sign(x)+sign(y) */
1307
1308 /* when y = 0 */
1309 if ((iy | ly) == 0) {
1310 switch (m) {
1311 case 0:
1312 case 1:
1313 return y; /* atan(+-0,+anything)=+-0 */
1314 case 2:
1315 return pi + tiny; /* atan(+0,-anything) = pi */
1316 case 3:
1317 return -pi - tiny; /* atan(-0,-anything) =-pi */
1318 }
1319 }
1320 /* when x = 0 */
1321 if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1322
1323 /* when x is INF */
1324 if (ix == 0x7FF00000) {
1325 if (iy == 0x7FF00000) {
1326 switch (m) {
1327 case 0:
1328 return pi_o_4 + tiny; /* atan(+INF,+INF) */
1329 case 1:
1330 return -pi_o_4 - tiny; /* atan(-INF,+INF) */
1331 case 2:
1332 return 3.0 * pi_o_4 + tiny; /*atan(+INF,-INF)*/
1333 case 3:
1334 return -3.0 * pi_o_4 - tiny; /*atan(-INF,-INF)*/
1335 }
1336 } else {
1337 switch (m) {
1338 case 0:
1339 return zero; /* atan(+...,+INF) */
1340 case 1:
1341 return -zero; /* atan(-...,+INF) */
1342 case 2:
1343 return pi + tiny; /* atan(+...,-INF) */
1344 case 3:
1345 return -pi - tiny; /* atan(-...,-INF) */
1346 }
1347 }
1348 }
1349 /* when y is INF */
1350 if (iy == 0x7FF00000) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1351
1352 /* compute y/x */
1353 k = (iy - ix) >> 20;
1354 if (k > 60) { /* |y/x| > 2**60 */
1355 z = pi_o_2 + 0.5 * pi_lo;
1356 m &= 1;
1357 } else if (hx < 0 && k < -60) {
1358 z = 0.0; /* 0 > |y|/x > -2**-60 */
1359 } else {
1360 z = atan(fabs(y / x)); /* safe to do y/x */
1361 }
1362 switch (m) {
1363 case 0:
1364 return z; /* atan(+,+) */
1365 case 1:
1366 return -z; /* atan(-,+) */
1367 case 2:
1368 return pi - (z - pi_lo); /* atan(+,-) */
1369 default: /* case 3 */
1370 return (z - pi_lo) - pi; /* atan(-,-) */
1371 }
1372 }
1373
1374 /* cos(x)
1375 * Return cosine function of x.
1376 *
1377 * kernel function:
1378 * __kernel_sin ... sine function on [-pi/4,pi/4]
1379 * __kernel_cos ... cosine function on [-pi/4,pi/4]
1380 * __ieee754_rem_pio2 ... argument reduction routine
1381 *
1382 * Method.
1383 * Let S,C and T denote the sin, cos and tan respectively on
1384 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
1385 * in [-pi/4 , +pi/4], and let n = k mod 4.
1386 * We have
1387 *
1388 * n sin(x) cos(x) tan(x)
1389 * ----------------------------------------------------------
1390 * 0 S C T
1391 * 1 C -S -1/T
1392 * 2 -S -C T
1393 * 3 -C S -1/T
1394 * ----------------------------------------------------------
1395 *
1396 * Special cases:
1397 * Let trig be any of sin, cos, or tan.
1398 * trig(+-INF) is NaN, with signals;
1399 * trig(NaN) is that NaN;
1400 *
1401 * Accuracy:
1402 * TRIG(x) returns trig(x) nearly rounded
1403 */
cos(double x)1404 double cos(double x) {
1405 double y[2], z = 0.0;
1406 int32_t n, ix;
1407
1408 /* High word of x. */
1409 GET_HIGH_WORD(ix, x);
1410
1411 /* |x| ~< pi/4 */
1412 ix &= 0x7FFFFFFF;
1413 if (ix <= 0x3FE921FB) {
1414 return __kernel_cos(x, z);
1415 } else if (ix >= 0x7FF00000) {
1416 /* cos(Inf or NaN) is NaN */
1417 return x - x;
1418 } else {
1419 /* argument reduction needed */
1420 n = __ieee754_rem_pio2(x, y);
1421 switch (n & 3) {
1422 case 0:
1423 return __kernel_cos(y[0], y[1]);
1424 case 1:
1425 return -__kernel_sin(y[0], y[1], 1);
1426 case 2:
1427 return -__kernel_cos(y[0], y[1]);
1428 default:
1429 return __kernel_sin(y[0], y[1], 1);
1430 }
1431 }
1432 }
1433
1434 /* exp(x)
1435 * Returns the exponential of x.
1436 *
1437 * Method
1438 * 1. Argument reduction:
1439 * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
1440 * Given x, find r and integer k such that
1441 *
1442 * x = k*ln2 + r, |r| <= 0.5*ln2.
1443 *
1444 * Here r will be represented as r = hi-lo for better
1445 * accuracy.
1446 *
1447 * 2. Approximation of exp(r) by a special rational function on
1448 * the interval [0,0.34658]:
1449 * Write
1450 * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
1451 * We use a special Remes algorithm on [0,0.34658] to generate
1452 * a polynomial of degree 5 to approximate R. The maximum error
1453 * of this polynomial approximation is bounded by 2**-59. In
1454 * other words,
1455 * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
1456 * (where z=r*r, and the values of P1 to P5 are listed below)
1457 * and
1458 * | 5 | -59
1459 * | 2.0+P1*z+...+P5*z - R(z) | <= 2
1460 * | |
1461 * The computation of exp(r) thus becomes
1462 * 2*r
1463 * exp(r) = 1 + -------
1464 * R - r
1465 * r*R1(r)
1466 * = 1 + r + ----------- (for better accuracy)
1467 * 2 - R1(r)
1468 * where
1469 * 2 4 10
1470 * R1(r) = r - (P1*r + P2*r + ... + P5*r ).
1471 *
1472 * 3. Scale back to obtain exp(x):
1473 * From step 1, we have
1474 * exp(x) = 2^k * exp(r)
1475 *
1476 * Special cases:
1477 * exp(INF) is INF, exp(NaN) is NaN;
1478 * exp(-INF) is 0, and
1479 * for finite argument, only exp(0)=1 is exact.
1480 *
1481 * Accuracy:
1482 * according to an error analysis, the error is always less than
1483 * 1 ulp (unit in the last place).
1484 *
1485 * Misc. info.
1486 * For IEEE double
1487 * if x > 7.09782712893383973096e+02 then exp(x) overflow
1488 * if x < -7.45133219101941108420e+02 then exp(x) underflow
1489 *
1490 * Constants:
1491 * The hexadecimal values are the intended ones for the following
1492 * constants. The decimal values may be used, provided that the
1493 * compiler will convert from decimal to binary accurately enough
1494 * to produce the hexadecimal values shown.
1495 */
exp(double x)1496 double exp(double x) {
1497 static const double
1498 one = 1.0,
1499 halF[2] = {0.5, -0.5},
1500 o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
1501 u_threshold = -7.45133219101941108420e+02, /* 0xC0874910, 0xD52D3051 */
1502 ln2HI[2] = {6.93147180369123816490e-01, /* 0x3FE62E42, 0xFEE00000 */
1503 -6.93147180369123816490e-01}, /* 0xBFE62E42, 0xFEE00000 */
1504 ln2LO[2] = {1.90821492927058770002e-10, /* 0x3DEA39EF, 0x35793C76 */
1505 -1.90821492927058770002e-10}, /* 0xBDEA39EF, 0x35793C76 */
1506 invln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE */
1507 P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
1508 P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
1509 P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
1510 P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
1511 P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
1512 E = 2.718281828459045; /* 0x4005BF0A, 0x8B145769 */
1513
1514 static volatile double
1515 huge = 1.0e+300,
1516 twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/
1517 two1023 = 8.988465674311579539e307; /* 0x1p1023 */
1518
1519 double y, hi = 0.0, lo = 0.0, c, t, twopk;
1520 int32_t k = 0, xsb;
1521 uint32_t hx;
1522
1523 GET_HIGH_WORD(hx, x);
1524 xsb = (hx >> 31) & 1; /* sign bit of x */
1525 hx &= 0x7FFFFFFF; /* high word of |x| */
1526
1527 /* filter out non-finite argument */
1528 if (hx >= 0x40862E42) { /* if |x|>=709.78... */
1529 if (hx >= 0x7FF00000) {
1530 uint32_t lx;
1531 GET_LOW_WORD(lx, x);
1532 if (((hx & 0xFFFFF) | lx) != 0)
1533 return x + x; /* NaN */
1534 else
1535 return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */
1536 }
1537 if (x > o_threshold) return huge * huge; /* overflow */
1538 if (x < u_threshold) return twom1000 * twom1000; /* underflow */
1539 }
1540
1541 /* argument reduction */
1542 if (hx > 0x3FD62E42) { /* if |x| > 0.5 ln2 */
1543 if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
1544 /* TODO(rtoy): We special case exp(1) here to return the correct
1545 * value of E, as the computation below would get the last bit
1546 * wrong. We should probably fix the algorithm instead.
1547 */
1548 if (x == 1.0) return E;
1549 hi = x - ln2HI[xsb];
1550 lo = ln2LO[xsb];
1551 k = 1 - xsb - xsb;
1552 } else {
1553 k = static_cast<int>(invln2 * x + halF[xsb]);
1554 t = k;
1555 hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */
1556 lo = t * ln2LO[0];
1557 }
1558 STRICT_ASSIGN(double, x, hi - lo);
1559 } else if (hx < 0x3E300000) { /* when |x|<2**-28 */
1560 if (huge + x > one) return one + x; /* trigger inexact */
1561 } else {
1562 k = 0;
1563 }
1564
1565 /* x is now in primary range */
1566 t = x * x;
1567 if (k >= -1021) {
1568 INSERT_WORDS(twopk, 0x3FF00000 + (k << 20), 0);
1569 } else {
1570 INSERT_WORDS(twopk, 0x3FF00000 + ((k + 1000) << 20), 0);
1571 }
1572 c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
1573 if (k == 0) {
1574 return one - ((x * c) / (c - 2.0) - x);
1575 } else {
1576 y = one - ((lo - (x * c) / (2.0 - c)) - hi);
1577 }
1578 if (k >= -1021) {
1579 if (k == 1024) return y * 2.0 * two1023;
1580 return y * twopk;
1581 } else {
1582 return y * twopk * twom1000;
1583 }
1584 }
1585
1586 /*
1587 * Method :
1588 * 1.Reduced x to positive by atanh(-x) = -atanh(x)
1589 * 2.For x>=0.5
1590 * 1 2x x
1591 * atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
1592 * 2 1 - x 1 - x
1593 *
1594 * For x<0.5
1595 * atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
1596 *
1597 * Special cases:
1598 * atanh(x) is NaN if |x| > 1 with signal;
1599 * atanh(NaN) is that NaN with no signal;
1600 * atanh(+-1) is +-INF with signal.
1601 *
1602 */
atanh(double x)1603 double atanh(double x) {
1604 static const double one = 1.0, huge = 1e300;
1605 static const double zero = 0.0;
1606
1607 double t;
1608 int32_t hx, ix;
1609 uint32_t lx;
1610 EXTRACT_WORDS(hx, lx, x);
1611 ix = hx & 0x7FFFFFFF;
1612 if ((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x3FF00000) /* |x|>1 */
1613 return (x - x) / (x - x);
1614 if (ix == 0x3FF00000) return x / zero;
1615 if (ix < 0x3E300000 && (huge + x) > zero) return x; /* x<2**-28 */
1616 SET_HIGH_WORD(x, ix);
1617 if (ix < 0x3FE00000) { /* x < 0.5 */
1618 t = x + x;
1619 t = 0.5 * log1p(t + t * x / (one - x));
1620 } else {
1621 t = 0.5 * log1p((x + x) / (one - x));
1622 }
1623 if (hx >= 0)
1624 return t;
1625 else
1626 return -t;
1627 }
1628
1629 /* log(x)
1630 * Return the logrithm of x
1631 *
1632 * Method :
1633 * 1. Argument Reduction: find k and f such that
1634 * x = 2^k * (1+f),
1635 * where sqrt(2)/2 < 1+f < sqrt(2) .
1636 *
1637 * 2. Approximation of log(1+f).
1638 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1639 * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1640 * = 2s + s*R
1641 * We use a special Reme algorithm on [0,0.1716] to generate
1642 * a polynomial of degree 14 to approximate R The maximum error
1643 * of this polynomial approximation is bounded by 2**-58.45. In
1644 * other words,
1645 * 2 4 6 8 10 12 14
1646 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
1647 * (the values of Lg1 to Lg7 are listed in the program)
1648 * and
1649 * | 2 14 | -58.45
1650 * | Lg1*s +...+Lg7*s - R(z) | <= 2
1651 * | |
1652 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1653 * In order to guarantee error in log below 1ulp, we compute log
1654 * by
1655 * log(1+f) = f - s*(f - R) (if f is not too large)
1656 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1657 *
1658 * 3. Finally, log(x) = k*ln2 + log(1+f).
1659 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1660 * Here ln2 is split into two floating point number:
1661 * ln2_hi + ln2_lo,
1662 * where n*ln2_hi is always exact for |n| < 2000.
1663 *
1664 * Special cases:
1665 * log(x) is NaN with signal if x < 0 (including -INF) ;
1666 * log(+INF) is +INF; log(0) is -INF with signal;
1667 * log(NaN) is that NaN with no signal.
1668 *
1669 * Accuracy:
1670 * according to an error analysis, the error is always less than
1671 * 1 ulp (unit in the last place).
1672 *
1673 * Constants:
1674 * The hexadecimal values are the intended ones for the following
1675 * constants. The decimal values may be used, provided that the
1676 * compiler will convert from decimal to binary accurately enough
1677 * to produce the hexadecimal values shown.
1678 */
log(double x)1679 double log(double x) {
1680 static const double /* -- */
1681 ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1682 ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1683 two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
1684 Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1685 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
1686 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
1687 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
1688 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
1689 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
1690 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
1691
1692 static const double zero = 0.0;
1693 static volatile double vzero = 0.0;
1694
1695 double hfsq, f, s, z, R, w, t1, t2, dk;
1696 int32_t k, hx, i, j;
1697 uint32_t lx;
1698
1699 EXTRACT_WORDS(hx, lx, x);
1700
1701 k = 0;
1702 if (hx < 0x00100000) { /* x < 2**-1022 */
1703 if (((hx & 0x7FFFFFFF) | lx) == 0)
1704 return -two54 / vzero; /* log(+-0)=-inf */
1705 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1706 k -= 54;
1707 x *= two54; /* subnormal number, scale up x */
1708 GET_HIGH_WORD(hx, x);
1709 }
1710 if (hx >= 0x7FF00000) return x + x;
1711 k += (hx >> 20) - 1023;
1712 hx &= 0x000FFFFF;
1713 i = (hx + 0x95F64) & 0x100000;
1714 SET_HIGH_WORD(x, hx | (i ^ 0x3FF00000)); /* normalize x or x/2 */
1715 k += (i >> 20);
1716 f = x - 1.0;
1717 if ((0x000FFFFF & (2 + hx)) < 3) { /* -2**-20 <= f < 2**-20 */
1718 if (f == zero) {
1719 if (k == 0) {
1720 return zero;
1721 } else {
1722 dk = static_cast<double>(k);
1723 return dk * ln2_hi + dk * ln2_lo;
1724 }
1725 }
1726 R = f * f * (0.5 - 0.33333333333333333 * f);
1727 if (k == 0) {
1728 return f - R;
1729 } else {
1730 dk = static_cast<double>(k);
1731 return dk * ln2_hi - ((R - dk * ln2_lo) - f);
1732 }
1733 }
1734 s = f / (2.0 + f);
1735 dk = static_cast<double>(k);
1736 z = s * s;
1737 i = hx - 0x6147A;
1738 w = z * z;
1739 j = 0x6B851 - hx;
1740 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1741 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1742 i |= j;
1743 R = t2 + t1;
1744 if (i > 0) {
1745 hfsq = 0.5 * f * f;
1746 if (k == 0)
1747 return f - (hfsq - s * (hfsq + R));
1748 else
1749 return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f);
1750 } else {
1751 if (k == 0)
1752 return f - s * (f - R);
1753 else
1754 return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
1755 }
1756 }
1757
1758 /* double log1p(double x)
1759 *
1760 * Method :
1761 * 1. Argument Reduction: find k and f such that
1762 * 1+x = 2^k * (1+f),
1763 * where sqrt(2)/2 < 1+f < sqrt(2) .
1764 *
1765 * Note. If k=0, then f=x is exact. However, if k!=0, then f
1766 * may not be representable exactly. In that case, a correction
1767 * term is need. Let u=1+x rounded. Let c = (1+x)-u, then
1768 * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
1769 * and add back the correction term c/u.
1770 * (Note: when x > 2**53, one can simply return log(x))
1771 *
1772 * 2. Approximation of log1p(f).
1773 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1774 * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1775 * = 2s + s*R
1776 * We use a special Reme algorithm on [0,0.1716] to generate
1777 * a polynomial of degree 14 to approximate R The maximum error
1778 * of this polynomial approximation is bounded by 2**-58.45. In
1779 * other words,
1780 * 2 4 6 8 10 12 14
1781 * R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s
1782 * (the values of Lp1 to Lp7 are listed in the program)
1783 * and
1784 * | 2 14 | -58.45
1785 * | Lp1*s +...+Lp7*s - R(z) | <= 2
1786 * | |
1787 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1788 * In order to guarantee error in log below 1ulp, we compute log
1789 * by
1790 * log1p(f) = f - (hfsq - s*(hfsq+R)).
1791 *
1792 * 3. Finally, log1p(x) = k*ln2 + log1p(f).
1793 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1794 * Here ln2 is split into two floating point number:
1795 * ln2_hi + ln2_lo,
1796 * where n*ln2_hi is always exact for |n| < 2000.
1797 *
1798 * Special cases:
1799 * log1p(x) is NaN with signal if x < -1 (including -INF) ;
1800 * log1p(+INF) is +INF; log1p(-1) is -INF with signal;
1801 * log1p(NaN) is that NaN with no signal.
1802 *
1803 * Accuracy:
1804 * according to an error analysis, the error is always less than
1805 * 1 ulp (unit in the last place).
1806 *
1807 * Constants:
1808 * The hexadecimal values are the intended ones for the following
1809 * constants. The decimal values may be used, provided that the
1810 * compiler will convert from decimal to binary accurately enough
1811 * to produce the hexadecimal values shown.
1812 *
1813 * Note: Assuming log() return accurate answer, the following
1814 * algorithm can be used to compute log1p(x) to within a few ULP:
1815 *
1816 * u = 1+x;
1817 * if(u==1.0) return x ; else
1818 * return log(u)*(x/(u-1.0));
1819 *
1820 * See HP-15C Advanced Functions Handbook, p.193.
1821 */
log1p(double x)1822 double log1p(double x) {
1823 static const double /* -- */
1824 ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1825 ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1826 two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
1827 Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1828 Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
1829 Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
1830 Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
1831 Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
1832 Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
1833 Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
1834
1835 static const double zero = 0.0;
1836 static volatile double vzero = 0.0;
1837
1838 double hfsq, f, c, s, z, R, u;
1839 int32_t k, hx, hu, ax;
1840
1841 GET_HIGH_WORD(hx, x);
1842 ax = hx & 0x7FFFFFFF;
1843
1844 k = 1;
1845 if (hx < 0x3FDA827A) { /* 1+x < sqrt(2)+ */
1846 if (ax >= 0x3FF00000) { /* x <= -1.0 */
1847 if (x == -1.0)
1848 return -two54 / vzero; /* log1p(-1)=+inf */
1849 else
1850 return (x - x) / (x - x); /* log1p(x<-1)=NaN */
1851 }
1852 if (ax < 0x3E200000) { /* |x| < 2**-29 */
1853 if (two54 + x > zero /* raise inexact */
1854 && ax < 0x3C900000) /* |x| < 2**-54 */
1855 return x;
1856 else
1857 return x - x * x * 0.5;
1858 }
1859 if (hx > 0 || hx <= static_cast<int32_t>(0xBFD2BEC4)) {
1860 k = 0;
1861 f = x;
1862 hu = 1;
1863 } /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
1864 }
1865 if (hx >= 0x7FF00000) return x + x;
1866 if (k != 0) {
1867 if (hx < 0x43400000) {
1868 STRICT_ASSIGN(double, u, 1.0 + x);
1869 GET_HIGH_WORD(hu, u);
1870 k = (hu >> 20) - 1023;
1871 c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
1872 c /= u;
1873 } else {
1874 u = x;
1875 GET_HIGH_WORD(hu, u);
1876 k = (hu >> 20) - 1023;
1877 c = 0;
1878 }
1879 hu &= 0x000FFFFF;
1880 /*
1881 * The approximation to sqrt(2) used in thresholds is not
1882 * critical. However, the ones used above must give less
1883 * strict bounds than the one here so that the k==0 case is
1884 * never reached from here, since here we have committed to
1885 * using the correction term but don't use it if k==0.
1886 */
1887 if (hu < 0x6A09E) { /* u ~< sqrt(2) */
1888 SET_HIGH_WORD(u, hu | 0x3FF00000); /* normalize u */
1889 } else {
1890 k += 1;
1891 SET_HIGH_WORD(u, hu | 0x3FE00000); /* normalize u/2 */
1892 hu = (0x00100000 - hu) >> 2;
1893 }
1894 f = u - 1.0;
1895 }
1896 hfsq = 0.5 * f * f;
1897 if (hu == 0) { /* |f| < 2**-20 */
1898 if (f == zero) {
1899 if (k == 0) {
1900 return zero;
1901 } else {
1902 c += k * ln2_lo;
1903 return k * ln2_hi + c;
1904 }
1905 }
1906 R = hfsq * (1.0 - 0.66666666666666666 * f);
1907 if (k == 0)
1908 return f - R;
1909 else
1910 return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
1911 }
1912 s = f / (2.0 + f);
1913 z = s * s;
1914 R = z * (Lp1 +
1915 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));
1916 if (k == 0)
1917 return f - (hfsq - s * (hfsq + R));
1918 else
1919 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
1920 }
1921
1922 /*
1923 * k_log1p(f):
1924 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
1925 *
1926 * The following describes the overall strategy for computing
1927 * logarithms in base e. The argument reduction and adding the final
1928 * term of the polynomial are done by the caller for increased accuracy
1929 * when different bases are used.
1930 *
1931 * Method :
1932 * 1. Argument Reduction: find k and f such that
1933 * x = 2^k * (1+f),
1934 * where sqrt(2)/2 < 1+f < sqrt(2) .
1935 *
1936 * 2. Approximation of log(1+f).
1937 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1938 * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1939 * = 2s + s*R
1940 * We use a special Reme algorithm on [0,0.1716] to generate
1941 * a polynomial of degree 14 to approximate R The maximum error
1942 * of this polynomial approximation is bounded by 2**-58.45. In
1943 * other words,
1944 * 2 4 6 8 10 12 14
1945 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
1946 * (the values of Lg1 to Lg7 are listed in the program)
1947 * and
1948 * | 2 14 | -58.45
1949 * | Lg1*s +...+Lg7*s - R(z) | <= 2
1950 * | |
1951 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1952 * In order to guarantee error in log below 1ulp, we compute log
1953 * by
1954 * log(1+f) = f - s*(f - R) (if f is not too large)
1955 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1956 *
1957 * 3. Finally, log(x) = k*ln2 + log(1+f).
1958 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1959 * Here ln2 is split into two floating point number:
1960 * ln2_hi + ln2_lo,
1961 * where n*ln2_hi is always exact for |n| < 2000.
1962 *
1963 * Special cases:
1964 * log(x) is NaN with signal if x < 0 (including -INF) ;
1965 * log(+INF) is +INF; log(0) is -INF with signal;
1966 * log(NaN) is that NaN with no signal.
1967 *
1968 * Accuracy:
1969 * according to an error analysis, the error is always less than
1970 * 1 ulp (unit in the last place).
1971 *
1972 * Constants:
1973 * The hexadecimal values are the intended ones for the following
1974 * constants. The decimal values may be used, provided that the
1975 * compiler will convert from decimal to binary accurately enough
1976 * to produce the hexadecimal values shown.
1977 */
1978
1979 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1980 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
1981 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
1982 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
1983 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
1984 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
1985 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
1986
1987 /*
1988 * We always inline k_log1p(), since doing so produces a
1989 * substantial performance improvement (~40% on amd64).
1990 */
k_log1p(double f)1991 static inline double k_log1p(double f) {
1992 double hfsq, s, z, R, w, t1, t2;
1993
1994 s = f / (2.0 + f);
1995 z = s * s;
1996 w = z * z;
1997 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1998 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1999 R = t2 + t1;
2000 hfsq = 0.5 * f * f;
2001 return s * (hfsq + R);
2002 }
2003
2004 /*
2005 * Return the base 2 logarithm of x. See e_log.c and k_log.h for most
2006 * comments.
2007 *
2008 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
2009 * then does the combining and scaling steps
2010 * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
2011 * in not-quite-routine extra precision.
2012 */
log2(double x)2013 double log2(double x) {
2014 static const double
2015 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
2016 ivln2hi = 1.44269504072144627571e+00, /* 0x3FF71547, 0x65200000 */
2017 ivln2lo = 1.67517131648865118353e-10; /* 0x3DE705FC, 0x2EEFA200 */
2018
2019 static const double zero = 0.0;
2020 static volatile double vzero = 0.0;
2021
2022 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
2023 int32_t i, k, hx;
2024 uint32_t lx;
2025
2026 EXTRACT_WORDS(hx, lx, x);
2027
2028 k = 0;
2029 if (hx < 0x00100000) { /* x < 2**-1022 */
2030 if (((hx & 0x7FFFFFFF) | lx) == 0)
2031 return -two54 / vzero; /* log(+-0)=-inf */
2032 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
2033 k -= 54;
2034 x *= two54; /* subnormal number, scale up x */
2035 GET_HIGH_WORD(hx, x);
2036 }
2037 if (hx >= 0x7FF00000) return x + x;
2038 if (hx == 0x3FF00000 && lx == 0) return zero; /* log(1) = +0 */
2039 k += (hx >> 20) - 1023;
2040 hx &= 0x000FFFFF;
2041 i = (hx + 0x95F64) & 0x100000;
2042 SET_HIGH_WORD(x, hx | (i ^ 0x3FF00000)); /* normalize x or x/2 */
2043 k += (i >> 20);
2044 y = static_cast<double>(k);
2045 f = x - 1.0;
2046 hfsq = 0.5 * f * f;
2047 r = k_log1p(f);
2048
2049 /*
2050 * f-hfsq must (for args near 1) be evaluated in extra precision
2051 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
2052 * This is fairly efficient since f-hfsq only depends on f, so can
2053 * be evaluated in parallel with R. Not combining hfsq with R also
2054 * keeps R small (though not as small as a true `lo' term would be),
2055 * so that extra precision is not needed for terms involving R.
2056 *
2057 * Compiler bugs involving extra precision used to break Dekker's
2058 * theorem for spitting f-hfsq as hi+lo, unless double_t was used
2059 * or the multi-precision calculations were avoided when double_t
2060 * has extra precision. These problems are now automatically
2061 * avoided as a side effect of the optimization of combining the
2062 * Dekker splitting step with the clear-low-bits step.
2063 *
2064 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
2065 * precision to avoid a very large cancellation when x is very near
2066 * these values. Unlike the above cancellations, this problem is
2067 * specific to base 2. It is strange that adding +-1 is so much
2068 * harder than adding +-ln2 or +-log10_2.
2069 *
2070 * This uses Dekker's theorem to normalize y+val_hi, so the
2071 * compiler bugs are back in some configurations, sigh. And I
2072 * don't want to used double_t to avoid them, since that gives a
2073 * pessimization and the support for avoiding the pessimization
2074 * is not yet available.
2075 *
2076 * The multi-precision calculations for the multiplications are
2077 * routine.
2078 */
2079 hi = f - hfsq;
2080 SET_LOW_WORD(hi, 0);
2081 lo = (f - hi) - hfsq + r;
2082 val_hi = hi * ivln2hi;
2083 val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
2084
2085 /* spadd(val_hi, val_lo, y), except for not using double_t: */
2086 w = y + val_hi;
2087 val_lo += (y - w) + val_hi;
2088 val_hi = w;
2089
2090 return val_lo + val_hi;
2091 }
2092
2093 /*
2094 * Return the base 10 logarithm of x
2095 *
2096 * Method :
2097 * Let log10_2hi = leading 40 bits of log10(2) and
2098 * log10_2lo = log10(2) - log10_2hi,
2099 * ivln10 = 1/log(10) rounded.
2100 * Then
2101 * n = ilogb(x),
2102 * if(n<0) n = n+1;
2103 * x = scalbn(x,-n);
2104 * log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
2105 *
2106 * Note 1:
2107 * To guarantee log10(10**n)=n, where 10**n is normal, the rounding
2108 * mode must set to Round-to-Nearest.
2109 * Note 2:
2110 * [1/log(10)] rounded to 53 bits has error .198 ulps;
2111 * log10 is monotonic at all binary break points.
2112 *
2113 * Special cases:
2114 * log10(x) is NaN if x < 0;
2115 * log10(+INF) is +INF; log10(0) is -INF;
2116 * log10(NaN) is that NaN;
2117 * log10(10**N) = N for N=0,1,...,22.
2118 */
log10(double x)2119 double log10(double x) {
2120 static const double
2121 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
2122 ivln10 = 4.34294481903251816668e-01,
2123 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
2124 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
2125
2126 static const double zero = 0.0;
2127 static volatile double vzero = 0.0;
2128
2129 double y;
2130 int32_t i, k, hx;
2131 uint32_t lx;
2132
2133 EXTRACT_WORDS(hx, lx, x);
2134
2135 k = 0;
2136 if (hx < 0x00100000) { /* x < 2**-1022 */
2137 if (((hx & 0x7FFFFFFF) | lx) == 0)
2138 return -two54 / vzero; /* log(+-0)=-inf */
2139 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
2140 k -= 54;
2141 x *= two54; /* subnormal number, scale up x */
2142 GET_HIGH_WORD(hx, x);
2143 GET_LOW_WORD(lx, x);
2144 }
2145 if (hx >= 0x7FF00000) return x + x;
2146 if (hx == 0x3FF00000 && lx == 0) return zero; /* log(1) = +0 */
2147 k += (hx >> 20) - 1023;
2148
2149 i = (k & 0x80000000) >> 31;
2150 hx = (hx & 0x000FFFFF) | ((0x3FF - i) << 20);
2151 y = k + i;
2152 SET_HIGH_WORD(x, hx);
2153 SET_LOW_WORD(x, lx);
2154
2155 double z = y * log10_2lo + ivln10 * log(x);
2156 return z + y * log10_2hi;
2157 }
2158
2159 /* expm1(x)
2160 * Returns exp(x)-1, the exponential of x minus 1.
2161 *
2162 * Method
2163 * 1. Argument reduction:
2164 * Given x, find r and integer k such that
2165 *
2166 * x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658
2167 *
2168 * Here a correction term c will be computed to compensate
2169 * the error in r when rounded to a floating-point number.
2170 *
2171 * 2. Approximating expm1(r) by a special rational function on
2172 * the interval [0,0.34658]:
2173 * Since
2174 * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
2175 * we define R1(r*r) by
2176 * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
2177 * That is,
2178 * R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
2179 * = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
2180 * = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
2181 * We use a special Reme algorithm on [0,0.347] to generate
2182 * a polynomial of degree 5 in r*r to approximate R1. The
2183 * maximum error of this polynomial approximation is bounded
2184 * by 2**-61. In other words,
2185 * R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
2186 * where Q1 = -1.6666666666666567384E-2,
2187 * Q2 = 3.9682539681370365873E-4,
2188 * Q3 = -9.9206344733435987357E-6,
2189 * Q4 = 2.5051361420808517002E-7,
2190 * Q5 = -6.2843505682382617102E-9;
2191 * z = r*r,
2192 * with error bounded by
2193 * | 5 | -61
2194 * | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2
2195 * | |
2196 *
2197 * expm1(r) = exp(r)-1 is then computed by the following
2198 * specific way which minimize the accumulation rounding error:
2199 * 2 3
2200 * r r [ 3 - (R1 + R1*r/2) ]
2201 * expm1(r) = r + --- + --- * [--------------------]
2202 * 2 2 [ 6 - r*(3 - R1*r/2) ]
2203 *
2204 * To compensate the error in the argument reduction, we use
2205 * expm1(r+c) = expm1(r) + c + expm1(r)*c
2206 * ~ expm1(r) + c + r*c
2207 * Thus c+r*c will be added in as the correction terms for
2208 * expm1(r+c). Now rearrange the term to avoid optimization
2209 * screw up:
2210 * ( 2 2 )
2211 * ({ ( r [ R1 - (3 - R1*r/2) ] ) } r )
2212 * expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
2213 * ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 )
2214 * ( )
2215 *
2216 * = r - E
2217 * 3. Scale back to obtain expm1(x):
2218 * From step 1, we have
2219 * expm1(x) = either 2^k*[expm1(r)+1] - 1
2220 * = or 2^k*[expm1(r) + (1-2^-k)]
2221 * 4. Implementation notes:
2222 * (A). To save one multiplication, we scale the coefficient Qi
2223 * to Qi*2^i, and replace z by (x^2)/2.
2224 * (B). To achieve maximum accuracy, we compute expm1(x) by
2225 * (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
2226 * (ii) if k=0, return r-E
2227 * (iii) if k=-1, return 0.5*(r-E)-0.5
2228 * (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E)
2229 * else return 1.0+2.0*(r-E);
2230 * (v) if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
2231 * (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else
2232 * (vii) return 2^k(1-((E+2^-k)-r))
2233 *
2234 * Special cases:
2235 * expm1(INF) is INF, expm1(NaN) is NaN;
2236 * expm1(-INF) is -1, and
2237 * for finite argument, only expm1(0)=0 is exact.
2238 *
2239 * Accuracy:
2240 * according to an error analysis, the error is always less than
2241 * 1 ulp (unit in the last place).
2242 *
2243 * Misc. info.
2244 * For IEEE double
2245 * if x > 7.09782712893383973096e+02 then expm1(x) overflow
2246 *
2247 * Constants:
2248 * The hexadecimal values are the intended ones for the following
2249 * constants. The decimal values may be used, provided that the
2250 * compiler will convert from decimal to binary accurately enough
2251 * to produce the hexadecimal values shown.
2252 */
expm1(double x)2253 double expm1(double x) {
2254 static const double
2255 one = 1.0,
2256 tiny = 1.0e-300,
2257 o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
2258 ln2_hi = 6.93147180369123816490e-01, /* 0x3FE62E42, 0xFEE00000 */
2259 ln2_lo = 1.90821492927058770002e-10, /* 0x3DEA39EF, 0x35793C76 */
2260 invln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE */
2261 /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs =
2262 x*x/2: */
2263 Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
2264 Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
2265 Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
2266 Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
2267 Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
2268
2269 static volatile double huge = 1.0e+300;
2270
2271 double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2272 int32_t k, xsb;
2273 uint32_t hx;
2274
2275 GET_HIGH_WORD(hx, x);
2276 xsb = hx & 0x80000000; /* sign bit of x */
2277 hx &= 0x7FFFFFFF; /* high word of |x| */
2278
2279 /* filter out huge and non-finite argument */
2280 if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
2281 if (hx >= 0x40862E42) { /* if |x|>=709.78... */
2282 if (hx >= 0x7FF00000) {
2283 uint32_t low;
2284 GET_LOW_WORD(low, x);
2285 if (((hx & 0xFFFFF) | low) != 0)
2286 return x + x; /* NaN */
2287 else
2288 return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
2289 }
2290 if (x > o_threshold) return huge * huge; /* overflow */
2291 }
2292 if (xsb != 0) { /* x < -56*ln2, return -1.0 with inexact */
2293 if (x + tiny < 0.0) /* raise inexact */
2294 return tiny - one; /* return -1 */
2295 }
2296 }
2297
2298 /* argument reduction */
2299 if (hx > 0x3FD62E42) { /* if |x| > 0.5 ln2 */
2300 if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2301 if (xsb == 0) {
2302 hi = x - ln2_hi;
2303 lo = ln2_lo;
2304 k = 1;
2305 } else {
2306 hi = x + ln2_hi;
2307 lo = -ln2_lo;
2308 k = -1;
2309 }
2310 } else {
2311 k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
2312 t = k;
2313 hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2314 lo = t * ln2_lo;
2315 }
2316 STRICT_ASSIGN(double, x, hi - lo);
2317 c = (hi - x) - lo;
2318 } else if (hx < 0x3C900000) { /* when |x|<2**-54, return x */
2319 t = huge + x; /* return x with inexact flags when x!=0 */
2320 return x - (t - (huge + x));
2321 } else {
2322 k = 0;
2323 }
2324
2325 /* x is now in primary range */
2326 hfx = 0.5 * x;
2327 hxs = x * hfx;
2328 r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2329 t = 3.0 - r1 * hfx;
2330 e = hxs * ((r1 - t) / (6.0 - x * t));
2331 if (k == 0) {
2332 return x - (x * e - hxs); /* c is 0 */
2333 } else {
2334 INSERT_WORDS(twopk, 0x3FF00000 + (k << 20), 0); /* 2^k */
2335 e = (x * (e - c) - c);
2336 e -= hxs;
2337 if (k == -1) return 0.5 * (x - e) - 0.5;
2338 if (k == 1) {
2339 if (x < -0.25)
2340 return -2.0 * (e - (x + 0.5));
2341 else
2342 return one + 2.0 * (x - e);
2343 }
2344 if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */
2345 y = one - (e - x);
2346 // TODO(mvstanton): is this replacement for the hex float
2347 // sufficient?
2348 // if (k == 1024) y = y*2.0*0x1p1023;
2349 if (k == 1024)
2350 y = y * 2.0 * 8.98846567431158e+307;
2351 else
2352 y = y * twopk;
2353 return y - one;
2354 }
2355 t = one;
2356 if (k < 20) {
2357 SET_HIGH_WORD(t, 0x3FF00000 - (0x200000 >> k)); /* t=1-2^-k */
2358 y = t - (e - x);
2359 y = y * twopk;
2360 } else {
2361 SET_HIGH_WORD(t, ((0x3FF - k) << 20)); /* 2^-k */
2362 y = x - (e + t);
2363 y += one;
2364 y = y * twopk;
2365 }
2366 }
2367 return y;
2368 }
2369
cbrt(double x)2370 double cbrt(double x) {
2371 static const uint32_t
2372 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
2373 B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
2374
2375 /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
2376 static const double P0 = 1.87595182427177009643, /* 0x3FFE03E6, 0x0F61E692 */
2377 P1 = -1.88497979543377169875, /* 0xBFFE28E0, 0x92F02420 */
2378 P2 = 1.621429720105354466140, /* 0x3FF9F160, 0x4A49D6C2 */
2379 P3 = -0.758397934778766047437, /* 0xBFE844CB, 0xBEE751D9 */
2380 P4 = 0.145996192886612446982; /* 0x3FC2B000, 0xD4E4EDD7 */
2381
2382 int32_t hx;
2383 union {
2384 double value;
2385 uint64_t bits;
2386 } u;
2387 double r, s, t = 0.0, w;
2388 uint32_t sign;
2389 uint32_t high, low;
2390
2391 EXTRACT_WORDS(hx, low, x);
2392 sign = hx & 0x80000000; /* sign= sign(x) */
2393 hx ^= sign;
2394 if (hx >= 0x7FF00000) return (x + x); /* cbrt(NaN,INF) is itself */
2395
2396 /*
2397 * Rough cbrt to 5 bits:
2398 * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
2399 * where e is integral and >= 0, m is real and in [0, 1), and "/" and
2400 * "%" are integer division and modulus with rounding towards minus
2401 * infinity. The RHS is always >= the LHS and has a maximum relative
2402 * error of about 1 in 16. Adding a bias of -0.03306235651 to the
2403 * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
2404 * floating point representation, for finite positive normal values,
2405 * ordinary integer division of the value in bits magically gives
2406 * almost exactly the RHS of the above provided we first subtract the
2407 * exponent bias (1023 for doubles) and later add it back. We do the
2408 * subtraction virtually to keep e >= 0 so that ordinary integer
2409 * division rounds towards minus infinity; this is also efficient.
2410 */
2411 if (hx < 0x00100000) { /* zero or subnormal? */
2412 if ((hx | low) == 0) return (x); /* cbrt(0) is itself */
2413 SET_HIGH_WORD(t, 0x43500000); /* set t= 2**54 */
2414 t *= x;
2415 GET_HIGH_WORD(high, t);
2416 INSERT_WORDS(t, sign | ((high & 0x7FFFFFFF) / 3 + B2), 0);
2417 } else {
2418 INSERT_WORDS(t, sign | (hx / 3 + B1), 0);
2419 }
2420
2421 /*
2422 * New cbrt to 23 bits:
2423 * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
2424 * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
2425 * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation
2426 * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
2427 * gives us bounds for r = t**3/x.
2428 *
2429 * Try to optimize for parallel evaluation as in k_tanf.c.
2430 */
2431 r = (t * t) * (t / x);
2432 t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
2433
2434 /*
2435 * Round t away from zero to 23 bits (sloppily except for ensuring that
2436 * the result is larger in magnitude than cbrt(x) but not much more than
2437 * 2 23-bit ulps larger). With rounding towards zero, the error bound
2438 * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps
2439 * in the rounded t, the infinite-precision error in the Newton
2440 * approximation barely affects third digit in the final error
2441 * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
2442 * before the final error is larger than 0.667 ulps.
2443 */
2444 u.value = t;
2445 u.bits = (u.bits + 0x80000000) & 0xFFFFFFFFC0000000ULL;
2446 t = u.value;
2447
2448 /* one step Newton iteration to 53 bits with error < 0.667 ulps */
2449 s = t * t; /* t*t is exact */
2450 r = x / s; /* error <= 0.5 ulps; |r| < |t| */
2451 w = t + t; /* t+t is exact */
2452 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
2453 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */
2454
2455 return (t);
2456 }
2457
2458 /* sin(x)
2459 * Return sine function of x.
2460 *
2461 * kernel function:
2462 * __kernel_sin ... sine function on [-pi/4,pi/4]
2463 * __kernel_cos ... cose function on [-pi/4,pi/4]
2464 * __ieee754_rem_pio2 ... argument reduction routine
2465 *
2466 * Method.
2467 * Let S,C and T denote the sin, cos and tan respectively on
2468 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2469 * in [-pi/4 , +pi/4], and let n = k mod 4.
2470 * We have
2471 *
2472 * n sin(x) cos(x) tan(x)
2473 * ----------------------------------------------------------
2474 * 0 S C T
2475 * 1 C -S -1/T
2476 * 2 -S -C T
2477 * 3 -C S -1/T
2478 * ----------------------------------------------------------
2479 *
2480 * Special cases:
2481 * Let trig be any of sin, cos, or tan.
2482 * trig(+-INF) is NaN, with signals;
2483 * trig(NaN) is that NaN;
2484 *
2485 * Accuracy:
2486 * TRIG(x) returns trig(x) nearly rounded
2487 */
sin(double x)2488 double sin(double x) {
2489 double y[2], z = 0.0;
2490 int32_t n, ix;
2491
2492 /* High word of x. */
2493 GET_HIGH_WORD(ix, x);
2494
2495 /* |x| ~< pi/4 */
2496 ix &= 0x7FFFFFFF;
2497 if (ix <= 0x3FE921FB) {
2498 return __kernel_sin(x, z, 0);
2499 } else if (ix >= 0x7FF00000) {
2500 /* sin(Inf or NaN) is NaN */
2501 return x - x;
2502 } else {
2503 /* argument reduction needed */
2504 n = __ieee754_rem_pio2(x, y);
2505 switch (n & 3) {
2506 case 0:
2507 return __kernel_sin(y[0], y[1], 1);
2508 case 1:
2509 return __kernel_cos(y[0], y[1]);
2510 case 2:
2511 return -__kernel_sin(y[0], y[1], 1);
2512 default:
2513 return -__kernel_cos(y[0], y[1]);
2514 }
2515 }
2516 }
2517
2518 /* tan(x)
2519 * Return tangent function of x.
2520 *
2521 * kernel function:
2522 * __kernel_tan ... tangent function on [-pi/4,pi/4]
2523 * __ieee754_rem_pio2 ... argument reduction routine
2524 *
2525 * Method.
2526 * Let S,C and T denote the sin, cos and tan respectively on
2527 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2528 * in [-pi/4 , +pi/4], and let n = k mod 4.
2529 * We have
2530 *
2531 * n sin(x) cos(x) tan(x)
2532 * ----------------------------------------------------------
2533 * 0 S C T
2534 * 1 C -S -1/T
2535 * 2 -S -C T
2536 * 3 -C S -1/T
2537 * ----------------------------------------------------------
2538 *
2539 * Special cases:
2540 * Let trig be any of sin, cos, or tan.
2541 * trig(+-INF) is NaN, with signals;
2542 * trig(NaN) is that NaN;
2543 *
2544 * Accuracy:
2545 * TRIG(x) returns trig(x) nearly rounded
2546 */
tan(double x)2547 double tan(double x) {
2548 double y[2], z = 0.0;
2549 int32_t n, ix;
2550
2551 /* High word of x. */
2552 GET_HIGH_WORD(ix, x);
2553
2554 /* |x| ~< pi/4 */
2555 ix &= 0x7FFFFFFF;
2556 if (ix <= 0x3FE921FB) {
2557 return __kernel_tan(x, z, 1);
2558 } else if (ix >= 0x7FF00000) {
2559 /* tan(Inf or NaN) is NaN */
2560 return x - x; /* NaN */
2561 } else {
2562 /* argument reduction needed */
2563 n = __ieee754_rem_pio2(x, y);
2564 /* 1 -> n even, -1 -> n odd */
2565 return __kernel_tan(y[0], y[1], 1 - ((n & 1) << 1));
2566 }
2567 }
2568
2569 /*
2570 * ES6 draft 09-27-13, section 20.2.2.12.
2571 * Math.cosh
2572 * Method :
2573 * mathematically cosh(x) if defined to be (exp(x)+exp(-x))/2
2574 * 1. Replace x by |x| (cosh(x) = cosh(-x)).
2575 * 2.
2576 * [ exp(x) - 1 ]^2
2577 * 0 <= x <= ln2/2 : cosh(x) := 1 + -------------------
2578 * 2*exp(x)
2579 *
2580 * exp(x) + 1/exp(x)
2581 * ln2/2 <= x <= 22 : cosh(x) := -------------------
2582 * 2
2583 * 22 <= x <= lnovft : cosh(x) := exp(x)/2
2584 * lnovft <= x <= ln2ovft: cosh(x) := exp(x/2)/2 * exp(x/2)
2585 * ln2ovft < x : cosh(x) := huge*huge (overflow)
2586 *
2587 * Special cases:
2588 * cosh(x) is |x| if x is +INF, -INF, or NaN.
2589 * only cosh(0)=1 is exact for finite x.
2590 */
cosh(double x)2591 double cosh(double x) {
2592 static const double KCOSH_OVERFLOW = 710.4758600739439;
2593 static const double one = 1.0, half = 0.5;
2594 static volatile double huge = 1.0e+300;
2595
2596 int32_t ix;
2597
2598 /* High word of |x|. */
2599 GET_HIGH_WORD(ix, x);
2600 ix &= 0x7FFFFFFF;
2601
2602 // |x| in [0,0.5*log2], return 1+expm1(|x|)^2/(2*exp(|x|))
2603 if (ix < 0x3FD62E43) {
2604 double t = expm1(fabs(x));
2605 double w = one + t;
2606 // For |x| < 2^-55, cosh(x) = 1
2607 if (ix < 0x3C800000) return w;
2608 return one + (t * t) / (w + w);
2609 }
2610
2611 // |x| in [0.5*log2, 22], return (exp(|x|)+1/exp(|x|)/2
2612 if (ix < 0x40360000) {
2613 double t = exp(fabs(x));
2614 return half * t + half / t;
2615 }
2616
2617 // |x| in [22, log(maxdouble)], return half*exp(|x|)
2618 if (ix < 0x40862E42) return half * exp(fabs(x));
2619
2620 // |x| in [log(maxdouble), overflowthreshold]
2621 if (fabs(x) <= KCOSH_OVERFLOW) {
2622 double w = exp(half * fabs(x));
2623 double t = half * w;
2624 return t * w;
2625 }
2626
2627 /* x is INF or NaN */
2628 if (ix >= 0x7FF00000) return x * x;
2629
2630 // |x| > overflowthreshold.
2631 return huge * huge;
2632 }
2633
2634 /*
2635 * ES6 draft 09-27-13, section 20.2.2.30.
2636 * Math.sinh
2637 * Method :
2638 * mathematically sinh(x) if defined to be (exp(x)-exp(-x))/2
2639 * 1. Replace x by |x| (sinh(-x) = -sinh(x)).
2640 * 2.
2641 * E + E/(E+1)
2642 * 0 <= x <= 22 : sinh(x) := --------------, E=expm1(x)
2643 * 2
2644 *
2645 * 22 <= x <= lnovft : sinh(x) := exp(x)/2
2646 * lnovft <= x <= ln2ovft: sinh(x) := exp(x/2)/2 * exp(x/2)
2647 * ln2ovft < x : sinh(x) := x*shuge (overflow)
2648 *
2649 * Special cases:
2650 * sinh(x) is |x| if x is +Infinity, -Infinity, or NaN.
2651 * only sinh(0)=0 is exact for finite x.
2652 */
sinh(double x)2653 double sinh(double x) {
2654 static const double KSINH_OVERFLOW = 710.4758600739439,
2655 TWO_M28 =
2656 3.725290298461914e-9, // 2^-28, empty lower half
2657 LOG_MAXD = 709.7822265625; // 0x40862E42 00000000, empty lower half
2658 static const double shuge = 1.0e307;
2659
2660 double h = (x < 0) ? -0.5 : 0.5;
2661 // |x| in [0, 22]. return sign(x)*0.5*(E+E/(E+1))
2662 double ax = fabs(x);
2663 if (ax < 22) {
2664 // For |x| < 2^-28, sinh(x) = x
2665 if (ax < TWO_M28) return x;
2666 double t = expm1(ax);
2667 if (ax < 1) {
2668 return h * (2 * t - t * t / (t + 1));
2669 }
2670 return h * (t + t / (t + 1));
2671 }
2672 // |x| in [22, log(maxdouble)], return 0.5 * exp(|x|)
2673 if (ax < LOG_MAXD) return h * exp(ax);
2674 // |x| in [log(maxdouble), overflowthreshold]
2675 // overflowthreshold = 710.4758600739426
2676 if (ax <= KSINH_OVERFLOW) {
2677 double w = exp(0.5 * ax);
2678 double t = h * w;
2679 return t * w;
2680 }
2681 // |x| > overflowthreshold or is NaN.
2682 // Return Infinity of the appropriate sign or NaN.
2683 return x * shuge;
2684 }
2685
2686 /* Tanh(x)
2687 * Return the Hyperbolic Tangent of x
2688 *
2689 * Method :
2690 * x -x
2691 * e - e
2692 * 0. tanh(x) is defined to be -----------
2693 * x -x
2694 * e + e
2695 * 1. reduce x to non-negative by tanh(-x) = -tanh(x).
2696 * 2. 0 <= x < 2**-28 : tanh(x) := x with inexact if x != 0
2697 * -t
2698 * 2**-28 <= x < 1 : tanh(x) := -----; t = expm1(-2x)
2699 * t + 2
2700 * 2
2701 * 1 <= x < 22 : tanh(x) := 1 - -----; t = expm1(2x)
2702 * t + 2
2703 * 22 <= x <= INF : tanh(x) := 1.
2704 *
2705 * Special cases:
2706 * tanh(NaN) is NaN;
2707 * only tanh(0)=0 is exact for finite argument.
2708 */
tanh(double x)2709 double tanh(double x) {
2710 static const volatile double tiny = 1.0e-300;
2711 static const double one = 1.0, two = 2.0, huge = 1.0e300;
2712 double t, z;
2713 int32_t jx, ix;
2714
2715 GET_HIGH_WORD(jx, x);
2716 ix = jx & 0x7FFFFFFF;
2717
2718 /* x is INF or NaN */
2719 if (ix >= 0x7FF00000) {
2720 if (jx >= 0)
2721 return one / x + one; /* tanh(+-inf)=+-1 */
2722 else
2723 return one / x - one; /* tanh(NaN) = NaN */
2724 }
2725
2726 /* |x| < 22 */
2727 if (ix < 0x40360000) { /* |x|<22 */
2728 if (ix < 0x3E300000) { /* |x|<2**-28 */
2729 if (huge + x > one) return x; /* tanh(tiny) = tiny with inexact */
2730 }
2731 if (ix >= 0x3FF00000) { /* |x|>=1 */
2732 t = expm1(two * fabs(x));
2733 z = one - two / (t + two);
2734 } else {
2735 t = expm1(-two * fabs(x));
2736 z = -t / (t + two);
2737 }
2738 /* |x| >= 22, return +-1 */
2739 } else {
2740 z = one - tiny; /* raise inexact flag */
2741 }
2742 return (jx >= 0) ? z : -z;
2743 }
2744
2745 } // namespace ieee754
2746 } // namespace base
2747 } // namespace v8
2748