1 /* Copyright JS Foundation and other contributors, http://js.foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 *
15 * This file is based on work under the following copyright and permission
16 * notice:
17 *
18 * Copyright (C) 1993, 2004 by Sun Microsystems, Inc. All rights reserved.
19 *
20 * Developed at SunSoft, a Sun Microsystems, Inc. business.
21 * Permission to use, copy, modify, and distribute this
22 * software is freely granted, provided that this notice
23 * is preserved.
24 *
25 * @(#)k_rem_pio2.c 1.3 95/01/18
26 * @(#)e_rem_pio2.c 1.4 95/01/18
27 * @(#)k_sin.c 1.3 95/01/18
28 * @(#)k_cos.c 1.3 95/01/18
29 * @(#)k_tan.c 1.5 04/04/22
30 * @(#)s_sin.c 1.3 95/01/18
31 * @(#)s_cos.c 1.3 95/01/18
32 * @(#)s_tan.c 1.3 95/01/18
33 */
34
35 #include "jerry-libm-internal.h"
36
37 #define zero 0.00000000000000000000e+00 /* 0x00000000, 0x00000000 */
38 #define half 5.00000000000000000000e-01 /* 0x3FE00000, 0x00000000 */
39 #define one 1.00000000000000000000e+00 /* 0x3FF00000, 0x00000000 */
40 #define two24 1.67772160000000000000e+07 /* 0x41700000, 0x00000000 */
41 #define twon24 5.96046447753906250000e-08 /* 0x3E700000, 0x00000000 */
42
43 /* __kernel_rem_pio2(x,y,e0,nx,prec)
44 * double x[],y[]; int e0,nx,prec;
45 *
46 * __kernel_rem_pio2 return the last three digits of N with
47 * y = x - N*pi/2
48 * so that |y| < pi/2.
49 *
50 * The method is to compute the integer (mod 8) and fraction parts of
51 * (2/pi)*x without doing the full multiplication. In general we
52 * skip the part of the product that are known to be a huge integer (
53 * more accurately, = 0 mod 8 ). Thus the number of operations are
54 * independent of the exponent of the input.
55 *
56 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
57 *
58 * Input parameters:
59 * x[] The input value (must be positive) is broken into nx
60 * pieces of 24-bit integers in double precision format.
61 * x[i] will be the i-th 24 bit of x. The scaled exponent
62 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
63 * match x's up to 24 bits.
64 *
65 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
66 * e0 = ilogb(z)-23
67 * z = scalbn(z,-e0)
68 * for i = 0,1,2
69 * x[i] = floor(z)
70 * z = (z-x[i])*2**24
71 *
72 * y[] ouput result in an array of double precision numbers.
73 * The dimension of y[] is:
74 * 24-bit precision 1
75 * 53-bit precision 2
76 * 64-bit precision 2
77 * 113-bit precision 3
78 * The actual value is the sum of them. Thus for 113-bit
79 * precison, one may have to do something like:
80 *
81 * long double t,w,r_head, r_tail;
82 * t = (long double)y[2] + (long double)y[1];
83 * w = (long double)y[0];
84 * r_head = t+w;
85 * r_tail = w - (r_head - t);
86 *
87 * e0 The exponent of x[0]
88 *
89 * nx dimension of x[]
90 *
91 * prec an integer indicating the precision:
92 * 0 24 bits (single)
93 * 1 53 bits (double)
94 * 2 64 bits (extended)
95 * 3 113 bits (quad)
96 *
97 * External function:
98 * double scalbn(), floor();
99 *
100 * Here is the description of some local variables:
101 *
102 * ipio2[] integer array, contains the (24*i)-th to (24*i+23)-th
103 * bit of 2/pi after binary point. The corresponding
104 * floating value is
105 *
106 * ipio2[i] * 2^(-24(i+1)).
107 *
108 * jk jk+1 is the initial number of terms of ipio2[] needed
109 * in the computation. The recommended value is 2,3,4,
110 * 6 for single, double, extended,and quad.
111 *
112 * jz local integer variable indicating the number of
113 * terms of ipio2[] used.
114 *
115 * jx nx - 1
116 *
117 * jv index for pointing to the suitable ipio2[] for the
118 * computation. In general, we want
119 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
120 * is an integer. Thus
121 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
122 * Hence jv = max(0,(e0-3)/24).
123 *
124 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
125 *
126 * q[] double array with integral value, representing the
127 * 24-bits chunk of the product of x and 2/pi.
128 *
129 * q0 the corresponding exponent of q[0]. Note that the
130 * exponent for q[i] would be q0-24*i.
131 *
132 * PIo2[] double precision array, obtained by cutting pi/2
133 * into 24 bits chunks.
134 *
135 * f[] ipio2[] in floating point
136 *
137 * iq[] integer array by breaking up q[] in 24-bits chunk.
138 *
139 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
140 *
141 * ih integer. If >0 it indicates q[] is >= 0.5, hence
142 * it also indicates the *sign* of the result.
143 */
144
145 /*
146 * Constants:
147 * The hexadecimal values are the intended ones for the following
148 * constants. The decimal values may be used, provided that the
149 * compiler will convert from decimal to binary accurately enough
150 * to produce the hexadecimal values shown.
151 */
152
153 /* initial value for jk */
154 static const int init_jk[] =
155 {
156 2, 3, 4, 6
157 };
158
159 static const double PIo2[] =
160 {
161 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
162 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
163 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
164 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
165 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
166 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
167 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
168 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
169 };
170
171 /*
172 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
173 */
174 static const int ipio2[] =
175 {
176 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
177 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
178 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
179 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
180 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
181 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
182 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
183 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
184 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
185 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
186 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
187 };
188
189 static int
__kernel_rem_pio2(double * x,double * y,int e0,int nx,int prec)190 __kernel_rem_pio2 (double *x, double *y, int e0, int nx, int prec)
191 {
192 int jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
193 double z, fw, f[20], fq[20], q[20];
194
195 /* initialize jk */
196 jk = init_jk[prec];
197 jp = jk;
198
199 /* determine jx, jv, q0, note that 3 > q0 */
200 jx = nx - 1;
201 jv = (e0 - 3) / 24;
202 if (jv < 0)
203 {
204 jv = 0;
205 }
206 q0 = e0 - 24 * (jv + 1);
207
208 /* set up f[0] to f[jx + jk] where f[jx + jk] = ipio2[jv + jk] */
209 j = jv - jx;
210 m = jx + jk;
211 for (i = 0; i <= m; i++, j++)
212 {
213 f[i] = (j < 0) ? zero : (double) ipio2[j];
214 }
215
216 /* compute q[0], q[1], ... q[jk] */
217 for (i = 0; i <= jk; i++)
218 {
219 for (j = 0, fw = 0.0; j <= jx; j++)
220 {
221 fw += x[j] * f[jx + i - j];
222 }
223 q[i] = fw;
224 }
225
226 jz = jk;
227 recompute:
228 /* distill q[] into iq[] reversingly */
229 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--)
230 {
231 fw = (double) ((int) (twon24 * z));
232 iq[i] = (int) (z - two24 * fw);
233 z = q[j - 1] + fw;
234 }
235
236 /* compute n */
237 z = scalbn (z, q0); /* actual value of z */
238 z -= 8.0 * floor (z * 0.125); /* trim off integer >= 8 */
239 n = (int) z;
240 z -= (double) n;
241 ih = 0;
242 if (q0 > 0) /* need iq[jz - 1] to determine n */
243 {
244 i = (iq[jz - 1] >> (24 - q0));
245 n += i;
246 iq[jz - 1] -= i << (24 - q0);
247 ih = iq[jz - 1] >> (23 - q0);
248 }
249 else if (q0 == 0)
250 {
251 ih = iq[jz - 1] >> 23;
252 }
253 else if (z >= 0.5)
254 {
255 ih = 2;
256 }
257
258 if (ih > 0) /* q > 0.5 */
259 {
260 n += 1;
261 carry = 0;
262 for (i = 0; i < jz; i++) /* compute 1 - q */
263 {
264 j = iq[i];
265 if (carry == 0)
266 {
267 if (j != 0)
268 {
269 carry = 1;
270 iq[i] = 0x1000000 - j;
271 }
272 }
273 else
274 {
275 iq[i] = 0xffffff - j;
276 }
277 }
278 if (q0 > 0) /* rare case: chance is 1 in 12 */
279 {
280 switch (q0)
281 {
282 case 1:
283 {
284 iq[jz - 1] &= 0x7fffff;
285 break;
286 }
287 case 2:
288 {
289 iq[jz - 1] &= 0x3fffff;
290 break;
291 }
292 }
293 }
294 if (ih == 2)
295 {
296 z = one - z;
297 if (carry != 0)
298 {
299 z -= scalbn (one, q0);
300 }
301 }
302 }
303
304 /* check if recomputation is needed */
305 if (z == zero)
306 {
307 j = 0;
308 for (i = jz - 1; i >= jk; i--)
309 {
310 j |= iq[i];
311 }
312 if (j == 0) /* need recomputation */
313 {
314 for (k = 1; iq[jk - k] == 0; k++) /* k = no. of terms needed */
315 {
316 }
317
318 for (i = jz + 1; i <= jz + k; i++) /* add q[jz + 1] to q[jz + k] */
319 {
320 f[jx + i] = (double) ipio2[jv + i];
321 for (j = 0, fw = 0.0; j <= jx; j++)
322 {
323 fw += x[j] * f[jx + i - j];
324 }
325 q[i] = fw;
326 }
327 jz += k;
328 goto recompute;
329 }
330 }
331
332 /* chop off zero terms */
333 if (z == 0.0)
334 {
335 jz -= 1;
336 q0 -= 24;
337 while (iq[jz] == 0)
338 {
339 jz--;
340 q0 -= 24;
341 }
342 }
343 else
344 { /* break z into 24-bit if necessary */
345 z = scalbn (z, -q0);
346 if (z >= two24)
347 {
348 fw = (double) ((int) (twon24 * z));
349 iq[jz] = (int) (z - two24 * fw);
350 jz += 1;
351 q0 += 24;
352 iq[jz] = (int) fw;
353 }
354 else
355 {
356 iq[jz] = (int) z;
357 }
358 }
359
360 /* convert integer "bit" chunk to floating-point value */
361 fw = scalbn (one, q0);
362 for (i = jz; i >= 0; i--)
363 {
364 q[i] = fw * (double) iq[i];
365 fw *= twon24;
366 }
367
368 /* compute PIo2[0, ..., jp] * q[jz, ..., 0] */
369 for (i = jz; i >= 0; i--)
370 {
371 for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
372 {
373 fw += PIo2[k] * q[i + k];
374 }
375 fq[jz - i] = fw;
376 }
377
378 /* compress fq[] into y[] */
379 switch (prec)
380 {
381 case 0:
382 {
383 fw = 0.0;
384 for (i = jz; i >= 0; i--)
385 {
386 fw += fq[i];
387 }
388 y[0] = (ih == 0) ? fw : -fw;
389 break;
390 }
391 case 1:
392 case 2:
393 {
394 fw = 0.0;
395 for (i = jz; i >= 0; i--)
396 {
397 fw += fq[i];
398 }
399 y[0] = (ih == 0) ? fw : -fw;
400 fw = fq[0] - fw;
401 for (i = 1; i <= jz; i++)
402 {
403 fw += fq[i];
404 }
405 y[1] = (ih == 0) ? fw : -fw;
406 break;
407 }
408 case 3: /* painful */
409 {
410 for (i = jz; i > 0; i--)
411 {
412 fw = fq[i - 1] + fq[i];
413 fq[i] += fq[i - 1] - fw;
414 fq[i - 1] = fw;
415 }
416 for (i = jz; i > 1; i--)
417 {
418 fw = fq[i - 1] + fq[i];
419 fq[i] += fq[i - 1] - fw;
420 fq[i - 1] = fw;
421 }
422 for (fw = 0.0, i = jz; i >= 2; i--)
423 {
424 fw += fq[i];
425 }
426 if (ih == 0)
427 {
428 y[0] = fq[0];
429 y[1] = fq[1];
430 y[2] = fw;
431 }
432 else
433 {
434 y[0] = -fq[0];
435 y[1] = -fq[1];
436 y[2] = -fw;
437 }
438 }
439 }
440 return n & 7;
441 } /* __kernel_rem_pio2 */
442
443 /* __ieee754_rem_pio2(x,y)
444 * return the remainder of x rem pi/2 in y[0]+y[1]
445 * use __kernel_rem_pio2()
446 */
447
448 static const int npio2_hw[] =
449 {
450 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
451 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
452 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
453 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
454 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
455 0x404858EB, 0x404921FB,
456 };
457
458 /*
459 * invpio2: 53 bits of 2/pi
460 * pio2_1: first 33 bit of pi/2
461 * pio2_1t: pi/2 - pio2_1
462 * pio2_2: second 33 bit of pi/2
463 * pio2_2t: pi/2 - (pio2_1 + pio2_2)
464 * pio2_3: third 33 bit of pi/2
465 * pio2_3t: pi/2 - (pio2_1 + pio2_2 + pio2_3)
466 */
467 #define invpio2 6.36619772367581382433e-01 /* 0x3FE45F30, 0x6DC9C883 */
468 #define pio2_1 1.57079632673412561417e+00 /* 0x3FF921FB, 0x54400000 */
469 #define pio2_1t 6.07710050650619224932e-11 /* 0x3DD0B461, 0x1A626331 */
470 #define pio2_2 6.07710050630396597660e-11 /* 0x3DD0B461, 0x1A600000 */
471 #define pio2_2t 2.02226624879595063154e-21 /* 0x3BA3198A, 0x2E037073 */
472 #define pio2_3 2.02226624871116645580e-21 /* 0x3BA3198A, 0x2E000000 */
473 #define pio2_3t 8.47842766036889956997e-32 /* 0x397B839A, 0x252049C1 */
474
475 static int
__ieee754_rem_pio2(double x,double * y)476 __ieee754_rem_pio2 (double x, double *y)
477 {
478 double_accessor z;
479 double w, t, r, fn;
480 double tx[3];
481 int e0, i, j, nx, n, ix, hx;
482
483 hx = __HI (x); /* high word of x */
484 ix = hx & 0x7fffffff;
485 if (ix <= 0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
486 {
487 y[0] = x;
488 y[1] = 0;
489 return 0;
490 }
491 if (ix < 0x4002d97c) /* |x| < 3pi/4, special case with n = +-1 */
492 {
493 if (hx > 0)
494 {
495 z.dbl = x - pio2_1;
496 if (ix != 0x3ff921fb) /* 33 + 53 bit pi is good enough */
497 {
498 y[0] = z.dbl - pio2_1t;
499 y[1] = (z.dbl - y[0]) - pio2_1t;
500 }
501 else /* near pi/2, use 33 + 33 + 53 bit pi */
502 {
503 z.dbl -= pio2_2;
504 y[0] = z.dbl - pio2_2t;
505 y[1] = (z.dbl - y[0]) - pio2_2t;
506 }
507 return 1;
508 }
509 else /* negative x */
510 {
511 z.dbl = x + pio2_1;
512 if (ix != 0x3ff921fb) /* 33 + 53 bit pi is good enough */
513 {
514 y[0] = z.dbl + pio2_1t;
515 y[1] = (z.dbl - y[0]) + pio2_1t;
516 }
517 else /* near pi/2, use 33 + 33 + 53 bit pi */
518 {
519 z.dbl += pio2_2;
520 y[0] = z.dbl + pio2_2t;
521 y[1] = (z.dbl - y[0]) + pio2_2t;
522 }
523 return -1;
524 }
525 }
526 if (ix <= 0x413921fb) /* |x| ~<= 2^19 * (pi/2), medium size */
527 {
528 t = fabs (x);
529 n = (int) (t * invpio2 + half);
530 fn = (double) n;
531 r = t - fn * pio2_1;
532 w = fn * pio2_1t; /* 1st round good to 85 bit */
533 if (n < 32 && ix != npio2_hw[n - 1])
534 {
535 y[0] = r - w; /* quick check no cancellation */
536 }
537 else
538 {
539 j = ix >> 20;
540 y[0] = r - w;
541 i = j - (((__HI (y[0])) >> 20) & 0x7ff);
542 if (i > 16) /* 2nd iteration needed, good to 118 */
543 {
544 t = r;
545 w = fn * pio2_2;
546 r = t - w;
547 w = fn * pio2_2t - ((t - r) - w);
548 y[0] = r - w;
549 i = j - (((__HI (y[0])) >> 20) & 0x7ff);
550 if (i > 49) /* 3rd iteration need, 151 bits acc, will cover all possible cases */
551 {
552 t = r;
553 w = fn * pio2_3;
554 r = t - w;
555 w = fn * pio2_3t - ((t - r) - w);
556 y[0] = r - w;
557 }
558 }
559 }
560 y[1] = (r - y[0]) - w;
561 if (hx < 0)
562 {
563 y[0] = -y[0];
564 y[1] = -y[1];
565 return -n;
566 }
567 else
568 {
569 return n;
570 }
571 }
572 /*
573 * all other (large) arguments
574 */
575 if (ix >= 0x7ff00000) /* x is inf or NaN */
576 {
577 y[0] = y[1] = x - x;
578 return 0;
579 }
580 /* set z = scalbn(|x|, ilogb(x) - 23) */
581 z.as_int.lo = __LO (x);
582 e0 = (ix >> 20) - 1046; /* e0 = ilogb(z) - 23; */
583 z.as_int.hi = ix - (e0 << 20);
584 for (i = 0; i < 2; i++)
585 {
586 tx[i] = (double) ((int) (z.dbl));
587 z.dbl = (z.dbl - tx[i]) * two24;
588 }
589 tx[2] = z.dbl;
590 nx = 3;
591 while (tx[nx - 1] == zero) /* skip zero term */
592 {
593 nx--;
594 }
595 n = __kernel_rem_pio2 (tx, y, e0, nx, 2);
596 if (hx < 0)
597 {
598 y[0] = -y[0];
599 y[1] = -y[1];
600 return -n;
601 }
602 return n;
603 } /* __ieee754_rem_pio2 */
604
605 /* __kernel_sin( x, y, iy)
606 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
607 * Input x is assumed to be bounded by ~pi/4 in magnitude.
608 * Input y is the tail of x.
609 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
610 *
611 * Algorithm
612 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
613 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
614 * 3. sin(x) is approximated by a polynomial of degree 13 on
615 * [0,pi/4]
616 * 3 13
617 * sin(x) ~ x + S1*x + ... + S6*x
618 * where
619 *
620 * |sin(x) 2 4 6 8 10 12 | -58
621 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
622 * | x |
623 *
624 * 4. sin(x+y) = sin(x) + sin'(x')*y
625 * ~ sin(x) + (1-x*x/2)*y
626 * For better accuracy, let
627 * 3 2 2 2 2
628 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
629 * then 3 2
630 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
631 */
632
633 #define S1 -1.66666666666666324348e-01 /* 0xBFC55555, 0x55555549 */
634 #define S2 8.33333333332248946124e-03 /* 0x3F811111, 0x1110F8A6 */
635 #define S3 -1.98412698298579493134e-04 /* 0xBF2A01A0, 0x19C161D5 */
636 #define S4 2.75573137070700676789e-06 /* 0x3EC71DE3, 0x57B1FE7D */
637 #define S5 -2.50507602534068634195e-08 /* 0xBE5AE5E6, 0x8A2B9CEB */
638 #define S6 1.58969099521155010221e-10 /* 0x3DE5D93A, 0x5ACFD57C */
639
640 static double
__kernel_sin(double x,double y,int iy)641 __kernel_sin (double x, double y, int iy)
642 {
643 double z, r, v;
644 int ix;
645
646 ix = __HI (x) & 0x7fffffff; /* high word of x */
647 if (ix < 0x3e400000) /* |x| < 2**-27 */
648 {
649 if ((int) x == 0)
650 {
651 return x; /* generate inexact */
652 }
653 }
654 z = x * x;
655 v = z * x;
656 r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
657 if (iy == 0)
658 {
659 return x + v * (S1 + z * r);
660 }
661 else
662 {
663 return x - ((z * (half * y - v * r) - y) - v * S1);
664 }
665 } /* __kernel_sin */
666
667 /*
668 * __kernel_cos( x, y )
669 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
670 * Input x is assumed to be bounded by ~pi/4 in magnitude.
671 * Input y is the tail of x.
672 *
673 * Algorithm
674 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
675 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
676 * 3. cos(x) is approximated by a polynomial of degree 14 on
677 * [0,pi/4]
678 * 4 14
679 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
680 * where the remez error is
681 *
682 * | 2 4 6 8 10 12 14 | -58
683 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
684 * | |
685 *
686 * 4 6 8 10 12 14
687 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
688 * cos(x) = 1 - x*x/2 + r
689 * since cos(x+y) ~ cos(x) - sin(x)*y
690 * ~ cos(x) - x*y,
691 * a correction term is necessary in cos(x) and hence
692 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
693 * For better accuracy when x > 0.3, let qx = |x|/4 with
694 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
695 * Then
696 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
697 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
698 * magnitude of the latter is at least a quarter of x*x/2,
699 * thus, reducing the rounding error in the subtraction.
700 */
701
702 #define C1 4.16666666666666019037e-02 /* 0x3FA55555, 0x5555554C */
703 #define C2 -1.38888888888741095749e-03 /* 0xBF56C16C, 0x16C15177 */
704 #define C3 2.48015872894767294178e-05 /* 0x3EFA01A0, 0x19CB1590 */
705 #define C4 -2.75573143513906633035e-07 /* 0xBE927E4F, 0x809C52AD */
706 #define C5 2.08757232129817482790e-09 /* 0x3E21EE9E, 0xBDB4B1C4 */
707 #define C6 -1.13596475577881948265e-11 /* 0xBDA8FAE9, 0xBE8838D4 */
708
709 static double
__kernel_cos(double x,double y)710 __kernel_cos (double x, double y)
711 {
712 double a, hz, z, r;
713 int ix;
714
715 ix = __HI (x) & 0x7fffffff; /* ix = |x|'s high word */
716 if (ix < 0x3e400000) /* if x < 2**27 */
717 {
718 if (((int) x) == 0)
719 {
720 return one; /* generate inexact */
721 }
722 }
723 z = x * x;
724 r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
725 if (ix < 0x3FD33333) /* if |x| < 0.3 */
726 {
727 return one - (0.5 * z - (z * r - x * y));
728 }
729 else
730 {
731 double_accessor qx;
732 if (ix > 0x3fe90000) /* x > 0.78125 */
733 {
734 qx.dbl = 0.28125;
735 }
736 else
737 {
738 qx.as_int.hi = ix - 0x00200000; /* x / 4 */
739 qx.as_int.lo = 0;
740 }
741 hz = 0.5 * z - qx.dbl;
742 a = one - qx.dbl;
743 return a - (hz - (z * r - x * y));
744 }
745 } /* __kernel_cos */
746
747 /* __kernel_tan( x, y, k )
748 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
749 * Input x is assumed to be bounded by ~pi/4 in magnitude.
750 * Input y is the tail of x.
751 * Input k indicates whether tan (if k = 1) or -1/tan (if k = -1) is returned.
752 *
753 * Algorithm
754 * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
755 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
756 * 3. tan(x) is approximated by a odd polynomial of degree 27 on
757 * [0,0.67434]
758 * 3 27
759 * tan(x) ~ x + T1*x + ... + T13*x
760 * where
761 *
762 * |tan(x) 2 4 26 | -59.2
763 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
764 * | x |
765 *
766 * Note: tan(x+y) = tan(x) + tan'(x)*y
767 * ~ tan(x) + (1+x*x)*y
768 * Therefore, for better accuracy in computing tan(x+y), let
769 * 3 2 2 2 2
770 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
771 * then
772 * 3 2
773 * tan(x+y) = x + (T1*x + (x *(r+y)+y))
774 *
775 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
776 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
777 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
778 */
779
780 #define T0 3.33333333333334091986e-01 /* 3FD55555, 55555563 */
781 #define T1 1.33333333333201242699e-01 /* 3FC11111, 1110FE7A */
782 #define T2 5.39682539762260521377e-02 /* 3FABA1BA, 1BB341FE */
783 #define T3 2.18694882948595424599e-02 /* 3F9664F4, 8406D637 */
784 #define T4 8.86323982359930005737e-03 /* 3F8226E3, E96E8493 */
785 #define T5 3.59207910759131235356e-03 /* 3F6D6D22, C9560328 */
786 #define T6 1.45620945432529025516e-03 /* 3F57DBC8, FEE08315 */
787 #define T7 5.88041240820264096874e-04 /* 3F4344D8, F2F26501 */
788 #define T8 2.46463134818469906812e-04 /* 3F3026F7, 1A8D1068 */
789 #define T9 7.81794442939557092300e-05 /* 3F147E88, A03792A6 */
790 #define T10 7.14072491382608190305e-05 /* 3F12B80F, 32F0A7E9 */
791 #define T11 -1.85586374855275456654e-05 /* BEF375CB, DB605373 */
792 #define T12 2.59073051863633712884e-05 /* 3EFB2A70, 74BF7AD4 */
793 #define pio4 7.85398163397448278999e-01 /* 3FE921FB, 54442D18 */
794 #define pio4lo 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
795
796 static double
__kernel_tan(double x,double y,int iy)797 __kernel_tan (double x, double y, int iy)
798 {
799 double_accessor z;
800 double r, v, w, s;
801 int ix, hx;
802
803 hx = __HI (x); /* high word of x */
804 ix = hx & 0x7fffffff; /* high word of |x| */
805 if (ix < 0x3e300000) /* x < 2**-28 */
806 {
807 if ((int) x == 0) /* generate inexact */
808 {
809 if (((ix | __LO (x)) | (iy + 1)) == 0)
810 {
811 return one / fabs (x);
812 }
813 else
814 {
815 if (iy == 1)
816 {
817 return x;
818 }
819 else /* compute -1 / (x + y) carefully */
820 {
821 double a;
822 double_accessor t;
823
824 z.dbl = w = x + y;
825 z.as_int.lo = 0;
826 v = y - (z.dbl - x);
827 t.dbl = a = -one / w;
828 t.as_int.lo = 0;
829 s = one + t.dbl * z.dbl;
830 return t.dbl + a * (s + t.dbl * v);
831 }
832 }
833 }
834 }
835 if (ix >= 0x3FE59428) /* |x| >= 0.6744 */
836 {
837 if (hx < 0)
838 {
839 x = -x;
840 y = -y;
841 }
842 z.dbl = pio4 - x;
843 w = pio4lo - y;
844 x = z.dbl + w;
845 y = 0.0;
846 }
847 z.dbl = x * x;
848 w = z.dbl * z.dbl;
849 /*
850 * Break x^5 * (T[1] + x^2 * T[2] + ...) into
851 * x^5 (T[1] + x^4 * T[3] + ... + x^20 * T[11]) +
852 * x^5 (x^2 * (T[2] + x^4 * T[4] + ... + x^22 * [T12]))
853 */
854 r = T1 + w * (T3 + w * (T5 + w * (T7 + w * (T9 + w * T11))));
855 v = z.dbl * (T2 + w * (T4 + w * (T6 + w * (T8 + w * (T10 + w * T12)))));
856 s = z.dbl * x;
857 r = y + z.dbl * (s * (r + v) + y);
858 r += T0 * s;
859 w = x + r;
860 if (ix >= 0x3FE59428)
861 {
862 v = (double) iy;
863 return (double) (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
864 }
865 if (iy == 1)
866 {
867 return w;
868 }
869 else
870 {
871 /*
872 * if allow error up to 2 ulp, simply return
873 * -1.0 / (x + r) here
874 */
875 /* compute -1.0 / (x + r) accurately */
876 double a;
877 double_accessor t;
878
879 z.dbl = w;
880 z.as_int.lo = 0;
881 v = r - (z.dbl - x); /* z + v = r + x */
882 t.dbl = a = -1.0 / w; /* a = -1.0 / w */
883 t.as_int.lo = 0;
884 s = 1.0 + t.dbl * z.dbl;
885 return t.dbl + a * (s + t.dbl * v);
886 }
887 } /* __kernel_tan */
888
889 /* Method:
890 * Let S,C and T denote the sin, cos and tan respectively on
891 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
892 * in [-pi/4 , +pi/4], and let n = k mod 4.
893 * We have
894 *
895 * n sin(x) cos(x) tan(x)
896 * ----------------------------------------------------------
897 * 0 S C T
898 * 1 C -S -1/T
899 * 2 -S -C T
900 * 3 -C S -1/T
901 * ----------------------------------------------------------
902 *
903 * Special cases:
904 * Let trig be any of sin, cos, or tan.
905 * trig(+-INF) is NaN, with signals;
906 * trig(NaN) is that NaN;
907 *
908 * Accuracy:
909 * TRIG(x) returns trig(x) nearly rounded
910 */
911
912 /* sin(x)
913 * Return sine function of x.
914 *
915 * kernel function:
916 * __kernel_sin ... sine function on [-pi/4,pi/4]
917 * __kernel_cos ... cose function on [-pi/4,pi/4]
918 * __ieee754_rem_pio2 ... argument reduction routine
919 */
920 double
sin(double x)921 sin (double x)
922 {
923 double y[2], z = 0.0;
924 int n, ix;
925
926 /* High word of x. */
927 ix = __HI (x);
928
929 /* |x| ~< pi/4 */
930 ix &= 0x7fffffff;
931 if (ix <= 0x3fe921fb)
932 {
933 return __kernel_sin (x, z, 0);
934 }
935
936 /* sin(Inf or NaN) is NaN */
937 else if (ix >= 0x7ff00000)
938 {
939 return x - x;
940 }
941
942 /* argument reduction needed */
943 else
944 {
945 n = __ieee754_rem_pio2 (x, y);
946 switch (n & 3)
947 {
948 case 0:
949 {
950 return __kernel_sin (y[0], y[1], 1);
951 }
952 case 1:
953 {
954 return __kernel_cos (y[0], y[1]);
955 }
956 case 2:
957 {
958 return -__kernel_sin (y[0], y[1], 1);
959 }
960 default:
961 {
962 return -__kernel_cos (y[0], y[1]);
963 }
964 }
965 }
966 } /* sin */
967
968 /* cos(x)
969 * Return cosine function of x.
970 *
971 * kernel function:
972 * __kernel_sin ... sine function on [-pi/4,pi/4]
973 * __kernel_cos ... cosine function on [-pi/4,pi/4]
974 * __ieee754_rem_pio2 ... argument reduction routine
975 */
976
977 double
cos(double x)978 cos (double x)
979 {
980 double y[2], z = 0.0;
981 int n, ix;
982
983 /* High word of x. */
984 ix = __HI (x);
985
986 /* |x| ~< pi/4 */
987 ix &= 0x7fffffff;
988 if (ix <= 0x3fe921fb)
989 {
990 return __kernel_cos (x, z);
991 }
992
993 /* cos(Inf or NaN) is NaN */
994 else if (ix >= 0x7ff00000)
995 {
996 return x - x;
997 }
998
999 /* argument reduction needed */
1000 else
1001 {
1002 n = __ieee754_rem_pio2 (x, y);
1003 switch (n & 3)
1004 {
1005 case 0:
1006 {
1007 return __kernel_cos (y[0], y[1]);
1008 }
1009 case 1:
1010 {
1011 return -__kernel_sin (y[0], y[1], 1);
1012 }
1013 case 2:
1014 {
1015 return -__kernel_cos (y[0], y[1]);
1016 }
1017 default:
1018 {
1019 return __kernel_sin (y[0], y[1], 1);
1020 }
1021 }
1022 }
1023 } /* cos */
1024
1025 /* tan(x)
1026 * Return tangent function of x.
1027 *
1028 * kernel function:
1029 * __kernel_tan ... tangent function on [-pi/4,pi/4]
1030 * __ieee754_rem_pio2 ... argument reduction routine
1031 */
1032
1033 double
tan(double x)1034 tan (double x)
1035 {
1036 double y[2], z = 0.0;
1037 int n, ix;
1038
1039 /* High word of x. */
1040 ix = __HI (x);
1041
1042 /* |x| ~< pi/4 */
1043 ix &= 0x7fffffff;
1044 if (ix <= 0x3fe921fb)
1045 {
1046 return __kernel_tan (x, z, 1);
1047 }
1048
1049 /* tan(Inf or NaN) is NaN */
1050 else if (ix >= 0x7ff00000)
1051 {
1052 return x - x; /* NaN */
1053 }
1054
1055 /* argument reduction needed */
1056 else
1057 {
1058 n = __ieee754_rem_pio2 (x, y);
1059 return __kernel_tan (y[0], y[1], 1 - ((n & 1) << 1)); /* 1 -- n even, -1 -- n odd */
1060 }
1061 } /* tan */
1062
1063 #undef zero
1064 #undef half
1065 #undef one
1066 #undef two24
1067 #undef twon24
1068 #undef invpio2
1069 #undef pio2_1
1070 #undef pio2_1t
1071 #undef pio2_2
1072 #undef pio2_2t
1073 #undef pio2_3
1074 #undef pio2_3t
1075 #undef S1
1076 #undef S2
1077 #undef S3
1078 #undef S4
1079 #undef S5
1080 #undef S6
1081 #undef C1
1082 #undef C2
1083 #undef C3
1084 #undef C4
1085 #undef C5
1086 #undef C6
1087 #undef T0
1088 #undef T1
1089 #undef T2
1090 #undef T3
1091 #undef T4
1092 #undef T5
1093 #undef T6
1094 #undef T7
1095 #undef T8
1096 #undef T9
1097 #undef T10
1098 #undef T11
1099 #undef T12
1100 #undef pio4
1101 #undef pio4lo
1102