• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright JS Foundation and other contributors, http://js.foundation
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  *
15  * This file is based on work under the following copyright and permission
16  * notice:
17  *
18  *     Copyright (C) 1993, 2004 by Sun Microsystems, Inc. All rights reserved.
19  *
20  *     Developed at SunSoft, a Sun Microsystems, Inc. business.
21  *     Permission to use, copy, modify, and distribute this
22  *     software is freely granted, provided that this notice
23  *     is preserved.
24  *
25  *     @(#)k_rem_pio2.c 1.3 95/01/18
26  *     @(#)e_rem_pio2.c 1.4 95/01/18
27  *     @(#)k_sin.c 1.3 95/01/18
28  *     @(#)k_cos.c 1.3 95/01/18
29  *     @(#)k_tan.c 1.5 04/04/22
30  *     @(#)s_sin.c 1.3 95/01/18
31  *     @(#)s_cos.c 1.3 95/01/18
32  *     @(#)s_tan.c 1.3 95/01/18
33  */
34 
35 #include "jerry-libm-internal.h"
36 
37 #define zero    0.00000000000000000000e+00 /* 0x00000000, 0x00000000 */
38 #define half    5.00000000000000000000e-01 /* 0x3FE00000, 0x00000000 */
39 #define one     1.00000000000000000000e+00 /* 0x3FF00000, 0x00000000 */
40 #define two24   1.67772160000000000000e+07 /* 0x41700000, 0x00000000 */
41 #define twon24  5.96046447753906250000e-08 /* 0x3E700000, 0x00000000 */
42 
43 /* __kernel_rem_pio2(x,y,e0,nx,prec)
44  * double x[],y[]; int e0,nx,prec;
45  *
46  * __kernel_rem_pio2 return the last three digits of N with
47  *              y = x - N*pi/2
48  * so that |y| < pi/2.
49  *
50  * The method is to compute the integer (mod 8) and fraction parts of
51  * (2/pi)*x without doing the full multiplication. In general we
52  * skip the part of the product that are known to be a huge integer (
53  * more accurately, = 0 mod 8 ). Thus the number of operations are
54  * independent of the exponent of the input.
55  *
56  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
57  *
58  * Input parameters:
59  *      x[]     The input value (must be positive) is broken into nx
60  *              pieces of 24-bit integers in double precision format.
61  *              x[i] will be the i-th 24 bit of x. The scaled exponent
62  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
63  *              match x's up to 24 bits.
64  *
65  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
66  *                      e0 = ilogb(z)-23
67  *                      z  = scalbn(z,-e0)
68  *              for i = 0,1,2
69  *                      x[i] = floor(z)
70  *                      z    = (z-x[i])*2**24
71  *
72  *      y[]     ouput result in an array of double precision numbers.
73  *              The dimension of y[] is:
74  *                      24-bit  precision       1
75  *                      53-bit  precision       2
76  *                      64-bit  precision       2
77  *                      113-bit precision       3
78  *              The actual value is the sum of them. Thus for 113-bit
79  *              precison, one may have to do something like:
80  *
81  *              long double t,w,r_head, r_tail;
82  *              t = (long double)y[2] + (long double)y[1];
83  *              w = (long double)y[0];
84  *              r_head = t+w;
85  *              r_tail = w - (r_head - t);
86  *
87  *      e0      The exponent of x[0]
88  *
89  *      nx      dimension of x[]
90  *
91  *      prec    an integer indicating the precision:
92  *                      0       24  bits (single)
93  *                      1       53  bits (double)
94  *                      2       64  bits (extended)
95  *                      3       113 bits (quad)
96  *
97  * External function:
98  *      double scalbn(), floor();
99  *
100  * Here is the description of some local variables:
101  *
102  *      ipio2[] integer array, contains the (24*i)-th to (24*i+23)-th
103  *              bit of 2/pi after binary point. The corresponding
104  *              floating value is
105  *
106  *                      ipio2[i] * 2^(-24(i+1)).
107  *
108  *      jk      jk+1 is the initial number of terms of ipio2[] needed
109  *              in the computation. The recommended value is 2,3,4,
110  *              6 for single, double, extended,and quad.
111  *
112  *      jz      local integer variable indicating the number of
113  *              terms of ipio2[] used.
114  *
115  *      jx      nx - 1
116  *
117  *      jv      index for pointing to the suitable ipio2[] for the
118  *              computation. In general, we want
119  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
120  *              is an integer. Thus
121  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
122  *              Hence jv = max(0,(e0-3)/24).
123  *
124  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
125  *
126  *      q[]     double array with integral value, representing the
127  *              24-bits chunk of the product of x and 2/pi.
128  *
129  *      q0      the corresponding exponent of q[0]. Note that the
130  *              exponent for q[i] would be q0-24*i.
131  *
132  *      PIo2[]  double precision array, obtained by cutting pi/2
133  *              into 24 bits chunks.
134  *
135  *      f[]     ipio2[] in floating point
136  *
137  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
138  *
139  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
140  *
141  *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
142  *              it also indicates the *sign* of the result.
143  */
144 
145 /*
146  * Constants:
147  * The hexadecimal values are the intended ones for the following
148  * constants. The decimal values may be used, provided that the
149  * compiler will convert from decimal to binary accurately enough
150  * to produce the hexadecimal values shown.
151  */
152 
153 /* initial value for jk */
154 static const int init_jk[] =
155 {
156   2, 3, 4, 6
157 };
158 
159 static const double PIo2[] =
160 {
161   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
162   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
163   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
164   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
165   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
166   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
167   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
168   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
169 };
170 
171 /*
172  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
173  */
174 static const int ipio2[] =
175 {
176   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
177   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
178   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
179   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
180   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
181   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
182   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
183   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
184   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
185   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
186   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
187 };
188 
189 static int
__kernel_rem_pio2(double * x,double * y,int e0,int nx,int prec)190 __kernel_rem_pio2 (double *x, double *y, int e0, int nx, int prec)
191 {
192   int jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
193   double z, fw, f[20], fq[20], q[20];
194 
195   /* initialize jk */
196   jk = init_jk[prec];
197   jp = jk;
198 
199   /* determine jx, jv, q0, note that 3 > q0 */
200   jx = nx - 1;
201   jv = (e0 - 3) / 24;
202   if (jv < 0)
203   {
204     jv = 0;
205   }
206   q0 = e0 - 24 * (jv + 1);
207 
208   /* set up f[0] to f[jx + jk] where f[jx + jk] = ipio2[jv + jk] */
209   j = jv - jx;
210   m = jx + jk;
211   for (i = 0; i <= m; i++, j++)
212   {
213     f[i] = (j < 0) ? zero : (double) ipio2[j];
214   }
215 
216   /* compute q[0], q[1], ... q[jk] */
217   for (i = 0; i <= jk; i++)
218   {
219     for (j = 0, fw = 0.0; j <= jx; j++)
220     {
221       fw += x[j] * f[jx + i - j];
222     }
223     q[i] = fw;
224   }
225 
226   jz = jk;
227 recompute:
228   /* distill q[] into iq[] reversingly */
229   for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--)
230   {
231     fw = (double) ((int) (twon24 * z));
232     iq[i] = (int) (z - two24 * fw);
233     z = q[j - 1] + fw;
234   }
235 
236   /* compute n */
237   z = scalbn (z, q0); /* actual value of z */
238   z -= 8.0 * floor (z * 0.125); /* trim off integer >= 8 */
239   n = (int) z;
240   z -= (double) n;
241   ih = 0;
242   if (q0 > 0) /* need iq[jz - 1] to determine n */
243   {
244     i = (iq[jz - 1] >> (24 - q0));
245     n += i;
246     iq[jz - 1] -= i << (24 - q0);
247     ih = iq[jz - 1] >> (23 - q0);
248   }
249   else if (q0 == 0)
250   {
251     ih = iq[jz - 1] >> 23;
252   }
253   else if (z >= 0.5)
254   {
255     ih = 2;
256   }
257 
258   if (ih > 0) /* q > 0.5 */
259   {
260     n += 1;
261     carry = 0;
262     for (i = 0; i < jz; i++) /* compute 1 - q */
263     {
264       j = iq[i];
265       if (carry == 0)
266       {
267         if (j != 0)
268         {
269           carry = 1;
270           iq[i] = 0x1000000 - j;
271         }
272       }
273       else
274       {
275         iq[i] = 0xffffff - j;
276       }
277     }
278     if (q0 > 0) /* rare case: chance is 1 in 12 */
279     {
280       switch (q0)
281       {
282         case 1:
283         {
284           iq[jz - 1] &= 0x7fffff;
285           break;
286         }
287         case 2:
288         {
289           iq[jz - 1] &= 0x3fffff;
290           break;
291         }
292       }
293     }
294     if (ih == 2)
295     {
296       z = one - z;
297       if (carry != 0)
298       {
299         z -= scalbn (one, q0);
300       }
301     }
302   }
303 
304   /* check if recomputation is needed */
305   if (z == zero)
306   {
307     j = 0;
308     for (i = jz - 1; i >= jk; i--)
309     {
310       j |= iq[i];
311     }
312     if (j == 0) /* need recomputation */
313     {
314       for (k = 1; iq[jk - k] == 0; k++) /* k = no. of terms needed */
315       {
316       }
317 
318       for (i = jz + 1; i <= jz + k; i++) /* add q[jz + 1] to q[jz + k] */
319       {
320         f[jx + i] = (double) ipio2[jv + i];
321         for (j = 0, fw = 0.0; j <= jx; j++)
322         {
323           fw += x[j] * f[jx + i - j];
324         }
325         q[i] = fw;
326       }
327       jz += k;
328       goto recompute;
329     }
330   }
331 
332   /* chop off zero terms */
333   if (z == 0.0)
334   {
335     jz -= 1;
336     q0 -= 24;
337     while (iq[jz] == 0)
338     {
339       jz--;
340       q0 -= 24;
341     }
342   }
343   else
344   { /* break z into 24-bit if necessary */
345     z = scalbn (z, -q0);
346     if (z >= two24)
347     {
348       fw = (double) ((int) (twon24 * z));
349       iq[jz] = (int) (z - two24 * fw);
350       jz += 1;
351       q0 += 24;
352       iq[jz] = (int) fw;
353     }
354     else
355     {
356       iq[jz] = (int) z;
357     }
358   }
359 
360   /* convert integer "bit" chunk to floating-point value */
361   fw = scalbn (one, q0);
362   for (i = jz; i >= 0; i--)
363   {
364     q[i] = fw * (double) iq[i];
365     fw *= twon24;
366   }
367 
368   /* compute PIo2[0, ..., jp] * q[jz, ..., 0] */
369   for (i = jz; i >= 0; i--)
370   {
371     for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
372     {
373       fw += PIo2[k] * q[i + k];
374     }
375     fq[jz - i] = fw;
376   }
377 
378   /* compress fq[] into y[] */
379   switch (prec)
380   {
381     case 0:
382     {
383       fw = 0.0;
384       for (i = jz; i >= 0; i--)
385       {
386         fw += fq[i];
387       }
388       y[0] = (ih == 0) ? fw : -fw;
389       break;
390     }
391     case 1:
392     case 2:
393     {
394       fw = 0.0;
395       for (i = jz; i >= 0; i--)
396       {
397         fw += fq[i];
398       }
399       y[0] = (ih == 0) ? fw : -fw;
400       fw = fq[0] - fw;
401       for (i = 1; i <= jz; i++)
402       {
403         fw += fq[i];
404       }
405       y[1] = (ih == 0) ? fw : -fw;
406       break;
407     }
408     case 3: /* painful */
409     {
410       for (i = jz; i > 0; i--)
411       {
412         fw = fq[i - 1] + fq[i];
413         fq[i] += fq[i - 1] - fw;
414         fq[i - 1] = fw;
415       }
416       for (i = jz; i > 1; i--)
417       {
418         fw = fq[i - 1] + fq[i];
419         fq[i] += fq[i - 1] - fw;
420         fq[i - 1] = fw;
421       }
422       for (fw = 0.0, i = jz; i >= 2; i--)
423       {
424         fw += fq[i];
425       }
426       if (ih == 0)
427       {
428         y[0] = fq[0];
429         y[1] = fq[1];
430         y[2] = fw;
431       }
432       else
433       {
434         y[0] = -fq[0];
435         y[1] = -fq[1];
436         y[2] = -fw;
437       }
438     }
439   }
440   return n & 7;
441 } /* __kernel_rem_pio2 */
442 
443 /* __ieee754_rem_pio2(x,y)
444  * return the remainder of x rem pi/2 in y[0]+y[1]
445  * use __kernel_rem_pio2()
446  */
447 
448 static const int npio2_hw[] =
449 {
450   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
451   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
452   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
453   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
454   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
455   0x404858EB, 0x404921FB,
456 };
457 
458 /*
459  * invpio2:  53 bits of 2/pi
460  * pio2_1:   first  33 bit of pi/2
461  * pio2_1t:  pi/2 - pio2_1
462  * pio2_2:   second 33 bit of pi/2
463  * pio2_2t:  pi/2 - (pio2_1 + pio2_2)
464  * pio2_3:   third  33 bit of pi/2
465  * pio2_3t:  pi/2 - (pio2_1 + pio2_2 + pio2_3)
466  */
467 #define invpio2 6.36619772367581382433e-01 /* 0x3FE45F30, 0x6DC9C883 */
468 #define pio2_1  1.57079632673412561417e+00 /* 0x3FF921FB, 0x54400000 */
469 #define pio2_1t 6.07710050650619224932e-11 /* 0x3DD0B461, 0x1A626331 */
470 #define pio2_2  6.07710050630396597660e-11 /* 0x3DD0B461, 0x1A600000 */
471 #define pio2_2t 2.02226624879595063154e-21 /* 0x3BA3198A, 0x2E037073 */
472 #define pio2_3  2.02226624871116645580e-21 /* 0x3BA3198A, 0x2E000000 */
473 #define pio2_3t 8.47842766036889956997e-32 /* 0x397B839A, 0x252049C1 */
474 
475 static int
__ieee754_rem_pio2(double x,double * y)476 __ieee754_rem_pio2 (double x, double *y)
477 {
478   double_accessor z;
479   double w, t, r, fn;
480   double tx[3];
481   int e0, i, j, nx, n, ix, hx;
482 
483   hx = __HI (x); /* high word of x */
484   ix = hx & 0x7fffffff;
485   if (ix <= 0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
486   {
487     y[0] = x;
488     y[1] = 0;
489     return 0;
490   }
491   if (ix < 0x4002d97c) /* |x| < 3pi/4, special case with n = +-1 */
492   {
493     if (hx > 0)
494     {
495       z.dbl = x - pio2_1;
496       if (ix != 0x3ff921fb) /* 33 + 53 bit pi is good enough */
497       {
498         y[0] = z.dbl - pio2_1t;
499         y[1] = (z.dbl - y[0]) - pio2_1t;
500       }
501       else /* near pi/2, use 33 + 33 + 53 bit pi */
502       {
503         z.dbl -= pio2_2;
504         y[0] = z.dbl - pio2_2t;
505         y[1] = (z.dbl - y[0]) - pio2_2t;
506       }
507       return 1;
508     }
509     else /* negative x */
510     {
511       z.dbl = x + pio2_1;
512       if (ix != 0x3ff921fb) /* 33 + 53 bit pi is good enough */
513       {
514         y[0] = z.dbl + pio2_1t;
515         y[1] = (z.dbl - y[0]) + pio2_1t;
516       }
517       else /* near pi/2, use 33 + 33 + 53 bit pi */
518       {
519         z.dbl += pio2_2;
520         y[0] = z.dbl + pio2_2t;
521         y[1] = (z.dbl - y[0]) + pio2_2t;
522       }
523       return -1;
524     }
525   }
526   if (ix <= 0x413921fb) /* |x| ~<= 2^19 * (pi/2), medium size */
527   {
528     t = fabs (x);
529     n = (int) (t * invpio2 + half);
530     fn = (double) n;
531     r = t - fn * pio2_1;
532     w = fn * pio2_1t; /* 1st round good to 85 bit */
533     if (n < 32 && ix != npio2_hw[n - 1])
534     {
535       y[0] = r - w; /* quick check no cancellation */
536     }
537     else
538     {
539       j = ix >> 20;
540       y[0] = r - w;
541       i = j - (((__HI (y[0])) >> 20) & 0x7ff);
542       if (i > 16) /* 2nd iteration needed, good to 118 */
543       {
544         t = r;
545         w = fn * pio2_2;
546         r = t - w;
547         w = fn * pio2_2t - ((t - r) - w);
548         y[0] = r - w;
549         i = j - (((__HI (y[0])) >> 20) & 0x7ff);
550         if (i > 49) /* 3rd iteration need, 151 bits acc, will cover all possible cases */
551         {
552           t = r;
553           w = fn * pio2_3;
554           r = t - w;
555           w = fn * pio2_3t - ((t - r) - w);
556           y[0] = r - w;
557         }
558       }
559     }
560     y[1] = (r - y[0]) - w;
561     if (hx < 0)
562     {
563       y[0] = -y[0];
564       y[1] = -y[1];
565       return -n;
566     }
567     else
568     {
569       return n;
570     }
571   }
572   /*
573    * all other (large) arguments
574    */
575   if (ix >= 0x7ff00000) /* x is inf or NaN */
576   {
577     y[0] = y[1] = x - x;
578     return 0;
579   }
580   /* set z = scalbn(|x|, ilogb(x) - 23) */
581   z.as_int.lo = __LO (x);
582   e0 = (ix >> 20) - 1046; /* e0 = ilogb(z) - 23; */
583   z.as_int.hi = ix - (e0 << 20);
584   for (i = 0; i < 2; i++)
585   {
586     tx[i] = (double) ((int) (z.dbl));
587     z.dbl = (z.dbl - tx[i]) * two24;
588   }
589   tx[2] = z.dbl;
590   nx = 3;
591   while (tx[nx - 1] == zero) /* skip zero term */
592   {
593     nx--;
594   }
595   n = __kernel_rem_pio2 (tx, y, e0, nx, 2);
596   if (hx < 0)
597   {
598     y[0] = -y[0];
599     y[1] = -y[1];
600     return -n;
601   }
602   return n;
603 } /* __ieee754_rem_pio2 */
604 
605 /* __kernel_sin( x, y, iy)
606  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
607  * Input x is assumed to be bounded by ~pi/4 in magnitude.
608  * Input y is the tail of x.
609  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
610  *
611  * Algorithm
612  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
613  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
614  *      3. sin(x) is approximated by a polynomial of degree 13 on
615  *         [0,pi/4]
616  *                               3            13
617  *              sin(x) ~ x + S1*x + ... + S6*x
618  *         where
619  *
620  *      |sin(x)         2     4     6     8     10     12  |     -58
621  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
622  *      |  x                                               |
623  *
624  *      4. sin(x+y) = sin(x) + sin'(x')*y
625  *                  ~ sin(x) + (1-x*x/2)*y
626  *         For better accuracy, let
627  *                   3      2      2      2      2
628  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
629  *         then                   3    2
630  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
631  */
632 
633 #define S1   -1.66666666666666324348e-01 /* 0xBFC55555, 0x55555549 */
634 #define S2    8.33333333332248946124e-03 /* 0x3F811111, 0x1110F8A6 */
635 #define S3   -1.98412698298579493134e-04 /* 0xBF2A01A0, 0x19C161D5 */
636 #define S4    2.75573137070700676789e-06 /* 0x3EC71DE3, 0x57B1FE7D */
637 #define S5   -2.50507602534068634195e-08 /* 0xBE5AE5E6, 0x8A2B9CEB */
638 #define S6    1.58969099521155010221e-10 /* 0x3DE5D93A, 0x5ACFD57C */
639 
640 static double
__kernel_sin(double x,double y,int iy)641 __kernel_sin (double x, double y, int iy)
642 {
643   double z, r, v;
644   int ix;
645 
646   ix = __HI (x) & 0x7fffffff; /* high word of x */
647   if (ix < 0x3e400000) /* |x| < 2**-27 */
648   {
649     if ((int) x == 0)
650     {
651       return x; /* generate inexact */
652     }
653   }
654   z = x * x;
655   v = z * x;
656   r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
657   if (iy == 0)
658   {
659     return x + v * (S1 + z * r);
660   }
661   else
662   {
663     return x - ((z * (half * y - v * r) - y) - v * S1);
664   }
665 } /* __kernel_sin */
666 
667 /*
668  * __kernel_cos( x,  y )
669  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
670  * Input x is assumed to be bounded by ~pi/4 in magnitude.
671  * Input y is the tail of x.
672  *
673  * Algorithm
674  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
675  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
676  *      3. cos(x) is approximated by a polynomial of degree 14 on
677  *         [0,pi/4]
678  *                                       4            14
679  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
680  *         where the remez error is
681  *
682  *      |              2     4     6     8     10    12     14 |     -58
683  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
684  *      |                                                      |
685  *
686  *                     4     6     8     10    12     14
687  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
688  *             cos(x) = 1 - x*x/2 + r
689  *         since cos(x+y) ~ cos(x) - sin(x)*y
690  *                        ~ cos(x) - x*y,
691  *         a correction term is necessary in cos(x) and hence
692  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
693  *         For better accuracy when x > 0.3, let qx = |x|/4 with
694  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
695  *         Then
696  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
697  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
698  *         magnitude of the latter is at least a quarter of x*x/2,
699  *         thus, reducing the rounding error in the subtraction.
700  */
701 
702 #define C1   4.16666666666666019037e-02 /* 0x3FA55555, 0x5555554C */
703 #define C2  -1.38888888888741095749e-03 /* 0xBF56C16C, 0x16C15177 */
704 #define C3   2.48015872894767294178e-05 /* 0x3EFA01A0, 0x19CB1590 */
705 #define C4  -2.75573143513906633035e-07 /* 0xBE927E4F, 0x809C52AD */
706 #define C5   2.08757232129817482790e-09 /* 0x3E21EE9E, 0xBDB4B1C4 */
707 #define C6  -1.13596475577881948265e-11 /* 0xBDA8FAE9, 0xBE8838D4 */
708 
709 static double
__kernel_cos(double x,double y)710 __kernel_cos (double x, double y)
711 {
712   double a, hz, z, r;
713   int ix;
714 
715   ix = __HI (x) & 0x7fffffff; /* ix = |x|'s high word */
716   if (ix < 0x3e400000) /* if x < 2**27 */
717   {
718     if (((int) x) == 0)
719     {
720       return one; /* generate inexact */
721     }
722   }
723   z = x * x;
724   r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
725   if (ix < 0x3FD33333) /* if |x| < 0.3 */
726   {
727     return one - (0.5 * z - (z * r - x * y));
728   }
729   else
730   {
731     double_accessor qx;
732     if (ix > 0x3fe90000) /* x > 0.78125 */
733     {
734       qx.dbl = 0.28125;
735     }
736     else
737     {
738       qx.as_int.hi = ix - 0x00200000; /* x / 4 */
739       qx.as_int.lo = 0;
740     }
741     hz = 0.5 * z - qx.dbl;
742     a = one - qx.dbl;
743     return a - (hz - (z * r - x * y));
744   }
745 } /* __kernel_cos */
746 
747 /* __kernel_tan( x, y, k )
748  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
749  * Input x is assumed to be bounded by ~pi/4 in magnitude.
750  * Input y is the tail of x.
751  * Input k indicates whether tan (if k = 1) or -1/tan (if k = -1) is returned.
752  *
753  * Algorithm
754  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
755  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
756  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
757  *         [0,0.67434]
758  *                               3             27
759  *              tan(x) ~ x + T1*x + ... + T13*x
760  *         where
761  *
762  *              |tan(x)         2     4            26   |     -59.2
763  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
764  *              |  x                                    |
765  *
766  *         Note: tan(x+y) = tan(x) + tan'(x)*y
767  *                        ~ tan(x) + (1+x*x)*y
768  *         Therefore, for better accuracy in computing tan(x+y), let
769  *                   3      2      2       2       2
770  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
771  *         then
772  *                                  3    2
773  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
774  *
775  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
776  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
777  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
778  */
779 
780 #define T0      3.33333333333334091986e-01 /* 3FD55555, 55555563 */
781 #define T1      1.33333333333201242699e-01 /* 3FC11111, 1110FE7A */
782 #define T2      5.39682539762260521377e-02 /* 3FABA1BA, 1BB341FE */
783 #define T3      2.18694882948595424599e-02 /* 3F9664F4, 8406D637 */
784 #define T4      8.86323982359930005737e-03 /* 3F8226E3, E96E8493 */
785 #define T5      3.59207910759131235356e-03 /* 3F6D6D22, C9560328 */
786 #define T6      1.45620945432529025516e-03 /* 3F57DBC8, FEE08315 */
787 #define T7      5.88041240820264096874e-04 /* 3F4344D8, F2F26501 */
788 #define T8      2.46463134818469906812e-04 /* 3F3026F7, 1A8D1068 */
789 #define T9      7.81794442939557092300e-05 /* 3F147E88, A03792A6 */
790 #define T10     7.14072491382608190305e-05 /* 3F12B80F, 32F0A7E9 */
791 #define T11    -1.85586374855275456654e-05 /* BEF375CB, DB605373 */
792 #define T12     2.59073051863633712884e-05 /* 3EFB2A70, 74BF7AD4 */
793 #define pio4    7.85398163397448278999e-01 /* 3FE921FB, 54442D18 */
794 #define pio4lo  3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
795 
796 static double
__kernel_tan(double x,double y,int iy)797 __kernel_tan (double x, double y, int iy)
798 {
799   double_accessor z;
800   double r, v, w, s;
801   int ix, hx;
802 
803   hx = __HI (x); /* high word of x */
804   ix = hx & 0x7fffffff; /* high word of |x| */
805   if (ix < 0x3e300000) /* x < 2**-28 */
806   {
807     if ((int) x == 0) /* generate inexact */
808     {
809       if (((ix | __LO (x)) | (iy + 1)) == 0)
810       {
811         return one / fabs (x);
812       }
813       else
814       {
815         if (iy == 1)
816         {
817           return x;
818         }
819         else /* compute -1 / (x + y) carefully */
820         {
821           double a;
822           double_accessor t;
823 
824           z.dbl = w = x + y;
825           z.as_int.lo = 0;
826           v = y - (z.dbl - x);
827           t.dbl = a = -one / w;
828           t.as_int.lo = 0;
829           s = one + t.dbl * z.dbl;
830           return t.dbl + a * (s + t.dbl * v);
831         }
832       }
833     }
834   }
835   if (ix >= 0x3FE59428) /* |x| >= 0.6744 */
836   {
837     if (hx < 0)
838     {
839       x = -x;
840       y = -y;
841     }
842     z.dbl = pio4 - x;
843     w = pio4lo - y;
844     x = z.dbl + w;
845     y = 0.0;
846   }
847   z.dbl = x * x;
848   w = z.dbl * z.dbl;
849   /*
850    * Break x^5 * (T[1] + x^2 * T[2] + ...) into
851    * x^5 (T[1] + x^4 * T[3] + ... + x^20 * T[11]) +
852    * x^5 (x^2 * (T[2] + x^4 * T[4] + ... + x^22 * [T12]))
853    */
854   r = T1 + w * (T3 + w * (T5 + w * (T7 + w * (T9 + w * T11))));
855   v = z.dbl * (T2 + w * (T4 + w * (T6 + w * (T8 + w * (T10 + w * T12)))));
856   s = z.dbl * x;
857   r = y + z.dbl * (s * (r + v) + y);
858   r += T0 * s;
859   w = x + r;
860   if (ix >= 0x3FE59428)
861   {
862     v = (double) iy;
863     return (double) (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
864   }
865   if (iy == 1)
866   {
867     return w;
868   }
869   else
870   {
871     /*
872      * if allow error up to 2 ulp, simply return
873      * -1.0 / (x + r) here
874      */
875     /* compute -1.0 / (x + r) accurately */
876     double a;
877     double_accessor t;
878 
879     z.dbl = w;
880     z.as_int.lo = 0;
881     v = r - (z.dbl - x); /* z + v = r + x */
882     t.dbl = a = -1.0 / w; /* a = -1.0 / w */
883     t.as_int.lo = 0;
884     s = 1.0 + t.dbl * z.dbl;
885     return t.dbl + a * (s + t.dbl * v);
886   }
887 } /* __kernel_tan */
888 
889 /* Method:
890  *      Let S,C and T denote the sin, cos and tan respectively on
891  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
892  *      in [-pi/4 , +pi/4], and let n = k mod 4.
893  *      We have
894  *
895  *          n        sin(x)      cos(x)        tan(x)
896  *     ----------------------------------------------------------
897  *          0          S           C             T
898  *          1          C          -S            -1/T
899  *          2         -S          -C             T
900  *          3         -C           S            -1/T
901  *     ----------------------------------------------------------
902  *
903  * Special cases:
904  *      Let trig be any of sin, cos, or tan.
905  *      trig(+-INF)  is NaN, with signals;
906  *      trig(NaN)    is that NaN;
907  *
908  * Accuracy:
909  *      TRIG(x) returns trig(x) nearly rounded
910  */
911 
912 /* sin(x)
913  * Return sine function of x.
914  *
915  * kernel function:
916  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
917  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
918  *      __ieee754_rem_pio2      ... argument reduction routine
919  */
920 double
sin(double x)921 sin (double x)
922 {
923   double y[2], z = 0.0;
924   int n, ix;
925 
926   /* High word of x. */
927   ix = __HI (x);
928 
929   /* |x| ~< pi/4 */
930   ix &= 0x7fffffff;
931   if (ix <= 0x3fe921fb)
932   {
933     return __kernel_sin (x, z, 0);
934   }
935 
936   /* sin(Inf or NaN) is NaN */
937   else if (ix >= 0x7ff00000)
938   {
939     return x - x;
940   }
941 
942   /* argument reduction needed */
943   else
944   {
945     n = __ieee754_rem_pio2 (x, y);
946     switch (n & 3)
947     {
948       case 0:
949       {
950         return __kernel_sin (y[0], y[1], 1);
951       }
952       case 1:
953       {
954         return __kernel_cos (y[0], y[1]);
955       }
956       case 2:
957       {
958         return -__kernel_sin (y[0], y[1], 1);
959       }
960       default:
961       {
962         return -__kernel_cos (y[0], y[1]);
963       }
964     }
965   }
966 } /* sin */
967 
968 /* cos(x)
969  * Return cosine function of x.
970  *
971  * kernel function:
972  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
973  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
974  *      __ieee754_rem_pio2      ... argument reduction routine
975  */
976 
977 double
cos(double x)978 cos (double x)
979 {
980   double y[2], z = 0.0;
981   int n, ix;
982 
983   /* High word of x. */
984   ix = __HI (x);
985 
986   /* |x| ~< pi/4 */
987   ix &= 0x7fffffff;
988   if (ix <= 0x3fe921fb)
989   {
990     return __kernel_cos (x, z);
991   }
992 
993   /* cos(Inf or NaN) is NaN */
994   else if (ix >= 0x7ff00000)
995   {
996     return x - x;
997   }
998 
999   /* argument reduction needed */
1000   else
1001   {
1002     n = __ieee754_rem_pio2 (x, y);
1003     switch (n & 3)
1004     {
1005       case 0:
1006       {
1007         return __kernel_cos (y[0], y[1]);
1008       }
1009       case 1:
1010       {
1011         return -__kernel_sin (y[0], y[1], 1);
1012       }
1013       case 2:
1014       {
1015         return -__kernel_cos (y[0], y[1]);
1016       }
1017       default:
1018       {
1019         return __kernel_sin (y[0], y[1], 1);
1020       }
1021     }
1022   }
1023 } /* cos */
1024 
1025 /* tan(x)
1026  * Return tangent function of x.
1027  *
1028  * kernel function:
1029  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
1030  *      __ieee754_rem_pio2      ... argument reduction routine
1031  */
1032 
1033 double
tan(double x)1034 tan (double x)
1035 {
1036   double y[2], z = 0.0;
1037   int n, ix;
1038 
1039   /* High word of x. */
1040   ix = __HI (x);
1041 
1042   /* |x| ~< pi/4 */
1043   ix &= 0x7fffffff;
1044   if (ix <= 0x3fe921fb)
1045   {
1046     return __kernel_tan (x, z, 1);
1047   }
1048 
1049   /* tan(Inf or NaN) is NaN */
1050   else if (ix >= 0x7ff00000)
1051   {
1052     return x - x; /* NaN */
1053   }
1054 
1055   /* argument reduction needed */
1056   else
1057   {
1058     n = __ieee754_rem_pio2 (x, y);
1059     return __kernel_tan (y[0], y[1], 1 - ((n & 1) << 1)); /*   1 -- n even, -1 -- n odd */
1060   }
1061 } /* tan */
1062 
1063 #undef zero
1064 #undef half
1065 #undef one
1066 #undef two24
1067 #undef twon24
1068 #undef invpio2
1069 #undef pio2_1
1070 #undef pio2_1t
1071 #undef pio2_2
1072 #undef pio2_2t
1073 #undef pio2_3
1074 #undef pio2_3t
1075 #undef S1
1076 #undef S2
1077 #undef S3
1078 #undef S4
1079 #undef S5
1080 #undef S6
1081 #undef C1
1082 #undef C2
1083 #undef C3
1084 #undef C4
1085 #undef C5
1086 #undef C6
1087 #undef T0
1088 #undef T1
1089 #undef T2
1090 #undef T3
1091 #undef T4
1092 #undef T5
1093 #undef T6
1094 #undef T7
1095 #undef T8
1096 #undef T9
1097 #undef T10
1098 #undef T11
1099 #undef T12
1100 #undef pio4
1101 #undef pio4lo
1102