1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12 /*
13 * from: @(#)fdlibm.h 5.1 93/09/24
14 */
15
16 #ifndef _MATH_PRIVATE_H_
17 #define _MATH_PRIVATE_H_
18
19 #include <sys/types.h>
20 #include <endian.h>
21
22 /*
23 * The original fdlibm code used statements like:
24 * n0 = ((*(int*)&one)>>29)^1; * index of high word *
25 * ix0 = *(n0+(int*)&x); * high word of x *
26 * ix1 = *((1-n0)+(int*)&x); * low word of x *
27 * to dig two 32 bit words out of the 64 bit IEEE floating point
28 * value. That is non-ANSI, and, moreover, the gcc instruction
29 * scheduler gets it wrong. We instead use the following macros.
30 * Unlike the original code, we determine the endianness at compile
31 * time, not at run time; I don't see much benefit to selecting
32 * endianness at run time.
33 */
34
35 /*
36 * A union which permits us to convert between a double and two 32 bit
37 * ints.
38 */
39
40 #ifdef __arm__
41 #if defined(__VFP_FP__) || defined(__ARM_EABI__)
42 #define IEEE_WORD_ORDER BYTE_ORDER
43 #else
44 #define IEEE_WORD_ORDER BIG_ENDIAN
45 #endif
46 #else /* __arm__ */
47 #define IEEE_WORD_ORDER BYTE_ORDER
48 #endif
49
50 typedef unsigned int u_int32_t;
51 typedef unsigned long u_int64_t;
52 typedef signed int int32_t;
53 typedef signed short int16_t;
54 typedef double __double_t;
55 typedef float __float_t;
56 /* A union which permits us to convert between a long double and
57 four 32 bit ints. */
58
59 #if IEEE_WORD_ORDER == BIG_ENDIAN
60
61 typedef union
62 {
63 long double value;
64 struct {
65 u_int32_t mswhi;
66 u_int32_t mswlo;
67 u_int32_t lswhi;
68 u_int32_t lswlo;
69 } parts32;
70 struct {
71 u_int64_t msw;
72 u_int64_t lsw;
73 } parts64;
74 } ieee_quad_shape_type;
75
76 #endif
77
78 #if IEEE_WORD_ORDER == LITTLE_ENDIAN
79
80 typedef union
81 {
82 long double value;
83 struct {
84 u_int32_t lswlo;
85 u_int32_t lswhi;
86 u_int32_t mswlo;
87 u_int32_t mswhi;
88 } parts32;
89 struct {
90 u_int64_t lsw;
91 u_int64_t msw;
92 } parts64;
93 } ieee_quad_shape_type;
94
95 #endif
96
97 #if IEEE_WORD_ORDER == BIG_ENDIAN
98
99 typedef union
100 {
101 double value;
102 struct
103 {
104 u_int32_t msw;
105 u_int32_t lsw;
106 } parts;
107 struct
108 {
109 u_int64_t w;
110 } xparts;
111 } ieee_double_shape_type;
112
113 #endif
114
115 #if IEEE_WORD_ORDER == LITTLE_ENDIAN
116
117 typedef union
118 {
119 double value;
120 struct
121 {
122 u_int32_t lsw;
123 u_int32_t msw;
124 } parts;
125 struct
126 {
127 u_int64_t w;
128 } xparts;
129 } ieee_double_shape_type;
130
131 #endif
132
133 /* Get two 32 bit ints from a double. */
134
135 #define EXTRACT_WORDS(ix0,ix1,d) \
136 do { \
137 ieee_double_shape_type ew_u; \
138 ew_u.value = (d); \
139 (ix0) = ew_u.parts.msw; \
140 (ix1) = ew_u.parts.lsw; \
141 } while (0)
142
143 /* Get a 64-bit int from a double. */
144 #define EXTRACT_WORD64(ix,d) \
145 do { \
146 ieee_double_shape_type ew_u; \
147 ew_u.value = (d); \
148 (ix) = ew_u.xparts.w; \
149 } while (0)
150
151 /* Get the more significant 32 bit int from a double. */
152
153 #define GET_HIGH_WORD(i,d) \
154 do { \
155 ieee_double_shape_type gh_u; \
156 gh_u.value = (d); \
157 (i) = gh_u.parts.msw; \
158 } while (0)
159
160 /* Get the less significant 32 bit int from a double. */
161
162 #define GET_LOW_WORD(i,d) \
163 do { \
164 ieee_double_shape_type gl_u; \
165 gl_u.value = (d); \
166 (i) = gl_u.parts.lsw; \
167 } while (0)
168
169 /* Set a double from two 32 bit ints. */
170
171 #define INSERT_WORDS(d,ix0,ix1) \
172 do { \
173 ieee_double_shape_type iw_u; \
174 iw_u.parts.msw = (ix0); \
175 iw_u.parts.lsw = (ix1); \
176 (d) = iw_u.value; \
177 } while (0)
178
179 /* Set a double from a 64-bit int. */
180 #define INSERT_WORD64(d,ix) \
181 do { \
182 ieee_double_shape_type iw_u; \
183 iw_u.xparts.w = (ix); \
184 (d) = iw_u.value; \
185 } while (0)
186
187 /* Set the more significant 32 bits of a double from an int. */
188
189 #define SET_HIGH_WORD(d,v) \
190 do { \
191 ieee_double_shape_type sh_u; \
192 sh_u.value = (d); \
193 sh_u.parts.msw = (v); \
194 (d) = sh_u.value; \
195 } while (0)
196
197 /* Set the less significant 32 bits of a double from an int. */
198
199 #define SET_LOW_WORD(d,v) \
200 do { \
201 ieee_double_shape_type sl_u; \
202 sl_u.value = (d); \
203 sl_u.parts.lsw = (v); \
204 (d) = sl_u.value; \
205 } while (0)
206
207 /*
208 * A union which permits us to convert between a float and a 32 bit
209 * int.
210 */
211
212 typedef union
213 {
214 float value;
215 /* FIXME: Assumes 32 bit int. */
216 unsigned int word;
217 } ieee_float_shape_type;
218
219 /* Get a 32 bit int from a float. */
220
221 #define GET_FLOAT_WORD(i,d) \
222 do { \
223 ieee_float_shape_type gf_u; \
224 gf_u.value = (d); \
225 (i) = gf_u.word; \
226 } while (0)
227
228 /* Set a float from a 32 bit int. */
229
230 #define SET_FLOAT_WORD(d,i) \
231 do { \
232 ieee_float_shape_type sf_u; \
233 sf_u.word = (i); \
234 (d) = sf_u.value; \
235 } while (0)
236
237 /*
238 * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long
239 * double.
240 */
241
242 #define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \
243 do { \
244 union IEEEl2bits ew_u; \
245 ew_u.e = (d); \
246 (ix0) = ew_u.xbits.expsign; \
247 (ix1) = ew_u.xbits.man; \
248 } while (0)
249
250 /*
251 * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit
252 * long double.
253 */
254
255 #define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \
256 do { \
257 union IEEEl2bits ew_u; \
258 ew_u.e = (d); \
259 (ix0) = ew_u.xbits.expsign; \
260 (ix1) = ew_u.xbits.manh; \
261 (ix2) = ew_u.xbits.manl; \
262 } while (0)
263
264 /* Get expsign as a 16 bit int from a long double. */
265
266 #define GET_LDBL_EXPSIGN(i,d) \
267 do { \
268 union IEEEl2bits ge_u; \
269 ge_u.e = (d); \
270 (i) = ge_u.xbits.expsign; \
271 } while (0)
272
273 /*
274 * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int
275 * mantissa.
276 */
277
278 #define INSERT_LDBL80_WORDS(d,ix0,ix1) \
279 do { \
280 union IEEEl2bits iw_u; \
281 iw_u.xbits.expsign = (ix0); \
282 iw_u.xbits.man = (ix1); \
283 (d) = iw_u.e; \
284 } while (0)
285
286 /*
287 * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints
288 * comprising the mantissa.
289 */
290
291 #define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \
292 do { \
293 union IEEEl2bits iw_u; \
294 iw_u.xbits.expsign = (ix0); \
295 iw_u.xbits.manh = (ix1); \
296 iw_u.xbits.manl = (ix2); \
297 (d) = iw_u.e; \
298 } while (0)
299
300 /* Set expsign of a long double from a 16 bit int. */
301
302 #define SET_LDBL_EXPSIGN(d,v) \
303 do { \
304 union IEEEl2bits se_u; \
305 se_u.e = (d); \
306 se_u.xbits.expsign = (v); \
307 (d) = se_u.e; \
308 } while (0)
309
310 #ifdef __i386__
311 /* Long double constants are broken on i386. */
312 #define LD80C(m, ex, v) { \
313 .xbits.man = __CONCAT(m, ULL), \
314 .xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0), \
315 }
316 #else
317 /* The above works on non-i386 too, but we use this to check v. */
318 #define LD80C(m, ex, v) { .e = (v), }
319 #endif
320
321 #ifdef FLT_EVAL_METHOD
322 /*
323 * Attempt to get strict C99 semantics for assignment with non-C99 compilers.
324 */
325 #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0
326 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
327 #else
328 #define STRICT_ASSIGN(type, lval, rval) do { \
329 volatile type __lval; \
330 \
331 if (sizeof(type) >= sizeof(long double)) \
332 (lval) = (rval); \
333 else { \
334 __lval = (rval); \
335 (lval) = __lval; \
336 } \
337 } while (0)
338 #endif
339 #endif /* FLT_EVAL_METHOD */
340
341 /* Support switching the mode to FP_PE if necessary. */
342 #if defined(__i386__) && !defined(NO_FPSETPREC)
343 #define ENTERI() ENTERIT(long double)
344 #define ENTERIT(returntype) \
345 returntype __retval; \
346 fp_prec_t __oprec; \
347 \
348 if ((__oprec = fpgetprec()) != FP_PE) \
349 fpsetprec(FP_PE)
350 #define RETURNI(x) do { \
351 __retval = (x); \
352 if (__oprec != FP_PE) \
353 fpsetprec(__oprec); \
354 RETURNF(__retval); \
355 } while (0)
356 #define ENTERV() \
357 fp_prec_t __oprec; \
358 \
359 if ((__oprec = fpgetprec()) != FP_PE) \
360 fpsetprec(FP_PE)
361 #define RETURNV() do { \
362 if (__oprec != FP_PE) \
363 fpsetprec(__oprec); \
364 return; \
365 } while (0)
366 #else
367 #define ENTERI()
368 #define ENTERIT(x)
369 #define RETURNI(x) RETURNF(x)
370 #define ENTERV()
371 #define RETURNV() return
372 #endif
373
374 /* Default return statement if hack*_t() is not used. */
375 #define RETURNF(v) return (v)
376
377 /*
378 * 2sum gives the same result as 2sumF without requiring |a| >= |b| or
379 * a == 0, but is slower.
380 */
381 #define _2sum(a, b) do { \
382 __typeof(a) __s, __w; \
383 \
384 __w = (a) + (b); \
385 __s = __w - (a); \
386 (b) = ((a) - (__w - __s)) + ((b) - __s); \
387 (a) = __w; \
388 } while (0)
389
390 /*
391 * 2sumF algorithm.
392 *
393 * "Normalize" the terms in the infinite-precision expression a + b for
394 * the sum of 2 floating point values so that b is as small as possible
395 * relative to 'a'. (The resulting 'a' is the value of the expression in
396 * the same precision as 'a' and the resulting b is the rounding error.)
397 * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and
398 * exponent overflow or underflow must not occur. This uses a Theorem of
399 * Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum"
400 * is apparently due to Skewchuk (1997).
401 *
402 * For this to always work, assignment of a + b to 'a' must not retain any
403 * extra precision in a + b. This is required by C standards but broken
404 * in many compilers. The brokenness cannot be worked around using
405 * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
406 * algorithm would be destroyed by non-null strict assignments. (The
407 * compilers are correct to be broken -- the efficiency of all floating
408 * point code calculations would be destroyed similarly if they forced the
409 * conversions.)
410 *
411 * Fortunately, a case that works well can usually be arranged by building
412 * any extra precision into the type of 'a' -- 'a' should have type float_t,
413 * double_t or long double. b's type should be no larger than 'a's type.
414 * Callers should use these types with scopes as large as possible, to
415 * reduce their own extra-precision and efficiciency problems. In
416 * particular, they shouldn't convert back and forth just to call here.
417 */
418 #ifdef DEBUG
419 #define _2sumF(a, b) do { \
420 __typeof(a) __w; \
421 volatile __typeof(a) __ia, __ib, __r, __vw; \
422 \
423 __ia = (a); \
424 __ib = (b); \
425 assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib)); \
426 \
427 __w = (a) + (b); \
428 (b) = ((a) - __w) + (b); \
429 (a) = __w; \
430 \
431 /* The next 2 assertions are weak if (a) is already long double. */ \
432 assert((long double)__ia + __ib == (long double)(a) + (b)); \
433 __vw = __ia + __ib; \
434 __r = __ia - __vw; \
435 __r += __ib; \
436 assert(__vw == (a) && __r == (b)); \
437 } while (0)
438 #else /* !DEBUG */
439 #define _2sumF(a, b) do { \
440 __typeof(a) __w; \
441 \
442 __w = (a) + (b); \
443 (b) = ((a) - __w) + (b); \
444 (a) = __w; \
445 } while (0)
446 #endif /* DEBUG */
447
448 /*
449 * Set x += c, where x is represented in extra precision as a + b.
450 * x must be sufficiently normalized and sufficiently larger than c,
451 * and the result is then sufficiently normalized.
452 *
453 * The details of ordering are that |a| must be >= |c| (so that (a, c)
454 * can be normalized without extra work to swap 'a' with c). The details of
455 * the normalization are that b must be small relative to the normalized 'a'.
456 * Normalization of (a, c) makes the normalized c tiny relative to the
457 * normalized a, so b remains small relative to 'a' in the result. However,
458 * b need not ever be tiny relative to 'a'. For example, b might be about
459 * 2**20 times smaller than 'a' to give about 20 extra bits of precision.
460 * That is usually enough, and adding c (which by normalization is about
461 * 2**53 times smaller than a) cannot change b significantly. However,
462 * cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
463 * significantly relative to b. The caller must ensure that significant
464 * cancellation doesn't occur, either by having c of the same sign as 'a',
465 * or by having |c| a few percent smaller than |a|. Pre-normalization of
466 * (a, b) may help.
467 *
468 * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
469 * exercise 19). We gain considerable efficiency by requiring the terms to
470 * be sufficiently normalized and sufficiently increasing.
471 */
472 #define _3sumF(a, b, c) do { \
473 __typeof(a) __tmp; \
474 \
475 __tmp = (c); \
476 _2sumF(__tmp, (a)); \
477 (b) += (a); \
478 (a) = __tmp; \
479 } while (0)
480
481 /*
482 * Common routine to process the arguments to nan(), nanf(), and nanl().
483 */
484 void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
485
486 /*
487 * Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns
488 * signaling NaNs into quiet NaNs by setting a quiet bit. We do this
489 * because we want to never return a signaling NaN, and also because we
490 * don't want the quiet bit to affect the result. Then mix the converted
491 * args using the specified operation.
492 *
493 * When one arg is NaN, the result is typically that arg quieted. When both
494 * args are NaNs, the result is typically the quietening of the arg whose
495 * mantissa is largest after quietening. When neither arg is NaN, the
496 * result may be NaN because it is indeterminate, or finite for subsequent
497 * construction of a NaN as the indeterminate 0.0L/0.0L.
498 *
499 * Technical complications: the result in bits after rounding to the final
500 * precision might depend on the runtime precision and/or on compiler
501 * optimizations, especially when different register sets are used for
502 * different precisions. Try to make the result not depend on at least the
503 * runtime precision by always doing the main mixing step in long double
504 * precision. Try to reduce dependencies on optimizations by adding the
505 * the 0's in different precisions (unless everything is in long double
506 * precision).
507 */
508 #define nan_mix(x, y) (nan_mix_op((x), (y), +))
509 #define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0))
510
511 #ifdef _COMPLEX_H
512
513 /*
514 * C99 specifies that complex numbers have the same representation as
515 * an array of two elements, where the first element is the real part
516 * and the second element is the imaginary part.
517 */
518 typedef union {
519 float complex f;
520 float a[2];
521 } float_complex;
522 typedef union {
523 double complex f;
524 double a[2];
525 } double_complex;
526 typedef union {
527 long double complex f;
528 long double a[2];
529 } long_double_complex;
530 #define REALPART(z) ((z).a[0])
531 #define IMAGPART(z) ((z).a[1])
532
533 /*
534 * Inline functions that can be used to construct complex values.
535 *
536 * The C99 standard intends x+I*y to be used for this, but x+I*y is
537 * currently unusable in general since gcc introduces many overflow,
538 * underflow, sign and efficiency bugs by rewriting I*y as
539 * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product.
540 * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted
541 * to -0.0+I*0.0.
542 *
543 * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL()
544 * to construct complex values. Compilers that conform to the C99
545 * standard require the following functions to avoid the above issues.
546 */
547
548 #ifndef CMPLXF
549 static __inline float complex
CMPLXF(float x,float y)550 CMPLXF(float x, float y)
551 {
552 float_complex z;
553
554 REALPART(z) = x;
555 IMAGPART(z) = y;
556 return (z.f);
557 }
558 #endif
559
560 #ifndef CMPLX
561 static __inline double complex
CMPLX(double x,double y)562 CMPLX(double x, double y)
563 {
564 double_complex z;
565
566 REALPART(z) = x;
567 IMAGPART(z) = y;
568 return (z.f);
569 }
570 #endif
571
572 #ifndef CMPLXL
573 static __inline long double complex
CMPLXL(long double x,long double y)574 CMPLXL(long double x, long double y)
575 {
576 long_double_complex z;
577
578 REALPART(z) = x;
579 IMAGPART(z) = y;
580 return (z.f);
581 }
582 #endif
583
584 #endif /* _COMPLEX_H */
585
586 /*
587 * The rnint() family rounds to the nearest integer for a restricted range
588 * range of args (up to about 2**MANT_DIG). We assume that the current
589 * rounding mode is FE_TONEAREST so that this can be done efficiently.
590 * Extra precision causes more problems in practice, and we only centralize
591 * this here to reduce those problems, and have not solved the efficiency
592 * problems. The exp2() family uses a more delicate version of this that
593 * requires extracting bits from the intermediate value, so it is not
594 * centralized here and should copy any solution of the efficiency problems.
595 */
596
597 static inline double
rnint(__double_t x)598 rnint(__double_t x)
599 {
600 /*
601 * This casts to double to kill any extra precision. This depends
602 * on the cast being applied to a double_t to avoid compiler bugs
603 * (this is a cleaner version of STRICT_ASSIGN()). This is
604 * inefficient if there actually is extra precision, but is hard
605 * to improve on. We use double_t in the API to minimise conversions
606 * for just calling here. Note that we cannot easily change the
607 * magic number to the one that works directly with double_t, since
608 * the rounding precision is variable at runtime on x86 so the
609 * magic number would need to be variable. Assuming that the
610 * rounding precision is always the default is too fragile. This
611 * and many other complications will move when the default is
612 * changed to FP_PE.
613 */
614 return ((double)(x + 0x1.8p52) - 0x1.8p52);
615 }
616
617 static inline float
rnintf(__float_t x)618 rnintf(__float_t x)
619 {
620 /*
621 * As for rnint(), except we could just call that to handle the
622 * extra precision case, usually without losing efficiency.
623 */
624 return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
625 }
626
627 #ifdef LDBL_MANT_DIG
628 /*
629 * The complications for extra precision are smaller for rnintl() since it
630 * can safely assume that the rounding precision has been increased from
631 * its default to FP_PE on x86. We don't exploit that here to get small
632 * optimizations from limiting the range to double. We just need it for
633 * the magic number to work with long doubles. ld128 callers should use
634 * rnint() instead of this if possible. ld80 callers should prefer
635 * rnintl() since for amd64 this avoids swapping the register set, while
636 * for i386 it makes no difference (assuming FP_PE), and for other arches
637 * it makes little difference.
638 */
639 static inline long double
rnintl(long double x)640 rnintl(long double x)
641 {
642 return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 -
643 __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2);
644 }
645 #endif /* LDBL_MANT_DIG */
646
647 /*
648 * irint() and i64rint() give the same result as casting to their integer
649 * return type provided their arg is a floating point integer. They can
650 * sometimes be more efficient because no rounding is required.
651 */
652 #if defined(amd64) || defined(__i386__)
653 #define irint(x) \
654 (sizeof(x) == sizeof(float) && \
655 sizeof(__float_t) == sizeof(long double) ? irintf(x) : \
656 sizeof(x) == sizeof(double) && \
657 sizeof(__double_t) == sizeof(long double) ? irintd(x) : \
658 sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
659 #else
660 #define irint(x) ((int)(x))
661 #endif
662
663 #define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */
664
665 #if defined(__i386__)
666 static __inline int
irintf(float x)667 irintf(float x)
668 {
669 int n;
670
671 __asm("fistl %0" : "=m" (n) : "t" (x));
672 return (n);
673 }
674
675 static __inline int
irintd(double x)676 irintd(double x)
677 {
678 int n;
679
680 __asm("fistl %0" : "=m" (n) : "t" (x));
681 return (n);
682 }
683 #endif
684
685 #if defined(__amd64__) || defined(__i386__)
686 static __inline int
irintl(long double x)687 irintl(long double x)
688 {
689 int n;
690
691 __asm("fistl %0" : "=m" (n) : "t" (x));
692 return (n);
693 }
694 #endif
695
696 /*
697 * The following are fast floor macros for 0 <= |x| < 0x1p(N-1), where
698 * N is the precision of the type of x. These macros are used in the
699 * half-cycle trignometric functions (e.g., sinpi(x)).
700 */
701 #define FFLOORF(x, j0, ix) do { \
702 (j0) = (((ix) >> 23) & 0xff) - 0x7f; \
703 (ix) &= ~(0x007fffff >> (j0)); \
704 SET_FLOAT_WORD((x), (ix)); \
705 } while (0)
706
707 #define FFLOOR(x, j0, ix, lx) do { \
708 (j0) = (((ix) >> 20) & 0x7ff) - 0x3ff; \
709 if ((j0) < 20) { \
710 (ix) &= ~(0x000fffff >> (j0)); \
711 (lx) = 0; \
712 } else { \
713 (lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20)); \
714 } \
715 INSERT_WORDS((x), (ix), (lx)); \
716 } while (0)
717
718 #define FFLOORL80(x, j0, ix, lx) do { \
719 j0 = ix - 0x3fff + 1; \
720 if ((j0) < 32) { \
721 (lx) = ((lx) >> 32) << 32; \
722 (lx) &= ~((((lx) << 32)-1) >> (j0)); \
723 } else { \
724 uint64_t _m; \
725 _m = (uint64_t)-1 >> (j0); \
726 if ((lx) & _m) (lx) &= ~_m; \
727 } \
728 INSERT_LDBL80_WORDS((x), (ix), (lx)); \
729 } while (0)
730
731 #define FFLOORL128(x, ai, ar) do { \
732 union IEEEl2bits u; \
733 uint64_t m; \
734 int e; \
735 u.e = (x); \
736 e = u.bits.exp - 16383; \
737 if (e < 48) { \
738 m = ((1llu << 49) - 1) >> (e + 1); \
739 u.bits.manh &= ~m; \
740 u.bits.manl = 0; \
741 } else { \
742 m = (uint64_t)-1 >> (e - 48); \
743 u.bits.manl &= ~m; \
744 } \
745 (ai) = u.e; \
746 (ar) = (x) - (ai); \
747 } while (0)
748
749 #ifdef DEBUG
750 #if defined(__amd64__) || defined(__i386__)
751 #define breakpoint() asm("int $3")
752 #else
753 #include <signal.h>
754
755 #define breakpoint() raise(SIGTRAP)
756 #endif
757 #endif
758
759 #ifdef STRUCT_RETURN
760 #define RETURNSP(rp) do { \
761 if (!(rp)->lo_set) \
762 RETURNF((rp)->hi); \
763 RETURNF((rp)->hi + (rp)->lo); \
764 } while (0)
765 #define RETURNSPI(rp) do { \
766 if (!(rp)->lo_set) \
767 RETURNI((rp)->hi); \
768 RETURNI((rp)->hi + (rp)->lo); \
769 } while (0)
770 #endif
771
772 #define SUM2P(x, y) ({ \
773 const __typeof (x) __x = (x); \
774 const __typeof (y) __y = (y); \
775 __x + __y; \
776 })
777
778 /* fdlibm kernel function */
779 int __kernel_rem_pio2(double*,double*,int,int,int);
780
781 /* double precision kernel functions */
782 #ifndef INLINE_REM_PIO2
783 int __ieee754_rem_pio2(double,double*);
784 #endif
785 double __kernel_sin(double,double,int);
786 double __kernel_cos(double,double);
787 double __kernel_tan(double,double,int);
788 double __ldexp_exp(double,int);
789 #ifdef _COMPLEX_H
790 double complex __ldexp_cexp(double complex,int);
791 #endif
792
793 /* float precision kernel functions */
794 #ifndef INLINE_REM_PIO2F
795 int __ieee754_rem_pio2f(float,double*);
796 #endif
797 #ifndef INLINE_KERNEL_SINDF
798 float __kernel_sindf(double);
799 #endif
800 #ifndef INLINE_KERNEL_COSDF
801 float __kernel_cosdf(double);
802 #endif
803 #ifndef INLINE_KERNEL_TANDF
804 float __kernel_tandf(double,int);
805 #endif
806 float __ldexp_expf(float,int);
807 #ifdef _COMPLEX_H
808 float complex __ldexp_cexpf(float complex,int);
809 #endif
810
811 /* long double precision kernel functions */
812 long double __kernel_sinl(long double, long double, int);
813 long double __kernel_cosl(long double, long double);
814 long double __kernel_tanl(long double, long double, int);
815
816 #endif /* !_MATH_PRIVATE_H_ */
817