• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * ====================================================
3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4  *
5  * Developed at SunPro, a Sun Microsystems, Inc. business.
6  * Permission to use, copy, modify, and distribute this
7  * software is freely granted, provided that this notice
8  * is preserved.
9  * ====================================================
10  */
11 
12 /*
13  * from: @(#)fdlibm.h 5.1 93/09/24
14  * $FreeBSD$
15  */
16 
17 #ifndef _MATH_PRIVATE_H_
18 #define	_MATH_PRIVATE_H_
19 
20 #include <sys/types.h>
21 #include <endian.h>
22 
23 /*
24  * The original fdlibm code used statements like:
25  *	n0 = ((*(int*)&one)>>29)^1;		* index of high word *
26  *	ix0 = *(n0+(int*)&x);			* high word of x *
27  *	ix1 = *((1-n0)+(int*)&x);		* low word of x *
28  * to dig two 32 bit words out of the 64 bit IEEE floating point
29  * value.  That is non-ANSI, and, moreover, the gcc instruction
30  * scheduler gets it wrong.  We instead use the following macros.
31  * Unlike the original code, we determine the endianness at compile
32  * time, not at run time; I don't see much benefit to selecting
33  * endianness at run time.
34  */
35 
36 /*
37  * A union which permits us to convert between a double and two 32 bit
38  * ints.
39  */
40 
41 #ifdef __arm__
42 #if defined(__VFP_FP__) || defined(__ARM_EABI__)
43 #define	IEEE_WORD_ORDER	BYTE_ORDER
44 #else
45 #define	IEEE_WORD_ORDER	BIG_ENDIAN
46 #endif
47 #else /* __arm__ */
48 #define	IEEE_WORD_ORDER	BYTE_ORDER
49 #endif
50 
51 typedef unsigned int u_int32_t;
52 typedef unsigned long u_int64_t;
53 typedef signed int int32_t;
54 typedef signed short int16_t;
55 typedef double __double_t;
56 typedef float __float_t;
57 /* A union which permits us to convert between a long double and
58    four 32 bit ints.  */
59 
60 #if IEEE_WORD_ORDER == BIG_ENDIAN
61 
62 typedef union
63 {
64   long double value;
65   struct {
66     u_int32_t mswhi;
67     u_int32_t mswlo;
68     u_int32_t lswhi;
69     u_int32_t lswlo;
70   } parts32;
71   struct {
72     u_int64_t msw;
73     u_int64_t lsw;
74   } parts64;
75 } ieee_quad_shape_type;
76 
77 #endif
78 
79 #if IEEE_WORD_ORDER == LITTLE_ENDIAN
80 
81 typedef union
82 {
83   long double value;
84   struct {
85     u_int32_t lswlo;
86     u_int32_t lswhi;
87     u_int32_t mswlo;
88     u_int32_t mswhi;
89   } parts32;
90   struct {
91     u_int64_t lsw;
92     u_int64_t msw;
93   } parts64;
94 } ieee_quad_shape_type;
95 
96 #endif
97 
98 #if IEEE_WORD_ORDER == BIG_ENDIAN
99 
100 typedef union
101 {
102   double value;
103   struct
104   {
105     u_int32_t msw;
106     u_int32_t lsw;
107   } parts;
108   struct
109   {
110     u_int64_t w;
111   } xparts;
112 } ieee_double_shape_type;
113 
114 #endif
115 
116 #if IEEE_WORD_ORDER == LITTLE_ENDIAN
117 
118 typedef union
119 {
120   double value;
121   struct
122   {
123     u_int32_t lsw;
124     u_int32_t msw;
125   } parts;
126   struct
127   {
128     u_int64_t w;
129   } xparts;
130 } ieee_double_shape_type;
131 
132 #endif
133 
134 /* Get two 32 bit ints from a double.  */
135 
136 #define EXTRACT_WORDS(ix0,ix1,d)				\
137 do {								\
138   ieee_double_shape_type ew_u;					\
139   ew_u.value = (d);						\
140   (ix0) = ew_u.parts.msw;					\
141   (ix1) = ew_u.parts.lsw;					\
142 } while (0)
143 
144 /* Get a 64-bit int from a double. */
145 #define EXTRACT_WORD64(ix,d)					\
146 do {								\
147   ieee_double_shape_type ew_u;					\
148   ew_u.value = (d);						\
149   (ix) = ew_u.xparts.w;						\
150 } while (0)
151 
152 /* Get the more significant 32 bit int from a double.  */
153 
154 #define GET_HIGH_WORD(i,d)					\
155 do {								\
156   ieee_double_shape_type gh_u;					\
157   gh_u.value = (d);						\
158   (i) = gh_u.parts.msw;						\
159 } while (0)
160 
161 /* Get the less significant 32 bit int from a double.  */
162 
163 #define GET_LOW_WORD(i,d)					\
164 do {								\
165   ieee_double_shape_type gl_u;					\
166   gl_u.value = (d);						\
167   (i) = gl_u.parts.lsw;						\
168 } while (0)
169 
170 /* Set a double from two 32 bit ints.  */
171 
172 #define INSERT_WORDS(d,ix0,ix1)					\
173 do {								\
174   ieee_double_shape_type iw_u;					\
175   iw_u.parts.msw = (ix0);					\
176   iw_u.parts.lsw = (ix1);					\
177   (d) = iw_u.value;						\
178 } while (0)
179 
180 /* Set a double from a 64-bit int. */
181 #define INSERT_WORD64(d,ix)					\
182 do {								\
183   ieee_double_shape_type iw_u;					\
184   iw_u.xparts.w = (ix);						\
185   (d) = iw_u.value;						\
186 } while (0)
187 
188 /* Set the more significant 32 bits of a double from an int.  */
189 
190 #define SET_HIGH_WORD(d,v)					\
191 do {								\
192   ieee_double_shape_type sh_u;					\
193   sh_u.value = (d);						\
194   sh_u.parts.msw = (v);						\
195   (d) = sh_u.value;						\
196 } while (0)
197 
198 /* Set the less significant 32 bits of a double from an int.  */
199 
200 #define SET_LOW_WORD(d,v)					\
201 do {								\
202   ieee_double_shape_type sl_u;					\
203   sl_u.value = (d);						\
204   sl_u.parts.lsw = (v);						\
205   (d) = sl_u.value;						\
206 } while (0)
207 
208 /*
209  * A union which permits us to convert between a float and a 32 bit
210  * int.
211  */
212 
213 typedef union
214 {
215   float value;
216   /* FIXME: Assumes 32 bit int.  */
217   unsigned int word;
218 } ieee_float_shape_type;
219 
220 /* Get a 32 bit int from a float.  */
221 
222 #define GET_FLOAT_WORD(i,d)					\
223 do {								\
224   ieee_float_shape_type gf_u;					\
225   gf_u.value = (d);						\
226   (i) = gf_u.word;						\
227 } while (0)
228 
229 /* Set a float from a 32 bit int.  */
230 
231 #define SET_FLOAT_WORD(d,i)					\
232 do {								\
233   ieee_float_shape_type sf_u;					\
234   sf_u.word = (i);						\
235   (d) = sf_u.value;						\
236 } while (0)
237 
238 /*
239  * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long
240  * double.
241  */
242 
243 #define	EXTRACT_LDBL80_WORDS(ix0,ix1,d)				\
244 do {								\
245   union IEEEl2bits ew_u;					\
246   ew_u.e = (d);							\
247   (ix0) = ew_u.xbits.expsign;					\
248   (ix1) = ew_u.xbits.man;					\
249 } while (0)
250 
251 /*
252  * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit
253  * long double.
254  */
255 
256 #define	EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d)			\
257 do {								\
258   union IEEEl2bits ew_u;					\
259   ew_u.e = (d);							\
260   (ix0) = ew_u.xbits.expsign;					\
261   (ix1) = ew_u.xbits.manh;					\
262   (ix2) = ew_u.xbits.manl;					\
263 } while (0)
264 
265 /* Get expsign as a 16 bit int from a long double.  */
266 
267 #define	GET_LDBL_EXPSIGN(i,d)					\
268 do {								\
269   union IEEEl2bits ge_u;					\
270   ge_u.e = (d);							\
271   (i) = ge_u.xbits.expsign;					\
272 } while (0)
273 
274 /*
275  * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int
276  * mantissa.
277  */
278 
279 #define	INSERT_LDBL80_WORDS(d,ix0,ix1)				\
280 do {								\
281   union IEEEl2bits iw_u;					\
282   iw_u.xbits.expsign = (ix0);					\
283   iw_u.xbits.man = (ix1);					\
284   (d) = iw_u.e;							\
285 } while (0)
286 
287 /*
288  * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints
289  * comprising the mantissa.
290  */
291 
292 #define	INSERT_LDBL128_WORDS(d,ix0,ix1,ix2)			\
293 do {								\
294   union IEEEl2bits iw_u;					\
295   iw_u.xbits.expsign = (ix0);					\
296   iw_u.xbits.manh = (ix1);					\
297   iw_u.xbits.manl = (ix2);					\
298   (d) = iw_u.e;							\
299 } while (0)
300 
301 /* Set expsign of a long double from a 16 bit int.  */
302 
303 #define	SET_LDBL_EXPSIGN(d,v)					\
304 do {								\
305   union IEEEl2bits se_u;					\
306   se_u.e = (d);							\
307   se_u.xbits.expsign = (v);					\
308   (d) = se_u.e;							\
309 } while (0)
310 
311 #ifdef __i386__
312 /* Long double constants are broken on i386. */
313 #define	LD80C(m, ex, v) {						\
314 	.xbits.man = __CONCAT(m, ULL),					\
315 	.xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0),	\
316 }
317 #else
318 /* The above works on non-i386 too, but we use this to check v. */
319 #define	LD80C(m, ex, v)	{ .e = (v), }
320 #endif
321 
322 #ifdef FLT_EVAL_METHOD
323 /*
324  * Attempt to get strict C99 semantics for assignment with non-C99 compilers.
325  */
326 #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0
327 #define	STRICT_ASSIGN(type, lval, rval)	((lval) = (rval))
328 #else
329 #define	STRICT_ASSIGN(type, lval, rval) do {	\
330 	volatile type __lval;			\
331 						\
332 	if (sizeof(type) >= sizeof(long double))	\
333 		(lval) = (rval);		\
334 	else {					\
335 		__lval = (rval);		\
336 		(lval) = __lval;		\
337 	}					\
338 } while (0)
339 #endif
340 #endif /* FLT_EVAL_METHOD */
341 
342 /* Support switching the mode to FP_PE if necessary. */
343 #if defined(__i386__) && !defined(NO_FPSETPREC)
344 #define	ENTERI() ENTERIT(long double)
345 #define	ENTERIT(returntype)			\
346 	returntype __retval;			\
347 	fp_prec_t __oprec;			\
348 						\
349 	if ((__oprec = fpgetprec()) != FP_PE)	\
350 		fpsetprec(FP_PE)
351 #define	RETURNI(x) do {				\
352 	__retval = (x);				\
353 	if (__oprec != FP_PE)			\
354 		fpsetprec(__oprec);		\
355 	RETURNF(__retval);			\
356 } while (0)
357 #define	ENTERV()				\
358 	fp_prec_t __oprec;			\
359 						\
360 	if ((__oprec = fpgetprec()) != FP_PE)	\
361 		fpsetprec(FP_PE)
362 #define	RETURNV() do {				\
363 	if (__oprec != FP_PE)			\
364 		fpsetprec(__oprec);		\
365 	return;			\
366 } while (0)
367 #else
368 #define	ENTERI()
369 #define	ENTERIT(x)
370 #define	RETURNI(x)	RETURNF(x)
371 #define	ENTERV()
372 #define	RETURNV()	return
373 #endif
374 
375 /* Default return statement if hack*_t() is not used. */
376 #define      RETURNF(v)      return (v)
377 
378 /*
379  * 2sum gives the same result as 2sumF without requiring |a| >= |b| or
380  * a == 0, but is slower.
381  */
382 #define	_2sum(a, b) do {	\
383 	__typeof(a) __s, __w;	\
384 				\
385 	__w = (a) + (b);	\
386 	__s = __w - (a);	\
387 	(b) = ((a) - (__w - __s)) + ((b) - __s); \
388 	(a) = __w;		\
389 } while (0)
390 
391 /*
392  * 2sumF algorithm.
393  *
394  * "Normalize" the terms in the infinite-precision expression a + b for
395  * the sum of 2 floating point values so that b is as small as possible
396  * relative to 'a'.  (The resulting 'a' is the value of the expression in
397  * the same precision as 'a' and the resulting b is the rounding error.)
398  * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and
399  * exponent overflow or underflow must not occur.  This uses a Theorem of
400  * Dekker (1971).  See Knuth (1981) 4.2.2 Theorem C.  The name "TwoSum"
401  * is apparently due to Skewchuk (1997).
402  *
403  * For this to always work, assignment of a + b to 'a' must not retain any
404  * extra precision in a + b.  This is required by C standards but broken
405  * in many compilers.  The brokenness cannot be worked around using
406  * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
407  * algorithm would be destroyed by non-null strict assignments.  (The
408  * compilers are correct to be broken -- the efficiency of all floating
409  * point code calculations would be destroyed similarly if they forced the
410  * conversions.)
411  *
412  * Fortunately, a case that works well can usually be arranged by building
413  * any extra precision into the type of 'a' -- 'a' should have type float_t,
414  * double_t or long double.  b's type should be no larger than 'a's type.
415  * Callers should use these types with scopes as large as possible, to
416  * reduce their own extra-precision and efficiciency problems.  In
417  * particular, they shouldn't convert back and forth just to call here.
418  */
419 #ifdef DEBUG
420 #define	_2sumF(a, b) do {				\
421 	__typeof(a) __w;				\
422 	volatile __typeof(a) __ia, __ib, __r, __vw;	\
423 							\
424 	__ia = (a);					\
425 	__ib = (b);					\
426 	assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib));	\
427 							\
428 	__w = (a) + (b);				\
429 	(b) = ((a) - __w) + (b);			\
430 	(a) = __w;					\
431 							\
432 	/* The next 2 assertions are weak if (a) is already long double. */ \
433 	assert((long double)__ia + __ib == (long double)(a) + (b));	\
434 	__vw = __ia + __ib;				\
435 	__r = __ia - __vw;				\
436 	__r += __ib;					\
437 	assert(__vw == (a) && __r == (b));		\
438 } while (0)
439 #else /* !DEBUG */
440 #define	_2sumF(a, b) do {	\
441 	__typeof(a) __w;	\
442 				\
443 	__w = (a) + (b);	\
444 	(b) = ((a) - __w) + (b); \
445 	(a) = __w;		\
446 } while (0)
447 #endif /* DEBUG */
448 
449 /*
450  * Set x += c, where x is represented in extra precision as a + b.
451  * x must be sufficiently normalized and sufficiently larger than c,
452  * and the result is then sufficiently normalized.
453  *
454  * The details of ordering are that |a| must be >= |c| (so that (a, c)
455  * can be normalized without extra work to swap 'a' with c).  The details of
456  * the normalization are that b must be small relative to the normalized 'a'.
457  * Normalization of (a, c) makes the normalized c tiny relative to the
458  * normalized a, so b remains small relative to 'a' in the result.  However,
459  * b need not ever be tiny relative to 'a'.  For example, b might be about
460  * 2**20 times smaller than 'a' to give about 20 extra bits of precision.
461  * That is usually enough, and adding c (which by normalization is about
462  * 2**53 times smaller than a) cannot change b significantly.  However,
463  * cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
464  * significantly relative to b.  The caller must ensure that significant
465  * cancellation doesn't occur, either by having c of the same sign as 'a',
466  * or by having |c| a few percent smaller than |a|.  Pre-normalization of
467  * (a, b) may help.
468  *
469  * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
470  * exercise 19).  We gain considerable efficiency by requiring the terms to
471  * be sufficiently normalized and sufficiently increasing.
472  */
473 #define	_3sumF(a, b, c) do {	\
474 	__typeof(a) __tmp;	\
475 				\
476 	__tmp = (c);		\
477 	_2sumF(__tmp, (a));	\
478 	(b) += (a);		\
479 	(a) = __tmp;		\
480 } while (0)
481 
482 /*
483  * Common routine to process the arguments to nan(), nanf(), and nanl().
484  */
485 void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
486 
487 /*
488  * Mix 0, 1 or 2 NaNs.  First add 0 to each arg.  This normally just turns
489  * signaling NaNs into quiet NaNs by setting a quiet bit.  We do this
490  * because we want to never return a signaling NaN, and also because we
491  * don't want the quiet bit to affect the result.  Then mix the converted
492  * args using the specified operation.
493  *
494  * When one arg is NaN, the result is typically that arg quieted.  When both
495  * args are NaNs, the result is typically the quietening of the arg whose
496  * mantissa is largest after quietening.  When neither arg is NaN, the
497  * result may be NaN because it is indeterminate, or finite for subsequent
498  * construction of a NaN as the indeterminate 0.0L/0.0L.
499  *
500  * Technical complications: the result in bits after rounding to the final
501  * precision might depend on the runtime precision and/or on compiler
502  * optimizations, especially when different register sets are used for
503  * different precisions.  Try to make the result not depend on at least the
504  * runtime precision by always doing the main mixing step in long double
505  * precision.  Try to reduce dependencies on optimizations by adding the
506  * the 0's in different precisions (unless everything is in long double
507  * precision).
508  */
509 #define	nan_mix(x, y)		(nan_mix_op((x), (y), +))
510 #define	nan_mix_op(x, y, op)	(((x) + 0.0L) op ((y) + 0))
511 
512 #ifdef _COMPLEX_H
513 
514 /*
515  * C99 specifies that complex numbers have the same representation as
516  * an array of two elements, where the first element is the real part
517  * and the second element is the imaginary part.
518  */
519 typedef union {
520 	float complex f;
521 	float a[2];
522 } float_complex;
523 typedef union {
524 	double complex f;
525 	double a[2];
526 } double_complex;
527 typedef union {
528 	long double complex f;
529 	long double a[2];
530 } long_double_complex;
531 #define	REALPART(z)	((z).a[0])
532 #define	IMAGPART(z)	((z).a[1])
533 
534 /*
535  * Inline functions that can be used to construct complex values.
536  *
537  * The C99 standard intends x+I*y to be used for this, but x+I*y is
538  * currently unusable in general since gcc introduces many overflow,
539  * underflow, sign and efficiency bugs by rewriting I*y as
540  * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product.
541  * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted
542  * to -0.0+I*0.0.
543  *
544  * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL()
545  * to construct complex values.  Compilers that conform to the C99
546  * standard require the following functions to avoid the above issues.
547  */
548 
549 #ifndef CMPLXF
550 static __inline float complex
CMPLXF(float x,float y)551 CMPLXF(float x, float y)
552 {
553 	float_complex z;
554 
555 	REALPART(z) = x;
556 	IMAGPART(z) = y;
557 	return (z.f);
558 }
559 #endif
560 
561 #ifndef CMPLX
562 static __inline double complex
CMPLX(double x,double y)563 CMPLX(double x, double y)
564 {
565 	double_complex z;
566 
567 	REALPART(z) = x;
568 	IMAGPART(z) = y;
569 	return (z.f);
570 }
571 #endif
572 
573 #ifndef CMPLXL
574 static __inline long double complex
CMPLXL(long double x,long double y)575 CMPLXL(long double x, long double y)
576 {
577 	long_double_complex z;
578 
579 	REALPART(z) = x;
580 	IMAGPART(z) = y;
581 	return (z.f);
582 }
583 #endif
584 
585 #endif /* _COMPLEX_H */
586 
587 /*
588  * The rnint() family rounds to the nearest integer for a restricted range
589  * range of args (up to about 2**MANT_DIG).  We assume that the current
590  * rounding mode is FE_TONEAREST so that this can be done efficiently.
591  * Extra precision causes more problems in practice, and we only centralize
592  * this here to reduce those problems, and have not solved the efficiency
593  * problems.  The exp2() family uses a more delicate version of this that
594  * requires extracting bits from the intermediate value, so it is not
595  * centralized here and should copy any solution of the efficiency problems.
596  */
597 
598 static inline double
rnint(__double_t x)599 rnint(__double_t x)
600 {
601 	/*
602 	 * This casts to double to kill any extra precision.  This depends
603 	 * on the cast being applied to a double_t to avoid compiler bugs
604 	 * (this is a cleaner version of STRICT_ASSIGN()).  This is
605 	 * inefficient if there actually is extra precision, but is hard
606 	 * to improve on.  We use double_t in the API to minimise conversions
607 	 * for just calling here.  Note that we cannot easily change the
608 	 * magic number to the one that works directly with double_t, since
609 	 * the rounding precision is variable at runtime on x86 so the
610 	 * magic number would need to be variable.  Assuming that the
611 	 * rounding precision is always the default is too fragile.  This
612 	 * and many other complications will move when the default is
613 	 * changed to FP_PE.
614 	 */
615 	return ((double)(x + 0x1.8p52) - 0x1.8p52);
616 }
617 
618 static inline float
rnintf(__float_t x)619 rnintf(__float_t x)
620 {
621 	/*
622 	 * As for rnint(), except we could just call that to handle the
623 	 * extra precision case, usually without losing efficiency.
624 	 */
625 	return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
626 }
627 
628 #ifdef LDBL_MANT_DIG
629 /*
630  * The complications for extra precision are smaller for rnintl() since it
631  * can safely assume that the rounding precision has been increased from
632  * its default to FP_PE on x86.  We don't exploit that here to get small
633  * optimizations from limiting the rangle to double.  We just need it for
634  * the magic number to work with long doubles.  ld128 callers should use
635  * rnint() instead of this if possible.  ld80 callers should prefer
636  * rnintl() since for amd64 this avoids swapping the register set, while
637  * for i386 it makes no difference (assuming FP_PE), and for other arches
638  * it makes little difference.
639  */
640 static inline long double
rnintl(long double x)641 rnintl(long double x)
642 {
643 	return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 -
644 	    __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2);
645 }
646 #endif /* LDBL_MANT_DIG */
647 
648 /*
649  * irint() and i64rint() give the same result as casting to their integer
650  * return type provided their arg is a floating point integer.  They can
651  * sometimes be more efficient because no rounding is required.
652  */
653 #if defined(amd64) || defined(__i386__)
654 #define	irint(x)						\
655     (sizeof(x) == sizeof(float) &&				\
656     sizeof(__float_t) == sizeof(long double) ? irintf(x) :	\
657     sizeof(x) == sizeof(double) &&				\
658     sizeof(__double_t) == sizeof(long double) ? irintd(x) :	\
659     sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
660 #else
661 #define	irint(x)	((int)(x))
662 #endif
663 
664 #define	i64rint(x)	((int64_t)(x))	/* only needed for ld128 so not opt. */
665 
666 #if defined(__i386__)
667 static __inline int
irintf(float x)668 irintf(float x)
669 {
670 	int n;
671 
672 	__asm("fistl %0" : "=m" (n) : "t" (x));
673 	return (n);
674 }
675 
676 static __inline int
irintd(double x)677 irintd(double x)
678 {
679 	int n;
680 
681 	__asm("fistl %0" : "=m" (n) : "t" (x));
682 	return (n);
683 }
684 #endif
685 
686 #if defined(__amd64__) || defined(__i386__)
687 static __inline int
irintl(long double x)688 irintl(long double x)
689 {
690 	int n;
691 
692 	__asm("fistl %0" : "=m" (n) : "t" (x));
693 	return (n);
694 }
695 #endif
696 
697 #ifdef DEBUG
698 #if defined(__amd64__) || defined(__i386__)
699 #define	breakpoint()	asm("int $3")
700 #else
701 #include <signal.h>
702 
703 #define	breakpoint()	raise(SIGTRAP)
704 #endif
705 #endif
706 
707 /* Write a pari script to test things externally. */
708 #ifdef DOPRINT
709 #include <stdio.h>
710 
711 #ifndef DOPRINT_SWIZZLE
712 #define	DOPRINT_SWIZZLE		0
713 #endif
714 
715 #ifdef DOPRINT_LD80
716 
717 #define	DOPRINT_START(xp) do {						\
718 	uint64_t __lx;							\
719 	uint16_t __hx;							\
720 									\
721 	/* Hack to give more-problematic args. */			\
722 	EXTRACT_LDBL80_WORDS(__hx, __lx, *xp);				\
723 	__lx ^= DOPRINT_SWIZZLE;					\
724 	INSERT_LDBL80_WORDS(*xp, __hx, __lx);				\
725 	printf("x = %.21Lg; ", (long double)*xp);			\
726 } while (0)
727 #define	DOPRINT_END1(v)							\
728 	printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
729 #define	DOPRINT_END2(hi, lo)						\
730 	printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\
731 	    (long double)(hi), (long double)(lo))
732 
733 #elif defined(DOPRINT_D64)
734 
735 #define	DOPRINT_START(xp) do {						\
736 	uint32_t __hx, __lx;						\
737 									\
738 	EXTRACT_WORDS(__hx, __lx, *xp);					\
739 	__lx ^= DOPRINT_SWIZZLE;					\
740 	INSERT_WORDS(*xp, __hx, __lx);					\
741 	printf("x = %.21Lg; ", (long double)*xp);			\
742 } while (0)
743 #define	DOPRINT_END1(v)							\
744 	printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
745 #define	DOPRINT_END2(hi, lo)						\
746 	printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\
747 	    (long double)(hi), (long double)(lo))
748 
749 #elif defined(DOPRINT_F32)
750 
751 #define	DOPRINT_START(xp) do {						\
752 	uint32_t __hx;							\
753 									\
754 	GET_FLOAT_WORD(__hx, *xp);					\
755 	__hx ^= DOPRINT_SWIZZLE;					\
756 	SET_FLOAT_WORD(*xp, __hx);					\
757 	printf("x = %.21Lg; ", (long double)*xp);			\
758 } while (0)
759 #define	DOPRINT_END1(v)							\
760 	printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
761 #define	DOPRINT_END2(hi, lo)						\
762 	printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\
763 	    (long double)(hi), (long double)(lo))
764 
765 #else /* !DOPRINT_LD80 && !DOPRINT_D64 (LD128 only) */
766 
767 #ifndef DOPRINT_SWIZZLE_HIGH
768 #define	DOPRINT_SWIZZLE_HIGH	0
769 #endif
770 
771 #define	DOPRINT_START(xp) do {						\
772 	uint64_t __lx, __llx;						\
773 	uint16_t __hx;							\
774 									\
775 	EXTRACT_LDBL128_WORDS(__hx, __lx, __llx, *xp);			\
776 	__llx ^= DOPRINT_SWIZZLE;					\
777 	__lx ^= DOPRINT_SWIZZLE_HIGH;					\
778 	INSERT_LDBL128_WORDS(*xp, __hx, __lx, __llx);			\
779 	printf("x = %.36Lg; ", (long double)*xp);					\
780 } while (0)
781 #define	DOPRINT_END1(v)							\
782 	printf("y = %.36Lg; z = 0; show(x, y, z);\n", (long double)(v))
783 #define	DOPRINT_END2(hi, lo)						\
784 	printf("y = %.36Lg; z = %.36Lg; show(x, y, z);\n",		\
785 	    (long double)(hi), (long double)(lo))
786 
787 #endif /* DOPRINT_LD80 */
788 
789 #else /* !DOPRINT */
790 #define	DOPRINT_START(xp)
791 #define	DOPRINT_END1(v)
792 #define	DOPRINT_END2(hi, lo)
793 #endif /* DOPRINT */
794 
795 #define	RETURNP(x) do {			\
796 	DOPRINT_END1(x);		\
797 	RETURNF(x);			\
798 } while (0)
799 #define	RETURNPI(x) do {		\
800 	DOPRINT_END1(x);		\
801 	RETURNI(x);			\
802 } while (0)
803 #define	RETURN2P(x, y) do {		\
804 	DOPRINT_END2((x), (y));		\
805 	RETURNF((x) + (y));		\
806 } while (0)
807 #define	RETURN2PI(x, y) do {		\
808 	DOPRINT_END2((x), (y));		\
809 	RETURNI((x) + (y));		\
810 } while (0)
811 #ifdef STRUCT_RETURN
812 #define	RETURNSP(rp) do {		\
813 	if (!(rp)->lo_set)		\
814 		RETURNP((rp)->hi);	\
815 	RETURN2P((rp)->hi, (rp)->lo);	\
816 } while (0)
817 #define	RETURNSPI(rp) do {		\
818 	if (!(rp)->lo_set)		\
819 		RETURNPI((rp)->hi);	\
820 	RETURN2PI((rp)->hi, (rp)->lo);	\
821 } while (0)
822 #endif
823 #define	SUM2P(x, y) ({			\
824 	const __typeof (x) __x = (x);	\
825 	const __typeof (y) __y = (y);	\
826 					\
827 	DOPRINT_END2(__x, __y);		\
828 	__x + __y;			\
829 })
830 
831 /*
832  * ieee style elementary functions
833  *
834  * We rename functions here to improve other sources' diffability
835  * against fdlibm.
836  */
837 #define	__ieee754_sqrt	sqrt
838 #define	__ieee754_acos	acos
839 #define	__ieee754_acosh	acosh
840 #define	__ieee754_log	log
841 #define	__ieee754_log2	log2
842 #define	__ieee754_atanh	atanh
843 #define	__ieee754_asin	asin
844 #define	__ieee754_atan2	atan2
845 #define	__ieee754_exp	exp
846 #define	__ieee754_cosh	cosh
847 #define	__ieee754_fmod	fmod
848 #define	__ieee754_pow	pow
849 #define	__ieee754_lgamma lgamma
850 #define	__ieee754_gamma	gamma
851 #define	__ieee754_lgamma_r lgamma_r
852 #define	__ieee754_gamma_r gamma_r
853 #define	__ieee754_log10	log10
854 #define	__ieee754_sinh	sinh
855 #define	__ieee754_hypot	hypot
856 #define	__ieee754_j0	j0
857 #define	__ieee754_j1	j1
858 #define	__ieee754_y0	y0
859 #define	__ieee754_y1	y1
860 #define	__ieee754_jn	jn
861 #define	__ieee754_yn	yn
862 #define	__ieee754_remainder remainder
863 #define	__ieee754_scalb	scalb
864 #define	__ieee754_sqrtf	sqrtf
865 #define	__ieee754_acosf	acosf
866 #define	__ieee754_acoshf acoshf
867 #define	__ieee754_logf	logf
868 #define	__ieee754_atanhf atanhf
869 #define	__ieee754_asinf	asinf
870 #define	__ieee754_atan2f atan2f
871 #define	__ieee754_expf	expf
872 #define	__ieee754_coshf	coshf
873 #define	__ieee754_fmodf	fmodf
874 #define	__ieee754_powf	powf
875 #define	__ieee754_lgammaf lgammaf
876 #define	__ieee754_gammaf gammaf
877 #define	__ieee754_lgammaf_r lgammaf_r
878 #define	__ieee754_gammaf_r gammaf_r
879 #define	__ieee754_log10f log10f
880 #define	__ieee754_log2f log2f
881 #define	__ieee754_sinhf	sinhf
882 #define	__ieee754_hypotf hypotf
883 #define	__ieee754_j0f	j0f
884 #define	__ieee754_j1f	j1f
885 #define	__ieee754_y0f	y0f
886 #define	__ieee754_y1f	y1f
887 #define	__ieee754_jnf	jnf
888 #define	__ieee754_ynf	ynf
889 #define	__ieee754_remainderf remainderf
890 #define	__ieee754_scalbf scalbf
891 
892 /* fdlibm kernel function */
893 int	__kernel_rem_pio2(double*,double*,int,int,int);
894 
895 /* double precision kernel functions */
896 #ifndef INLINE_REM_PIO2
897 int	__ieee754_rem_pio2(double,double*);
898 #endif
899 double	__kernel_sin(double,double,int);
900 double	__kernel_cos(double,double);
901 double	__kernel_tan(double,double,int);
902 double	__ldexp_exp(double,int);
903 #ifdef _COMPLEX_H
904 double complex __ldexp_cexp(double complex,int);
905 #endif
906 
907 /* float precision kernel functions */
908 #ifndef INLINE_REM_PIO2F
909 int	__ieee754_rem_pio2f(float,double*);
910 #endif
911 #ifndef INLINE_KERNEL_SINDF
912 float	__kernel_sindf(double);
913 #endif
914 #ifndef INLINE_KERNEL_COSDF
915 float	__kernel_cosdf(double);
916 #endif
917 #ifndef INLINE_KERNEL_TANDF
918 float	__kernel_tandf(double,int);
919 #endif
920 float	__ldexp_expf(float,int);
921 #ifdef _COMPLEX_H
922 float complex __ldexp_cexpf(float complex,int);
923 #endif
924 
925 /* long double precision kernel functions */
926 long double __kernel_sinl(long double, long double, int);
927 long double __kernel_cosl(long double, long double);
928 long double __kernel_tanl(long double, long double, int);
929 
930 #endif /* !_MATH_PRIVATE_H_ */
931