1 //! A small number of math routines for floats and doubles.
2 //!
3 //! These are adapted from libm, a port of musl libc's libm to Rust.
4 //! libm can be found online [here](https://github.com/rust-lang/libm),
5 //! and is similarly licensed under an Apache2.0/MIT license
6
7 #![cfg(all(not(feature = "std"), feature = "compact"))]
8 #![doc(hidden)]
9
10 /* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */
11 /*
12 * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
13 */
14 /*
15 * ====================================================
16 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
17 *
18 * Developed at SunPro, a Sun Microsystems, Inc. business.
19 * Permission to use, copy, modify, and distribute this
20 * software is freely granted, provided that this notice
21 * is preserved.
22 * ====================================================
23 */
24
25 /// # Safety
26 ///
27 /// Safe if `index < array.len()`.
28 macro_rules! i {
29 ($array:ident, $index:expr) => {
30 // SAFETY: safe if `index < array.len()`.
31 unsafe { *$array.get_unchecked($index) }
32 };
33 }
34
powf(x: f32, y: f32) -> f3235 pub fn powf(x: f32, y: f32) -> f32 {
36 const BP: [f32; 2] = [1.0, 1.5];
37 const DP_H: [f32; 2] = [0.0, 5.84960938e-01]; /* 0x3f15c000 */
38 const DP_L: [f32; 2] = [0.0, 1.56322085e-06]; /* 0x35d1cfdc */
39 const TWO24: f32 = 16777216.0; /* 0x4b800000 */
40 const HUGE: f32 = 1.0e30;
41 const TINY: f32 = 1.0e-30;
42 const L1: f32 = 6.0000002384e-01; /* 0x3f19999a */
43 const L2: f32 = 4.2857143283e-01; /* 0x3edb6db7 */
44 const L3: f32 = 3.3333334327e-01; /* 0x3eaaaaab */
45 const L4: f32 = 2.7272811532e-01; /* 0x3e8ba305 */
46 const L5: f32 = 2.3066075146e-01; /* 0x3e6c3255 */
47 const L6: f32 = 2.0697501302e-01; /* 0x3e53f142 */
48 const P1: f32 = 1.6666667163e-01; /* 0x3e2aaaab */
49 const P2: f32 = -2.7777778450e-03; /* 0xbb360b61 */
50 const P3: f32 = 6.6137559770e-05; /* 0x388ab355 */
51 const P4: f32 = -1.6533901999e-06; /* 0xb5ddea0e */
52 const P5: f32 = 4.1381369442e-08; /* 0x3331bb4c */
53 const LG2: f32 = 6.9314718246e-01; /* 0x3f317218 */
54 const LG2_H: f32 = 6.93145752e-01; /* 0x3f317200 */
55 const LG2_L: f32 = 1.42860654e-06; /* 0x35bfbe8c */
56 const OVT: f32 = 4.2995665694e-08; /* -(128-log2(ovfl+.5ulp)) */
57 const CP: f32 = 9.6179670095e-01; /* 0x3f76384f =2/(3ln2) */
58 const CP_H: f32 = 9.6191406250e-01; /* 0x3f764000 =12b cp */
59 const CP_L: f32 = -1.1736857402e-04; /* 0xb8f623c6 =tail of cp_h */
60 const IVLN2: f32 = 1.4426950216e+00;
61 const IVLN2_H: f32 = 1.4426879883e+00;
62 const IVLN2_L: f32 = 7.0526075433e-06;
63
64 let mut z: f32;
65 let mut ax: f32;
66 let z_h: f32;
67 let z_l: f32;
68 let mut p_h: f32;
69 let mut p_l: f32;
70 let y1: f32;
71 let mut t1: f32;
72 let t2: f32;
73 let mut r: f32;
74 let s: f32;
75 let mut sn: f32;
76 let mut t: f32;
77 let mut u: f32;
78 let mut v: f32;
79 let mut w: f32;
80 let i: i32;
81 let mut j: i32;
82 let mut k: i32;
83 let mut yisint: i32;
84 let mut n: i32;
85 let hx: i32;
86 let hy: i32;
87 let mut ix: i32;
88 let iy: i32;
89 let mut is: i32;
90
91 hx = x.to_bits() as i32;
92 hy = y.to_bits() as i32;
93
94 ix = hx & 0x7fffffff;
95 iy = hy & 0x7fffffff;
96
97 /* x**0 = 1, even if x is NaN */
98 if iy == 0 {
99 return 1.0;
100 }
101
102 /* 1**y = 1, even if y is NaN */
103 if hx == 0x3f800000 {
104 return 1.0;
105 }
106
107 /* NaN if either arg is NaN */
108 if ix > 0x7f800000 || iy > 0x7f800000 {
109 return x + y;
110 }
111
112 /* determine if y is an odd int when x < 0
113 * yisint = 0 ... y is not an integer
114 * yisint = 1 ... y is an odd int
115 * yisint = 2 ... y is an even int
116 */
117 yisint = 0;
118 if hx < 0 {
119 if iy >= 0x4b800000 {
120 yisint = 2; /* even integer y */
121 } else if iy >= 0x3f800000 {
122 k = (iy >> 23) - 0x7f; /* exponent */
123 j = iy >> (23 - k);
124 if (j << (23 - k)) == iy {
125 yisint = 2 - (j & 1);
126 }
127 }
128 }
129
130 /* special value of y */
131 if iy == 0x7f800000 {
132 /* y is +-inf */
133 if ix == 0x3f800000 {
134 /* (-1)**+-inf is 1 */
135 return 1.0;
136 } else if ix > 0x3f800000 {
137 /* (|x|>1)**+-inf = inf,0 */
138 return if hy >= 0 {
139 y
140 } else {
141 0.0
142 };
143 } else {
144 /* (|x|<1)**+-inf = 0,inf */
145 return if hy >= 0 {
146 0.0
147 } else {
148 -y
149 };
150 }
151 }
152 if iy == 0x3f800000 {
153 /* y is +-1 */
154 return if hy >= 0 {
155 x
156 } else {
157 1.0 / x
158 };
159 }
160
161 if hy == 0x40000000 {
162 /* y is 2 */
163 return x * x;
164 }
165
166 if hy == 0x3f000000
167 /* y is 0.5 */
168 && hx >= 0
169 {
170 /* x >= +0 */
171 return sqrtf(x);
172 }
173
174 ax = fabsf(x);
175 /* special value of x */
176 if ix == 0x7f800000 || ix == 0 || ix == 0x3f800000 {
177 /* x is +-0,+-inf,+-1 */
178 z = ax;
179 if hy < 0 {
180 /* z = (1/|x|) */
181 z = 1.0 / z;
182 }
183
184 if hx < 0 {
185 if ((ix - 0x3f800000) | yisint) == 0 {
186 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
187 } else if yisint == 1 {
188 z = -z; /* (x<0)**odd = -(|x|**odd) */
189 }
190 }
191 return z;
192 }
193
194 sn = 1.0; /* sign of result */
195 if hx < 0 {
196 if yisint == 0 {
197 /* (x<0)**(non-int) is NaN */
198 return (x - x) / (x - x);
199 }
200
201 if yisint == 1 {
202 /* (x<0)**(odd int) */
203 sn = -1.0;
204 }
205 }
206
207 /* |y| is HUGE */
208 if iy > 0x4d000000 {
209 /* if |y| > 2**27 */
210 /* over/underflow if x is not close to one */
211 if ix < 0x3f7ffff8 {
212 return if hy < 0 {
213 sn * HUGE * HUGE
214 } else {
215 sn * TINY * TINY
216 };
217 }
218
219 if ix > 0x3f800007 {
220 return if hy > 0 {
221 sn * HUGE * HUGE
222 } else {
223 sn * TINY * TINY
224 };
225 }
226
227 /* now |1-x| is TINY <= 2**-20, suffice to compute
228 log(x) by x-x^2/2+x^3/3-x^4/4 */
229 t = ax - 1.; /* t has 20 trailing zeros */
230 w = (t * t) * (0.5 - t * (0.333333333333 - t * 0.25));
231 u = IVLN2_H * t; /* IVLN2_H has 16 sig. bits */
232 v = t * IVLN2_L - w * IVLN2;
233 t1 = u + v;
234 is = t1.to_bits() as i32;
235 t1 = f32::from_bits(is as u32 & 0xfffff000);
236 t2 = v - (t1 - u);
237 } else {
238 let mut s2: f32;
239 let mut s_h: f32;
240 let s_l: f32;
241 let mut t_h: f32;
242 let mut t_l: f32;
243
244 n = 0;
245 /* take care subnormal number */
246 if ix < 0x00800000 {
247 ax *= TWO24;
248 n -= 24;
249 ix = ax.to_bits() as i32;
250 }
251 n += ((ix) >> 23) - 0x7f;
252 j = ix & 0x007fffff;
253 /* determine interval */
254 ix = j | 0x3f800000; /* normalize ix */
255 if j <= 0x1cc471 {
256 /* |x|<sqrt(3/2) */
257 k = 0;
258 } else if j < 0x5db3d7 {
259 /* |x|<sqrt(3) */
260 k = 1;
261 } else {
262 k = 0;
263 n += 1;
264 ix -= 0x00800000;
265 }
266 ax = f32::from_bits(ix as u32);
267
268 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
269 u = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */
270 v = 1.0 / (ax + i!(BP, k as usize));
271 s = u * v;
272 s_h = s;
273 is = s_h.to_bits() as i32;
274 s_h = f32::from_bits(is as u32 & 0xfffff000);
275 /* t_h=ax+bp[k] High */
276 is = (((ix as u32 >> 1) & 0xfffff000) | 0x20000000) as i32;
277 t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21));
278 t_l = ax - (t_h - i!(BP, k as usize));
279 s_l = v * ((u - s_h * t_h) - s_h * t_l);
280 /* compute log(ax) */
281 s2 = s * s;
282 r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6)))));
283 r += s_l * (s_h + s);
284 s2 = s_h * s_h;
285 t_h = 3.0 + s2 + r;
286 is = t_h.to_bits() as i32;
287 t_h = f32::from_bits(is as u32 & 0xfffff000);
288 t_l = r - ((t_h - 3.0) - s2);
289 /* u+v = s*(1+...) */
290 u = s_h * t_h;
291 v = s_l * t_h + t_l * s;
292 /* 2/(3log2)*(s+...) */
293 p_h = u + v;
294 is = p_h.to_bits() as i32;
295 p_h = f32::from_bits(is as u32 & 0xfffff000);
296 p_l = v - (p_h - u);
297 z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */
298 z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize);
299 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
300 t = n as f32;
301 t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t;
302 is = t1.to_bits() as i32;
303 t1 = f32::from_bits(is as u32 & 0xfffff000);
304 t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h);
305 };
306
307 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
308 is = y.to_bits() as i32;
309 y1 = f32::from_bits(is as u32 & 0xfffff000);
310 p_l = (y - y1) * t1 + y * t2;
311 p_h = y1 * t1;
312 z = p_l + p_h;
313 j = z.to_bits() as i32;
314 if j > 0x43000000 {
315 /* if z > 128 */
316 return sn * HUGE * HUGE; /* overflow */
317 } else if j == 0x43000000 {
318 /* if z == 128 */
319 if p_l + OVT > z - p_h {
320 return sn * HUGE * HUGE; /* overflow */
321 }
322 } else if (j & 0x7fffffff) > 0x43160000 {
323 /* z < -150 */
324 // FIXME: check should be (uint32_t)j > 0xc3160000
325 return sn * TINY * TINY; /* underflow */
326 } else if j as u32 == 0xc3160000
327 /* z == -150 */
328 && p_l <= z - p_h
329 {
330 return sn * TINY * TINY; /* underflow */
331 }
332
333 /*
334 * compute 2**(p_h+p_l)
335 */
336 i = j & 0x7fffffff;
337 k = (i >> 23) - 0x7f;
338 n = 0;
339 if i > 0x3f000000 {
340 /* if |z| > 0.5, set n = [z+0.5] */
341 n = j + (0x00800000 >> (k + 1));
342 k = ((n & 0x7fffffff) >> 23) - 0x7f; /* new k for n */
343 t = f32::from_bits(n as u32 & !(0x007fffff >> k));
344 n = ((n & 0x007fffff) | 0x00800000) >> (23 - k);
345 if j < 0 {
346 n = -n;
347 }
348 p_h -= t;
349 }
350 t = p_l + p_h;
351 is = t.to_bits() as i32;
352 t = f32::from_bits(is as u32 & 0xffff8000);
353 u = t * LG2_H;
354 v = (p_l - (t - p_h)) * LG2 + t * LG2_L;
355 z = u + v;
356 w = v - (z - u);
357 t = z * z;
358 t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
359 r = (z * t1) / (t1 - 2.0) - (w + z * w);
360 z = 1.0 - (r - z);
361 j = z.to_bits() as i32;
362 j += n << 23;
363 if (j >> 23) <= 0 {
364 /* subnormal output */
365 z = scalbnf(z, n);
366 } else {
367 z = f32::from_bits(j as u32);
368 }
369 sn * z
370 }
371
372 /* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
373 /*
374 * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
375 */
376 /*
377 * ====================================================
378 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
379 *
380 * Developed at SunPro, a Sun Microsystems, Inc. business.
381 * Permission to use, copy, modify, and distribute this
382 * software is freely granted, provided that this notice
383 * is preserved.
384 * ====================================================
385 */
386
sqrtf(x: f32) -> f32387 pub fn sqrtf(x: f32) -> f32 {
388 #[cfg(target_feature = "sse")]
389 {
390 // Note: This path is unlikely since LLVM will usually have already
391 // optimized sqrt calls into hardware instructions if sse is available,
392 // but if someone does end up here they'll apprected the speed increase.
393 #[cfg(target_arch = "x86")]
394 use core::arch::x86::*;
395 #[cfg(target_arch = "x86_64")]
396 use core::arch::x86_64::*;
397 // SAFETY: safe, since `_mm_set_ss` takes a 32-bit float, and returns
398 // a 128-bit type with the lowest 32-bits as `x`, `_mm_sqrt_ss` calculates
399 // the sqrt of this 128-bit vector, and `_mm_cvtss_f32` extracts the lower
400 // 32-bits as a 32-bit float.
401 unsafe {
402 let m = _mm_set_ss(x);
403 let m_sqrt = _mm_sqrt_ss(m);
404 _mm_cvtss_f32(m_sqrt)
405 }
406 }
407 #[cfg(not(target_feature = "sse"))]
408 {
409 const TINY: f32 = 1.0e-30;
410
411 let mut z: f32;
412 let sign: i32 = 0x80000000u32 as i32;
413 let mut ix: i32;
414 let mut s: i32;
415 let mut q: i32;
416 let mut m: i32;
417 let mut t: i32;
418 let mut i: i32;
419 let mut r: u32;
420
421 ix = x.to_bits() as i32;
422
423 /* take care of Inf and NaN */
424 if (ix as u32 & 0x7f800000) == 0x7f800000 {
425 return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
426 }
427
428 /* take care of zero */
429 if ix <= 0 {
430 if (ix & !sign) == 0 {
431 return x; /* sqrt(+-0) = +-0 */
432 }
433 if ix < 0 {
434 return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
435 }
436 }
437
438 /* normalize x */
439 m = ix >> 23;
440 if m == 0 {
441 /* subnormal x */
442 i = 0;
443 while ix & 0x00800000 == 0 {
444 ix <<= 1;
445 i = i + 1;
446 }
447 m -= i - 1;
448 }
449 m -= 127; /* unbias exponent */
450 ix = (ix & 0x007fffff) | 0x00800000;
451 if m & 1 == 1 {
452 /* odd m, double x to make it even */
453 ix += ix;
454 }
455 m >>= 1; /* m = [m/2] */
456
457 /* generate sqrt(x) bit by bit */
458 ix += ix;
459 q = 0;
460 s = 0;
461 r = 0x01000000; /* r = moving bit from right to left */
462
463 while r != 0 {
464 t = s + r as i32;
465 if t <= ix {
466 s = t + r as i32;
467 ix -= t;
468 q += r as i32;
469 }
470 ix += ix;
471 r >>= 1;
472 }
473
474 /* use floating add to find out rounding direction */
475 if ix != 0 {
476 z = 1.0 - TINY; /* raise inexact flag */
477 if z >= 1.0 {
478 z = 1.0 + TINY;
479 if z > 1.0 {
480 q += 2;
481 } else {
482 q += q & 1;
483 }
484 }
485 }
486
487 ix = (q >> 1) + 0x3f000000;
488 ix += m << 23;
489 f32::from_bits(ix as u32)
490 }
491 }
492
493 /// Absolute value (magnitude) (f32)
494 /// Calculates the absolute value (magnitude) of the argument `x`,
495 /// by direct manipulation of the bit representation of `x`.
fabsf(x: f32) -> f32496 pub fn fabsf(x: f32) -> f32 {
497 f32::from_bits(x.to_bits() & 0x7fffffff)
498 }
499
scalbnf(mut x: f32, mut n: i32) -> f32500 pub fn scalbnf(mut x: f32, mut n: i32) -> f32 {
501 let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
502 let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126
503 let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24
504
505 if n > 127 {
506 x *= x1p127;
507 n -= 127;
508 if n > 127 {
509 x *= x1p127;
510 n -= 127;
511 if n > 127 {
512 n = 127;
513 }
514 }
515 } else if n < -126 {
516 x *= x1p_126 * x1p24;
517 n += 126 - 24;
518 if n < -126 {
519 x *= x1p_126 * x1p24;
520 n += 126 - 24;
521 if n < -126 {
522 n = -126;
523 }
524 }
525 }
526 x * f32::from_bits(((0x7f + n) as u32) << 23)
527 }
528
529 /* origin: FreeBSD /usr/src/lib/msun/src/e_pow.c */
530 /*
531 * ====================================================
532 * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
533 *
534 * Permission to use, copy, modify, and distribute this
535 * software is freely granted, provided that this notice
536 * is preserved.
537 * ====================================================
538 */
539
540 // pow(x,y) return x**y
541 //
542 // n
543 // Method: Let x = 2 * (1+f)
544 // 1. Compute and return log2(x) in two pieces:
545 // log2(x) = w1 + w2,
546 // where w1 has 53-24 = 29 bit trailing zeros.
547 // 2. Perform y*log2(x) = n+y' by simulating muti-precision
548 // arithmetic, where |y'|<=0.5.
549 // 3. Return x**y = 2**n*exp(y'*log2)
550 //
551 // Special cases:
552 // 1. (anything) ** 0 is 1
553 // 2. 1 ** (anything) is 1
554 // 3. (anything except 1) ** NAN is NAN
555 // 4. NAN ** (anything except 0) is NAN
556 // 5. +-(|x| > 1) ** +INF is +INF
557 // 6. +-(|x| > 1) ** -INF is +0
558 // 7. +-(|x| < 1) ** +INF is +0
559 // 8. +-(|x| < 1) ** -INF is +INF
560 // 9. -1 ** +-INF is 1
561 // 10. +0 ** (+anything except 0, NAN) is +0
562 // 11. -0 ** (+anything except 0, NAN, odd integer) is +0
563 // 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero
564 // 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero
565 // 14. -0 ** (+odd integer) is -0
566 // 15. -0 ** (-odd integer) is -INF, raise divbyzero
567 // 16. +INF ** (+anything except 0,NAN) is +INF
568 // 17. +INF ** (-anything except 0,NAN) is +0
569 // 18. -INF ** (+odd integer) is -INF
570 // 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer)
571 // 20. (anything) ** 1 is (anything)
572 // 21. (anything) ** -1 is 1/(anything)
573 // 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
574 // 23. (-anything except 0 and inf) ** (non-integer) is NAN
575 //
576 // Accuracy:
577 // pow(x,y) returns x**y nearly rounded. In particular
578 // pow(integer,integer)
579 // always returns the correct integer provided it is
580 // representable.
581 //
582 // Constants :
583 // The hexadecimal values are the intended ones for the following
584 // constants. The decimal values may be used, provided that the
585 // compiler will convert from decimal to binary accurately enough
586 // to produce the hexadecimal values shown.
587
powd(x: f64, y: f64) -> f64588 pub fn powd(x: f64, y: f64) -> f64 {
589 const BP: [f64; 2] = [1.0, 1.5];
590 const DP_H: [f64; 2] = [0.0, 5.84962487220764160156e-01]; /* 0x3fe2b803_40000000 */
591 const DP_L: [f64; 2] = [0.0, 1.35003920212974897128e-08]; /* 0x3E4CFDEB, 0x43CFD006 */
592 const TWO53: f64 = 9007199254740992.0; /* 0x43400000_00000000 */
593 const HUGE: f64 = 1.0e300;
594 const TINY: f64 = 1.0e-300;
595
596 // poly coefs for (3/2)*(log(x)-2s-2/3*s**3:
597 const L1: f64 = 5.99999999999994648725e-01; /* 0x3fe33333_33333303 */
598 const L2: f64 = 4.28571428578550184252e-01; /* 0x3fdb6db6_db6fabff */
599 const L3: f64 = 3.33333329818377432918e-01; /* 0x3fd55555_518f264d */
600 const L4: f64 = 2.72728123808534006489e-01; /* 0x3fd17460_a91d4101 */
601 const L5: f64 = 2.30660745775561754067e-01; /* 0x3fcd864a_93c9db65 */
602 const L6: f64 = 2.06975017800338417784e-01; /* 0x3fca7e28_4a454eef */
603 const P1: f64 = 1.66666666666666019037e-01; /* 0x3fc55555_5555553e */
604 const P2: f64 = -2.77777777770155933842e-03; /* 0xbf66c16c_16bebd93 */
605 const P3: f64 = 6.61375632143793436117e-05; /* 0x3f11566a_af25de2c */
606 const P4: f64 = -1.65339022054652515390e-06; /* 0xbebbbd41_c5d26bf1 */
607 const P5: f64 = 4.13813679705723846039e-08; /* 0x3e663769_72bea4d0 */
608 const LG2: f64 = 6.93147180559945286227e-01; /* 0x3fe62e42_fefa39ef */
609 const LG2_H: f64 = 6.93147182464599609375e-01; /* 0x3fe62e43_00000000 */
610 const LG2_L: f64 = -1.90465429995776804525e-09; /* 0xbe205c61_0ca86c39 */
611 const OVT: f64 = 8.0085662595372944372e-017; /* -(1024-log2(ovfl+.5ulp)) */
612 const CP: f64 = 9.61796693925975554329e-01; /* 0x3feec709_dc3a03fd =2/(3ln2) */
613 const CP_H: f64 = 9.61796700954437255859e-01; /* 0x3feec709_e0000000 =(float)cp */
614 const CP_L: f64 = -7.02846165095275826516e-09; /* 0xbe3e2fe0_145b01f5 =tail of cp_h*/
615 const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */
616 const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/
617 const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/
618
619 let t1: f64;
620 let t2: f64;
621
622 let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32);
623 let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32);
624
625 let mut ix: i32 = (hx & 0x7fffffff) as i32;
626 let iy: i32 = (hy & 0x7fffffff) as i32;
627
628 /* x**0 = 1, even if x is NaN */
629 if ((iy as u32) | ly) == 0 {
630 return 1.0;
631 }
632
633 /* 1**y = 1, even if y is NaN */
634 if hx == 0x3ff00000 && lx == 0 {
635 return 1.0;
636 }
637
638 /* NaN if either arg is NaN */
639 if ix > 0x7ff00000
640 || (ix == 0x7ff00000 && lx != 0)
641 || iy > 0x7ff00000
642 || (iy == 0x7ff00000 && ly != 0)
643 {
644 return x + y;
645 }
646
647 /* determine if y is an odd int when x < 0
648 * yisint = 0 ... y is not an integer
649 * yisint = 1 ... y is an odd int
650 * yisint = 2 ... y is an even int
651 */
652 let mut yisint: i32 = 0;
653 let mut k: i32;
654 let mut j: i32;
655 if hx < 0 {
656 if iy >= 0x43400000 {
657 yisint = 2; /* even integer y */
658 } else if iy >= 0x3ff00000 {
659 k = (iy >> 20) - 0x3ff; /* exponent */
660
661 if k > 20 {
662 j = (ly >> (52 - k)) as i32;
663
664 if (j << (52 - k)) == (ly as i32) {
665 yisint = 2 - (j & 1);
666 }
667 } else if ly == 0 {
668 j = iy >> (20 - k);
669
670 if (j << (20 - k)) == iy {
671 yisint = 2 - (j & 1);
672 }
673 }
674 }
675 }
676
677 if ly == 0 {
678 /* special value of y */
679 if iy == 0x7ff00000 {
680 /* y is +-inf */
681
682 return if ((ix - 0x3ff00000) | (lx as i32)) == 0 {
683 /* (-1)**+-inf is 1 */
684 1.0
685 } else if ix >= 0x3ff00000 {
686 /* (|x|>1)**+-inf = inf,0 */
687 if hy >= 0 {
688 y
689 } else {
690 0.0
691 }
692 } else {
693 /* (|x|<1)**+-inf = 0,inf */
694 if hy >= 0 {
695 0.0
696 } else {
697 -y
698 }
699 };
700 }
701
702 if iy == 0x3ff00000 {
703 /* y is +-1 */
704 return if hy >= 0 {
705 x
706 } else {
707 1.0 / x
708 };
709 }
710
711 if hy == 0x40000000 {
712 /* y is 2 */
713 return x * x;
714 }
715
716 if hy == 0x3fe00000 {
717 /* y is 0.5 */
718 if hx >= 0 {
719 /* x >= +0 */
720 return sqrtd(x);
721 }
722 }
723 }
724
725 let mut ax: f64 = fabsd(x);
726 if lx == 0 {
727 /* special value of x */
728 if ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000 {
729 /* x is +-0,+-inf,+-1 */
730 let mut z: f64 = ax;
731
732 if hy < 0 {
733 /* z = (1/|x|) */
734 z = 1.0 / z;
735 }
736
737 if hx < 0 {
738 if ((ix - 0x3ff00000) | yisint) == 0 {
739 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
740 } else if yisint == 1 {
741 z = -z; /* (x<0)**odd = -(|x|**odd) */
742 }
743 }
744
745 return z;
746 }
747 }
748
749 let mut s: f64 = 1.0; /* sign of result */
750 if hx < 0 {
751 if yisint == 0 {
752 /* (x<0)**(non-int) is NaN */
753 return (x - x) / (x - x);
754 }
755
756 if yisint == 1 {
757 /* (x<0)**(odd int) */
758 s = -1.0;
759 }
760 }
761
762 /* |y| is HUGE */
763 if iy > 0x41e00000 {
764 /* if |y| > 2**31 */
765 if iy > 0x43f00000 {
766 /* if |y| > 2**64, must o/uflow */
767 if ix <= 0x3fefffff {
768 return if hy < 0 {
769 HUGE * HUGE
770 } else {
771 TINY * TINY
772 };
773 }
774
775 if ix >= 0x3ff00000 {
776 return if hy > 0 {
777 HUGE * HUGE
778 } else {
779 TINY * TINY
780 };
781 }
782 }
783
784 /* over/underflow if x is not close to one */
785 if ix < 0x3fefffff {
786 return if hy < 0 {
787 s * HUGE * HUGE
788 } else {
789 s * TINY * TINY
790 };
791 }
792 if ix > 0x3ff00000 {
793 return if hy > 0 {
794 s * HUGE * HUGE
795 } else {
796 s * TINY * TINY
797 };
798 }
799
800 /* now |1-x| is TINY <= 2**-20, suffice to compute
801 log(x) by x-x^2/2+x^3/3-x^4/4 */
802 let t: f64 = ax - 1.0; /* t has 20 trailing zeros */
803 let w: f64 = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25));
804 let u: f64 = IVLN2_H * t; /* ivln2_h has 21 sig. bits */
805 let v: f64 = t * IVLN2_L - w * IVLN2;
806 t1 = with_set_low_word(u + v, 0);
807 t2 = v - (t1 - u);
808 } else {
809 // double ss,s2,s_h,s_l,t_h,t_l;
810 let mut n: i32 = 0;
811
812 if ix < 0x00100000 {
813 /* take care subnormal number */
814 ax *= TWO53;
815 n -= 53;
816 ix = get_high_word(ax) as i32;
817 }
818
819 n += (ix >> 20) - 0x3ff;
820 j = ix & 0x000fffff;
821
822 /* determine interval */
823 let k: i32;
824 ix = j | 0x3ff00000; /* normalize ix */
825 if j <= 0x3988E {
826 /* |x|<sqrt(3/2) */
827 k = 0;
828 } else if j < 0xBB67A {
829 /* |x|<sqrt(3) */
830 k = 1;
831 } else {
832 k = 0;
833 n += 1;
834 ix -= 0x00100000;
835 }
836 ax = with_set_high_word(ax, ix as u32);
837
838 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
839 let u: f64 = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */
840 let v: f64 = 1.0 / (ax + i!(BP, k as usize));
841 let ss: f64 = u * v;
842 let s_h = with_set_low_word(ss, 0);
843
844 /* t_h=ax+bp[k] High */
845 let t_h: f64 = with_set_high_word(
846 0.0,
847 ((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18),
848 );
849 let t_l: f64 = ax - (t_h - i!(BP, k as usize));
850 let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l);
851
852 /* compute log(ax) */
853 let s2: f64 = ss * ss;
854 let mut r: f64 = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6)))));
855 r += s_l * (s_h + ss);
856 let s2: f64 = s_h * s_h;
857 let t_h: f64 = with_set_low_word(3.0 + s2 + r, 0);
858 let t_l: f64 = r - ((t_h - 3.0) - s2);
859
860 /* u+v = ss*(1+...) */
861 let u: f64 = s_h * t_h;
862 let v: f64 = s_l * t_h + t_l * ss;
863
864 /* 2/(3log2)*(ss+...) */
865 let p_h: f64 = with_set_low_word(u + v, 0);
866 let p_l = v - (p_h - u);
867 let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */
868 let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize);
869
870 /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
871 let t: f64 = n as f64;
872 t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0);
873 t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h);
874 }
875
876 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
877 let y1: f64 = with_set_low_word(y, 0);
878 let p_l: f64 = (y - y1) * t1 + y * t2;
879 let mut p_h: f64 = y1 * t1;
880 let z: f64 = p_l + p_h;
881 let mut j: i32 = (z.to_bits() >> 32) as i32;
882 let i: i32 = z.to_bits() as i32;
883 // let (j, i): (i32, i32) = ((z.to_bits() >> 32) as i32, z.to_bits() as i32);
884
885 if j >= 0x40900000 {
886 /* z >= 1024 */
887 if (j - 0x40900000) | i != 0 {
888 /* if z > 1024 */
889 return s * HUGE * HUGE; /* overflow */
890 }
891
892 if p_l + OVT > z - p_h {
893 return s * HUGE * HUGE; /* overflow */
894 }
895 } else if (j & 0x7fffffff) >= 0x4090cc00 {
896 /* z <= -1075 */
897 // FIXME: instead of abs(j) use unsigned j
898
899 if (((j as u32) - 0xc090cc00) | (i as u32)) != 0 {
900 /* z < -1075 */
901 return s * TINY * TINY; /* underflow */
902 }
903
904 if p_l <= z - p_h {
905 return s * TINY * TINY; /* underflow */
906 }
907 }
908
909 /* compute 2**(p_h+p_l) */
910 let i: i32 = j & (0x7fffffff as i32);
911 k = (i >> 20) - 0x3ff;
912 let mut n: i32 = 0;
913
914 if i > 0x3fe00000 {
915 /* if |z| > 0.5, set n = [z+0.5] */
916 n = j + (0x00100000 >> (k + 1));
917 k = ((n & 0x7fffffff) >> 20) - 0x3ff; /* new k for n */
918 let t: f64 = with_set_high_word(0.0, (n & !(0x000fffff >> k)) as u32);
919 n = ((n & 0x000fffff) | 0x00100000) >> (20 - k);
920 if j < 0 {
921 n = -n;
922 }
923 p_h -= t;
924 }
925
926 let t: f64 = with_set_low_word(p_l + p_h, 0);
927 let u: f64 = t * LG2_H;
928 let v: f64 = (p_l - (t - p_h)) * LG2 + t * LG2_L;
929 let mut z: f64 = u + v;
930 let w: f64 = v - (z - u);
931 let t: f64 = z * z;
932 let t1: f64 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
933 let r: f64 = (z * t1) / (t1 - 2.0) - (w + z * w);
934 z = 1.0 - (r - z);
935 j = get_high_word(z) as i32;
936 j += n << 20;
937
938 if (j >> 20) <= 0 {
939 /* subnormal output */
940 z = scalbnd(z, n);
941 } else {
942 z = with_set_high_word(z, j as u32);
943 }
944
945 s * z
946 }
947
948 /// Absolute value (magnitude) (f64)
949 /// Calculates the absolute value (magnitude) of the argument `x`,
950 /// by direct manipulation of the bit representation of `x`.
fabsd(x: f64) -> f64951 pub fn fabsd(x: f64) -> f64 {
952 f64::from_bits(x.to_bits() & (u64::MAX / 2))
953 }
954
scalbnd(x: f64, mut n: i32) -> f64955 pub fn scalbnd(x: f64, mut n: i32) -> f64 {
956 let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
957 let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53
958 let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022)
959
960 let mut y = x;
961
962 if n > 1023 {
963 y *= x1p1023;
964 n -= 1023;
965 if n > 1023 {
966 y *= x1p1023;
967 n -= 1023;
968 if n > 1023 {
969 n = 1023;
970 }
971 }
972 } else if n < -1022 {
973 /* make sure final n < -53 to avoid double
974 rounding in the subnormal range */
975 y *= x1p_1022 * x1p53;
976 n += 1022 - 53;
977 if n < -1022 {
978 y *= x1p_1022 * x1p53;
979 n += 1022 - 53;
980 if n < -1022 {
981 n = -1022;
982 }
983 }
984 }
985 y * f64::from_bits(((0x3ff + n) as u64) << 52)
986 }
987
988 /* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
989 /*
990 * ====================================================
991 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
992 *
993 * Developed at SunSoft, a Sun Microsystems, Inc. business.
994 * Permission to use, copy, modify, and distribute this
995 * software is freely granted, provided that this notice
996 * is preserved.
997 * ====================================================
998 */
999 /* sqrt(x)
1000 * Return correctly rounded sqrt.
1001 * ------------------------------------------
1002 * | Use the hardware sqrt if you have one |
1003 * ------------------------------------------
1004 * Method:
1005 * Bit by bit method using integer arithmetic. (Slow, but portable)
1006 * 1. Normalization
1007 * Scale x to y in [1,4) with even powers of 2:
1008 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
1009 * sqrt(x) = 2^k * sqrt(y)
1010 * 2. Bit by bit computation
1011 * Let q = sqrt(y) truncated to i bit after binary point (q = 1),
1012 * i 0
1013 * i+1 2
1014 * s = 2*q , and y = 2 * ( y - q ). (1)
1015 * i i i i
1016 *
1017 * To compute q from q , one checks whether
1018 * i+1 i
1019 *
1020 * -(i+1) 2
1021 * (q + 2 ) <= y. (2)
1022 * i
1023 * -(i+1)
1024 * If (2) is false, then q = q ; otherwise q = q + 2 .
1025 * i+1 i i+1 i
1026 *
1027 * With some algebraic manipulation, it is not difficult to see
1028 * that (2) is equivalent to
1029 * -(i+1)
1030 * s + 2 <= y (3)
1031 * i i
1032 *
1033 * The advantage of (3) is that s and y can be computed by
1034 * i i
1035 * the following recurrence formula:
1036 * if (3) is false
1037 *
1038 * s = s , y = y ; (4)
1039 * i+1 i i+1 i
1040 *
1041 * otherwise,
1042 * -i -(i+1)
1043 * s = s + 2 , y = y - s - 2 (5)
1044 * i+1 i i+1 i i
1045 *
1046 * One may easily use induction to prove (4) and (5).
1047 * Note. Since the left hand side of (3) contain only i+2 bits,
1048 * it does not necessary to do a full (53-bit) comparison
1049 * in (3).
1050 * 3. Final rounding
1051 * After generating the 53 bits result, we compute one more bit.
1052 * Together with the remainder, we can decide whether the
1053 * result is exact, bigger than 1/2ulp, or less than 1/2ulp
1054 * (it will never equal to 1/2ulp).
1055 * The rounding mode can be detected by checking whether
1056 * huge + tiny is equal to huge, and whether huge - tiny is
1057 * equal to huge for some floating point number "huge" and "tiny".
1058 *
1059 * Special cases:
1060 * sqrt(+-0) = +-0 ... exact
1061 * sqrt(inf) = inf
1062 * sqrt(-ve) = NaN ... with invalid signal
1063 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN
1064 */
1065
sqrtd(x: f64) -> f641066 pub fn sqrtd(x: f64) -> f64 {
1067 #[cfg(target_feature = "sse2")]
1068 {
1069 // Note: This path is unlikely since LLVM will usually have already
1070 // optimized sqrt calls into hardware instructions if sse2 is available,
1071 // but if someone does end up here they'll apprected the speed increase.
1072 #[cfg(target_arch = "x86")]
1073 use core::arch::x86::*;
1074 #[cfg(target_arch = "x86_64")]
1075 use core::arch::x86_64::*;
1076 // SAFETY: safe, since `_mm_set_sd` takes a 64-bit float, and returns
1077 // a 128-bit type with the lowest 64-bits as `x`, `_mm_sqrt_ss` calculates
1078 // the sqrt of this 128-bit vector, and `_mm_cvtss_f64` extracts the lower
1079 // 64-bits as a 64-bit float.
1080 unsafe {
1081 let m = _mm_set_sd(x);
1082 let m_sqrt = _mm_sqrt_pd(m);
1083 _mm_cvtsd_f64(m_sqrt)
1084 }
1085 }
1086 #[cfg(not(target_feature = "sse2"))]
1087 {
1088 use core::num::Wrapping;
1089
1090 const TINY: f64 = 1.0e-300;
1091
1092 let mut z: f64;
1093 let sign: Wrapping<u32> = Wrapping(0x80000000);
1094 let mut ix0: i32;
1095 let mut s0: i32;
1096 let mut q: i32;
1097 let mut m: i32;
1098 let mut t: i32;
1099 let mut i: i32;
1100 let mut r: Wrapping<u32>;
1101 let mut t1: Wrapping<u32>;
1102 let mut s1: Wrapping<u32>;
1103 let mut ix1: Wrapping<u32>;
1104 let mut q1: Wrapping<u32>;
1105
1106 ix0 = (x.to_bits() >> 32) as i32;
1107 ix1 = Wrapping(x.to_bits() as u32);
1108
1109 /* take care of Inf and NaN */
1110 if (ix0 & 0x7ff00000) == 0x7ff00000 {
1111 return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
1112 }
1113 /* take care of zero */
1114 if ix0 <= 0 {
1115 if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 {
1116 return x; /* sqrt(+-0) = +-0 */
1117 }
1118 if ix0 < 0 {
1119 return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
1120 }
1121 }
1122 /* normalize x */
1123 m = ix0 >> 20;
1124 if m == 0 {
1125 /* subnormal x */
1126 while ix0 == 0 {
1127 m -= 21;
1128 ix0 |= (ix1 >> 11).0 as i32;
1129 ix1 <<= 21;
1130 }
1131 i = 0;
1132 while (ix0 & 0x00100000) == 0 {
1133 i += 1;
1134 ix0 <<= 1;
1135 }
1136 m -= i - 1;
1137 ix0 |= (ix1 >> (32 - i) as usize).0 as i32;
1138 ix1 = ix1 << i as usize;
1139 }
1140 m -= 1023; /* unbias exponent */
1141 ix0 = (ix0 & 0x000fffff) | 0x00100000;
1142 if (m & 1) == 1 {
1143 /* odd m, double x to make it even */
1144 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
1145 ix1 += ix1;
1146 }
1147 m >>= 1; /* m = [m/2] */
1148
1149 /* generate sqrt(x) bit by bit */
1150 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
1151 ix1 += ix1;
1152 q = 0; /* [q,q1] = sqrt(x) */
1153 q1 = Wrapping(0);
1154 s0 = 0;
1155 s1 = Wrapping(0);
1156 r = Wrapping(0x00200000); /* r = moving bit from right to left */
1157
1158 while r != Wrapping(0) {
1159 t = s0 + r.0 as i32;
1160 if t <= ix0 {
1161 s0 = t + r.0 as i32;
1162 ix0 -= t;
1163 q += r.0 as i32;
1164 }
1165 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
1166 ix1 += ix1;
1167 r >>= 1;
1168 }
1169
1170 r = sign;
1171 while r != Wrapping(0) {
1172 t1 = s1 + r;
1173 t = s0;
1174 if t < ix0 || (t == ix0 && t1 <= ix1) {
1175 s1 = t1 + r;
1176 if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) {
1177 s0 += 1;
1178 }
1179 ix0 -= t;
1180 if ix1 < t1 {
1181 ix0 -= 1;
1182 }
1183 ix1 -= t1;
1184 q1 += r;
1185 }
1186 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
1187 ix1 += ix1;
1188 r >>= 1;
1189 }
1190
1191 /* use floating add to find out rounding direction */
1192 if (ix0 as u32 | ix1.0) != 0 {
1193 z = 1.0 - TINY; /* raise inexact flag */
1194 if z >= 1.0 {
1195 z = 1.0 + TINY;
1196 if q1.0 == 0xffffffff {
1197 q1 = Wrapping(0);
1198 q += 1;
1199 } else if z > 1.0 {
1200 if q1.0 == 0xfffffffe {
1201 q += 1;
1202 }
1203 q1 += Wrapping(2);
1204 } else {
1205 q1 += q1 & Wrapping(1);
1206 }
1207 }
1208 }
1209 ix0 = (q >> 1) + 0x3fe00000;
1210 ix1 = q1 >> 1;
1211 if (q & 1) == 1 {
1212 ix1 |= sign;
1213 }
1214 ix0 += m << 20;
1215 f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64)
1216 }
1217 }
1218
1219 #[inline]
get_high_word(x: f64) -> u321220 fn get_high_word(x: f64) -> u32 {
1221 (x.to_bits() >> 32) as u32
1222 }
1223
1224 #[inline]
with_set_high_word(f: f64, hi: u32) -> f641225 fn with_set_high_word(f: f64, hi: u32) -> f64 {
1226 let mut tmp = f.to_bits();
1227 tmp &= 0x00000000_ffffffff;
1228 tmp |= (hi as u64) << 32;
1229 f64::from_bits(tmp)
1230 }
1231
1232 #[inline]
with_set_low_word(f: f64, lo: u32) -> f641233 fn with_set_low_word(f: f64, lo: u32) -> f64 {
1234 let mut tmp = f.to_bits();
1235 tmp &= 0xffffffff_00000000;
1236 tmp |= lo as u64;
1237 f64::from_bits(tmp)
1238 }
1239