• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Defines rounding schemes for floating-point numbers.
2 
3 #![doc(hidden)]
4 
5 use crate::extended_float::ExtendedFloat;
6 use crate::mask::{lower_n_halfway, lower_n_mask};
7 use crate::num::Float;
8 
9 // ROUNDING
10 // --------
11 
12 /// Round an extended-precision float to the nearest machine float.
13 ///
14 /// Shifts the significant digits into place, adjusts the exponent,
15 /// so it can be easily converted to a native float.
16 #[cfg_attr(not(feature = "compact"), inline)]
round<F, Cb>(fp: &mut ExtendedFloat, cb: Cb) where F: Float, Cb: Fn(&mut ExtendedFloat, i32),17 pub fn round<F, Cb>(fp: &mut ExtendedFloat, cb: Cb)
18 where
19     F: Float,
20     Cb: Fn(&mut ExtendedFloat, i32),
21 {
22     let fp_inf = ExtendedFloat {
23         mant: 0,
24         exp: F::INFINITE_POWER,
25     };
26 
27     // Calculate our shift in significant digits.
28     let mantissa_shift = 64 - F::MANTISSA_SIZE - 1;
29 
30     // Check for a denormal float, if after the shift the exponent is negative.
31     if -fp.exp >= mantissa_shift {
32         // Have a denormal float that isn't a literal 0.
33         // The extra 1 is to adjust for the denormal float, which is
34         // `1 - F::EXPONENT_BIAS`. This works as before, because our
35         // old logic rounded to `F::DENORMAL_EXPONENT` (now 1), and then
36         // checked if `exp == F::DENORMAL_EXPONENT` and no hidden mask
37         // bit was set. Here, we handle that here, rather than later.
38         //
39         // This might round-down to 0, but shift will be at **max** 65,
40         // for halfway cases rounding towards 0.
41         let shift = -fp.exp + 1;
42         debug_assert!(shift <= 65);
43         cb(fp, shift.min(64));
44         // Check for round-up: if rounding-nearest carried us to the hidden bit.
45         fp.exp = (fp.mant >= F::HIDDEN_BIT_MASK) as i32;
46         return;
47     }
48 
49     // The float is normal, round to the hidden bit.
50     cb(fp, mantissa_shift);
51 
52     // Check if we carried, and if so, shift the bit to the hidden bit.
53     let carry_mask = F::CARRY_MASK;
54     if fp.mant & carry_mask == carry_mask {
55         fp.mant >>= 1;
56         fp.exp += 1;
57     }
58 
59     // Handle if we carried and check for overflow again.
60     if fp.exp >= F::INFINITE_POWER {
61         // Exponent is above largest normal value, must be infinite.
62         *fp = fp_inf;
63         return;
64     }
65 
66     // Remove the hidden bit.
67     fp.mant &= F::MANTISSA_MASK;
68 }
69 
70 /// Shift right N-bytes and round towards a direction.
71 ///
72 /// Callback should take the following parameters:
73 ///     1. is_odd
74 ///     1. is_halfway
75 ///     1. is_above
76 #[cfg_attr(not(feature = "compact"), inline)]
round_nearest_tie_even<Cb>(fp: &mut ExtendedFloat, shift: i32, cb: Cb) where Cb: Fn(bool, bool, bool) -> bool,77 pub fn round_nearest_tie_even<Cb>(fp: &mut ExtendedFloat, shift: i32, cb: Cb)
78 where
79     // is_odd, is_halfway, is_above
80     Cb: Fn(bool, bool, bool) -> bool,
81 {
82     // Ensure we've already handled denormal values that underflow.
83     debug_assert!(shift <= 64);
84 
85     // Extract the truncated bits using mask.
86     // Calculate if the value of the truncated bits are either above
87     // the mid-way point, or equal to it.
88     //
89     // For example, for 4 truncated bytes, the mask would be 0b1111
90     // and the midway point would be 0b1000.
91     let mask = lower_n_mask(shift as u64);
92     let halfway = lower_n_halfway(shift as u64);
93     let truncated_bits = fp.mant & mask;
94     let is_above = truncated_bits > halfway;
95     let is_halfway = truncated_bits == halfway;
96 
97     // Bit shift so the leading bit is in the hidden bit.
98     // This optimixes pretty well:
99     //  ```text
100     //   mov     ecx, esi
101     //   shr     rdi, cl
102     //   xor     eax, eax
103     //   cmp     esi, 64
104     //   cmovne  rax, rdi
105     //   ret
106     //  ```
107     fp.mant = match shift == 64 {
108         true => 0,
109         false => fp.mant >> shift,
110     };
111     fp.exp += shift;
112 
113     // Extract the last bit after shifting (and determine if it is odd).
114     let is_odd = fp.mant & 1 == 1;
115 
116     // Calculate if we need to roundup.
117     // We need to roundup if we are above halfway, or if we are odd
118     // and at half-way (need to tie-to-even). Avoid the branch here.
119     fp.mant += cb(is_odd, is_halfway, is_above) as u64;
120 }
121 
122 /// Round our significant digits into place, truncating them.
123 #[cfg_attr(not(feature = "compact"), inline)]
round_down(fp: &mut ExtendedFloat, shift: i32)124 pub fn round_down(fp: &mut ExtendedFloat, shift: i32) {
125     // Might have a shift greater than 64 if we have an error.
126     fp.mant = match shift == 64 {
127         true => 0,
128         false => fp.mant >> shift,
129     };
130     fp.exp += shift;
131 }
132