• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Implementation of the Eisel-Lemire algorithm.
2 //!
3 //! This is adapted from [fast-float-rust](https://github.com/aldanor/fast-float-rust),
4 //! a port of [fast_float](https://github.com/fastfloat/fast_float) to Rust.
5 
6 #![cfg(not(feature = "compact"))]
7 #![doc(hidden)]
8 
9 use crate::extended_float::ExtendedFloat;
10 use crate::num::Float;
11 use crate::number::Number;
12 use crate::table::{LARGEST_POWER_OF_FIVE, POWER_OF_FIVE_128, SMALLEST_POWER_OF_FIVE};
13 
14 /// Ensure truncation of digits doesn't affect our computation, by doing 2 passes.
15 #[inline]
lemire<F: Float>(num: &Number) -> ExtendedFloat16 pub fn lemire<F: Float>(num: &Number) -> ExtendedFloat {
17     // If significant digits were truncated, then we can have rounding error
18     // only if `mantissa + 1` produces a different result. We also avoid
19     // redundantly using the Eisel-Lemire algorithm if it was unable to
20     // correctly round on the first pass.
21     let mut fp = compute_float::<F>(num.exponent, num.mantissa);
22     if num.many_digits && fp.exp >= 0 && fp != compute_float::<F>(num.exponent, num.mantissa + 1) {
23         // Need to re-calculate, since the previous values are rounded
24         // when the slow path algorithm expects a normalized extended float.
25         fp = compute_error::<F>(num.exponent, num.mantissa);
26     }
27     fp
28 }
29 
30 /// Compute a float using an extended-precision representation.
31 ///
32 /// Fast conversion of a the significant digits and decimal exponent
33 /// a float to a extended representation with a binary float. This
34 /// algorithm will accurately parse the vast majority of cases,
35 /// and uses a 128-bit representation (with a fallback 192-bit
36 /// representation).
37 ///
38 /// This algorithm scales the exponent by the decimal exponent
39 /// using pre-computed powers-of-5, and calculates if the
40 /// representation can be unambiguously rounded to the nearest
41 /// machine float. Near-halfway cases are not handled here,
42 /// and are represented by a negative, biased binary exponent.
43 ///
44 /// The algorithm is described in detail in "Daniel Lemire, Number Parsing
45 /// at a Gigabyte per Second" in section 5, "Fast Algorithm", and
46 /// section 6, "Exact Numbers And Ties", available online:
47 /// <https://arxiv.org/abs/2101.11408.pdf>.
compute_float<F: Float>(q: i32, mut w: u64) -> ExtendedFloat48 pub fn compute_float<F: Float>(q: i32, mut w: u64) -> ExtendedFloat {
49     let fp_zero = ExtendedFloat {
50         mant: 0,
51         exp: 0,
52     };
53     let fp_inf = ExtendedFloat {
54         mant: 0,
55         exp: F::INFINITE_POWER,
56     };
57 
58     // Short-circuit if the value can only be a literal 0 or infinity.
59     if w == 0 || q < F::SMALLEST_POWER_OF_TEN {
60         return fp_zero;
61     } else if q > F::LARGEST_POWER_OF_TEN {
62         return fp_inf;
63     }
64     // Normalize our significant digits, so the most-significant bit is set.
65     let lz = w.leading_zeros() as i32;
66     w <<= lz;
67     let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_SIZE as usize + 3);
68     if lo == 0xFFFF_FFFF_FFFF_FFFF {
69         // If we have failed to approximate w x 5^-q with our 128-bit value.
70         // Since the addition of 1 could lead to an overflow which could then
71         // round up over the half-way point, this can lead to improper rounding
72         // of a float.
73         //
74         // However, this can only occur if q ∈ [-27, 55]. The upper bound of q
75         // is 55 because 5^55 < 2^128, however, this can only happen if 5^q > 2^64,
76         // since otherwise the product can be represented in 64-bits, producing
77         // an exact result. For negative exponents, rounding-to-even can
78         // only occur if 5^-q < 2^64.
79         //
80         // For detailed explanations of rounding for negative exponents, see
81         // <https://arxiv.org/pdf/2101.11408.pdf#section.9.1>. For detailed
82         // explanations of rounding for positive exponents, see
83         // <https://arxiv.org/pdf/2101.11408.pdf#section.8>.
84         let inside_safe_exponent = (q >= -27) && (q <= 55);
85         if !inside_safe_exponent {
86             return compute_error_scaled::<F>(q, hi, lz);
87         }
88     }
89     let upperbit = (hi >> 63) as i32;
90     let mut mantissa = hi >> (upperbit + 64 - F::MANTISSA_SIZE - 3);
91     let mut power2 = power(q) + upperbit - lz - F::MINIMUM_EXPONENT;
92     if power2 <= 0 {
93         if -power2 + 1 >= 64 {
94             // Have more than 64 bits below the minimum exponent, must be 0.
95             return fp_zero;
96         }
97         // Have a subnormal value.
98         mantissa >>= -power2 + 1;
99         mantissa += mantissa & 1;
100         mantissa >>= 1;
101         power2 = (mantissa >= (1_u64 << F::MANTISSA_SIZE)) as i32;
102         return ExtendedFloat {
103             mant: mantissa,
104             exp: power2,
105         };
106     }
107     // Need to handle rounding ties. Normally, we need to round up,
108     // but if we fall right in between and and we have an even basis, we
109     // need to round down.
110     //
111     // This will only occur if:
112     //  1. The lower 64 bits of the 128-bit representation is 0.
113     //      IE, 5^q fits in single 64-bit word.
114     //  2. The least-significant bit prior to truncated mantissa is odd.
115     //  3. All the bits truncated when shifting to mantissa bits + 1 are 0.
116     //
117     // Or, we may fall between two floats: we are exactly halfway.
118     if lo <= 1
119         && q >= F::MIN_EXPONENT_ROUND_TO_EVEN
120         && q <= F::MAX_EXPONENT_ROUND_TO_EVEN
121         && mantissa & 3 == 1
122         && (mantissa << (upperbit + 64 - F::MANTISSA_SIZE - 3)) == hi
123     {
124         // Zero the lowest bit, so we don't round up.
125         mantissa &= !1_u64;
126     }
127     // Round-to-even, then shift the significant digits into place.
128     mantissa += mantissa & 1;
129     mantissa >>= 1;
130     if mantissa >= (2_u64 << F::MANTISSA_SIZE) {
131         // Rounding up overflowed, so the carry bit is set. Set the
132         // mantissa to 1 (only the implicit, hidden bit is set) and
133         // increase the exponent.
134         mantissa = 1_u64 << F::MANTISSA_SIZE;
135         power2 += 1;
136     }
137     // Zero out the hidden bit.
138     mantissa &= !(1_u64 << F::MANTISSA_SIZE);
139     if power2 >= F::INFINITE_POWER {
140         // Exponent is above largest normal value, must be infinite.
141         return fp_inf;
142     }
143     ExtendedFloat {
144         mant: mantissa,
145         exp: power2,
146     }
147 }
148 
149 /// Fallback algorithm to calculate the non-rounded representation.
150 /// This calculates the extended representation, and then normalizes
151 /// the resulting representation, so the high bit is set.
152 #[inline]
compute_error<F: Float>(q: i32, mut w: u64) -> ExtendedFloat153 pub fn compute_error<F: Float>(q: i32, mut w: u64) -> ExtendedFloat {
154     let lz = w.leading_zeros() as i32;
155     w <<= lz;
156     let hi = compute_product_approx(q, w, F::MANTISSA_SIZE as usize + 3).1;
157     compute_error_scaled::<F>(q, hi, lz)
158 }
159 
160 /// Compute the error from a mantissa scaled to the exponent.
161 #[inline]
compute_error_scaled<F: Float>(q: i32, mut w: u64, lz: i32) -> ExtendedFloat162 pub fn compute_error_scaled<F: Float>(q: i32, mut w: u64, lz: i32) -> ExtendedFloat {
163     // Want to normalize the float, but this is faster than ctlz on most architectures.
164     let hilz = (w >> 63) as i32 ^ 1;
165     w <<= hilz;
166     let power2 = power(q as i32) + F::EXPONENT_BIAS - hilz - lz - 62;
167 
168     ExtendedFloat {
169         mant: w,
170         exp: power2 + F::INVALID_FP,
171     }
172 }
173 
174 /// Calculate a base 2 exponent from a decimal exponent.
175 /// This uses a pre-computed integer approximation for
176 /// log2(10), where 217706 / 2^16 is accurate for the
177 /// entire range of non-finite decimal exponents.
178 #[inline]
power(q: i32) -> i32179 fn power(q: i32) -> i32 {
180     (q.wrapping_mul(152_170 + 65536) >> 16) + 63
181 }
182 
183 #[inline]
full_multiplication(a: u64, b: u64) -> (u64, u64)184 fn full_multiplication(a: u64, b: u64) -> (u64, u64) {
185     let r = (a as u128) * (b as u128);
186     (r as u64, (r >> 64) as u64)
187 }
188 
189 // This will compute or rather approximate w * 5**q and return a pair of 64-bit words
190 // approximating the result, with the "high" part corresponding to the most significant
191 // bits and the low part corresponding to the least significant bits.
compute_product_approx(q: i32, w: u64, precision: usize) -> (u64, u64)192 fn compute_product_approx(q: i32, w: u64, precision: usize) -> (u64, u64) {
193     debug_assert!(q >= SMALLEST_POWER_OF_FIVE);
194     debug_assert!(q <= LARGEST_POWER_OF_FIVE);
195     debug_assert!(precision <= 64);
196 
197     let mask = if precision < 64 {
198         0xFFFF_FFFF_FFFF_FFFF_u64 >> precision
199     } else {
200         0xFFFF_FFFF_FFFF_FFFF_u64
201     };
202 
203     // 5^q < 2^64, then the multiplication always provides an exact value.
204     // That means whenever we need to round ties to even, we always have
205     // an exact value.
206     let index = (q - SMALLEST_POWER_OF_FIVE) as usize;
207     let (lo5, hi5) = POWER_OF_FIVE_128[index];
208     // Only need one multiplication as long as there is 1 zero but
209     // in the explicit mantissa bits, +1 for the hidden bit, +1 to
210     // determine the rounding direction, +1 for if the computed
211     // product has a leading zero.
212     let (mut first_lo, mut first_hi) = full_multiplication(w, lo5);
213     if first_hi & mask == mask {
214         // Need to do a second multiplication to get better precision
215         // for the lower product. This will always be exact
216         // where q is < 55, since 5^55 < 2^128. If this wraps,
217         // then we need to need to round up the hi product.
218         let (_, second_hi) = full_multiplication(w, hi5);
219         first_lo = first_lo.wrapping_add(second_hi);
220         if second_hi > first_lo {
221             first_hi += 1;
222         }
223     }
224     (first_lo, first_hi)
225 }
226