• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Adapted from https://github.com/Alexhuszagh/rust-lexical.
2 
3 // FLOAT TYPE
4 
5 use super::num::*;
6 use super::rounding::*;
7 use super::shift::*;
8 
9 /// Extended precision floating-point type.
10 ///
11 /// Private implementation, exposed only for testing purposes.
12 #[doc(hidden)]
13 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
14 pub(crate) struct ExtendedFloat {
15     /// Mantissa for the extended-precision float.
16     pub mant: u64,
17     /// Binary exponent for the extended-precision float.
18     pub exp: i32,
19 }
20 
21 impl ExtendedFloat {
22     // PROPERTIES
23 
24     // OPERATIONS
25 
26     /// Multiply two normalized extended-precision floats, as if by `a*b`.
27     ///
28     /// The precision is maximal when the numbers are normalized, however,
29     /// decent precision will occur as long as both values have high bits
30     /// set. The result is not normalized.
31     ///
32     /// Algorithm:
33     ///     1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
34     ///     2. Normalization of the result (not done here).
35     ///     3. Addition of exponents.
mul(&self, b: &ExtendedFloat) -> ExtendedFloat36     pub(crate) fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat {
37         // Logic check, values must be decently normalized prior to multiplication.
38         debug_assert!((self.mant & u64::HIMASK != 0) && (b.mant & u64::HIMASK != 0));
39 
40         // Extract high-and-low masks.
41         let ah = self.mant >> u64::HALF;
42         let al = self.mant & u64::LOMASK;
43         let bh = b.mant >> u64::HALF;
44         let bl = b.mant & u64::LOMASK;
45 
46         // Get our products
47         let ah_bl = ah * bl;
48         let al_bh = al * bh;
49         let al_bl = al * bl;
50         let ah_bh = ah * bh;
51 
52         let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF);
53         // round up
54         tmp += 1 << (u64::HALF - 1);
55 
56         ExtendedFloat {
57             mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF),
58             exp: self.exp + b.exp + u64::FULL,
59         }
60     }
61 
62     /// Multiply in-place, as if by `a*b`.
63     ///
64     /// The result is not normalized.
65     #[inline]
imul(&mut self, b: &ExtendedFloat)66     pub(crate) fn imul(&mut self, b: &ExtendedFloat) {
67         *self = self.mul(b);
68     }
69 
70     // NORMALIZE
71 
72     /// Normalize float-point number.
73     ///
74     /// Shift the mantissa so the number of leading zeros is 0, or the value
75     /// itself is 0.
76     ///
77     /// Get the number of bytes shifted.
78     #[inline]
normalize(&mut self) -> u3279     pub(crate) fn normalize(&mut self) -> u32 {
80         // Note:
81         // Using the cltz intrinsic via leading_zeros is way faster (~10x)
82         // than shifting 1-bit at a time, via while loop, and also way
83         // faster (~2x) than an unrolled loop that checks at 32, 16, 4,
84         // 2, and 1 bit.
85         //
86         // Using a modulus of pow2 (which will get optimized to a bitwise
87         // and with 0x3F or faster) is slightly slower than an if/then,
88         // however, removing the if/then will likely optimize more branched
89         // code as it removes conditional logic.
90 
91         // Calculate the number of leading zeros, and then zero-out
92         // any overflowing bits, to avoid shl overflow when self.mant == 0.
93         let shift = if self.mant == 0 {
94             0
95         } else {
96             self.mant.leading_zeros()
97         };
98         shl(self, shift as i32);
99         shift
100     }
101 
102     // ROUND
103 
104     /// Lossy round float-point number to native mantissa boundaries.
105     #[inline]
round_to_native<F, Algorithm>(&mut self, algorithm: Algorithm) where F: Float, Algorithm: FnOnce(&mut ExtendedFloat, i32),106     pub(crate) fn round_to_native<F, Algorithm>(&mut self, algorithm: Algorithm)
107     where
108         F: Float,
109         Algorithm: FnOnce(&mut ExtendedFloat, i32),
110     {
111         round_to_native::<F, _>(self, algorithm)
112     }
113 
114     // FROM
115 
116     /// Create extended float from native float.
117     #[inline]
from_float<F: Float>(f: F) -> ExtendedFloat118     pub fn from_float<F: Float>(f: F) -> ExtendedFloat {
119         from_float(f)
120     }
121 
122     // INTO
123 
124     /// Convert into default-rounded, lower-precision native float.
125     #[inline]
into_float<F: Float>(mut self) -> F126     pub(crate) fn into_float<F: Float>(mut self) -> F {
127         self.round_to_native::<F, _>(round_nearest_tie_even);
128         into_float(self)
129     }
130 
131     /// Convert into downward-rounded, lower-precision native float.
132     #[inline]
into_downward_float<F: Float>(mut self) -> F133     pub(crate) fn into_downward_float<F: Float>(mut self) -> F {
134         self.round_to_native::<F, _>(round_downward);
135         into_float(self)
136     }
137 }
138 
139 // FROM FLOAT
140 
141 // Import ExtendedFloat from native float.
142 #[inline]
from_float<F>(f: F) -> ExtendedFloat where F: Float,143 pub(crate) fn from_float<F>(f: F) -> ExtendedFloat
144 where
145     F: Float,
146 {
147     ExtendedFloat {
148         mant: u64::as_cast(f.mantissa()),
149         exp: f.exponent(),
150     }
151 }
152 
153 // INTO FLOAT
154 
155 // Export extended-precision float to native float.
156 //
157 // The extended-precision float must be in native float representation,
158 // with overflow/underflow appropriately handled.
159 #[inline]
into_float<F>(fp: ExtendedFloat) -> F where F: Float,160 pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F
161 where
162     F: Float,
163 {
164     // Export floating-point number.
165     if fp.mant == 0 || fp.exp < F::DENORMAL_EXPONENT {
166         // sub-denormal, underflow
167         F::ZERO
168     } else if fp.exp >= F::MAX_EXPONENT {
169         // overflow
170         F::from_bits(F::INFINITY_BITS)
171     } else {
172         // calculate the exp and fraction bits, and return a float from bits.
173         let exp: u64;
174         if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == 0 {
175             exp = 0;
176         } else {
177             exp = (fp.exp + F::EXPONENT_BIAS) as u64;
178         }
179         let exp = exp << F::MANTISSA_SIZE;
180         let mant = fp.mant & F::MANTISSA_MASK.as_u64();
181         F::from_bits(F::Unsigned::as_cast(mant | exp))
182     }
183 }
184