• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- A class to store a normalized floating point number -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
11 
12 #include "FPBits.h"
13 
14 #include "src/__support/CPP/type_traits.h"
15 #include "src/__support/common.h"
16 
17 #include <stdint.h>
18 
19 namespace LIBC_NAMESPACE {
20 namespace fputil {
21 
22 // A class which stores the normalized form of a floating point value.
23 // The special IEEE-754 bits patterns of Zero, infinity and NaNs are
24 // are not handled by this class.
25 //
26 // A normalized floating point number is of this form:
27 //    (-1)*sign * 2^exponent * <mantissa>
28 // where <mantissa> is of the form 1.<...>.
29 template <typename T> struct NormalFloat {
30   static_assert(
31       cpp::is_floating_point_v<T>,
32       "NormalFloat template parameter has to be a floating point type.");
33 
34   using StorageType = typename FPBits<T>::StorageType;
35   static constexpr StorageType ONE =
36       (StorageType(1) << FPBits<T>::FRACTION_LEN);
37 
38   // Unbiased exponent value.
39   int32_t exponent;
40 
41   StorageType mantissa;
42   // We want |StorageType| to have atleast one bit more than the actual mantissa
43   // bit width to accommodate the implicit 1 value.
44   static_assert(sizeof(StorageType) * 8 >= FPBits<T>::FRACTION_LEN + 1,
45                 "Bad type for mantissa in NormalFloat.");
46 
47   Sign sign = Sign::POS;
48 
NormalFloatNormalFloat49   LIBC_INLINE NormalFloat(Sign s, int32_t e, StorageType m)
50       : exponent(e), mantissa(m), sign(s) {
51     if (mantissa >= ONE)
52       return;
53 
54     unsigned normalization_shift = evaluate_normalization_shift(mantissa);
55     mantissa = mantissa << normalization_shift;
56     exponent -= normalization_shift;
57   }
58 
NormalFloatNormalFloat59   LIBC_INLINE explicit NormalFloat(T x) { init_from_bits(FPBits<T>(x)); }
60 
NormalFloatNormalFloat61   LIBC_INLINE explicit NormalFloat(FPBits<T> bits) { init_from_bits(bits); }
62 
63   // Compares this normalized number with another normalized number.
64   // Returns -1 is this number is less than |other|, 0 if this number is equal
65   // to |other|, and 1 if this number is greater than |other|.
cmpNormalFloat66   LIBC_INLINE int cmp(const NormalFloat<T> &other) const {
67     const int result = sign.is_neg() ? -1 : 1;
68     if (sign != other.sign)
69       return result;
70 
71     if (exponent > other.exponent) {
72       return result;
73     } else if (exponent == other.exponent) {
74       if (mantissa > other.mantissa)
75         return result;
76       else if (mantissa == other.mantissa)
77         return 0;
78       else
79         return -result;
80     } else {
81       return -result;
82     }
83   }
84 
85   // Returns a new normalized floating point number which is equal in value
86   // to this number multiplied by 2^e. That is:
87   //     new = this *  2^e
mul2NormalFloat88   LIBC_INLINE NormalFloat<T> mul2(int e) const {
89     NormalFloat<T> result = *this;
90     result.exponent += e;
91     return result;
92   }
93 
TNormalFloat94   LIBC_INLINE operator T() const {
95     int biased_exponent = exponent + FPBits<T>::EXP_BIAS;
96     // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
97     constexpr int MAX_EXPONENT_VALUE = (1 << FPBits<T>::EXP_LEN) - 2;
98     if (biased_exponent > MAX_EXPONENT_VALUE) {
99       return FPBits<T>::inf(sign).get_val();
100     }
101 
102     FPBits<T> result(T(0.0));
103     result.set_sign(sign);
104 
105     constexpr int SUBNORMAL_EXPONENT = -FPBits<T>::EXP_BIAS + 1;
106     if (exponent < SUBNORMAL_EXPONENT) {
107       unsigned shift = SUBNORMAL_EXPONENT - exponent;
108       // Since exponent > subnormalExponent, shift is strictly greater than
109       // zero.
110       if (shift <= FPBits<T>::FRACTION_LEN + 1) {
111         // Generate a subnormal number. Might lead to loss of precision.
112         // We round to nearest and round halfway cases to even.
113         const StorageType shift_out_mask = (StorageType(1) << shift) - 1;
114         const StorageType shift_out_value = mantissa & shift_out_mask;
115         const StorageType halfway_value = StorageType(1) << (shift - 1);
116         result.set_biased_exponent(0);
117         result.set_mantissa(mantissa >> shift);
118         StorageType new_mantissa = result.get_mantissa();
119         if (shift_out_value > halfway_value) {
120           new_mantissa += 1;
121         } else if (shift_out_value == halfway_value) {
122           // Round to even.
123           if (result.get_mantissa() & 0x1)
124             new_mantissa += 1;
125         }
126         result.set_mantissa(new_mantissa);
127         // Adding 1 to mantissa can lead to overflow. This can only happen if
128         // mantissa was all ones (0b111..11). For such a case, we will carry
129         // the overflow into the exponent.
130         if (new_mantissa == ONE)
131           result.set_biased_exponent(1);
132         return result.get_val();
133       } else {
134         return result.get_val();
135       }
136     }
137 
138     result.set_biased_exponent(exponent + FPBits<T>::EXP_BIAS);
139     result.set_mantissa(mantissa);
140     return result.get_val();
141   }
142 
143 private:
init_from_bitsNormalFloat144   LIBC_INLINE void init_from_bits(FPBits<T> bits) {
145     sign = bits.sign();
146 
147     if (bits.is_inf_or_nan() || bits.is_zero()) {
148       // Ignore special bit patterns. Implementations deal with them separately
149       // anyway so this should not be a problem.
150       exponent = 0;
151       mantissa = 0;
152       return;
153     }
154 
155     // Normalize subnormal numbers.
156     if (bits.is_subnormal()) {
157       unsigned shift = evaluate_normalization_shift(bits.get_mantissa());
158       mantissa = StorageType(bits.get_mantissa()) << shift;
159       exponent = 1 - FPBits<T>::EXP_BIAS - shift;
160     } else {
161       exponent = bits.get_biased_exponent() - FPBits<T>::EXP_BIAS;
162       mantissa = ONE | bits.get_mantissa();
163     }
164   }
165 
evaluate_normalization_shiftNormalFloat166   LIBC_INLINE unsigned evaluate_normalization_shift(StorageType m) {
167     unsigned shift = 0;
168     for (; (ONE & m) == 0 && (shift < FPBits<T>::FRACTION_LEN);
169          m <<= 1, ++shift)
170       ;
171     return shift;
172   }
173 };
174 
175 #ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
176 template <>
177 LIBC_INLINE void
init_from_bits(FPBits<long double> bits)178 NormalFloat<long double>::init_from_bits(FPBits<long double> bits) {
179   sign = bits.sign();
180 
181   if (bits.is_inf_or_nan() || bits.is_zero()) {
182     // Ignore special bit patterns. Implementations deal with them separately
183     // anyway so this should not be a problem.
184     exponent = 0;
185     mantissa = 0;
186     return;
187   }
188 
189   if (bits.is_subnormal()) {
190     if (bits.get_implicit_bit() == 0) {
191       // Since we ignore zero value, the mantissa in this case is non-zero.
192       int normalization_shift =
193           evaluate_normalization_shift(bits.get_mantissa());
194       exponent = -16382 - normalization_shift;
195       mantissa = (bits.get_mantissa() << normalization_shift);
196     } else {
197       exponent = -16382;
198       mantissa = ONE | bits.get_mantissa();
199     }
200   } else {
201     if (bits.get_implicit_bit() == 0) {
202       // Invalid number so just store 0 similar to a NaN.
203       exponent = 0;
204       mantissa = 0;
205     } else {
206       exponent = bits.get_biased_exponent() - 16383;
207       mantissa = ONE | bits.get_mantissa();
208     }
209   }
210 }
211 
212 template <> LIBC_INLINE NormalFloat<long double>::operator long double() const {
213   using LDBits = FPBits<long double>;
214   int biased_exponent = exponent + LDBits::EXP_BIAS;
215   // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
216   constexpr int MAX_EXPONENT_VALUE = (1 << LDBits::EXP_LEN) - 2;
217   if (biased_exponent > MAX_EXPONENT_VALUE) {
218     return LDBits::inf(sign).get_val();
219   }
220 
221   FPBits<long double> result(0.0l);
222   result.set_sign(sign);
223 
224   constexpr int SUBNORMAL_EXPONENT = -LDBits::EXP_BIAS + 1;
225   if (exponent < SUBNORMAL_EXPONENT) {
226     unsigned shift = SUBNORMAL_EXPONENT - exponent;
227     if (shift <= LDBits::FRACTION_LEN + 1) {
228       // Generate a subnormal number. Might lead to loss of precision.
229       // We round to nearest and round halfway cases to even.
230       const StorageType shift_out_mask = (StorageType(1) << shift) - 1;
231       const StorageType shift_out_value = mantissa & shift_out_mask;
232       const StorageType halfway_value = StorageType(1) << (shift - 1);
233       result.set_biased_exponent(0);
234       result.set_mantissa(mantissa >> shift);
235       StorageType new_mantissa = result.get_mantissa();
236       if (shift_out_value > halfway_value) {
237         new_mantissa += 1;
238       } else if (shift_out_value == halfway_value) {
239         // Round to even.
240         if (result.get_mantissa() & 0x1)
241           new_mantissa += 1;
242       }
243       result.set_mantissa(new_mantissa);
244       // Adding 1 to mantissa can lead to overflow. This can only happen if
245       // mantissa was all ones (0b111..11). For such a case, we will carry
246       // the overflow into the exponent and set the implicit bit to 1.
247       if (new_mantissa == ONE) {
248         result.set_biased_exponent(1);
249         result.set_implicit_bit(1);
250       } else {
251         result.set_implicit_bit(0);
252       }
253       return result.get_val();
254     } else {
255       return result.get_val();
256     }
257   }
258 
259   result.set_biased_exponent(biased_exponent);
260   result.set_mantissa(mantissa);
261   result.set_implicit_bit(1);
262   return result.get_val();
263 }
264 #endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
265 
266 } // namespace fputil
267 } // namespace LIBC_NAMESPACE
268 
269 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
270