1 //===-- Abstract class for bit manipulation of float numbers. ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 11 12 #include "src/__support/CPP/bit.h" 13 #include "src/__support/CPP/type_traits.h" 14 #include "src/__support/common.h" 15 #include "src/__support/libc_assert.h" // LIBC_ASSERT 16 #include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR 17 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_FLOAT128 18 #include "src/__support/math_extras.h" // mask_trailing_ones 19 #include "src/__support/sign.h" // Sign 20 #include "src/__support/uint128.h" 21 22 #include <stdint.h> 23 24 namespace LIBC_NAMESPACE { 25 namespace fputil { 26 27 // The supported floating point types. 28 enum class FPType { 29 IEEE754_Binary16, 30 IEEE754_Binary32, 31 IEEE754_Binary64, 32 IEEE754_Binary128, 33 X86_Binary80, 34 }; 35 36 // The classes hierarchy is as follows: 37 // 38 // ┌───────────────────┐ 39 // │ FPLayout<FPType> │ 40 // └─────────▲─────────┘ 41 // │ 42 // ┌─────────┴─────────┐ 43 // │ FPStorage<FPType> │ 44 // └─────────▲─────────┘ 45 // │ 46 // ┌────────────┴─────────────┐ 47 // │ │ 48 // ┌────────┴─────────┐ ┌──────────────┴──────────────────┐ 49 // │ FPRepSem<FPType> │ │ FPRepSem<FPType::X86_Binary80 │ 50 // └────────▲─────────┘ └──────────────▲──────────────────┘ 51 // │ │ 52 // └────────────┬─────────────┘ 53 // │ 54 // ┌───────┴───────┐ 55 // │ FPRepImpl<T> │ 56 // └───────▲───────┘ 57 // │ 58 // ┌────────┴────────┐ 59 // ┌─────┴─────┐ ┌─────┴─────┐ 60 // │ FPRep<T> │ │ FPBits<T> │ 61 // └───────────┘ └───────────┘ 62 // 63 // - 'FPLayout' defines only a few constants, namely the 'StorageType' and 64 // length of the sign, the exponent, fraction and significand parts. 65 // - 'FPStorage' builds more constants on top of those from 'FPLayout' like 66 // exponent bias and masks. It also holds the bit representation of the 67 // floating point as a 'StorageType' type and defines tools to assemble or 68 // test these parts. 69 // - 'FPRepSem' defines functions to interact semantically with the floating 70 // point representation. The default implementation is the one for 'IEEE754', 71 // a specialization is provided for X86 Extended Precision. 72 // - 'FPRepImpl' derives from 'FPRepSem' and adds functions that are common to 73 // all implementations or build on the ones in 'FPRepSem'. 74 // - 'FPRep' exposes all functions from 'FPRepImpl' and returns 'FPRep' 75 // instances when using Builders (static functions to create values). 76 // - 'FPBits' exposes all the functions from 'FPRepImpl' but operates on the 77 // native C++ floating point type instead of 'FPType'. An additional 'get_val' 78 // function allows getting the C++ floating point type value back. Builders 79 // called from 'FPBits' return 'FPBits' instances. 80 81 namespace internal { 82 83 // Defines the layout (sign, exponent, significand) of a floating point type in 84 // memory. It also defines its associated StorageType, i.e., the unsigned 85 // integer type used to manipulate its representation. 86 // Additionally we provide the fractional part length, i.e., the number of bits 87 // after the decimal dot when the number is in normal form. 88 template <FPType> struct FPLayout {}; 89 90 template <> struct FPLayout<FPType::IEEE754_Binary16> { 91 using StorageType = uint16_t; 92 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 93 LIBC_INLINE_VAR static constexpr int EXP_LEN = 5; 94 LIBC_INLINE_VAR static constexpr int SIG_LEN = 10; 95 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 96 }; 97 98 template <> struct FPLayout<FPType::IEEE754_Binary32> { 99 using StorageType = uint32_t; 100 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 101 LIBC_INLINE_VAR static constexpr int EXP_LEN = 8; 102 LIBC_INLINE_VAR static constexpr int SIG_LEN = 23; 103 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 104 }; 105 106 template <> struct FPLayout<FPType::IEEE754_Binary64> { 107 using StorageType = uint64_t; 108 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 109 LIBC_INLINE_VAR static constexpr int EXP_LEN = 11; 110 LIBC_INLINE_VAR static constexpr int SIG_LEN = 52; 111 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 112 }; 113 114 template <> struct FPLayout<FPType::IEEE754_Binary128> { 115 using StorageType = UInt128; 116 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 117 LIBC_INLINE_VAR static constexpr int EXP_LEN = 15; 118 LIBC_INLINE_VAR static constexpr int SIG_LEN = 112; 119 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 120 }; 121 122 template <> struct FPLayout<FPType::X86_Binary80> { 123 using StorageType = UInt128; 124 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 125 LIBC_INLINE_VAR static constexpr int EXP_LEN = 15; 126 LIBC_INLINE_VAR static constexpr int SIG_LEN = 64; 127 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN - 1; 128 }; 129 130 // FPStorage derives useful constants from the FPLayout above. 131 template <FPType fp_type> struct FPStorage : public FPLayout<fp_type> { 132 using UP = FPLayout<fp_type>; 133 134 using UP::EXP_LEN; // The number of bits for the *exponent* part 135 using UP::SIG_LEN; // The number of bits for the *significand* part 136 using UP::SIGN_LEN; // The number of bits for the *sign* part 137 // For convenience, the sum of `SIG_LEN`, `EXP_LEN`, and `SIGN_LEN`. 138 LIBC_INLINE_VAR static constexpr int TOTAL_LEN = SIGN_LEN + EXP_LEN + SIG_LEN; 139 140 // The number of bits after the decimal dot when the number is in normal form. 141 using UP::FRACTION_LEN; 142 143 // An unsigned integer that is wide enough to contain all of the floating 144 // point bits. 145 using StorageType = typename UP::StorageType; 146 147 // The number of bits in StorageType. 148 LIBC_INLINE_VAR static constexpr int STORAGE_LEN = 149 sizeof(StorageType) * CHAR_BIT; 150 static_assert(STORAGE_LEN >= TOTAL_LEN); 151 152 // The exponent bias. Always positive. 153 LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS = 154 (1U << (EXP_LEN - 1U)) - 1U; 155 static_assert(EXP_BIAS > 0); 156 157 // The bit pattern that keeps only the *significand* part. 158 LIBC_INLINE_VAR static constexpr StorageType SIG_MASK = 159 mask_trailing_ones<StorageType, SIG_LEN>(); 160 // The bit pattern that keeps only the *exponent* part. 161 LIBC_INLINE_VAR static constexpr StorageType EXP_MASK = 162 mask_trailing_ones<StorageType, EXP_LEN>() << SIG_LEN; 163 // The bit pattern that keeps only the *sign* part. 164 LIBC_INLINE_VAR static constexpr StorageType SIGN_MASK = 165 mask_trailing_ones<StorageType, SIGN_LEN>() << (EXP_LEN + SIG_LEN); 166 // The bit pattern that keeps only the *exponent + significand* part. 167 LIBC_INLINE_VAR static constexpr StorageType EXP_SIG_MASK = 168 mask_trailing_ones<StorageType, EXP_LEN + SIG_LEN>(); 169 // The bit pattern that keeps only the *sign + exponent + significand* part. 170 LIBC_INLINE_VAR static constexpr StorageType FP_MASK = 171 mask_trailing_ones<StorageType, TOTAL_LEN>(); 172 // The bit pattern that keeps only the *fraction* part. 173 // i.e., the *significand* without the leading one. 174 LIBC_INLINE_VAR static constexpr StorageType FRACTION_MASK = 175 mask_trailing_ones<StorageType, FRACTION_LEN>(); 176 177 static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint"); 178 static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover"); 179 180 protected: 181 // Merge bits from 'a' and 'b' values according to 'mask'. 182 // Use 'a' bits when corresponding 'mask' bits are zeroes and 'b' bits when 183 // corresponding bits are ones. 184 LIBC_INLINE static constexpr StorageType merge(StorageType a, StorageType b, 185 StorageType mask) { 186 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 187 return a ^ ((a ^ b) & mask); 188 } 189 190 // A stongly typed integer that prevents mixing and matching integers with 191 // different semantics. 192 template <typename T> struct TypedInt { 193 using value_type = T; 194 LIBC_INLINE constexpr explicit TypedInt(T value) : value(value) {} 195 LIBC_INLINE constexpr TypedInt(const TypedInt &value) = default; 196 LIBC_INLINE constexpr TypedInt &operator=(const TypedInt &value) = default; 197 198 LIBC_INLINE constexpr explicit operator T() const { return value; } 199 200 LIBC_INLINE constexpr StorageType to_storage_type() const { 201 return StorageType(value); 202 } 203 204 LIBC_INLINE friend constexpr bool operator==(TypedInt a, TypedInt b) { 205 return a.value == b.value; 206 } 207 LIBC_INLINE friend constexpr bool operator!=(TypedInt a, TypedInt b) { 208 return a.value != b.value; 209 } 210 211 protected: 212 T value; 213 }; 214 215 // An opaque type to store a floating point exponent. 216 // We define special values but it is valid to create arbitrary values as long 217 // as they are in the range [min, max]. 218 struct Exponent : public TypedInt<int32_t> { 219 using UP = TypedInt<int32_t>; 220 using UP::UP; 221 LIBC_INLINE static constexpr auto subnormal() { 222 return Exponent(-EXP_BIAS); 223 } 224 LIBC_INLINE static constexpr auto min() { return Exponent(1 - EXP_BIAS); } 225 LIBC_INLINE static constexpr auto zero() { return Exponent(0); } 226 LIBC_INLINE static constexpr auto max() { return Exponent(EXP_BIAS); } 227 LIBC_INLINE static constexpr auto inf() { return Exponent(EXP_BIAS + 1); } 228 }; 229 230 // An opaque type to store a floating point biased exponent. 231 // We define special values but it is valid to create arbitrary values as long 232 // as they are in the range [zero, bits_all_ones]. 233 // Values greater than bits_all_ones are truncated. 234 struct BiasedExponent : public TypedInt<uint32_t> { 235 using UP = TypedInt<uint32_t>; 236 using UP::UP; 237 238 LIBC_INLINE constexpr BiasedExponent(Exponent exp) 239 : UP(static_cast<int32_t>(exp) + EXP_BIAS) {} 240 241 // Cast operator to get convert from BiasedExponent to Exponent. 242 LIBC_INLINE constexpr operator Exponent() const { 243 return Exponent(UP::value - EXP_BIAS); 244 } 245 246 LIBC_INLINE constexpr BiasedExponent &operator++() { 247 LIBC_ASSERT(*this != BiasedExponent(Exponent::inf())); 248 ++UP::value; 249 return *this; 250 } 251 252 LIBC_INLINE constexpr BiasedExponent &operator--() { 253 LIBC_ASSERT(*this != BiasedExponent(Exponent::subnormal())); 254 --UP::value; 255 return *this; 256 } 257 }; 258 259 // An opaque type to store a floating point significand. 260 // We define special values but it is valid to create arbitrary values as long 261 // as they are in the range [zero, bits_all_ones]. 262 // Note that the semantics of the Significand are implementation dependent. 263 // Values greater than bits_all_ones are truncated. 264 struct Significand : public TypedInt<StorageType> { 265 using UP = TypedInt<StorageType>; 266 using UP::UP; 267 268 LIBC_INLINE friend constexpr Significand operator|(const Significand a, 269 const Significand b) { 270 return Significand( 271 StorageType(a.to_storage_type() | b.to_storage_type())); 272 } 273 LIBC_INLINE friend constexpr Significand operator^(const Significand a, 274 const Significand b) { 275 return Significand( 276 StorageType(a.to_storage_type() ^ b.to_storage_type())); 277 } 278 LIBC_INLINE friend constexpr Significand operator>>(const Significand a, 279 int shift) { 280 return Significand(StorageType(a.to_storage_type() >> shift)); 281 } 282 283 LIBC_INLINE static constexpr auto zero() { 284 return Significand(StorageType(0)); 285 } 286 LIBC_INLINE static constexpr auto lsb() { 287 return Significand(StorageType(1)); 288 } 289 LIBC_INLINE static constexpr auto msb() { 290 return Significand(StorageType(1) << (SIG_LEN - 1)); 291 } 292 LIBC_INLINE static constexpr auto bits_all_ones() { 293 return Significand(SIG_MASK); 294 } 295 }; 296 297 LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp) { 298 return (exp.to_storage_type() << SIG_LEN) & EXP_MASK; 299 } 300 301 LIBC_INLINE static constexpr StorageType encode(Significand value) { 302 return value.to_storage_type() & SIG_MASK; 303 } 304 305 LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp, 306 Significand sig) { 307 return encode(exp) | encode(sig); 308 } 309 310 LIBC_INLINE static constexpr StorageType encode(Sign sign, BiasedExponent exp, 311 Significand sig) { 312 if (sign.is_neg()) 313 return SIGN_MASK | encode(exp, sig); 314 return encode(exp, sig); 315 } 316 317 // The floating point number representation as an unsigned integer. 318 StorageType bits{}; 319 320 LIBC_INLINE constexpr FPStorage() : bits(0) {} 321 LIBC_INLINE constexpr FPStorage(StorageType value) : bits(value) {} 322 323 // Observers 324 LIBC_INLINE constexpr StorageType exp_bits() const { return bits & EXP_MASK; } 325 LIBC_INLINE constexpr StorageType sig_bits() const { return bits & SIG_MASK; } 326 LIBC_INLINE constexpr StorageType exp_sig_bits() const { 327 return bits & EXP_SIG_MASK; 328 } 329 330 // Parts 331 LIBC_INLINE constexpr BiasedExponent biased_exponent() const { 332 return BiasedExponent(static_cast<uint32_t>(exp_bits() >> SIG_LEN)); 333 } 334 LIBC_INLINE constexpr void set_biased_exponent(BiasedExponent biased) { 335 bits = merge(bits, encode(biased), EXP_MASK); 336 } 337 338 public: 339 LIBC_INLINE constexpr Sign sign() const { 340 return (bits & SIGN_MASK) ? Sign::NEG : Sign::POS; 341 } 342 LIBC_INLINE constexpr void set_sign(Sign signVal) { 343 if (sign() != signVal) 344 bits ^= SIGN_MASK; 345 } 346 }; 347 348 // This layer defines all functions that are specific to how the the floating 349 // point type is encoded. It enables constructions, modification and observation 350 // of values manipulated as 'StorageType'. 351 template <FPType fp_type, typename RetT> 352 struct FPRepSem : public FPStorage<fp_type> { 353 using UP = FPStorage<fp_type>; 354 using typename UP::StorageType; 355 using UP::FRACTION_LEN; 356 using UP::FRACTION_MASK; 357 358 protected: 359 using typename UP::Exponent; 360 using typename UP::Significand; 361 using UP::bits; 362 using UP::encode; 363 using UP::exp_bits; 364 using UP::exp_sig_bits; 365 using UP::sig_bits; 366 using UP::UP; 367 368 public: 369 // Builders 370 LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) { 371 return RetT(encode(sign, Exponent::subnormal(), Significand::zero())); 372 } 373 LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) { 374 return RetT(encode(sign, Exponent::zero(), Significand::zero())); 375 } 376 LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) { 377 return RetT(encode(sign, Exponent::subnormal(), Significand::lsb())); 378 } 379 LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) { 380 return RetT( 381 encode(sign, Exponent::subnormal(), Significand::bits_all_ones())); 382 } 383 LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) { 384 return RetT(encode(sign, Exponent::min(), Significand::zero())); 385 } 386 LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) { 387 return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones())); 388 } 389 LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) { 390 return RetT(encode(sign, Exponent::inf(), Significand::zero())); 391 } 392 LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS, 393 StorageType v = 0) { 394 return RetT(encode(sign, Exponent::inf(), 395 (v ? Significand(v) : (Significand::msb() >> 1)))); 396 } 397 LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS, 398 StorageType v = 0) { 399 return RetT( 400 encode(sign, Exponent::inf(), Significand::msb() | Significand(v))); 401 } 402 403 // Observers 404 LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; } 405 LIBC_INLINE constexpr bool is_nan() const { 406 return exp_sig_bits() > encode(Exponent::inf(), Significand::zero()); 407 } 408 LIBC_INLINE constexpr bool is_quiet_nan() const { 409 return exp_sig_bits() >= encode(Exponent::inf(), Significand::msb()); 410 } 411 LIBC_INLINE constexpr bool is_signaling_nan() const { 412 return is_nan() && !is_quiet_nan(); 413 } 414 LIBC_INLINE constexpr bool is_inf() const { 415 return exp_sig_bits() == encode(Exponent::inf(), Significand::zero()); 416 } 417 LIBC_INLINE constexpr bool is_finite() const { 418 return exp_bits() != encode(Exponent::inf()); 419 } 420 LIBC_INLINE 421 constexpr bool is_subnormal() const { 422 return exp_bits() == encode(Exponent::subnormal()); 423 } 424 LIBC_INLINE constexpr bool is_normal() const { 425 return is_finite() && !is_subnormal(); 426 } 427 LIBC_INLINE constexpr RetT next_toward_inf() const { 428 if (is_finite()) 429 return RetT(bits + StorageType(1)); 430 return RetT(bits); 431 } 432 433 // Returns the mantissa with the implicit bit set iff the current 434 // value is a valid normal number. 435 LIBC_INLINE constexpr StorageType get_explicit_mantissa() const { 436 if (is_subnormal()) 437 return sig_bits(); 438 return (StorageType(1) << UP::SIG_LEN) | sig_bits(); 439 } 440 }; 441 442 // Specialization for the X86 Extended Precision type. 443 template <typename RetT> 444 struct FPRepSem<FPType::X86_Binary80, RetT> 445 : public FPStorage<FPType::X86_Binary80> { 446 using UP = FPStorage<FPType::X86_Binary80>; 447 using typename UP::StorageType; 448 using UP::FRACTION_LEN; 449 using UP::FRACTION_MASK; 450 451 // The x86 80 bit float represents the leading digit of the mantissa 452 // explicitly. This is the mask for that bit. 453 static constexpr StorageType EXPLICIT_BIT_MASK = StorageType(1) 454 << FRACTION_LEN; 455 // The X80 significand is made of an explicit bit and the fractional part. 456 static_assert((EXPLICIT_BIT_MASK & FRACTION_MASK) == 0, 457 "the explicit bit and the fractional part should not overlap"); 458 static_assert((EXPLICIT_BIT_MASK | FRACTION_MASK) == SIG_MASK, 459 "the explicit bit and the fractional part should cover the " 460 "whole significand"); 461 462 protected: 463 using typename UP::Exponent; 464 using typename UP::Significand; 465 using UP::encode; 466 using UP::UP; 467 468 public: 469 // Builders 470 LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) { 471 return RetT(encode(sign, Exponent::subnormal(), Significand::zero())); 472 } 473 LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) { 474 return RetT(encode(sign, Exponent::zero(), Significand::msb())); 475 } 476 LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) { 477 return RetT(encode(sign, Exponent::subnormal(), Significand::lsb())); 478 } 479 LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) { 480 return RetT(encode(sign, Exponent::subnormal(), 481 Significand::bits_all_ones() ^ Significand::msb())); 482 } 483 LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) { 484 return RetT(encode(sign, Exponent::min(), Significand::msb())); 485 } 486 LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) { 487 return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones())); 488 } 489 LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) { 490 return RetT(encode(sign, Exponent::inf(), Significand::msb())); 491 } 492 LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS, 493 StorageType v = 0) { 494 return RetT(encode(sign, Exponent::inf(), 495 Significand::msb() | 496 (v ? Significand(v) : (Significand::msb() >> 2)))); 497 } 498 LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS, 499 StorageType v = 0) { 500 return RetT(encode(sign, Exponent::inf(), 501 Significand::msb() | (Significand::msb() >> 1) | 502 Significand(v))); 503 } 504 505 // Observers 506 LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; } 507 LIBC_INLINE constexpr bool is_nan() const { 508 // Most encoding forms from the table found in 509 // https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format 510 // are interpreted as NaN. 511 // More precisely : 512 // - Pseudo-Infinity 513 // - Pseudo Not a Number 514 // - Signalling Not a Number 515 // - Floating-point Indefinite 516 // - Quiet Not a Number 517 // - Unnormal 518 // This can be reduced to the following logic: 519 if (exp_bits() == encode(Exponent::inf())) 520 return !is_inf(); 521 if (exp_bits() != encode(Exponent::subnormal())) 522 return (sig_bits() & encode(Significand::msb())) == 0; 523 return false; 524 } 525 LIBC_INLINE constexpr bool is_quiet_nan() const { 526 return exp_sig_bits() >= 527 encode(Exponent::inf(), 528 Significand::msb() | (Significand::msb() >> 1)); 529 } 530 LIBC_INLINE constexpr bool is_signaling_nan() const { 531 return is_nan() && !is_quiet_nan(); 532 } 533 LIBC_INLINE constexpr bool is_inf() const { 534 return exp_sig_bits() == encode(Exponent::inf(), Significand::msb()); 535 } 536 LIBC_INLINE constexpr bool is_finite() const { 537 return !is_inf() && !is_nan(); 538 } 539 LIBC_INLINE 540 constexpr bool is_subnormal() const { 541 return exp_bits() == encode(Exponent::subnormal()); 542 } 543 LIBC_INLINE constexpr bool is_normal() const { 544 const auto exp = exp_bits(); 545 if (exp == encode(Exponent::subnormal()) || exp == encode(Exponent::inf())) 546 return false; 547 return get_implicit_bit(); 548 } 549 LIBC_INLINE constexpr RetT next_toward_inf() const { 550 if (is_finite()) { 551 if (exp_sig_bits() == max_normal().uintval()) { 552 return inf(sign()); 553 } else if (exp_sig_bits() == max_subnormal().uintval()) { 554 return min_normal(sign()); 555 } else if (sig_bits() == SIG_MASK) { 556 return RetT(encode(sign(), ++biased_exponent(), Significand::zero())); 557 } else { 558 return RetT(bits + StorageType(1)); 559 } 560 } 561 return RetT(bits); 562 } 563 564 LIBC_INLINE constexpr StorageType get_explicit_mantissa() const { 565 return sig_bits(); 566 } 567 568 // This functions is specific to FPRepSem<FPType::X86_Binary80>. 569 // TODO: Remove if possible. 570 LIBC_INLINE constexpr bool get_implicit_bit() const { 571 return static_cast<bool>(bits & EXPLICIT_BIT_MASK); 572 } 573 574 // This functions is specific to FPRepSem<FPType::X86_Binary80>. 575 // TODO: Remove if possible. 576 LIBC_INLINE constexpr void set_implicit_bit(bool implicitVal) { 577 if (get_implicit_bit() != implicitVal) 578 bits ^= EXPLICIT_BIT_MASK; 579 } 580 }; 581 582 // 'FPRepImpl' is the bottom of the class hierarchy that only deals with 583 // 'FPType'. The operations dealing with specific float semantics are 584 // implemented by 'FPRepSem' above and specialized when needed. 585 // 586 // The 'RetT' type is being propagated up to 'FPRepSem' so that the functions 587 // creating new values (Builders) can return the appropriate type. That is, when 588 // creating a value through 'FPBits' below the builder will return an 'FPBits' 589 // value. 590 // FPBits<float>::zero(); // returns an FPBits<> 591 // 592 // When we don't care about specific C++ floating point type we can use 593 // 'FPRep' and specify the 'FPType' directly. 594 // FPRep<FPType::IEEE754_Binary32:>::zero() // returns an FPRep<> 595 template <FPType fp_type, typename RetT> 596 struct FPRepImpl : public FPRepSem<fp_type, RetT> { 597 using UP = FPRepSem<fp_type, RetT>; 598 using StorageType = typename UP::StorageType; 599 600 protected: 601 using UP::bits; 602 using UP::encode; 603 using UP::exp_bits; 604 using UP::exp_sig_bits; 605 606 using typename UP::BiasedExponent; 607 using typename UP::Exponent; 608 using typename UP::Significand; 609 610 using UP::FP_MASK; 611 612 public: 613 // Constants. 614 using UP::EXP_BIAS; 615 using UP::EXP_MASK; 616 using UP::FRACTION_MASK; 617 using UP::SIG_LEN; 618 using UP::SIG_MASK; 619 using UP::SIGN_MASK; 620 LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT = 621 (1 << UP::EXP_LEN) - 1; 622 623 // CTors 624 LIBC_INLINE constexpr FPRepImpl() = default; 625 LIBC_INLINE constexpr explicit FPRepImpl(StorageType x) : UP(x) {} 626 627 // Comparison 628 LIBC_INLINE constexpr friend bool operator==(FPRepImpl a, FPRepImpl b) { 629 return a.uintval() == b.uintval(); 630 } 631 LIBC_INLINE constexpr friend bool operator!=(FPRepImpl a, FPRepImpl b) { 632 return a.uintval() != b.uintval(); 633 } 634 635 // Representation 636 LIBC_INLINE constexpr StorageType uintval() const { return bits & FP_MASK; } 637 LIBC_INLINE constexpr void set_uintval(StorageType value) { 638 bits = (value & FP_MASK); 639 } 640 641 // Builders 642 using UP::inf; 643 using UP::max_normal; 644 using UP::max_subnormal; 645 using UP::min_normal; 646 using UP::min_subnormal; 647 using UP::one; 648 using UP::quiet_nan; 649 using UP::signaling_nan; 650 using UP::zero; 651 652 // Modifiers 653 LIBC_INLINE constexpr RetT abs() const { 654 return RetT(static_cast<StorageType>(bits & UP::EXP_SIG_MASK)); 655 } 656 657 // Observers 658 using UP::get_explicit_mantissa; 659 using UP::is_finite; 660 using UP::is_inf; 661 using UP::is_nan; 662 using UP::is_normal; 663 using UP::is_quiet_nan; 664 using UP::is_signaling_nan; 665 using UP::is_subnormal; 666 using UP::is_zero; 667 using UP::next_toward_inf; 668 using UP::sign; 669 LIBC_INLINE constexpr bool is_inf_or_nan() const { return !is_finite(); } 670 LIBC_INLINE constexpr bool is_neg() const { return sign().is_neg(); } 671 LIBC_INLINE constexpr bool is_pos() const { return sign().is_pos(); } 672 673 LIBC_INLINE constexpr uint16_t get_biased_exponent() const { 674 return static_cast<uint16_t>(static_cast<uint32_t>(UP::biased_exponent())); 675 } 676 677 LIBC_INLINE constexpr void set_biased_exponent(StorageType biased) { 678 UP::set_biased_exponent(BiasedExponent((int32_t)biased)); 679 } 680 681 LIBC_INLINE constexpr int get_exponent() const { 682 return static_cast<int32_t>(Exponent(UP::biased_exponent())); 683 } 684 685 // If the number is subnormal, the exponent is treated as if it were the 686 // minimum exponent for a normal number. This is to keep continuity between 687 // the normal and subnormal ranges, but it causes problems for functions where 688 // values are calculated from the exponent, since just subtracting the bias 689 // will give a slightly incorrect result. Additionally, zero has an exponent 690 // of zero, and that should actually be treated as zero. 691 LIBC_INLINE constexpr int get_explicit_exponent() const { 692 Exponent exponent(UP::biased_exponent()); 693 if (is_zero()) 694 exponent = Exponent::zero(); 695 if (exponent == Exponent::subnormal()) 696 exponent = Exponent::min(); 697 return static_cast<int32_t>(exponent); 698 } 699 700 LIBC_INLINE constexpr StorageType get_mantissa() const { 701 return bits & FRACTION_MASK; 702 } 703 704 LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) { 705 bits = UP::merge(bits, mantVal, FRACTION_MASK); 706 } 707 708 LIBC_INLINE constexpr void set_significand(StorageType sigVal) { 709 bits = UP::merge(bits, sigVal, SIG_MASK); 710 } 711 // Unsafe function to create a floating point representation. 712 // It simply packs the sign, biased exponent and mantissa values without 713 // checking bound nor normalization. 714 // 715 // WARNING: For X86 Extended Precision, implicit bit needs to be set correctly 716 // in the 'mantissa' by the caller. This function will not check for its 717 // validity. 718 // 719 // FIXME: Use an uint32_t for 'biased_exp'. 720 LIBC_INLINE static constexpr RetT 721 create_value(Sign sign, StorageType biased_exp, StorageType mantissa) { 722 return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)), 723 Significand(mantissa))); 724 } 725 726 // The function converts integer number and unbiased exponent to proper 727 // float T type: 728 // Result = number * 2^(ep+1 - exponent_bias) 729 // Be careful! 730 // 1) "ep" is the raw exponent value. 731 // 2) The function adds +1 to ep for seamless normalized to denormalized 732 // transition. 733 // 3) The function does not check exponent high limit. 734 // 4) "number" zero value is not processed correctly. 735 // 5) Number is unsigned, so the result can be only positive. 736 LIBC_INLINE static constexpr RetT make_value(StorageType number, int ep) { 737 FPRepImpl result(0); 738 int lz = 739 UP::FRACTION_LEN + 1 - (UP::STORAGE_LEN - cpp::countl_zero(number)); 740 741 number <<= lz; 742 ep -= lz; 743 744 if (LIBC_LIKELY(ep >= 0)) { 745 // Implicit number bit will be removed by mask 746 result.set_significand(number); 747 result.set_biased_exponent(static_cast<StorageType>(ep + 1)); 748 } else { 749 result.set_significand(number >> -ep); 750 } 751 return RetT(result.uintval()); 752 } 753 }; 754 755 // A generic class to manipulate floating point formats. 756 // It derives its functionality to FPRepImpl above. 757 template <FPType fp_type> 758 struct FPRep : public FPRepImpl<fp_type, FPRep<fp_type>> { 759 using UP = FPRepImpl<fp_type, FPRep<fp_type>>; 760 using StorageType = typename UP::StorageType; 761 using UP::UP; 762 763 LIBC_INLINE constexpr explicit operator StorageType() const { 764 return UP::uintval(); 765 } 766 }; 767 768 } // namespace internal 769 770 // Returns the FPType corresponding to C++ type T on the host. 771 template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() { 772 using UnqualT = cpp::remove_cv_t<T>; 773 if constexpr (cpp::is_same_v<UnqualT, float> && __FLT_MANT_DIG__ == 24) 774 return FPType::IEEE754_Binary32; 775 else if constexpr (cpp::is_same_v<UnqualT, double> && __DBL_MANT_DIG__ == 53) 776 return FPType::IEEE754_Binary64; 777 else if constexpr (cpp::is_same_v<UnqualT, long double>) { 778 if constexpr (__LDBL_MANT_DIG__ == 53) 779 return FPType::IEEE754_Binary64; 780 else if constexpr (__LDBL_MANT_DIG__ == 64) 781 return FPType::X86_Binary80; 782 else if constexpr (__LDBL_MANT_DIG__ == 113) 783 return FPType::IEEE754_Binary128; 784 } 785 #if defined(LIBC_TYPES_HAS_FLOAT16) 786 else if constexpr (cpp::is_same_v<UnqualT, float16>) 787 return FPType::IEEE754_Binary16; 788 #endif 789 #if defined(LIBC_TYPES_HAS_FLOAT128) 790 else if constexpr (cpp::is_same_v<UnqualT, float128>) 791 return FPType::IEEE754_Binary128; 792 #endif 793 else 794 static_assert(cpp::always_false<UnqualT>, "Unsupported type"); 795 } 796 797 // A generic class to manipulate C++ floating point formats. 798 // It derives its functionality to FPRepImpl above. 799 template <typename T> 800 struct FPBits final : public internal::FPRepImpl<get_fp_type<T>(), FPBits<T>> { 801 static_assert(cpp::is_floating_point_v<T>, 802 "FPBits instantiated with invalid type."); 803 using UP = internal::FPRepImpl<get_fp_type<T>(), FPBits<T>>; 804 using StorageType = typename UP::StorageType; 805 806 // Constructors. 807 LIBC_INLINE constexpr FPBits() = default; 808 809 template <typename XType> LIBC_INLINE constexpr explicit FPBits(XType x) { 810 using Unqual = typename cpp::remove_cv_t<XType>; 811 if constexpr (cpp::is_same_v<Unqual, T>) { 812 UP::bits = cpp::bit_cast<StorageType>(x); 813 } else if constexpr (cpp::is_same_v<Unqual, StorageType>) { 814 UP::bits = x; 815 } else { 816 // We don't want accidental type promotions/conversions, so we require 817 // exact type match. 818 static_assert(cpp::always_false<XType>); 819 } 820 } 821 822 // Floating-point conversions. 823 LIBC_INLINE constexpr T get_val() const { return cpp::bit_cast<T>(UP::bits); } 824 }; 825 826 } // namespace fputil 827 } // namespace LIBC_NAMESPACE 828 829 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 830