1 //== llvm/Support/APFloat.h - Arbitrary Precision Floating Point -*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief 12 /// This file declares a class to represent arbitrary precision floating point 13 /// values and provide a variety of arithmetic operations on them. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_ADT_APFLOAT_H 18 #define LLVM_ADT_APFLOAT_H 19 20 #include "llvm/ADT/APInt.h" 21 22 namespace llvm { 23 24 struct fltSemantics; 25 class APSInt; 26 class StringRef; 27 28 /// Enum that represents what fraction of the LSB truncated bits of an fp number 29 /// represent. 30 /// 31 /// This essentially combines the roles of guard and sticky bits. 32 enum lostFraction { // Example of truncated bits: 33 lfExactlyZero, // 000000 34 lfLessThanHalf, // 0xxxxx x's not all zero 35 lfExactlyHalf, // 100000 36 lfMoreThanHalf // 1xxxxx x's not all zero 37 }; 38 39 /// \brief A self-contained host- and target-independent arbitrary-precision 40 /// floating-point software implementation. 41 /// 42 /// APFloat uses bignum integer arithmetic as provided by static functions in 43 /// the APInt class. The library will work with bignum integers whose parts are 44 /// any unsigned type at least 16 bits wide, but 64 bits is recommended. 45 /// 46 /// Written for clarity rather than speed, in particular with a view to use in 47 /// the front-end of a cross compiler so that target arithmetic can be correctly 48 /// performed on the host. Performance should nonetheless be reasonable, 49 /// particularly for its intended use. It may be useful as a base 50 /// implementation for a run-time library during development of a faster 51 /// target-specific one. 52 /// 53 /// All 5 rounding modes in the IEEE-754R draft are handled correctly for all 54 /// implemented operations. Currently implemented operations are add, subtract, 55 /// multiply, divide, fused-multiply-add, conversion-to-float, 56 /// conversion-to-integer and conversion-from-integer. New rounding modes 57 /// (e.g. away from zero) can be added with three or four lines of code. 58 /// 59 /// Four formats are built-in: IEEE single precision, double precision, 60 /// quadruple precision, and x87 80-bit extended double (when operating with 61 /// full extended precision). Adding a new format that obeys IEEE semantics 62 /// only requires adding two lines of code: a declaration and definition of the 63 /// format. 64 /// 65 /// All operations return the status of that operation as an exception bit-mask, 66 /// so multiple operations can be done consecutively with their results or-ed 67 /// together. The returned status can be useful for compiler diagnostics; e.g., 68 /// inexact, underflow and overflow can be easily diagnosed on constant folding, 69 /// and compiler optimizers can determine what exceptions would be raised by 70 /// folding operations and optimize, or perhaps not optimize, accordingly. 71 /// 72 /// At present, underflow tininess is detected after rounding; it should be 73 /// straight forward to add support for the before-rounding case too. 74 /// 75 /// The library reads hexadecimal floating point numbers as per C99, and 76 /// correctly rounds if necessary according to the specified rounding mode. 77 /// Syntax is required to have been validated by the caller. It also converts 78 /// floating point numbers to hexadecimal text as per the C99 %a and %A 79 /// conversions. The output precision (or alternatively the natural minimal 80 /// precision) can be specified; if the requested precision is less than the 81 /// natural precision the output is correctly rounded for the specified rounding 82 /// mode. 83 /// 84 /// It also reads decimal floating point numbers and correctly rounds according 85 /// to the specified rounding mode. 86 /// 87 /// Conversion to decimal text is not currently implemented. 88 /// 89 /// Non-zero finite numbers are represented internally as a sign bit, a 16-bit 90 /// signed exponent, and the significand as an array of integer parts. After 91 /// normalization of a number of precision P the exponent is within the range of 92 /// the format, and if the number is not denormal the P-th bit of the 93 /// significand is set as an explicit integer bit. For denormals the most 94 /// significant bit is shifted right so that the exponent is maintained at the 95 /// format's minimum, so that the smallest denormal has just the least 96 /// significant bit of the significand set. The sign of zeroes and infinities 97 /// is significant; the exponent and significand of such numbers is not stored, 98 /// but has a known implicit (deterministic) value: 0 for the significands, 0 99 /// for zero exponent, all 1 bits for infinity exponent. For NaNs the sign and 100 /// significand are deterministic, although not really meaningful, and preserved 101 /// in non-conversion operations. The exponent is implicitly all 1 bits. 102 /// 103 /// APFloat does not provide any exception handling beyond default exception 104 /// handling. We represent Signaling NaNs via IEEE-754R 2008 6.2.1 should clause 105 /// by encoding Signaling NaNs with the first bit of its trailing significand as 106 /// 0. 107 /// 108 /// TODO 109 /// ==== 110 /// 111 /// Some features that may or may not be worth adding: 112 /// 113 /// Binary to decimal conversion (hard). 114 /// 115 /// Optional ability to detect underflow tininess before rounding. 116 /// 117 /// New formats: x87 in single and double precision mode (IEEE apart from 118 /// extended exponent range) (hard). 119 /// 120 /// New operations: sqrt, IEEE remainder, C90 fmod, nexttoward. 121 /// 122 class APFloat { 123 public: 124 125 /// A signed type to represent a floating point numbers unbiased exponent. 126 typedef signed short ExponentType; 127 128 /// \name Floating Point Semantics. 129 /// @{ 130 131 static const fltSemantics IEEEhalf; 132 static const fltSemantics IEEEsingle; 133 static const fltSemantics IEEEdouble; 134 static const fltSemantics IEEEquad; 135 static const fltSemantics PPCDoubleDouble; 136 static const fltSemantics x87DoubleExtended; 137 138 /// A Pseudo fltsemantic used to construct APFloats that cannot conflict with 139 /// anything real. 140 static const fltSemantics Bogus; 141 142 /// @} 143 144 static unsigned int semanticsPrecision(const fltSemantics &); 145 146 /// IEEE-754R 5.11: Floating Point Comparison Relations. 147 enum cmpResult { 148 cmpLessThan, 149 cmpEqual, 150 cmpGreaterThan, 151 cmpUnordered 152 }; 153 154 /// IEEE-754R 4.3: Rounding-direction attributes. 155 enum roundingMode { 156 rmNearestTiesToEven, 157 rmTowardPositive, 158 rmTowardNegative, 159 rmTowardZero, 160 rmNearestTiesToAway 161 }; 162 163 /// IEEE-754R 7: Default exception handling. 164 /// 165 /// opUnderflow or opOverflow are always returned or-ed with opInexact. 166 enum opStatus { 167 opOK = 0x00, 168 opInvalidOp = 0x01, 169 opDivByZero = 0x02, 170 opOverflow = 0x04, 171 opUnderflow = 0x08, 172 opInexact = 0x10 173 }; 174 175 /// Category of internally-represented number. 176 enum fltCategory { 177 fcInfinity, 178 fcNaN, 179 fcNormal, 180 fcZero 181 }; 182 183 /// Convenience enum used to construct an uninitialized APFloat. 184 enum uninitializedTag { 185 uninitialized 186 }; 187 188 /// \name Constructors 189 /// @{ 190 191 APFloat(const fltSemantics &); // Default construct to 0.0 192 APFloat(const fltSemantics &, StringRef); 193 APFloat(const fltSemantics &, integerPart); 194 APFloat(const fltSemantics &, uninitializedTag); 195 APFloat(const fltSemantics &, const APInt &); 196 explicit APFloat(double d); 197 explicit APFloat(float f); 198 APFloat(const APFloat &); 199 ~APFloat(); 200 201 /// @} 202 203 /// \brief Returns whether this instance allocated memory. needsCleanup()204 bool needsCleanup() const { return partCount() > 1; } 205 206 /// \name Convenience "constructors" 207 /// @{ 208 209 /// Factory for Positive and Negative Zero. 210 /// 211 /// \param Negative True iff the number should be negative. 212 static APFloat getZero(const fltSemantics &Sem, bool Negative = false) { 213 APFloat Val(Sem, uninitialized); 214 Val.makeZero(Negative); 215 return Val; 216 } 217 218 /// Factory for Positive and Negative Infinity. 219 /// 220 /// \param Negative True iff the number should be negative. 221 static APFloat getInf(const fltSemantics &Sem, bool Negative = false) { 222 APFloat Val(Sem, uninitialized); 223 Val.makeInf(Negative); 224 return Val; 225 } 226 227 /// Factory for QNaN values. 228 /// 229 /// \param Negative - True iff the NaN generated should be negative. 230 /// \param type - The unspecified fill bits for creating the NaN, 0 by 231 /// default. The value is truncated as necessary. 232 static APFloat getNaN(const fltSemantics &Sem, bool Negative = false, 233 unsigned type = 0) { 234 if (type) { 235 APInt fill(64, type); 236 return getQNaN(Sem, Negative, &fill); 237 } else { 238 return getQNaN(Sem, Negative, 0); 239 } 240 } 241 242 /// Factory for QNaN values. 243 static APFloat getQNaN(const fltSemantics &Sem, bool Negative = false, 244 const APInt *payload = 0) { 245 return makeNaN(Sem, false, Negative, payload); 246 } 247 248 /// Factory for SNaN values. 249 static APFloat getSNaN(const fltSemantics &Sem, bool Negative = false, 250 const APInt *payload = 0) { 251 return makeNaN(Sem, true, Negative, payload); 252 } 253 254 /// Returns the largest finite number in the given semantics. 255 /// 256 /// \param Negative - True iff the number should be negative 257 static APFloat getLargest(const fltSemantics &Sem, bool Negative = false); 258 259 /// Returns the smallest (by magnitude) finite number in the given semantics. 260 /// Might be denormalized, which implies a relative loss of precision. 261 /// 262 /// \param Negative - True iff the number should be negative 263 static APFloat getSmallest(const fltSemantics &Sem, bool Negative = false); 264 265 /// Returns the smallest (by magnitude) normalized finite number in the given 266 /// semantics. 267 /// 268 /// \param Negative - True iff the number should be negative 269 static APFloat getSmallestNormalized(const fltSemantics &Sem, 270 bool Negative = false); 271 272 /// Returns a float which is bitcasted from an all one value int. 273 /// 274 /// \param BitWidth - Select float type 275 /// \param isIEEE - If 128 bit number, select between PPC and IEEE 276 static APFloat getAllOnesValue(unsigned BitWidth, bool isIEEE = false); 277 278 /// @} 279 280 /// Used to insert APFloat objects, or objects that contain APFloat objects, 281 /// into FoldingSets. 282 void Profile(FoldingSetNodeID &NID) const; 283 284 /// \brief Used by the Bitcode serializer to emit APInts to Bitcode. 285 void Emit(Serializer &S) const; 286 287 /// \brief Used by the Bitcode deserializer to deserialize APInts. 288 static APFloat ReadVal(Deserializer &D); 289 290 /// \name Arithmetic 291 /// @{ 292 293 opStatus add(const APFloat &, roundingMode); 294 opStatus subtract(const APFloat &, roundingMode); 295 opStatus multiply(const APFloat &, roundingMode); 296 opStatus divide(const APFloat &, roundingMode); 297 /// IEEE remainder. 298 opStatus remainder(const APFloat &); 299 /// C fmod, or llvm frem. 300 opStatus mod(const APFloat &, roundingMode); 301 opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode); 302 opStatus roundToIntegral(roundingMode); 303 /// IEEE-754R 5.3.1: nextUp/nextDown. 304 opStatus next(bool nextDown); 305 306 /// @} 307 308 /// \name Sign operations. 309 /// @{ 310 311 void changeSign(); 312 void clearSign(); 313 void copySign(const APFloat &); 314 315 /// @} 316 317 /// \name Conversions 318 /// @{ 319 320 opStatus convert(const fltSemantics &, roundingMode, bool *); 321 opStatus convertToInteger(integerPart *, unsigned int, bool, roundingMode, 322 bool *) const; 323 opStatus convertToInteger(APSInt &, roundingMode, bool *) const; 324 opStatus convertFromAPInt(const APInt &, bool, roundingMode); 325 opStatus convertFromSignExtendedInteger(const integerPart *, unsigned int, 326 bool, roundingMode); 327 opStatus convertFromZeroExtendedInteger(const integerPart *, unsigned int, 328 bool, roundingMode); 329 opStatus convertFromString(StringRef, roundingMode); 330 APInt bitcastToAPInt() const; 331 double convertToDouble() const; 332 float convertToFloat() const; 333 334 /// @} 335 336 /// The definition of equality is not straightforward for floating point, so 337 /// we won't use operator==. Use one of the following, or write whatever it 338 /// is you really mean. 339 bool operator==(const APFloat &) const LLVM_DELETED_FUNCTION; 340 341 /// IEEE comparison with another floating point number (NaNs compare 342 /// unordered, 0==-0). 343 cmpResult compare(const APFloat &) const; 344 345 /// Bitwise comparison for equality (QNaNs compare equal, 0!=-0). 346 bool bitwiseIsEqual(const APFloat &) const; 347 348 /// Write out a hexadecimal representation of the floating point value to DST, 349 /// which must be of sufficient size, in the C99 form [-]0xh.hhhhp[+-]d. 350 /// Return the number of characters written, excluding the terminating NUL. 351 unsigned int convertToHexString(char *dst, unsigned int hexDigits, 352 bool upperCase, roundingMode) const; 353 354 /// \name IEEE-754R 5.7.2 General operations. 355 /// @{ 356 357 /// IEEE-754R isSignMinus: Returns true if and only if the current value is 358 /// negative. 359 /// 360 /// This applies to zeros and NaNs as well. isNegative()361 bool isNegative() const { return sign; } 362 363 /// IEEE-754R isNormal: Returns true if and only if the current value is normal. 364 /// 365 /// This implies that the current value of the float is not zero, subnormal, 366 /// infinite, or NaN following the definition of normality from IEEE-754R. isNormal()367 bool isNormal() const { return !isDenormal() && isFiniteNonZero(); } 368 369 /// Returns true if and only if the current value is zero, subnormal, or 370 /// normal. 371 /// 372 /// This means that the value is not infinite or NaN. isFinite()373 bool isFinite() const { return !isNaN() && !isInfinity(); } 374 375 /// Returns true if and only if the float is plus or minus zero. isZero()376 bool isZero() const { return category == fcZero; } 377 378 /// IEEE-754R isSubnormal(): Returns true if and only if the float is a 379 /// denormal. 380 bool isDenormal() const; 381 382 /// IEEE-754R isInfinite(): Returns true if and only if the float is infinity. isInfinity()383 bool isInfinity() const { return category == fcInfinity; } 384 385 /// Returns true if and only if the float is a quiet or signaling NaN. isNaN()386 bool isNaN() const { return category == fcNaN; } 387 388 /// Returns true if and only if the float is a signaling NaN. 389 bool isSignaling() const; 390 391 /// @} 392 393 /// \name Simple Queries 394 /// @{ 395 getCategory()396 fltCategory getCategory() const { return category; } getSemantics()397 const fltSemantics &getSemantics() const { return *semantics; } isNonZero()398 bool isNonZero() const { return category != fcZero; } isFiniteNonZero()399 bool isFiniteNonZero() const { return isFinite() && !isZero(); } isPosZero()400 bool isPosZero() const { return isZero() && !isNegative(); } isNegZero()401 bool isNegZero() const { return isZero() && isNegative(); } 402 403 /// Returns true if and only if the number has the smallest possible non-zero 404 /// magnitude in the current semantics. 405 bool isSmallest() const; 406 407 /// Returns true if and only if the number has the largest possible finite 408 /// magnitude in the current semantics. 409 bool isLargest() const; 410 411 /// @} 412 413 APFloat &operator=(const APFloat &); 414 415 /// \brief Overload to compute a hash code for an APFloat value. 416 /// 417 /// Note that the use of hash codes for floating point values is in general 418 /// frought with peril. Equality is hard to define for these values. For 419 /// example, should negative and positive zero hash to different codes? Are 420 /// they equal or not? This hash value implementation specifically 421 /// emphasizes producing different codes for different inputs in order to 422 /// be used in canonicalization and memoization. As such, equality is 423 /// bitwiseIsEqual, and 0 != -0. 424 friend hash_code hash_value(const APFloat &Arg); 425 426 /// Converts this value into a decimal string. 427 /// 428 /// \param FormatPrecision The maximum number of digits of 429 /// precision to output. If there are fewer digits available, 430 /// zero padding will not be used unless the value is 431 /// integral and small enough to be expressed in 432 /// FormatPrecision digits. 0 means to use the natural 433 /// precision of the number. 434 /// \param FormatMaxPadding The maximum number of zeros to 435 /// consider inserting before falling back to scientific 436 /// notation. 0 means to always use scientific notation. 437 /// 438 /// Number Precision MaxPadding Result 439 /// ------ --------- ---------- ------ 440 /// 1.01E+4 5 2 10100 441 /// 1.01E+4 4 2 1.01E+4 442 /// 1.01E+4 5 1 1.01E+4 443 /// 1.01E-2 5 2 0.0101 444 /// 1.01E-2 4 2 0.0101 445 /// 1.01E-2 4 1 1.01E-2 446 void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision = 0, 447 unsigned FormatMaxPadding = 3) const; 448 449 /// If this value has an exact multiplicative inverse, store it in inv and 450 /// return true. 451 bool getExactInverse(APFloat *inv) const; 452 453 private: 454 455 /// \name Simple Queries 456 /// @{ 457 458 integerPart *significandParts(); 459 const integerPart *significandParts() const; 460 unsigned int partCount() const; 461 462 /// @} 463 464 /// \name Significand operations. 465 /// @{ 466 467 integerPart addSignificand(const APFloat &); 468 integerPart subtractSignificand(const APFloat &, integerPart); 469 lostFraction addOrSubtractSignificand(const APFloat &, bool subtract); 470 lostFraction multiplySignificand(const APFloat &, const APFloat *); 471 lostFraction divideSignificand(const APFloat &); 472 void incrementSignificand(); 473 void initialize(const fltSemantics *); 474 void shiftSignificandLeft(unsigned int); 475 lostFraction shiftSignificandRight(unsigned int); 476 unsigned int significandLSB() const; 477 unsigned int significandMSB() const; 478 void zeroSignificand(); 479 /// Return true if the significand excluding the integral bit is all ones. 480 bool isSignificandAllOnes() const; 481 /// Return true if the significand excluding the integral bit is all zeros. 482 bool isSignificandAllZeros() const; 483 484 /// @} 485 486 /// \name Arithmetic on special values. 487 /// @{ 488 489 opStatus addOrSubtractSpecials(const APFloat &, bool subtract); 490 opStatus divideSpecials(const APFloat &); 491 opStatus multiplySpecials(const APFloat &); 492 opStatus modSpecials(const APFloat &); 493 494 /// @} 495 496 /// \name Special value setters. 497 /// @{ 498 499 void makeLargest(bool Neg = false); 500 void makeSmallest(bool Neg = false); 501 void makeNaN(bool SNaN = false, bool Neg = false, const APInt *fill = 0); 502 static APFloat makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 503 const APInt *fill); 504 void makeInf(bool Neg = false); 505 void makeZero(bool Neg = false); 506 507 /// @} 508 509 /// \name Miscellany 510 /// @{ 511 512 bool convertFromStringSpecials(StringRef str); 513 opStatus normalize(roundingMode, lostFraction); 514 opStatus addOrSubtract(const APFloat &, roundingMode, bool subtract); 515 cmpResult compareAbsoluteValue(const APFloat &) const; 516 opStatus handleOverflow(roundingMode); 517 bool roundAwayFromZero(roundingMode, lostFraction, unsigned int) const; 518 opStatus convertToSignExtendedInteger(integerPart *, unsigned int, bool, 519 roundingMode, bool *) const; 520 opStatus convertFromUnsignedParts(const integerPart *, unsigned int, 521 roundingMode); 522 opStatus convertFromHexadecimalString(StringRef, roundingMode); 523 opStatus convertFromDecimalString(StringRef, roundingMode); 524 char *convertNormalToHexString(char *, unsigned int, bool, 525 roundingMode) const; 526 opStatus roundSignificandWithExponent(const integerPart *, unsigned int, int, 527 roundingMode); 528 529 /// @} 530 531 APInt convertHalfAPFloatToAPInt() const; 532 APInt convertFloatAPFloatToAPInt() const; 533 APInt convertDoubleAPFloatToAPInt() const; 534 APInt convertQuadrupleAPFloatToAPInt() const; 535 APInt convertF80LongDoubleAPFloatToAPInt() const; 536 APInt convertPPCDoubleDoubleAPFloatToAPInt() const; 537 void initFromAPInt(const fltSemantics *Sem, const APInt &api); 538 void initFromHalfAPInt(const APInt &api); 539 void initFromFloatAPInt(const APInt &api); 540 void initFromDoubleAPInt(const APInt &api); 541 void initFromQuadrupleAPInt(const APInt &api); 542 void initFromF80LongDoubleAPInt(const APInt &api); 543 void initFromPPCDoubleDoubleAPInt(const APInt &api); 544 545 void assign(const APFloat &); 546 void copySignificand(const APFloat &); 547 void freeSignificand(); 548 549 /// The semantics that this value obeys. 550 const fltSemantics *semantics; 551 552 /// A binary fraction with an explicit integer bit. 553 /// 554 /// The significand must be at least one bit wider than the target precision. 555 union Significand { 556 integerPart part; 557 integerPart *parts; 558 } significand; 559 560 /// The signed unbiased exponent of the value. 561 ExponentType exponent; 562 563 /// What kind of floating point number this is. 564 /// 565 /// Only 2 bits are required, but VisualStudio incorrectly sign extends it. 566 /// Using the extra bit keeps it from failing under VisualStudio. 567 fltCategory category : 3; 568 569 /// Sign bit of the number. 570 unsigned int sign : 1; 571 }; 572 573 /// See friend declaration above. 574 /// 575 /// This additional declaration is required in order to compile LLVM with IBM 576 /// xlC compiler. 577 hash_code hash_value(const APFloat &Arg); 578 } // namespace llvm 579 580 #endif // LLVM_ADT_APFLOAT_H 581