1 //===-- include/flang/Common/real.h -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef FORTRAN_COMMON_REAL_H_ 10 #define FORTRAN_COMMON_REAL_H_ 11 12 // Characteristics of IEEE-754 & related binary floating-point numbers. 13 // The various representations are distinguished by their binary precisions 14 // (number of explicit significand bits and any implicit MSB in the fraction). 15 16 #include <cinttypes> 17 18 namespace Fortran::common { 19 20 // Total representation size in bits for each type BitsForBinaryPrecision(int binaryPrecision)21static constexpr int BitsForBinaryPrecision(int binaryPrecision) { 22 switch (binaryPrecision) { 23 case 8: // IEEE single (truncated): 1+8+7 with implicit bit 24 return 16; 25 case 11: // IEEE half precision: 1+5+10 with implicit bit 26 return 16; 27 case 24: // IEEE single precision: 1+8+23 with implicit bit 28 return 32; 29 case 53: // IEEE double precision: 1+11+52 with implicit bit 30 return 64; 31 case 64: // x87 extended precision: 1+15+64, no implicit bit 32 return 80; 33 case 106: // "double-double": 2*(1+11+52 with implicit bit) 34 return 128; 35 case 113: // IEEE quad precision: 1+15+112 with implicit bit 36 return 128; 37 default: 38 return -1; 39 } 40 } 41 42 // Maximum number of significant decimal digits in the fraction of an 43 // exact conversion in each type; computed by converting the value 44 // with the minimum exponent (biased to 1) and all fractional bits set. MaxDecimalConversionDigits(int binaryPrecision)45static constexpr int MaxDecimalConversionDigits(int binaryPrecision) { 46 switch (binaryPrecision) { 47 case 8: // IEEE single (truncated): 1+8+7 with implicit bit 48 return 96; 49 case 11: // IEEE half precision: 1+5+10 with implicit bit 50 return 21; 51 case 24: // IEEE single precision: 1+8+23 with implicit bit 52 return 112; 53 case 53: // IEEE double precision: 1+11+52 with implicit bit 54 return 767; 55 case 64: // x87 extended precision: 1+15+64, no implicit bit 56 return 11514; 57 case 106: // "double-double": 2*(1+11+52 with implicit bit) 58 return 2 * 767; 59 case 113: // IEEE quad precision: 1+15+112 with implicit bit 60 return 11563; 61 default: 62 return -1; 63 } 64 } 65 RealKindForPrecision(int binaryPrecision)66static constexpr int RealKindForPrecision(int binaryPrecision) { 67 switch (binaryPrecision) { 68 case 8: // IEEE single (truncated): 1+8+7 with implicit bit 69 return 3; 70 case 11: // IEEE half precision: 1+5+10 with implicit bit 71 return 2; 72 case 24: // IEEE single precision: 1+8+23 with implicit bit 73 return 4; 74 case 53: // IEEE double precision: 1+11+52 with implicit bit 75 return 8; 76 case 64: // x87 extended precision: 1+15+64, no implicit bit 77 return 10; 78 // TODO: case 106: return kind for double/double 79 case 113: // IEEE quad precision: 1+15+112 with implicit bit 80 return 16; 81 default: 82 return -1; 83 } 84 } 85 PrecisionOfRealKind(int kind)86static constexpr int PrecisionOfRealKind(int kind) { 87 switch (kind) { 88 case 2: // IEEE half precision: 1+5+10 with implicit bit 89 return 11; 90 case 3: // IEEE single (truncated): 1+8+7 with implicit bit 91 return 8; 92 case 4: // IEEE single precision: 1+8+23 with implicit bit 93 return 24; 94 case 8: // IEEE double precision: 1+11+52 with implicit bit 95 return 53; 96 case 10: // x87 extended precision: 1+15+64, no implicit bit 97 return 64; 98 // TODO: case kind for double/double: return 106; 99 case 16: // IEEE quad precision: 1+15+112 with implicit bit 100 return 113; 101 default: 102 return -1; 103 } 104 } 105 106 template <int BINARY_PRECISION> class RealDetails { 107 private: 108 // Converts bit widths to whole decimal digits LogBaseTwoToLogBaseTen(int logb2)109 static constexpr int LogBaseTwoToLogBaseTen(int logb2) { 110 constexpr std::int64_t LogBaseTenOfTwoTimesTenToThe12th{301029995664}; 111 constexpr std::int64_t TenToThe12th{1000000000000}; 112 std::int64_t logb10{ 113 (logb2 * LogBaseTenOfTwoTimesTenToThe12th) / TenToThe12th}; 114 return static_cast<int>(logb10); 115 } 116 117 public: 118 static constexpr int binaryPrecision{BINARY_PRECISION}; 119 static constexpr int bits{BitsForBinaryPrecision(binaryPrecision)}; 120 static constexpr bool isImplicitMSB{binaryPrecision != 64 /*x87*/}; 121 static constexpr int significandBits{binaryPrecision - isImplicitMSB}; 122 static constexpr int exponentBits{bits - significandBits - 1 /*sign*/}; 123 static constexpr int maxExponent{(1 << exponentBits) - 1}; 124 static constexpr int exponentBias{maxExponent / 2}; 125 126 static constexpr int decimalPrecision{ 127 LogBaseTwoToLogBaseTen(binaryPrecision - 1)}; 128 static constexpr int decimalRange{LogBaseTwoToLogBaseTen(exponentBias - 1)}; 129 130 // Number of significant decimal digits in the fraction of the 131 // exact conversion of the least nonzero subnormal. 132 static constexpr int maxDecimalConversionDigits{ 133 MaxDecimalConversionDigits(binaryPrecision)}; 134 135 static_assert(binaryPrecision > 0); 136 static_assert(exponentBits > 1); 137 static_assert(exponentBits <= 15); 138 }; 139 140 } // namespace Fortran::common 141 #endif // FORTRAN_COMMON_REAL_H_ 142