• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "big-radix-floating-point.h"
10 #include "flang/Common/bit-population-count.h"
11 #include "flang/Common/leading-zero-bit-count.h"
12 #include "flang/Decimal/binary-floating-point.h"
13 #include "flang/Decimal/decimal.h"
14 #include <cinttypes>
15 #include <cstring>
16 #include <ctype.h>
17 
18 namespace Fortran::decimal {
19 
20 template <int PREC, int LOG10RADIX>
ParseNumber(const char * & p,bool & inexact)21 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
22     const char *&p, bool &inexact) {
23   SetToZero();
24   while (*p == ' ') {
25     ++p;
26   }
27   const char *q{p};
28   isNegative_ = *q == '-';
29   if (*q == '-' || *q == '+') {
30     ++q;
31   }
32   const char *start{q};
33   while (*q == '0') {
34     ++q;
35   }
36   const char *first{q};
37   for (; *q >= '0' && *q <= '9'; ++q) {
38   }
39   const char *point{nullptr};
40   if (*q == '.') {
41     point = q;
42     for (++q; *q >= '0' && *q <= '9'; ++q) {
43     }
44   }
45   if (q == start || (q == start + 1 && *start == '.')) {
46     return false; // require at least one digit
47   }
48   // There's a valid number here; set the reference argument to point to
49   // the first character afterward.
50   p = q;
51   // Strip off trailing zeroes
52   if (point) {
53     while (q[-1] == '0') {
54       --q;
55     }
56     if (q[-1] == '.') {
57       point = nullptr;
58       --q;
59     }
60   }
61   if (!point) {
62     while (q > first && q[-1] == '0') {
63       --q;
64       ++exponent_;
65     }
66   }
67   // Trim any excess digits
68   const char *limit{first + maxDigits * log10Radix + (point != nullptr)};
69   if (q > limit) {
70     inexact = true;
71     if (point >= limit) {
72       q = point;
73       point = nullptr;
74     }
75     if (!point) {
76       exponent_ += q - limit;
77     }
78     q = limit;
79   }
80   if (point) {
81     exponent_ -= static_cast<int>(q - point - 1);
82   }
83   if (q == first) {
84     exponent_ = 0; // all zeros
85   }
86   // Rack the decimal digits up into big Digits.
87   for (auto times{radix}; q-- > first;) {
88     if (*q != '.') {
89       if (times == radix) {
90         digit_[digits_++] = *q - '0';
91         times = 10;
92       } else {
93         digit_[digits_ - 1] += times * (*q - '0');
94         times *= 10;
95       }
96     }
97   }
98   // Look for an optional exponent field.
99   q = p;
100   switch (*q) {
101   case 'e':
102   case 'E':
103   case 'd':
104   case 'D':
105   case 'q':
106   case 'Q': {
107     bool negExpo{*++q == '-'};
108     if (*q == '-' || *q == '+') {
109       ++q;
110     }
111     if (*q >= '0' && *q <= '9') {
112       int expo{0};
113       while (*q == '0') {
114         ++q;
115       }
116       const char *expDig{q};
117       while (*q >= '0' && *q <= '9') {
118         expo = 10 * expo + *q++ - '0';
119       }
120       if (q >= expDig + 8) {
121         // There's a ridiculous number of nonzero exponent digits.
122         // The decimal->binary conversion routine will cope with
123         // returning 0 or Inf, but we must ensure that "expo" didn't
124         // overflow back around to something legal.
125         expo = 10 * Real::decimalRange;
126         exponent_ = 0;
127       }
128       p = q; // exponent was valid
129       if (negExpo) {
130         exponent_ -= expo;
131       } else {
132         exponent_ += expo;
133       }
134     }
135   } break;
136   default:
137     break;
138   }
139   return true;
140 }
141 
142 template <int PREC, int LOG10RADIX>
143 void BigRadixFloatingPointNumber<PREC,
LoseLeastSignificantDigit()144     LOG10RADIX>::LoseLeastSignificantDigit() {
145   Digit LSD{digit_[0]};
146   for (int j{0}; j < digits_ - 1; ++j) {
147     digit_[j] = digit_[j + 1];
148   }
149   digit_[digits_ - 1] = 0;
150   bool incr{false};
151   switch (rounding_) {
152   case RoundNearest:
153     incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
154     break;
155   case RoundUp:
156     incr = LSD > 0 && !isNegative_;
157     break;
158   case RoundDown:
159     incr = LSD > 0 && isNegative_;
160     break;
161   case RoundToZero:
162     break;
163   case RoundCompatible:
164     incr = LSD >= radix / 2;
165     break;
166   }
167   for (int j{0}; (digit_[j] += incr) == radix; ++j) {
168     digit_[j] = 0;
169   }
170 }
171 
172 // This local utility class represents an unrounded nonnegative
173 // binary floating-point value with an unbiased (i.e., signed)
174 // binary exponent, an integer value (not a fraction) with an implied
175 // binary point to its *right*, and some guard bits for rounding.
176 template <int PREC> class IntermediateFloat {
177 public:
178   static constexpr int precision{PREC};
179   using IntType = common::HostUnsignedIntType<precision>;
180   static constexpr IntType topBit{IntType{1} << (precision - 1)};
181   static constexpr IntType mask{topBit + (topBit - 1)};
182 
IntermediateFloat()183   IntermediateFloat() {}
184   IntermediateFloat(const IntermediateFloat &) = default;
185 
186   // Assumes that exponent_ is valid on entry, and may increment it.
187   // Returns the number of guard_ bits that have been determined.
SetTo(UINT n)188   template <typename UINT> bool SetTo(UINT n) {
189     static constexpr int nBits{CHAR_BIT * sizeof n};
190     if constexpr (precision >= nBits) {
191       value_ = n;
192       guard_ = 0;
193       return 0;
194     } else {
195       int shift{common::BitsNeededFor(n) - precision};
196       if (shift <= 0) {
197         value_ = n;
198         guard_ = 0;
199         return 0;
200       } else {
201         value_ = n >> shift;
202         exponent_ += shift;
203         n <<= nBits - shift;
204         guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0);
205         return shift;
206       }
207     }
208   }
209 
ShiftIn(int bit=0)210   void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; }
IsFull() const211   bool IsFull() const { return value_ >= topBit; }
AdjustExponent(int by)212   void AdjustExponent(int by) { exponent_ += by; }
SetGuard(int g)213   void SetGuard(int g) {
214     guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1);
215   }
216 
217   ConversionToBinaryResult<PREC> ToBinary(
218       bool isNegative, FortranRounding) const;
219 
220 private:
221   static constexpr int guardBits{3}; // guard, round, sticky
222   using GuardType = int;
223   static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)};
224 
225   IntType value_{0};
226   GuardType guard_{0};
227   int exponent_{0};
228 };
229 
230 template <int PREC>
ToBinary(bool isNegative,FortranRounding rounding) const231 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
232     bool isNegative, FortranRounding rounding) const {
233   using Binary = BinaryFloatingPointNumber<PREC>;
234   // Create a fraction with a binary point to the left of the integer
235   // value_, and bias the exponent.
236   IntType fraction{value_};
237   GuardType guard{guard_};
238   int expo{exponent_ + Binary::exponentBias + (precision - 1)};
239   while (expo < 1 && (fraction > 0 || guard > oneHalf)) {
240     guard = (guard & 1) | (guard >> 1) |
241         ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1));
242     fraction >>= 1;
243     ++expo;
244   }
245   int flags{Exact};
246   if (guard != 0) {
247     flags |= Inexact;
248   }
249   if (fraction == 0 && guard <= oneHalf) {
250     return {Binary{}, static_cast<enum ConversionResultFlags>(flags)};
251   }
252   // The value is nonzero; normalize it.
253   while (fraction < topBit && expo > 1) {
254     --expo;
255     fraction = fraction * 2 + (guard >> (guardBits - 2));
256     guard = (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1);
257   }
258   // Apply rounding
259   bool incr{false};
260   switch (rounding) {
261   case RoundNearest:
262     incr = guard > oneHalf || (guard == oneHalf && (fraction & 1));
263     break;
264   case RoundUp:
265     incr = guard != 0 && !isNegative;
266     break;
267   case RoundDown:
268     incr = guard != 0 && isNegative;
269     break;
270   case RoundToZero:
271     break;
272   case RoundCompatible:
273     incr = guard >= oneHalf;
274     break;
275   }
276   if (incr) {
277     if (fraction == mask) {
278       // rounding causes a carry
279       ++expo;
280       fraction = topBit;
281     } else {
282       ++fraction;
283     }
284   }
285   if (expo == 1 && fraction < topBit) {
286     expo = 0; // subnormal
287   }
288   if (expo >= Binary::maxExponent) {
289     expo = Binary::maxExponent; // Inf
290     flags |= Overflow;
291     fraction = 0;
292   }
293   using Raw = typename Binary::RawType;
294   Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1);
295   raw |= static_cast<Raw>(expo) << Binary::significandBits;
296   if constexpr (Binary::isImplicitMSB) {
297     fraction &= ~topBit;
298   }
299   raw |= fraction;
300   return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)};
301 }
302 
303 template <int PREC, int LOG10RADIX>
304 ConversionToBinaryResult<PREC>
ConvertToBinary()305 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
306   // On entry, *this holds a multi-precision integer value in a radix of a
307   // large power of ten.  Its radix point is defined to be to the right of its
308   // digits, and "exponent_" is the power of ten by which it is to be scaled.
309   Normalize();
310   if (digits_ == 0) { // zero value
311     return {Real{SignBit()}};
312   }
313   // The value is not zero:  x = D. * 10.**E
314   // Shift our perspective on the radix (& decimal) point so that
315   // it sits to the *left* of the digits: i.e., x = .D * 10.**E
316   exponent_ += digits_ * log10Radix;
317   // Sanity checks for ridiculous exponents
318   static constexpr int crazy{2 * Real::decimalRange + log10Radix};
319   if (exponent_ < -crazy) { // underflow to +/-0.
320     return {Real{SignBit()}, Inexact};
321   } else if (exponent_ > crazy) { // overflow to +/-Inf.
322     return {Real{Infinity()}, Overflow};
323   }
324   // Apply any negative decimal exponent by multiplication
325   // by a power of two, adjusting the binary exponent to compensate.
326   IntermediateFloat<PREC> f;
327   while (exponent_ < log10Radix) {
328     // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
329     f.AdjustExponent(-9);
330     digitLimit_ = digits_;
331     if (int carry{MultiplyWithoutNormalization<512>()}) {
332       // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
333       PushCarry(carry);
334       exponent_ += log10Radix;
335     }
336   }
337   // Apply any positive decimal exponent greater than
338   // is needed to treat the topmost digit as an integer
339   // part by multiplying by 10 or 10000 repeatedly.
340   while (exponent_ > log10Radix) {
341     digitLimit_ = digits_;
342     int carry;
343     if (exponent_ >= log10Radix + 4) {
344       // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
345       exponent_ -= 4;
346       carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>();
347       f.AdjustExponent(4);
348     } else {
349       // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
350       --exponent_;
351       carry = MultiplyWithoutNormalization<5>();
352       f.AdjustExponent(1);
353     }
354     if (carry != 0) {
355       // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
356       PushCarry(carry);
357       exponent_ += log10Radix;
358     }
359   }
360   // So exponent_ is now log10Radix, meaning that the
361   // MSD can be taken as an integer part and transferred
362   // to the binary result.
363   // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
364   int guardShift{f.SetTo(digit_[--digits_])};
365   // Transfer additional bits until the result is normal.
366   digitLimit_ = digits_;
367   while (!f.IsFull()) {
368     // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
369     f.AdjustExponent(-1);
370     std::uint32_t carry = MultiplyWithoutNormalization<2>();
371     f.ShiftIn(carry);
372   }
373   // Get the next few bits for rounding.  Allow for some guard bits
374   // that may have already been set in f.SetTo() above.
375   int guard{0};
376   if (guardShift == 0) {
377     guard = MultiplyWithoutNormalization<4>();
378   } else if (guardShift == 1) {
379     guard = MultiplyWithoutNormalization<2>();
380   }
381   guard = guard + guard + !IsZero();
382   f.SetGuard(guard);
383   return f.ToBinary(isNegative_, rounding_);
384 }
385 
386 template <int PREC, int LOG10RADIX>
387 ConversionToBinaryResult<PREC>
ConvertToBinary(const char * & p)388 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(const char *&p) {
389   bool inexact{false};
390   if (ParseNumber(p, inexact)) {
391     auto result{ConvertToBinary()};
392     if (inexact) {
393       result.flags =
394           static_cast<enum ConversionResultFlags>(result.flags | Inexact);
395     }
396     return result;
397   } else {
398     // Could not parse a decimal floating-point number.  p has been
399     // advanced over any leading spaces.
400     if (toupper(p[0]) == 'N' && toupper(p[1]) == 'A' && toupper(p[2]) == 'N') {
401       // NaN
402       p += 3;
403       return {Real{NaN()}};
404     } else {
405       // Try to parse Inf, maybe with a sign
406       const char *q{p};
407       isNegative_ = *q == '-';
408       if (*q == '-' || *q == '+') {
409         ++q;
410       }
411       if (toupper(q[0]) == 'I' && toupper(q[1]) == 'N' &&
412           toupper(q[2]) == 'F') {
413         p = q + 3;
414         return {Real{Infinity()}};
415       } else {
416         // Invalid input
417         return {Real{NaN()}, Invalid};
418       }
419     }
420   }
421 }
422 
423 template <int PREC>
ConvertToBinary(const char * & p,enum FortranRounding rounding)424 ConversionToBinaryResult<PREC> ConvertToBinary(
425     const char *&p, enum FortranRounding rounding) {
426   return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p);
427 }
428 
429 template ConversionToBinaryResult<8> ConvertToBinary<8>(
430     const char *&, enum FortranRounding);
431 template ConversionToBinaryResult<11> ConvertToBinary<11>(
432     const char *&, enum FortranRounding);
433 template ConversionToBinaryResult<24> ConvertToBinary<24>(
434     const char *&, enum FortranRounding);
435 template ConversionToBinaryResult<53> ConvertToBinary<53>(
436     const char *&, enum FortranRounding);
437 template ConversionToBinaryResult<64> ConvertToBinary<64>(
438     const char *&, enum FortranRounding);
439 template ConversionToBinaryResult<113> ConvertToBinary<113>(
440     const char *&, enum FortranRounding);
441 
442 extern "C" {
ConvertDecimalToFloat(const char ** p,float * f,enum FortranRounding rounding)443 enum ConversionResultFlags ConvertDecimalToFloat(
444     const char **p, float *f, enum FortranRounding rounding) {
445   auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)};
446   std::memcpy(reinterpret_cast<void *>(f),
447       reinterpret_cast<const void *>(&result.binary), sizeof *f);
448   return result.flags;
449 }
ConvertDecimalToDouble(const char ** p,double * d,enum FortranRounding rounding)450 enum ConversionResultFlags ConvertDecimalToDouble(
451     const char **p, double *d, enum FortranRounding rounding) {
452   auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)};
453   std::memcpy(reinterpret_cast<void *>(d),
454       reinterpret_cast<const void *>(&result.binary), sizeof *d);
455   return result.flags;
456 }
457 #if __x86_64__ && !defined(_MSC_VER)
ConvertDecimalToLongDouble(const char ** p,long double * ld,enum FortranRounding rounding)458 enum ConversionResultFlags ConvertDecimalToLongDouble(
459     const char **p, long double *ld, enum FortranRounding rounding) {
460   auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)};
461   std::memcpy(reinterpret_cast<void *>(ld),
462       reinterpret_cast<const void *>(&result.binary), sizeof *ld);
463   return result.flags;
464 }
465 #endif
466 }
467 } // namespace Fortran::decimal
468