• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cstring>
27 #include <limits.h>
28 
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
30   do {                                                                         \
31     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
32       return U.IEEE.METHOD_CALL;                                               \
33     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
34       return U.Double.METHOD_CALL;                                             \
35     llvm_unreachable("Unexpected semantics");                                  \
36   } while (false)
37 
38 using namespace llvm;
39 
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
43 ///
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
47 
48 /* Assumed in hexadecimal significand parsing, and conversion to
49    hexadecimal strings.  */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
51 
52 namespace llvm {
53   /* Represents floating point arithmetic semantics.  */
54   struct fltSemantics {
55     /* The largest E such that 2^E is representable; this matches the
56        definition of IEEE 754.  */
57     APFloatBase::ExponentType maxExponent;
58 
59     /* The smallest E such that 2^E is a normalized number; this
60        matches the definition of IEEE 754.  */
61     APFloatBase::ExponentType minExponent;
62 
63     /* Number of bits in the significand.  This includes the integer
64        bit.  */
65     unsigned int precision;
66 
67     /* Number of bits actually used in the semantics. */
68     unsigned int sizeInBits;
69   };
70 
71   static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
72   static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
73   static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
74   static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
75   static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
76   static const fltSemantics semBogus = {0, 0, 0, 0};
77 
78   /* The IBM double-double semantics. Such a number consists of a pair of IEEE
79      64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
80      (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
81      Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
82      to each other, and two 11-bit exponents.
83 
84      Note: we need to make the value different from semBogus as otherwise
85      an unsafe optimization may collapse both values to a single address,
86      and we heavily rely on them having distinct addresses.             */
87   static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
88 
89   /* These are legacy semantics for the fallback, inaccrurate implementation of
90      IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
91      operation. It's equivalent to having an IEEE number with consecutive 106
92      bits of mantissa and 11 bits of exponent.
93 
94      It's not equivalent to IBM double-double. For example, a legit IBM
95      double-double, 1 + epsilon:
96 
97        1 + epsilon = 1 + (1 >> 1076)
98 
99      is not representable by a consecutive 106 bits of mantissa.
100 
101      Currently, these semantics are used in the following way:
102 
103        semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
104        (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
105        semPPCDoubleDoubleLegacy -> IEEE operations
106 
107      We use bitcastToAPInt() to get the bit representation (in APInt) of the
108      underlying IEEEdouble, then use the APInt constructor to construct the
109      legacy IEEE float.
110 
111      TODO: Implement all operations in semPPCDoubleDouble, and delete these
112      semantics.  */
113   static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
114                                                         53 + 53, 128};
115 
EnumToSemantics(Semantics S)116   const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
117     switch (S) {
118     case S_IEEEhalf:
119       return IEEEhalf();
120     case S_IEEEsingle:
121       return IEEEsingle();
122     case S_IEEEdouble:
123       return IEEEdouble();
124     case S_x87DoubleExtended:
125       return x87DoubleExtended();
126     case S_IEEEquad:
127       return IEEEquad();
128     case S_PPCDoubleDouble:
129       return PPCDoubleDouble();
130     }
131     llvm_unreachable("Unrecognised floating semantics");
132   }
133 
134   APFloatBase::Semantics
SemanticsToEnum(const llvm::fltSemantics & Sem)135   APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
136     if (&Sem == &llvm::APFloat::IEEEhalf())
137       return S_IEEEhalf;
138     else if (&Sem == &llvm::APFloat::IEEEsingle())
139       return S_IEEEsingle;
140     else if (&Sem == &llvm::APFloat::IEEEdouble())
141       return S_IEEEdouble;
142     else if (&Sem == &llvm::APFloat::x87DoubleExtended())
143       return S_x87DoubleExtended;
144     else if (&Sem == &llvm::APFloat::IEEEquad())
145       return S_IEEEquad;
146     else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
147       return S_PPCDoubleDouble;
148     else
149       llvm_unreachable("Unknown floating semantics");
150   }
151 
IEEEhalf()152   const fltSemantics &APFloatBase::IEEEhalf() {
153     return semIEEEhalf;
154   }
IEEEsingle()155   const fltSemantics &APFloatBase::IEEEsingle() {
156     return semIEEEsingle;
157   }
IEEEdouble()158   const fltSemantics &APFloatBase::IEEEdouble() {
159     return semIEEEdouble;
160   }
IEEEquad()161   const fltSemantics &APFloatBase::IEEEquad() {
162     return semIEEEquad;
163   }
x87DoubleExtended()164   const fltSemantics &APFloatBase::x87DoubleExtended() {
165     return semX87DoubleExtended;
166   }
Bogus()167   const fltSemantics &APFloatBase::Bogus() {
168     return semBogus;
169   }
PPCDoubleDouble()170   const fltSemantics &APFloatBase::PPCDoubleDouble() {
171     return semPPCDoubleDouble;
172   }
173 
174   /* A tight upper bound on number of parts required to hold the value
175      pow(5, power) is
176 
177        power * 815 / (351 * integerPartWidth) + 1
178 
179      However, whilst the result may require only this many parts,
180      because we are multiplying two values to get it, the
181      multiplication may require an extra part with the excess part
182      being zero (consider the trivial case of 1 * 1, tcFullMultiply
183      requires two parts to hold the single-part result).  So we add an
184      extra one to guarantee enough space whilst multiplying.  */
185   const unsigned int maxExponent = 16383;
186   const unsigned int maxPrecision = 113;
187   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
188   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
189 
semanticsPrecision(const fltSemantics & semantics)190   unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
191     return semantics.precision;
192   }
193   APFloatBase::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)194   APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
195     return semantics.maxExponent;
196   }
197   APFloatBase::ExponentType
semanticsMinExponent(const fltSemantics & semantics)198   APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
199     return semantics.minExponent;
200   }
semanticsSizeInBits(const fltSemantics & semantics)201   unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
202     return semantics.sizeInBits;
203   }
204 
getSizeInBits(const fltSemantics & Sem)205   unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
206     return Sem.sizeInBits;
207 }
208 
209 /* A bunch of private, handy routines.  */
210 
createError(const Twine & Err)211 static inline Error createError(const Twine &Err) {
212   return make_error<StringError>(Err, inconvertibleErrorCode());
213 }
214 
215 static inline unsigned int
partCountForBits(unsigned int bits)216 partCountForBits(unsigned int bits)
217 {
218   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
219 }
220 
221 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
222 static inline unsigned int
decDigitValue(unsigned int c)223 decDigitValue(unsigned int c)
224 {
225   return c - '0';
226 }
227 
228 /* Return the value of a decimal exponent of the form
229    [+-]ddddddd.
230 
231    If the exponent overflows, returns a large exponent with the
232    appropriate sign.  */
readExponent(StringRef::iterator begin,StringRef::iterator end)233 static Expected<int> readExponent(StringRef::iterator begin,
234                                   StringRef::iterator end) {
235   bool isNegative;
236   unsigned int absExponent;
237   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
238   StringRef::iterator p = begin;
239 
240   // Treat no exponent as 0 to match binutils
241   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
242     return 0;
243   }
244 
245   isNegative = (*p == '-');
246   if (*p == '-' || *p == '+') {
247     p++;
248     if (p == end)
249       return createError("Exponent has no digits");
250   }
251 
252   absExponent = decDigitValue(*p++);
253   if (absExponent >= 10U)
254     return createError("Invalid character in exponent");
255 
256   for (; p != end; ++p) {
257     unsigned int value;
258 
259     value = decDigitValue(*p);
260     if (value >= 10U)
261       return createError("Invalid character in exponent");
262 
263     absExponent = absExponent * 10U + value;
264     if (absExponent >= overlargeExponent) {
265       absExponent = overlargeExponent;
266       break;
267     }
268   }
269 
270   if (isNegative)
271     return -(int) absExponent;
272   else
273     return (int) absExponent;
274 }
275 
276 /* This is ugly and needs cleaning up, but I don't immediately see
277    how whilst remaining safe.  */
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)278 static Expected<int> totalExponent(StringRef::iterator p,
279                                    StringRef::iterator end,
280                                    int exponentAdjustment) {
281   int unsignedExponent;
282   bool negative, overflow;
283   int exponent = 0;
284 
285   if (p == end)
286     return createError("Exponent has no digits");
287 
288   negative = *p == '-';
289   if (*p == '-' || *p == '+') {
290     p++;
291     if (p == end)
292       return createError("Exponent has no digits");
293   }
294 
295   unsignedExponent = 0;
296   overflow = false;
297   for (; p != end; ++p) {
298     unsigned int value;
299 
300     value = decDigitValue(*p);
301     if (value >= 10U)
302       return createError("Invalid character in exponent");
303 
304     unsignedExponent = unsignedExponent * 10 + value;
305     if (unsignedExponent > 32767) {
306       overflow = true;
307       break;
308     }
309   }
310 
311   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
312     overflow = true;
313 
314   if (!overflow) {
315     exponent = unsignedExponent;
316     if (negative)
317       exponent = -exponent;
318     exponent += exponentAdjustment;
319     if (exponent > 32767 || exponent < -32768)
320       overflow = true;
321   }
322 
323   if (overflow)
324     exponent = negative ? -32768: 32767;
325 
326   return exponent;
327 }
328 
329 static Expected<StringRef::iterator>
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)330 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
331                            StringRef::iterator *dot) {
332   StringRef::iterator p = begin;
333   *dot = end;
334   while (p != end && *p == '0')
335     p++;
336 
337   if (p != end && *p == '.') {
338     *dot = p++;
339 
340     if (end - begin == 1)
341       return createError("Significand has no digits");
342 
343     while (p != end && *p == '0')
344       p++;
345   }
346 
347   return p;
348 }
349 
350 /* Given a normal decimal floating point number of the form
351 
352      dddd.dddd[eE][+-]ddd
353 
354    where the decimal point and exponent are optional, fill out the
355    structure D.  Exponent is appropriate if the significand is
356    treated as an integer, and normalizedExponent if the significand
357    is taken to have the decimal point after a single leading
358    non-zero digit.
359 
360    If the value is zero, V->firstSigDigit points to a non-digit, and
361    the return exponent is zero.
362 */
363 struct decimalInfo {
364   const char *firstSigDigit;
365   const char *lastSigDigit;
366   int exponent;
367   int normalizedExponent;
368 };
369 
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)370 static Error interpretDecimal(StringRef::iterator begin,
371                               StringRef::iterator end, decimalInfo *D) {
372   StringRef::iterator dot = end;
373 
374   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
375   if (!PtrOrErr)
376     return PtrOrErr.takeError();
377   StringRef::iterator p = *PtrOrErr;
378 
379   D->firstSigDigit = p;
380   D->exponent = 0;
381   D->normalizedExponent = 0;
382 
383   for (; p != end; ++p) {
384     if (*p == '.') {
385       if (dot != end)
386         return createError("String contains multiple dots");
387       dot = p++;
388       if (p == end)
389         break;
390     }
391     if (decDigitValue(*p) >= 10U)
392       break;
393   }
394 
395   if (p != end) {
396     if (*p != 'e' && *p != 'E')
397       return createError("Invalid character in significand");
398     if (p == begin)
399       return createError("Significand has no digits");
400     if (dot != end && p - begin == 1)
401       return createError("Significand has no digits");
402 
403     /* p points to the first non-digit in the string */
404     auto ExpOrErr = readExponent(p + 1, end);
405     if (!ExpOrErr)
406       return ExpOrErr.takeError();
407     D->exponent = *ExpOrErr;
408 
409     /* Implied decimal point?  */
410     if (dot == end)
411       dot = p;
412   }
413 
414   /* If number is all zeroes accept any exponent.  */
415   if (p != D->firstSigDigit) {
416     /* Drop insignificant trailing zeroes.  */
417     if (p != begin) {
418       do
419         do
420           p--;
421         while (p != begin && *p == '0');
422       while (p != begin && *p == '.');
423     }
424 
425     /* Adjust the exponents for any decimal point.  */
426     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
427     D->normalizedExponent = (D->exponent +
428               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
429                                       - (dot > D->firstSigDigit && dot < p)));
430   }
431 
432   D->lastSigDigit = p;
433   return Error::success();
434 }
435 
436 /* Return the trailing fraction of a hexadecimal number.
437    DIGITVALUE is the first hex digit of the fraction, P points to
438    the next digit.  */
439 static Expected<lostFraction>
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)440 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
441                             unsigned int digitValue) {
442   unsigned int hexDigit;
443 
444   /* If the first trailing digit isn't 0 or 8 we can work out the
445      fraction immediately.  */
446   if (digitValue > 8)
447     return lfMoreThanHalf;
448   else if (digitValue < 8 && digitValue > 0)
449     return lfLessThanHalf;
450 
451   // Otherwise we need to find the first non-zero digit.
452   while (p != end && (*p == '0' || *p == '.'))
453     p++;
454 
455   if (p == end)
456     return createError("Invalid trailing hexadecimal fraction!");
457 
458   hexDigit = hexDigitValue(*p);
459 
460   /* If we ran off the end it is exactly zero or one-half, otherwise
461      a little more.  */
462   if (hexDigit == -1U)
463     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
464   else
465     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
466 }
467 
468 /* Return the fraction lost were a bignum truncated losing the least
469    significant BITS bits.  */
470 static lostFraction
lostFractionThroughTruncation(const APFloatBase::integerPart * parts,unsigned int partCount,unsigned int bits)471 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
472                               unsigned int partCount,
473                               unsigned int bits)
474 {
475   unsigned int lsb;
476 
477   lsb = APInt::tcLSB(parts, partCount);
478 
479   /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
480   if (bits <= lsb)
481     return lfExactlyZero;
482   if (bits == lsb + 1)
483     return lfExactlyHalf;
484   if (bits <= partCount * APFloatBase::integerPartWidth &&
485       APInt::tcExtractBit(parts, bits - 1))
486     return lfMoreThanHalf;
487 
488   return lfLessThanHalf;
489 }
490 
491 /* Shift DST right BITS bits noting lost fraction.  */
492 static lostFraction
shiftRight(APFloatBase::integerPart * dst,unsigned int parts,unsigned int bits)493 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
494 {
495   lostFraction lost_fraction;
496 
497   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
498 
499   APInt::tcShiftRight(dst, parts, bits);
500 
501   return lost_fraction;
502 }
503 
504 /* Combine the effect of two lost fractions.  */
505 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)506 combineLostFractions(lostFraction moreSignificant,
507                      lostFraction lessSignificant)
508 {
509   if (lessSignificant != lfExactlyZero) {
510     if (moreSignificant == lfExactlyZero)
511       moreSignificant = lfLessThanHalf;
512     else if (moreSignificant == lfExactlyHalf)
513       moreSignificant = lfMoreThanHalf;
514   }
515 
516   return moreSignificant;
517 }
518 
519 /* The error from the true value, in half-ulps, on multiplying two
520    floating point numbers, which differ from the value they
521    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
522    than the returned value.
523 
524    See "How to Read Floating Point Numbers Accurately" by William D
525    Clinger.  */
526 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)527 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
528 {
529   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
530 
531   if (HUerr1 + HUerr2 == 0)
532     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
533   else
534     return inexactMultiply + 2 * (HUerr1 + HUerr2);
535 }
536 
537 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
538    when the least significant BITS are truncated.  BITS cannot be
539    zero.  */
540 static APFloatBase::integerPart
ulpsFromBoundary(const APFloatBase::integerPart * parts,unsigned int bits,bool isNearest)541 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
542                  bool isNearest) {
543   unsigned int count, partBits;
544   APFloatBase::integerPart part, boundary;
545 
546   assert(bits != 0);
547 
548   bits--;
549   count = bits / APFloatBase::integerPartWidth;
550   partBits = bits % APFloatBase::integerPartWidth + 1;
551 
552   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
553 
554   if (isNearest)
555     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
556   else
557     boundary = 0;
558 
559   if (count == 0) {
560     if (part - boundary <= boundary - part)
561       return part - boundary;
562     else
563       return boundary - part;
564   }
565 
566   if (part == boundary) {
567     while (--count)
568       if (parts[count])
569         return ~(APFloatBase::integerPart) 0; /* A lot.  */
570 
571     return parts[0];
572   } else if (part == boundary - 1) {
573     while (--count)
574       if (~parts[count])
575         return ~(APFloatBase::integerPart) 0; /* A lot.  */
576 
577     return -parts[0];
578   }
579 
580   return ~(APFloatBase::integerPart) 0; /* A lot.  */
581 }
582 
583 /* Place pow(5, power) in DST, and return the number of parts used.
584    DST must be at least one part larger than size of the answer.  */
585 static unsigned int
powerOf5(APFloatBase::integerPart * dst,unsigned int power)586 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
587   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
588   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
589   pow5s[0] = 78125 * 5;
590 
591   unsigned int partsCount[16] = { 1 };
592   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
593   unsigned int result;
594   assert(power <= maxExponent);
595 
596   p1 = dst;
597   p2 = scratch;
598 
599   *p1 = firstEightPowers[power & 7];
600   power >>= 3;
601 
602   result = 1;
603   pow5 = pow5s;
604 
605   for (unsigned int n = 0; power; power >>= 1, n++) {
606     unsigned int pc;
607 
608     pc = partsCount[n];
609 
610     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
611     if (pc == 0) {
612       pc = partsCount[n - 1];
613       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
614       pc *= 2;
615       if (pow5[pc - 1] == 0)
616         pc--;
617       partsCount[n] = pc;
618     }
619 
620     if (power & 1) {
621       APFloatBase::integerPart *tmp;
622 
623       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
624       result += pc;
625       if (p2[result - 1] == 0)
626         result--;
627 
628       /* Now result is in p1 with partsCount parts and p2 is scratch
629          space.  */
630       tmp = p1;
631       p1 = p2;
632       p2 = tmp;
633     }
634 
635     pow5 += pc;
636   }
637 
638   if (p1 != dst)
639     APInt::tcAssign(dst, p1, result);
640 
641   return result;
642 }
643 
644 /* Zero at the end to avoid modular arithmetic when adding one; used
645    when rounding up during hexadecimal output.  */
646 static const char hexDigitsLower[] = "0123456789abcdef0";
647 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
648 static const char infinityL[] = "infinity";
649 static const char infinityU[] = "INFINITY";
650 static const char NaNL[] = "nan";
651 static const char NaNU[] = "NAN";
652 
653 /* Write out an integerPart in hexadecimal, starting with the most
654    significant nibble.  Write out exactly COUNT hexdigits, return
655    COUNT.  */
656 static unsigned int
partAsHex(char * dst,APFloatBase::integerPart part,unsigned int count,const char * hexDigitChars)657 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
658            const char *hexDigitChars)
659 {
660   unsigned int result = count;
661 
662   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
663 
664   part >>= (APFloatBase::integerPartWidth - 4 * count);
665   while (count--) {
666     dst[count] = hexDigitChars[part & 0xf];
667     part >>= 4;
668   }
669 
670   return result;
671 }
672 
673 /* Write out an unsigned decimal integer.  */
674 static char *
writeUnsignedDecimal(char * dst,unsigned int n)675 writeUnsignedDecimal (char *dst, unsigned int n)
676 {
677   char buff[40], *p;
678 
679   p = buff;
680   do
681     *p++ = '0' + n % 10;
682   while (n /= 10);
683 
684   do
685     *dst++ = *--p;
686   while (p != buff);
687 
688   return dst;
689 }
690 
691 /* Write out a signed decimal integer.  */
692 static char *
writeSignedDecimal(char * dst,int value)693 writeSignedDecimal (char *dst, int value)
694 {
695   if (value < 0) {
696     *dst++ = '-';
697     dst = writeUnsignedDecimal(dst, -(unsigned) value);
698   } else
699     dst = writeUnsignedDecimal(dst, value);
700 
701   return dst;
702 }
703 
704 namespace detail {
705 /* Constructors.  */
initialize(const fltSemantics * ourSemantics)706 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
707   unsigned int count;
708 
709   semantics = ourSemantics;
710   count = partCount();
711   if (count > 1)
712     significand.parts = new integerPart[count];
713 }
714 
freeSignificand()715 void IEEEFloat::freeSignificand() {
716   if (needsCleanup())
717     delete [] significand.parts;
718 }
719 
assign(const IEEEFloat & rhs)720 void IEEEFloat::assign(const IEEEFloat &rhs) {
721   assert(semantics == rhs.semantics);
722 
723   sign = rhs.sign;
724   category = rhs.category;
725   exponent = rhs.exponent;
726   if (isFiniteNonZero() || category == fcNaN)
727     copySignificand(rhs);
728 }
729 
copySignificand(const IEEEFloat & rhs)730 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
731   assert(isFiniteNonZero() || category == fcNaN);
732   assert(rhs.partCount() >= partCount());
733 
734   APInt::tcAssign(significandParts(), rhs.significandParts(),
735                   partCount());
736 }
737 
738 /* Make this number a NaN, with an arbitrary but deterministic value
739    for the significand.  If double or longer, this is a signalling NaN,
740    which may not be ideal.  If float, this is QNaN(0).  */
makeNaN(bool SNaN,bool Negative,const APInt * fill)741 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
742   category = fcNaN;
743   sign = Negative;
744 
745   integerPart *significand = significandParts();
746   unsigned numParts = partCount();
747 
748   // Set the significand bits to the fill.
749   if (!fill || fill->getNumWords() < numParts)
750     APInt::tcSet(significand, 0, numParts);
751   if (fill) {
752     APInt::tcAssign(significand, fill->getRawData(),
753                     std::min(fill->getNumWords(), numParts));
754 
755     // Zero out the excess bits of the significand.
756     unsigned bitsToPreserve = semantics->precision - 1;
757     unsigned part = bitsToPreserve / 64;
758     bitsToPreserve %= 64;
759     significand[part] &= ((1ULL << bitsToPreserve) - 1);
760     for (part++; part != numParts; ++part)
761       significand[part] = 0;
762   }
763 
764   unsigned QNaNBit = semantics->precision - 2;
765 
766   if (SNaN) {
767     // We always have to clear the QNaN bit to make it an SNaN.
768     APInt::tcClearBit(significand, QNaNBit);
769 
770     // If there are no bits set in the payload, we have to set
771     // *something* to make it a NaN instead of an infinity;
772     // conventionally, this is the next bit down from the QNaN bit.
773     if (APInt::tcIsZero(significand, numParts))
774       APInt::tcSetBit(significand, QNaNBit - 1);
775   } else {
776     // We always have to set the QNaN bit to make it a QNaN.
777     APInt::tcSetBit(significand, QNaNBit);
778   }
779 
780   // For x87 extended precision, we want to make a NaN, not a
781   // pseudo-NaN.  Maybe we should expose the ability to make
782   // pseudo-NaNs?
783   if (semantics == &semX87DoubleExtended)
784     APInt::tcSetBit(significand, QNaNBit + 1);
785 }
786 
operator =(const IEEEFloat & rhs)787 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
788   if (this != &rhs) {
789     if (semantics != rhs.semantics) {
790       freeSignificand();
791       initialize(rhs.semantics);
792     }
793     assign(rhs);
794   }
795 
796   return *this;
797 }
798 
operator =(IEEEFloat && rhs)799 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
800   freeSignificand();
801 
802   semantics = rhs.semantics;
803   significand = rhs.significand;
804   exponent = rhs.exponent;
805   category = rhs.category;
806   sign = rhs.sign;
807 
808   rhs.semantics = &semBogus;
809   return *this;
810 }
811 
isDenormal() const812 bool IEEEFloat::isDenormal() const {
813   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
814          (APInt::tcExtractBit(significandParts(),
815                               semantics->precision - 1) == 0);
816 }
817 
isSmallest() const818 bool IEEEFloat::isSmallest() const {
819   // The smallest number by magnitude in our format will be the smallest
820   // denormal, i.e. the floating point number with exponent being minimum
821   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
822   return isFiniteNonZero() && exponent == semantics->minExponent &&
823     significandMSB() == 0;
824 }
825 
isSignificandAllOnes() const826 bool IEEEFloat::isSignificandAllOnes() const {
827   // Test if the significand excluding the integral bit is all ones. This allows
828   // us to test for binade boundaries.
829   const integerPart *Parts = significandParts();
830   const unsigned PartCount = partCount();
831   for (unsigned i = 0; i < PartCount - 1; i++)
832     if (~Parts[i])
833       return false;
834 
835   // Set the unused high bits to all ones when we compare.
836   const unsigned NumHighBits =
837     PartCount*integerPartWidth - semantics->precision + 1;
838   assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
839          "fill than integerPartWidth");
840   const integerPart HighBitFill =
841     ~integerPart(0) << (integerPartWidth - NumHighBits);
842   if (~(Parts[PartCount - 1] | HighBitFill))
843     return false;
844 
845   return true;
846 }
847 
isSignificandAllZeros() const848 bool IEEEFloat::isSignificandAllZeros() const {
849   // Test if the significand excluding the integral bit is all zeros. This
850   // allows us to test for binade boundaries.
851   const integerPart *Parts = significandParts();
852   const unsigned PartCount = partCount();
853 
854   for (unsigned i = 0; i < PartCount - 1; i++)
855     if (Parts[i])
856       return false;
857 
858   const unsigned NumHighBits =
859     PartCount*integerPartWidth - semantics->precision + 1;
860   assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
861          "clear than integerPartWidth");
862   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
863 
864   if (Parts[PartCount - 1] & HighBitMask)
865     return false;
866 
867   return true;
868 }
869 
isLargest() const870 bool IEEEFloat::isLargest() const {
871   // The largest number by magnitude in our format will be the floating point
872   // number with maximum exponent and with significand that is all ones.
873   return isFiniteNonZero() && exponent == semantics->maxExponent
874     && isSignificandAllOnes();
875 }
876 
isInteger() const877 bool IEEEFloat::isInteger() const {
878   // This could be made more efficient; I'm going for obviously correct.
879   if (!isFinite()) return false;
880   IEEEFloat truncated = *this;
881   truncated.roundToIntegral(rmTowardZero);
882   return compare(truncated) == cmpEqual;
883 }
884 
bitwiseIsEqual(const IEEEFloat & rhs) const885 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
886   if (this == &rhs)
887     return true;
888   if (semantics != rhs.semantics ||
889       category != rhs.category ||
890       sign != rhs.sign)
891     return false;
892   if (category==fcZero || category==fcInfinity)
893     return true;
894 
895   if (isFiniteNonZero() && exponent != rhs.exponent)
896     return false;
897 
898   return std::equal(significandParts(), significandParts() + partCount(),
899                     rhs.significandParts());
900 }
901 
IEEEFloat(const fltSemantics & ourSemantics,integerPart value)902 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
903   initialize(&ourSemantics);
904   sign = 0;
905   category = fcNormal;
906   zeroSignificand();
907   exponent = ourSemantics.precision - 1;
908   significandParts()[0] = value;
909   normalize(rmNearestTiesToEven, lfExactlyZero);
910 }
911 
IEEEFloat(const fltSemantics & ourSemantics)912 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
913   initialize(&ourSemantics);
914   category = fcZero;
915   sign = false;
916 }
917 
918 // Delegate to the previous constructor, because later copy constructor may
919 // actually inspects category, which can't be garbage.
IEEEFloat(const fltSemantics & ourSemantics,uninitializedTag tag)920 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
921     : IEEEFloat(ourSemantics) {}
922 
IEEEFloat(const IEEEFloat & rhs)923 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
924   initialize(rhs.semantics);
925   assign(rhs);
926 }
927 
IEEEFloat(IEEEFloat && rhs)928 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
929   *this = std::move(rhs);
930 }
931 
~IEEEFloat()932 IEEEFloat::~IEEEFloat() { freeSignificand(); }
933 
partCount() const934 unsigned int IEEEFloat::partCount() const {
935   return partCountForBits(semantics->precision + 1);
936 }
937 
significandParts() const938 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
939   return const_cast<IEEEFloat *>(this)->significandParts();
940 }
941 
significandParts()942 IEEEFloat::integerPart *IEEEFloat::significandParts() {
943   if (partCount() > 1)
944     return significand.parts;
945   else
946     return &significand.part;
947 }
948 
zeroSignificand()949 void IEEEFloat::zeroSignificand() {
950   APInt::tcSet(significandParts(), 0, partCount());
951 }
952 
953 /* Increment an fcNormal floating point number's significand.  */
incrementSignificand()954 void IEEEFloat::incrementSignificand() {
955   integerPart carry;
956 
957   carry = APInt::tcIncrement(significandParts(), partCount());
958 
959   /* Our callers should never cause us to overflow.  */
960   assert(carry == 0);
961   (void)carry;
962 }
963 
964 /* Add the significand of the RHS.  Returns the carry flag.  */
addSignificand(const IEEEFloat & rhs)965 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
966   integerPart *parts;
967 
968   parts = significandParts();
969 
970   assert(semantics == rhs.semantics);
971   assert(exponent == rhs.exponent);
972 
973   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
974 }
975 
976 /* Subtract the significand of the RHS with a borrow flag.  Returns
977    the borrow flag.  */
subtractSignificand(const IEEEFloat & rhs,integerPart borrow)978 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
979                                                       integerPart borrow) {
980   integerPart *parts;
981 
982   parts = significandParts();
983 
984   assert(semantics == rhs.semantics);
985   assert(exponent == rhs.exponent);
986 
987   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
988                            partCount());
989 }
990 
991 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
992    on to the full-precision result of the multiplication.  Returns the
993    lost fraction.  */
multiplySignificand(const IEEEFloat & rhs,IEEEFloat addend)994 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
995                                             IEEEFloat addend) {
996   unsigned int omsb;        // One, not zero, based MSB.
997   unsigned int partsCount, newPartsCount, precision;
998   integerPart *lhsSignificand;
999   integerPart scratch[4];
1000   integerPart *fullSignificand;
1001   lostFraction lost_fraction;
1002   bool ignored;
1003 
1004   assert(semantics == rhs.semantics);
1005 
1006   precision = semantics->precision;
1007 
1008   // Allocate space for twice as many bits as the original significand, plus one
1009   // extra bit for the addition to overflow into.
1010   newPartsCount = partCountForBits(precision * 2 + 1);
1011 
1012   if (newPartsCount > 4)
1013     fullSignificand = new integerPart[newPartsCount];
1014   else
1015     fullSignificand = scratch;
1016 
1017   lhsSignificand = significandParts();
1018   partsCount = partCount();
1019 
1020   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1021                         rhs.significandParts(), partsCount, partsCount);
1022 
1023   lost_fraction = lfExactlyZero;
1024   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1025   exponent += rhs.exponent;
1026 
1027   // Assume the operands involved in the multiplication are single-precision
1028   // FP, and the two multiplicants are:
1029   //   *this = a23 . a22 ... a0 * 2^e1
1030   //     rhs = b23 . b22 ... b0 * 2^e2
1031   // the result of multiplication is:
1032   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1033   // Note that there are three significant bits at the left-hand side of the
1034   // radix point: two for the multiplication, and an overflow bit for the
1035   // addition (that will always be zero at this point). Move the radix point
1036   // toward left by two bits, and adjust exponent accordingly.
1037   exponent += 2;
1038 
1039   if (addend.isNonZero()) {
1040     // The intermediate result of the multiplication has "2 * precision"
1041     // signicant bit; adjust the addend to be consistent with mul result.
1042     //
1043     Significand savedSignificand = significand;
1044     const fltSemantics *savedSemantics = semantics;
1045     fltSemantics extendedSemantics;
1046     opStatus status;
1047     unsigned int extendedPrecision;
1048 
1049     // Normalize our MSB to one below the top bit to allow for overflow.
1050     extendedPrecision = 2 * precision + 1;
1051     if (omsb != extendedPrecision - 1) {
1052       assert(extendedPrecision > omsb);
1053       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1054                          (extendedPrecision - 1) - omsb);
1055       exponent -= (extendedPrecision - 1) - omsb;
1056     }
1057 
1058     /* Create new semantics.  */
1059     extendedSemantics = *semantics;
1060     extendedSemantics.precision = extendedPrecision;
1061 
1062     if (newPartsCount == 1)
1063       significand.part = fullSignificand[0];
1064     else
1065       significand.parts = fullSignificand;
1066     semantics = &extendedSemantics;
1067 
1068     // Make a copy so we can convert it to the extended semantics.
1069     // Note that we cannot convert the addend directly, as the extendedSemantics
1070     // is a local variable (which we take a reference to).
1071     IEEEFloat extendedAddend(addend);
1072     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1073     assert(status == opOK);
1074     (void)status;
1075 
1076     // Shift the significand of the addend right by one bit. This guarantees
1077     // that the high bit of the significand is zero (same as fullSignificand),
1078     // so the addition will overflow (if it does overflow at all) into the top bit.
1079     lost_fraction = extendedAddend.shiftSignificandRight(1);
1080     assert(lost_fraction == lfExactlyZero &&
1081            "Lost precision while shifting addend for fused-multiply-add.");
1082 
1083     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1084 
1085     /* Restore our state.  */
1086     if (newPartsCount == 1)
1087       fullSignificand[0] = significand.part;
1088     significand = savedSignificand;
1089     semantics = savedSemantics;
1090 
1091     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1092   }
1093 
1094   // Convert the result having "2 * precision" significant-bits back to the one
1095   // having "precision" significant-bits. First, move the radix point from
1096   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1097   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1098   exponent -= precision + 1;
1099 
1100   // In case MSB resides at the left-hand side of radix point, shift the
1101   // mantissa right by some amount to make sure the MSB reside right before
1102   // the radix point (i.e. "MSB . rest-significant-bits").
1103   //
1104   // Note that the result is not normalized when "omsb < precision". So, the
1105   // caller needs to call IEEEFloat::normalize() if normalized value is
1106   // expected.
1107   if (omsb > precision) {
1108     unsigned int bits, significantParts;
1109     lostFraction lf;
1110 
1111     bits = omsb - precision;
1112     significantParts = partCountForBits(omsb);
1113     lf = shiftRight(fullSignificand, significantParts, bits);
1114     lost_fraction = combineLostFractions(lf, lost_fraction);
1115     exponent += bits;
1116   }
1117 
1118   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1119 
1120   if (newPartsCount > 4)
1121     delete [] fullSignificand;
1122 
1123   return lost_fraction;
1124 }
1125 
multiplySignificand(const IEEEFloat & rhs)1126 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1127   return multiplySignificand(rhs, IEEEFloat(*semantics));
1128 }
1129 
1130 /* Multiply the significands of LHS and RHS to DST.  */
divideSignificand(const IEEEFloat & rhs)1131 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1132   unsigned int bit, i, partsCount;
1133   const integerPart *rhsSignificand;
1134   integerPart *lhsSignificand, *dividend, *divisor;
1135   integerPart scratch[4];
1136   lostFraction lost_fraction;
1137 
1138   assert(semantics == rhs.semantics);
1139 
1140   lhsSignificand = significandParts();
1141   rhsSignificand = rhs.significandParts();
1142   partsCount = partCount();
1143 
1144   if (partsCount > 2)
1145     dividend = new integerPart[partsCount * 2];
1146   else
1147     dividend = scratch;
1148 
1149   divisor = dividend + partsCount;
1150 
1151   /* Copy the dividend and divisor as they will be modified in-place.  */
1152   for (i = 0; i < partsCount; i++) {
1153     dividend[i] = lhsSignificand[i];
1154     divisor[i] = rhsSignificand[i];
1155     lhsSignificand[i] = 0;
1156   }
1157 
1158   exponent -= rhs.exponent;
1159 
1160   unsigned int precision = semantics->precision;
1161 
1162   /* Normalize the divisor.  */
1163   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1164   if (bit) {
1165     exponent += bit;
1166     APInt::tcShiftLeft(divisor, partsCount, bit);
1167   }
1168 
1169   /* Normalize the dividend.  */
1170   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1171   if (bit) {
1172     exponent -= bit;
1173     APInt::tcShiftLeft(dividend, partsCount, bit);
1174   }
1175 
1176   /* Ensure the dividend >= divisor initially for the loop below.
1177      Incidentally, this means that the division loop below is
1178      guaranteed to set the integer bit to one.  */
1179   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1180     exponent--;
1181     APInt::tcShiftLeft(dividend, partsCount, 1);
1182     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1183   }
1184 
1185   /* Long division.  */
1186   for (bit = precision; bit; bit -= 1) {
1187     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1188       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1189       APInt::tcSetBit(lhsSignificand, bit - 1);
1190     }
1191 
1192     APInt::tcShiftLeft(dividend, partsCount, 1);
1193   }
1194 
1195   /* Figure out the lost fraction.  */
1196   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1197 
1198   if (cmp > 0)
1199     lost_fraction = lfMoreThanHalf;
1200   else if (cmp == 0)
1201     lost_fraction = lfExactlyHalf;
1202   else if (APInt::tcIsZero(dividend, partsCount))
1203     lost_fraction = lfExactlyZero;
1204   else
1205     lost_fraction = lfLessThanHalf;
1206 
1207   if (partsCount > 2)
1208     delete [] dividend;
1209 
1210   return lost_fraction;
1211 }
1212 
significandMSB() const1213 unsigned int IEEEFloat::significandMSB() const {
1214   return APInt::tcMSB(significandParts(), partCount());
1215 }
1216 
significandLSB() const1217 unsigned int IEEEFloat::significandLSB() const {
1218   return APInt::tcLSB(significandParts(), partCount());
1219 }
1220 
1221 /* Note that a zero result is NOT normalized to fcZero.  */
shiftSignificandRight(unsigned int bits)1222 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1223   /* Our exponent should not overflow.  */
1224   assert((ExponentType) (exponent + bits) >= exponent);
1225 
1226   exponent += bits;
1227 
1228   return shiftRight(significandParts(), partCount(), bits);
1229 }
1230 
1231 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
shiftSignificandLeft(unsigned int bits)1232 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1233   assert(bits < semantics->precision);
1234 
1235   if (bits) {
1236     unsigned int partsCount = partCount();
1237 
1238     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1239     exponent -= bits;
1240 
1241     assert(!APInt::tcIsZero(significandParts(), partsCount));
1242   }
1243 }
1244 
1245 IEEEFloat::cmpResult
compareAbsoluteValue(const IEEEFloat & rhs) const1246 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1247   int compare;
1248 
1249   assert(semantics == rhs.semantics);
1250   assert(isFiniteNonZero());
1251   assert(rhs.isFiniteNonZero());
1252 
1253   compare = exponent - rhs.exponent;
1254 
1255   /* If exponents are equal, do an unsigned bignum comparison of the
1256      significands.  */
1257   if (compare == 0)
1258     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1259                                partCount());
1260 
1261   if (compare > 0)
1262     return cmpGreaterThan;
1263   else if (compare < 0)
1264     return cmpLessThan;
1265   else
1266     return cmpEqual;
1267 }
1268 
1269 /* Handle overflow.  Sign is preserved.  We either become infinity or
1270    the largest finite number.  */
handleOverflow(roundingMode rounding_mode)1271 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1272   /* Infinity?  */
1273   if (rounding_mode == rmNearestTiesToEven ||
1274       rounding_mode == rmNearestTiesToAway ||
1275       (rounding_mode == rmTowardPositive && !sign) ||
1276       (rounding_mode == rmTowardNegative && sign)) {
1277     category = fcInfinity;
1278     return (opStatus) (opOverflow | opInexact);
1279   }
1280 
1281   /* Otherwise we become the largest finite number.  */
1282   category = fcNormal;
1283   exponent = semantics->maxExponent;
1284   APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1285                                    semantics->precision);
1286 
1287   return opInexact;
1288 }
1289 
1290 /* Returns TRUE if, when truncating the current number, with BIT the
1291    new LSB, with the given lost fraction and rounding mode, the result
1292    would need to be rounded away from zero (i.e., by increasing the
1293    signficand).  This routine must work for fcZero of both signs, and
1294    fcNormal numbers.  */
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1295 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1296                                   lostFraction lost_fraction,
1297                                   unsigned int bit) const {
1298   /* NaNs and infinities should not have lost fractions.  */
1299   assert(isFiniteNonZero() || category == fcZero);
1300 
1301   /* Current callers never pass this so we don't handle it.  */
1302   assert(lost_fraction != lfExactlyZero);
1303 
1304   switch (rounding_mode) {
1305   case rmNearestTiesToAway:
1306     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1307 
1308   case rmNearestTiesToEven:
1309     if (lost_fraction == lfMoreThanHalf)
1310       return true;
1311 
1312     /* Our zeroes don't have a significand to test.  */
1313     if (lost_fraction == lfExactlyHalf && category != fcZero)
1314       return APInt::tcExtractBit(significandParts(), bit);
1315 
1316     return false;
1317 
1318   case rmTowardZero:
1319     return false;
1320 
1321   case rmTowardPositive:
1322     return !sign;
1323 
1324   case rmTowardNegative:
1325     return sign;
1326   }
1327   llvm_unreachable("Invalid rounding mode found");
1328 }
1329 
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1330 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1331                                          lostFraction lost_fraction) {
1332   unsigned int omsb;                /* One, not zero, based MSB.  */
1333   int exponentChange;
1334 
1335   if (!isFiniteNonZero())
1336     return opOK;
1337 
1338   /* Before rounding normalize the exponent of fcNormal numbers.  */
1339   omsb = significandMSB() + 1;
1340 
1341   if (omsb) {
1342     /* OMSB is numbered from 1.  We want to place it in the integer
1343        bit numbered PRECISION if possible, with a compensating change in
1344        the exponent.  */
1345     exponentChange = omsb - semantics->precision;
1346 
1347     /* If the resulting exponent is too high, overflow according to
1348        the rounding mode.  */
1349     if (exponent + exponentChange > semantics->maxExponent)
1350       return handleOverflow(rounding_mode);
1351 
1352     /* Subnormal numbers have exponent minExponent, and their MSB
1353        is forced based on that.  */
1354     if (exponent + exponentChange < semantics->minExponent)
1355       exponentChange = semantics->minExponent - exponent;
1356 
1357     /* Shifting left is easy as we don't lose precision.  */
1358     if (exponentChange < 0) {
1359       assert(lost_fraction == lfExactlyZero);
1360 
1361       shiftSignificandLeft(-exponentChange);
1362 
1363       return opOK;
1364     }
1365 
1366     if (exponentChange > 0) {
1367       lostFraction lf;
1368 
1369       /* Shift right and capture any new lost fraction.  */
1370       lf = shiftSignificandRight(exponentChange);
1371 
1372       lost_fraction = combineLostFractions(lf, lost_fraction);
1373 
1374       /* Keep OMSB up-to-date.  */
1375       if (omsb > (unsigned) exponentChange)
1376         omsb -= exponentChange;
1377       else
1378         omsb = 0;
1379     }
1380   }
1381 
1382   /* Now round the number according to rounding_mode given the lost
1383      fraction.  */
1384 
1385   /* As specified in IEEE 754, since we do not trap we do not report
1386      underflow for exact results.  */
1387   if (lost_fraction == lfExactlyZero) {
1388     /* Canonicalize zeroes.  */
1389     if (omsb == 0)
1390       category = fcZero;
1391 
1392     return opOK;
1393   }
1394 
1395   /* Increment the significand if we're rounding away from zero.  */
1396   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1397     if (omsb == 0)
1398       exponent = semantics->minExponent;
1399 
1400     incrementSignificand();
1401     omsb = significandMSB() + 1;
1402 
1403     /* Did the significand increment overflow?  */
1404     if (omsb == (unsigned) semantics->precision + 1) {
1405       /* Renormalize by incrementing the exponent and shifting our
1406          significand right one.  However if we already have the
1407          maximum exponent we overflow to infinity.  */
1408       if (exponent == semantics->maxExponent) {
1409         category = fcInfinity;
1410 
1411         return (opStatus) (opOverflow | opInexact);
1412       }
1413 
1414       shiftSignificandRight(1);
1415 
1416       return opInexact;
1417     }
1418   }
1419 
1420   /* The normal case - we were and are not denormal, and any
1421      significand increment above didn't overflow.  */
1422   if (omsb == semantics->precision)
1423     return opInexact;
1424 
1425   /* We have a non-zero denormal.  */
1426   assert(omsb < semantics->precision);
1427 
1428   /* Canonicalize zeroes.  */
1429   if (omsb == 0)
1430     category = fcZero;
1431 
1432   /* The fcZero case is a denormal that underflowed to zero.  */
1433   return (opStatus) (opUnderflow | opInexact);
1434 }
1435 
addOrSubtractSpecials(const IEEEFloat & rhs,bool subtract)1436 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1437                                                      bool subtract) {
1438   switch (PackCategoriesIntoKey(category, rhs.category)) {
1439   default:
1440     llvm_unreachable(nullptr);
1441 
1442   case PackCategoriesIntoKey(fcNaN, fcZero):
1443   case PackCategoriesIntoKey(fcNaN, fcNormal):
1444   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1445   case PackCategoriesIntoKey(fcNaN, fcNaN):
1446   case PackCategoriesIntoKey(fcNormal, fcZero):
1447   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1448   case PackCategoriesIntoKey(fcInfinity, fcZero):
1449     return opOK;
1450 
1451   case PackCategoriesIntoKey(fcZero, fcNaN):
1452   case PackCategoriesIntoKey(fcNormal, fcNaN):
1453   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1454     // We need to be sure to flip the sign here for subtraction because we
1455     // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1456     sign = rhs.sign ^ subtract;
1457     category = fcNaN;
1458     copySignificand(rhs);
1459     return opOK;
1460 
1461   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1462   case PackCategoriesIntoKey(fcZero, fcInfinity):
1463     category = fcInfinity;
1464     sign = rhs.sign ^ subtract;
1465     return opOK;
1466 
1467   case PackCategoriesIntoKey(fcZero, fcNormal):
1468     assign(rhs);
1469     sign = rhs.sign ^ subtract;
1470     return opOK;
1471 
1472   case PackCategoriesIntoKey(fcZero, fcZero):
1473     /* Sign depends on rounding mode; handled by caller.  */
1474     return opOK;
1475 
1476   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1477     /* Differently signed infinities can only be validly
1478        subtracted.  */
1479     if (((sign ^ rhs.sign)!=0) != subtract) {
1480       makeNaN();
1481       return opInvalidOp;
1482     }
1483 
1484     return opOK;
1485 
1486   case PackCategoriesIntoKey(fcNormal, fcNormal):
1487     return opDivByZero;
1488   }
1489 }
1490 
1491 /* Add or subtract two normal numbers.  */
addOrSubtractSignificand(const IEEEFloat & rhs,bool subtract)1492 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1493                                                  bool subtract) {
1494   integerPart carry;
1495   lostFraction lost_fraction;
1496   int bits;
1497 
1498   /* Determine if the operation on the absolute values is effectively
1499      an addition or subtraction.  */
1500   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1501 
1502   /* Are we bigger exponent-wise than the RHS?  */
1503   bits = exponent - rhs.exponent;
1504 
1505   /* Subtraction is more subtle than one might naively expect.  */
1506   if (subtract) {
1507     IEEEFloat temp_rhs(rhs);
1508 
1509     if (bits == 0)
1510       lost_fraction = lfExactlyZero;
1511     else if (bits > 0) {
1512       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1513       shiftSignificandLeft(1);
1514     } else {
1515       lost_fraction = shiftSignificandRight(-bits - 1);
1516       temp_rhs.shiftSignificandLeft(1);
1517     }
1518 
1519     // Should we reverse the subtraction.
1520     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1521       carry = temp_rhs.subtractSignificand
1522         (*this, lost_fraction != lfExactlyZero);
1523       copySignificand(temp_rhs);
1524       sign = !sign;
1525     } else {
1526       carry = subtractSignificand
1527         (temp_rhs, lost_fraction != lfExactlyZero);
1528     }
1529 
1530     /* Invert the lost fraction - it was on the RHS and
1531        subtracted.  */
1532     if (lost_fraction == lfLessThanHalf)
1533       lost_fraction = lfMoreThanHalf;
1534     else if (lost_fraction == lfMoreThanHalf)
1535       lost_fraction = lfLessThanHalf;
1536 
1537     /* The code above is intended to ensure that no borrow is
1538        necessary.  */
1539     assert(!carry);
1540     (void)carry;
1541   } else {
1542     if (bits > 0) {
1543       IEEEFloat temp_rhs(rhs);
1544 
1545       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1546       carry = addSignificand(temp_rhs);
1547     } else {
1548       lost_fraction = shiftSignificandRight(-bits);
1549       carry = addSignificand(rhs);
1550     }
1551 
1552     /* We have a guard bit; generating a carry cannot happen.  */
1553     assert(!carry);
1554     (void)carry;
1555   }
1556 
1557   return lost_fraction;
1558 }
1559 
multiplySpecials(const IEEEFloat & rhs)1560 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1561   switch (PackCategoriesIntoKey(category, rhs.category)) {
1562   default:
1563     llvm_unreachable(nullptr);
1564 
1565   case PackCategoriesIntoKey(fcNaN, fcZero):
1566   case PackCategoriesIntoKey(fcNaN, fcNormal):
1567   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1568   case PackCategoriesIntoKey(fcNaN, fcNaN):
1569     sign = false;
1570     return opOK;
1571 
1572   case PackCategoriesIntoKey(fcZero, fcNaN):
1573   case PackCategoriesIntoKey(fcNormal, fcNaN):
1574   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1575     sign = false;
1576     category = fcNaN;
1577     copySignificand(rhs);
1578     return opOK;
1579 
1580   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1581   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1582   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1583     category = fcInfinity;
1584     return opOK;
1585 
1586   case PackCategoriesIntoKey(fcZero, fcNormal):
1587   case PackCategoriesIntoKey(fcNormal, fcZero):
1588   case PackCategoriesIntoKey(fcZero, fcZero):
1589     category = fcZero;
1590     return opOK;
1591 
1592   case PackCategoriesIntoKey(fcZero, fcInfinity):
1593   case PackCategoriesIntoKey(fcInfinity, fcZero):
1594     makeNaN();
1595     return opInvalidOp;
1596 
1597   case PackCategoriesIntoKey(fcNormal, fcNormal):
1598     return opOK;
1599   }
1600 }
1601 
divideSpecials(const IEEEFloat & rhs)1602 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1603   switch (PackCategoriesIntoKey(category, rhs.category)) {
1604   default:
1605     llvm_unreachable(nullptr);
1606 
1607   case PackCategoriesIntoKey(fcZero, fcNaN):
1608   case PackCategoriesIntoKey(fcNormal, fcNaN):
1609   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1610     category = fcNaN;
1611     copySignificand(rhs);
1612     LLVM_FALLTHROUGH;
1613   case PackCategoriesIntoKey(fcNaN, fcZero):
1614   case PackCategoriesIntoKey(fcNaN, fcNormal):
1615   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1616   case PackCategoriesIntoKey(fcNaN, fcNaN):
1617     sign = false;
1618     LLVM_FALLTHROUGH;
1619   case PackCategoriesIntoKey(fcInfinity, fcZero):
1620   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1621   case PackCategoriesIntoKey(fcZero, fcInfinity):
1622   case PackCategoriesIntoKey(fcZero, fcNormal):
1623     return opOK;
1624 
1625   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1626     category = fcZero;
1627     return opOK;
1628 
1629   case PackCategoriesIntoKey(fcNormal, fcZero):
1630     category = fcInfinity;
1631     return opDivByZero;
1632 
1633   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1634   case PackCategoriesIntoKey(fcZero, fcZero):
1635     makeNaN();
1636     return opInvalidOp;
1637 
1638   case PackCategoriesIntoKey(fcNormal, fcNormal):
1639     return opOK;
1640   }
1641 }
1642 
modSpecials(const IEEEFloat & rhs)1643 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1644   switch (PackCategoriesIntoKey(category, rhs.category)) {
1645   default:
1646     llvm_unreachable(nullptr);
1647 
1648   case PackCategoriesIntoKey(fcNaN, fcZero):
1649   case PackCategoriesIntoKey(fcNaN, fcNormal):
1650   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1651   case PackCategoriesIntoKey(fcNaN, fcNaN):
1652   case PackCategoriesIntoKey(fcZero, fcInfinity):
1653   case PackCategoriesIntoKey(fcZero, fcNormal):
1654   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1655     return opOK;
1656 
1657   case PackCategoriesIntoKey(fcZero, fcNaN):
1658   case PackCategoriesIntoKey(fcNormal, fcNaN):
1659   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1660     sign = false;
1661     category = fcNaN;
1662     copySignificand(rhs);
1663     return opOK;
1664 
1665   case PackCategoriesIntoKey(fcNormal, fcZero):
1666   case PackCategoriesIntoKey(fcInfinity, fcZero):
1667   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1668   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1669   case PackCategoriesIntoKey(fcZero, fcZero):
1670     makeNaN();
1671     return opInvalidOp;
1672 
1673   case PackCategoriesIntoKey(fcNormal, fcNormal):
1674     return opOK;
1675   }
1676 }
1677 
1678 /* Change sign.  */
changeSign()1679 void IEEEFloat::changeSign() {
1680   /* Look mummy, this one's easy.  */
1681   sign = !sign;
1682 }
1683 
1684 /* Normalized addition or subtraction.  */
addOrSubtract(const IEEEFloat & rhs,roundingMode rounding_mode,bool subtract)1685 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1686                                              roundingMode rounding_mode,
1687                                              bool subtract) {
1688   opStatus fs;
1689 
1690   fs = addOrSubtractSpecials(rhs, subtract);
1691 
1692   /* This return code means it was not a simple case.  */
1693   if (fs == opDivByZero) {
1694     lostFraction lost_fraction;
1695 
1696     lost_fraction = addOrSubtractSignificand(rhs, subtract);
1697     fs = normalize(rounding_mode, lost_fraction);
1698 
1699     /* Can only be zero if we lost no fraction.  */
1700     assert(category != fcZero || lost_fraction == lfExactlyZero);
1701   }
1702 
1703   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1704      positive zero unless rounding to minus infinity, except that
1705      adding two like-signed zeroes gives that zero.  */
1706   if (category == fcZero) {
1707     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1708       sign = (rounding_mode == rmTowardNegative);
1709   }
1710 
1711   return fs;
1712 }
1713 
1714 /* Normalized addition.  */
add(const IEEEFloat & rhs,roundingMode rounding_mode)1715 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1716                                    roundingMode rounding_mode) {
1717   return addOrSubtract(rhs, rounding_mode, false);
1718 }
1719 
1720 /* Normalized subtraction.  */
subtract(const IEEEFloat & rhs,roundingMode rounding_mode)1721 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1722                                         roundingMode rounding_mode) {
1723   return addOrSubtract(rhs, rounding_mode, true);
1724 }
1725 
1726 /* Normalized multiply.  */
multiply(const IEEEFloat & rhs,roundingMode rounding_mode)1727 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1728                                         roundingMode rounding_mode) {
1729   opStatus fs;
1730 
1731   sign ^= rhs.sign;
1732   fs = multiplySpecials(rhs);
1733 
1734   if (isFiniteNonZero()) {
1735     lostFraction lost_fraction = multiplySignificand(rhs);
1736     fs = normalize(rounding_mode, lost_fraction);
1737     if (lost_fraction != lfExactlyZero)
1738       fs = (opStatus) (fs | opInexact);
1739   }
1740 
1741   return fs;
1742 }
1743 
1744 /* Normalized divide.  */
divide(const IEEEFloat & rhs,roundingMode rounding_mode)1745 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1746                                       roundingMode rounding_mode) {
1747   opStatus fs;
1748 
1749   sign ^= rhs.sign;
1750   fs = divideSpecials(rhs);
1751 
1752   if (isFiniteNonZero()) {
1753     lostFraction lost_fraction = divideSignificand(rhs);
1754     fs = normalize(rounding_mode, lost_fraction);
1755     if (lost_fraction != lfExactlyZero)
1756       fs = (opStatus) (fs | opInexact);
1757   }
1758 
1759   return fs;
1760 }
1761 
1762 /* Normalized remainder.  This is not currently correct in all cases.  */
remainder(const IEEEFloat & rhs)1763 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1764   opStatus fs;
1765   IEEEFloat V = *this;
1766   unsigned int origSign = sign;
1767 
1768   fs = V.divide(rhs, rmNearestTiesToEven);
1769   if (fs == opDivByZero)
1770     return fs;
1771 
1772   int parts = partCount();
1773   integerPart *x = new integerPart[parts];
1774   bool ignored;
1775   fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1776                           parts * integerPartWidth, true, rmNearestTiesToEven,
1777                           &ignored);
1778   if (fs == opInvalidOp) {
1779     delete[] x;
1780     return fs;
1781   }
1782 
1783   fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1784                                         rmNearestTiesToEven);
1785   assert(fs==opOK);   // should always work
1786 
1787   fs = V.multiply(rhs, rmNearestTiesToEven);
1788   assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
1789 
1790   fs = subtract(V, rmNearestTiesToEven);
1791   assert(fs==opOK || fs==opInexact);   // likewise
1792 
1793   if (isZero())
1794     sign = origSign;    // IEEE754 requires this
1795   delete[] x;
1796   return fs;
1797 }
1798 
1799 /* Normalized llvm frem (C fmod). */
mod(const IEEEFloat & rhs)1800 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1801   opStatus fs;
1802   fs = modSpecials(rhs);
1803   unsigned int origSign = sign;
1804 
1805   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1806          compareAbsoluteValue(rhs) != cmpLessThan) {
1807     IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1808     if (compareAbsoluteValue(V) == cmpLessThan)
1809       V = scalbn(V, -1, rmNearestTiesToEven);
1810     V.sign = sign;
1811 
1812     fs = subtract(V, rmNearestTiesToEven);
1813     assert(fs==opOK);
1814   }
1815   if (isZero())
1816     sign = origSign; // fmod requires this
1817   return fs;
1818 }
1819 
1820 /* Normalized fused-multiply-add.  */
fusedMultiplyAdd(const IEEEFloat & multiplicand,const IEEEFloat & addend,roundingMode rounding_mode)1821 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1822                                                 const IEEEFloat &addend,
1823                                                 roundingMode rounding_mode) {
1824   opStatus fs;
1825 
1826   /* Post-multiplication sign, before addition.  */
1827   sign ^= multiplicand.sign;
1828 
1829   /* If and only if all arguments are normal do we need to do an
1830      extended-precision calculation.  */
1831   if (isFiniteNonZero() &&
1832       multiplicand.isFiniteNonZero() &&
1833       addend.isFinite()) {
1834     lostFraction lost_fraction;
1835 
1836     lost_fraction = multiplySignificand(multiplicand, addend);
1837     fs = normalize(rounding_mode, lost_fraction);
1838     if (lost_fraction != lfExactlyZero)
1839       fs = (opStatus) (fs | opInexact);
1840 
1841     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1842        positive zero unless rounding to minus infinity, except that
1843        adding two like-signed zeroes gives that zero.  */
1844     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1845       sign = (rounding_mode == rmTowardNegative);
1846   } else {
1847     fs = multiplySpecials(multiplicand);
1848 
1849     /* FS can only be opOK or opInvalidOp.  There is no more work
1850        to do in the latter case.  The IEEE-754R standard says it is
1851        implementation-defined in this case whether, if ADDEND is a
1852        quiet NaN, we raise invalid op; this implementation does so.
1853 
1854        If we need to do the addition we can do so with normal
1855        precision.  */
1856     if (fs == opOK)
1857       fs = addOrSubtract(addend, rounding_mode, false);
1858   }
1859 
1860   return fs;
1861 }
1862 
1863 /* Rounding-mode corrrect round to integral value.  */
roundToIntegral(roundingMode rounding_mode)1864 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1865   opStatus fs;
1866 
1867   // If the exponent is large enough, we know that this value is already
1868   // integral, and the arithmetic below would potentially cause it to saturate
1869   // to +/-Inf.  Bail out early instead.
1870   if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1871     return opOK;
1872 
1873   // The algorithm here is quite simple: we add 2^(p-1), where p is the
1874   // precision of our format, and then subtract it back off again.  The choice
1875   // of rounding modes for the addition/subtraction determines the rounding mode
1876   // for our integral rounding as well.
1877   // NOTE: When the input value is negative, we do subtraction followed by
1878   // addition instead.
1879   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1880   IntegerConstant <<= semanticsPrecision(*semantics)-1;
1881   IEEEFloat MagicConstant(*semantics);
1882   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1883                                       rmNearestTiesToEven);
1884   MagicConstant.sign = sign;
1885 
1886   if (fs != opOK)
1887     return fs;
1888 
1889   // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1890   bool inputSign = isNegative();
1891 
1892   fs = add(MagicConstant, rounding_mode);
1893   if (fs != opOK && fs != opInexact)
1894     return fs;
1895 
1896   fs = subtract(MagicConstant, rounding_mode);
1897 
1898   // Restore the input sign.
1899   if (inputSign != isNegative())
1900     changeSign();
1901 
1902   return fs;
1903 }
1904 
1905 
1906 /* Comparison requires normalized numbers.  */
compare(const IEEEFloat & rhs) const1907 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1908   cmpResult result;
1909 
1910   assert(semantics == rhs.semantics);
1911 
1912   switch (PackCategoriesIntoKey(category, rhs.category)) {
1913   default:
1914     llvm_unreachable(nullptr);
1915 
1916   case PackCategoriesIntoKey(fcNaN, fcZero):
1917   case PackCategoriesIntoKey(fcNaN, fcNormal):
1918   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1919   case PackCategoriesIntoKey(fcNaN, fcNaN):
1920   case PackCategoriesIntoKey(fcZero, fcNaN):
1921   case PackCategoriesIntoKey(fcNormal, fcNaN):
1922   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1923     return cmpUnordered;
1924 
1925   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1926   case PackCategoriesIntoKey(fcInfinity, fcZero):
1927   case PackCategoriesIntoKey(fcNormal, fcZero):
1928     if (sign)
1929       return cmpLessThan;
1930     else
1931       return cmpGreaterThan;
1932 
1933   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1934   case PackCategoriesIntoKey(fcZero, fcInfinity):
1935   case PackCategoriesIntoKey(fcZero, fcNormal):
1936     if (rhs.sign)
1937       return cmpGreaterThan;
1938     else
1939       return cmpLessThan;
1940 
1941   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1942     if (sign == rhs.sign)
1943       return cmpEqual;
1944     else if (sign)
1945       return cmpLessThan;
1946     else
1947       return cmpGreaterThan;
1948 
1949   case PackCategoriesIntoKey(fcZero, fcZero):
1950     return cmpEqual;
1951 
1952   case PackCategoriesIntoKey(fcNormal, fcNormal):
1953     break;
1954   }
1955 
1956   /* Two normal numbers.  Do they have the same sign?  */
1957   if (sign != rhs.sign) {
1958     if (sign)
1959       result = cmpLessThan;
1960     else
1961       result = cmpGreaterThan;
1962   } else {
1963     /* Compare absolute values; invert result if negative.  */
1964     result = compareAbsoluteValue(rhs);
1965 
1966     if (sign) {
1967       if (result == cmpLessThan)
1968         result = cmpGreaterThan;
1969       else if (result == cmpGreaterThan)
1970         result = cmpLessThan;
1971     }
1972   }
1973 
1974   return result;
1975 }
1976 
1977 /// IEEEFloat::convert - convert a value of one floating point type to another.
1978 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
1979 /// records whether the transformation lost information, i.e. whether
1980 /// converting the result back to the original type will produce the
1981 /// original value (this is almost the same as return value==fsOK, but there
1982 /// are edge cases where this is not so).
1983 
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)1984 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1985                                        roundingMode rounding_mode,
1986                                        bool *losesInfo) {
1987   lostFraction lostFraction;
1988   unsigned int newPartCount, oldPartCount;
1989   opStatus fs;
1990   int shift;
1991   const fltSemantics &fromSemantics = *semantics;
1992 
1993   lostFraction = lfExactlyZero;
1994   newPartCount = partCountForBits(toSemantics.precision + 1);
1995   oldPartCount = partCount();
1996   shift = toSemantics.precision - fromSemantics.precision;
1997 
1998   bool X86SpecialNan = false;
1999   if (&fromSemantics == &semX87DoubleExtended &&
2000       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2001       (!(*significandParts() & 0x8000000000000000ULL) ||
2002        !(*significandParts() & 0x4000000000000000ULL))) {
2003     // x86 has some unusual NaNs which cannot be represented in any other
2004     // format; note them here.
2005     X86SpecialNan = true;
2006   }
2007 
2008   // If this is a truncation of a denormal number, and the target semantics
2009   // has larger exponent range than the source semantics (this can happen
2010   // when truncating from PowerPC double-double to double format), the
2011   // right shift could lose result mantissa bits.  Adjust exponent instead
2012   // of performing excessive shift.
2013   if (shift < 0 && isFiniteNonZero()) {
2014     int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2015     if (exponent + exponentChange < toSemantics.minExponent)
2016       exponentChange = toSemantics.minExponent - exponent;
2017     if (exponentChange < shift)
2018       exponentChange = shift;
2019     if (exponentChange < 0) {
2020       shift -= exponentChange;
2021       exponent += exponentChange;
2022     }
2023   }
2024 
2025   // If this is a truncation, perform the shift before we narrow the storage.
2026   if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2027     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2028 
2029   // Fix the storage so it can hold to new value.
2030   if (newPartCount > oldPartCount) {
2031     // The new type requires more storage; make it available.
2032     integerPart *newParts;
2033     newParts = new integerPart[newPartCount];
2034     APInt::tcSet(newParts, 0, newPartCount);
2035     if (isFiniteNonZero() || category==fcNaN)
2036       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2037     freeSignificand();
2038     significand.parts = newParts;
2039   } else if (newPartCount == 1 && oldPartCount != 1) {
2040     // Switch to built-in storage for a single part.
2041     integerPart newPart = 0;
2042     if (isFiniteNonZero() || category==fcNaN)
2043       newPart = significandParts()[0];
2044     freeSignificand();
2045     significand.part = newPart;
2046   }
2047 
2048   // Now that we have the right storage, switch the semantics.
2049   semantics = &toSemantics;
2050 
2051   // If this is an extension, perform the shift now that the storage is
2052   // available.
2053   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2054     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2055 
2056   if (isFiniteNonZero()) {
2057     fs = normalize(rounding_mode, lostFraction);
2058     *losesInfo = (fs != opOK);
2059   } else if (category == fcNaN) {
2060     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2061 
2062     // For x87 extended precision, we want to make a NaN, not a special NaN if
2063     // the input wasn't special either.
2064     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2065       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2066 
2067     // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2068     // does not give you back the same bits.  This is dubious, and we
2069     // don't currently do it.  You're really supposed to get
2070     // an invalid operation signal at runtime, but nobody does that.
2071     fs = opOK;
2072   } else {
2073     *losesInfo = false;
2074     fs = opOK;
2075   }
2076 
2077   return fs;
2078 }
2079 
2080 /* Convert a floating point number to an integer according to the
2081    rounding mode.  If the rounded integer value is out of range this
2082    returns an invalid operation exception and the contents of the
2083    destination parts are unspecified.  If the rounded value is in
2084    range but the floating point number is not the exact integer, the C
2085    standard doesn't require an inexact exception to be raised.  IEEE
2086    854 does require it so we do that.
2087 
2088    Note that for conversions to integer type the C standard requires
2089    round-to-zero to always be used.  */
convertToSignExtendedInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2090 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2091     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2092     roundingMode rounding_mode, bool *isExact) const {
2093   lostFraction lost_fraction;
2094   const integerPart *src;
2095   unsigned int dstPartsCount, truncatedBits;
2096 
2097   *isExact = false;
2098 
2099   /* Handle the three special cases first.  */
2100   if (category == fcInfinity || category == fcNaN)
2101     return opInvalidOp;
2102 
2103   dstPartsCount = partCountForBits(width);
2104   assert(dstPartsCount <= parts.size() && "Integer too big");
2105 
2106   if (category == fcZero) {
2107     APInt::tcSet(parts.data(), 0, dstPartsCount);
2108     // Negative zero can't be represented as an int.
2109     *isExact = !sign;
2110     return opOK;
2111   }
2112 
2113   src = significandParts();
2114 
2115   /* Step 1: place our absolute value, with any fraction truncated, in
2116      the destination.  */
2117   if (exponent < 0) {
2118     /* Our absolute value is less than one; truncate everything.  */
2119     APInt::tcSet(parts.data(), 0, dstPartsCount);
2120     /* For exponent -1 the integer bit represents .5, look at that.
2121        For smaller exponents leftmost truncated bit is 0. */
2122     truncatedBits = semantics->precision -1U - exponent;
2123   } else {
2124     /* We want the most significant (exponent + 1) bits; the rest are
2125        truncated.  */
2126     unsigned int bits = exponent + 1U;
2127 
2128     /* Hopelessly large in magnitude?  */
2129     if (bits > width)
2130       return opInvalidOp;
2131 
2132     if (bits < semantics->precision) {
2133       /* We truncate (semantics->precision - bits) bits.  */
2134       truncatedBits = semantics->precision - bits;
2135       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2136     } else {
2137       /* We want at least as many bits as are available.  */
2138       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2139                        0);
2140       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2141                          bits - semantics->precision);
2142       truncatedBits = 0;
2143     }
2144   }
2145 
2146   /* Step 2: work out any lost fraction, and increment the absolute
2147      value if we would round away from zero.  */
2148   if (truncatedBits) {
2149     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2150                                                   truncatedBits);
2151     if (lost_fraction != lfExactlyZero &&
2152         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2153       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2154         return opInvalidOp;     /* Overflow.  */
2155     }
2156   } else {
2157     lost_fraction = lfExactlyZero;
2158   }
2159 
2160   /* Step 3: check if we fit in the destination.  */
2161   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2162 
2163   if (sign) {
2164     if (!isSigned) {
2165       /* Negative numbers cannot be represented as unsigned.  */
2166       if (omsb != 0)
2167         return opInvalidOp;
2168     } else {
2169       /* It takes omsb bits to represent the unsigned integer value.
2170          We lose a bit for the sign, but care is needed as the
2171          maximally negative integer is a special case.  */
2172       if (omsb == width &&
2173           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2174         return opInvalidOp;
2175 
2176       /* This case can happen because of rounding.  */
2177       if (omsb > width)
2178         return opInvalidOp;
2179     }
2180 
2181     APInt::tcNegate (parts.data(), dstPartsCount);
2182   } else {
2183     if (omsb >= width + !isSigned)
2184       return opInvalidOp;
2185   }
2186 
2187   if (lost_fraction == lfExactlyZero) {
2188     *isExact = true;
2189     return opOK;
2190   } else
2191     return opInexact;
2192 }
2193 
2194 /* Same as convertToSignExtendedInteger, except we provide
2195    deterministic values in case of an invalid operation exception,
2196    namely zero for NaNs and the minimal or maximal value respectively
2197    for underflow or overflow.
2198    The *isExact output tells whether the result is exact, in the sense
2199    that converting it back to the original floating point type produces
2200    the original value.  This is almost equivalent to result==opOK,
2201    except for negative zeroes.
2202 */
2203 IEEEFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2204 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2205                             unsigned int width, bool isSigned,
2206                             roundingMode rounding_mode, bool *isExact) const {
2207   opStatus fs;
2208 
2209   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2210                                     isExact);
2211 
2212   if (fs == opInvalidOp) {
2213     unsigned int bits, dstPartsCount;
2214 
2215     dstPartsCount = partCountForBits(width);
2216     assert(dstPartsCount <= parts.size() && "Integer too big");
2217 
2218     if (category == fcNaN)
2219       bits = 0;
2220     else if (sign)
2221       bits = isSigned;
2222     else
2223       bits = width - isSigned;
2224 
2225     APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2226     if (sign && isSigned)
2227       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2228   }
2229 
2230   return fs;
2231 }
2232 
2233 /* Convert an unsigned integer SRC to a floating point number,
2234    rounding according to ROUNDING_MODE.  The sign of the floating
2235    point number is not modified.  */
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2236 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2237     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2238   unsigned int omsb, precision, dstCount;
2239   integerPart *dst;
2240   lostFraction lost_fraction;
2241 
2242   category = fcNormal;
2243   omsb = APInt::tcMSB(src, srcCount) + 1;
2244   dst = significandParts();
2245   dstCount = partCount();
2246   precision = semantics->precision;
2247 
2248   /* We want the most significant PRECISION bits of SRC.  There may not
2249      be that many; extract what we can.  */
2250   if (precision <= omsb) {
2251     exponent = omsb - 1;
2252     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2253                                                   omsb - precision);
2254     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2255   } else {
2256     exponent = precision - 1;
2257     lost_fraction = lfExactlyZero;
2258     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2259   }
2260 
2261   return normalize(rounding_mode, lost_fraction);
2262 }
2263 
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2264 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2265                                                 roundingMode rounding_mode) {
2266   unsigned int partCount = Val.getNumWords();
2267   APInt api = Val;
2268 
2269   sign = false;
2270   if (isSigned && api.isNegative()) {
2271     sign = true;
2272     api = -api;
2273   }
2274 
2275   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2276 }
2277 
2278 /* Convert a two's complement integer SRC to a floating point number,
2279    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2280    integer is signed, in which case it must be sign-extended.  */
2281 IEEEFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2282 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2283                                           unsigned int srcCount, bool isSigned,
2284                                           roundingMode rounding_mode) {
2285   opStatus status;
2286 
2287   if (isSigned &&
2288       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2289     integerPart *copy;
2290 
2291     /* If we're signed and negative negate a copy.  */
2292     sign = true;
2293     copy = new integerPart[srcCount];
2294     APInt::tcAssign(copy, src, srcCount);
2295     APInt::tcNegate(copy, srcCount);
2296     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2297     delete [] copy;
2298   } else {
2299     sign = false;
2300     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2301   }
2302 
2303   return status;
2304 }
2305 
2306 /* FIXME: should this just take a const APInt reference?  */
2307 IEEEFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2308 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2309                                           unsigned int width, bool isSigned,
2310                                           roundingMode rounding_mode) {
2311   unsigned int partCount = partCountForBits(width);
2312   APInt api = APInt(width, makeArrayRef(parts, partCount));
2313 
2314   sign = false;
2315   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2316     sign = true;
2317     api = -api;
2318   }
2319 
2320   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2321 }
2322 
2323 Expected<IEEEFloat::opStatus>
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2324 IEEEFloat::convertFromHexadecimalString(StringRef s,
2325                                         roundingMode rounding_mode) {
2326   lostFraction lost_fraction = lfExactlyZero;
2327 
2328   category = fcNormal;
2329   zeroSignificand();
2330   exponent = 0;
2331 
2332   integerPart *significand = significandParts();
2333   unsigned partsCount = partCount();
2334   unsigned bitPos = partsCount * integerPartWidth;
2335   bool computedTrailingFraction = false;
2336 
2337   // Skip leading zeroes and any (hexa)decimal point.
2338   StringRef::iterator begin = s.begin();
2339   StringRef::iterator end = s.end();
2340   StringRef::iterator dot;
2341   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2342   if (!PtrOrErr)
2343     return PtrOrErr.takeError();
2344   StringRef::iterator p = *PtrOrErr;
2345   StringRef::iterator firstSignificantDigit = p;
2346 
2347   while (p != end) {
2348     integerPart hex_value;
2349 
2350     if (*p == '.') {
2351       if (dot != end)
2352         return createError("String contains multiple dots");
2353       dot = p++;
2354       continue;
2355     }
2356 
2357     hex_value = hexDigitValue(*p);
2358     if (hex_value == -1U)
2359       break;
2360 
2361     p++;
2362 
2363     // Store the number while we have space.
2364     if (bitPos) {
2365       bitPos -= 4;
2366       hex_value <<= bitPos % integerPartWidth;
2367       significand[bitPos / integerPartWidth] |= hex_value;
2368     } else if (!computedTrailingFraction) {
2369       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2370       if (!FractOrErr)
2371         return FractOrErr.takeError();
2372       lost_fraction = *FractOrErr;
2373       computedTrailingFraction = true;
2374     }
2375   }
2376 
2377   /* Hex floats require an exponent but not a hexadecimal point.  */
2378   if (p == end)
2379     return createError("Hex strings require an exponent");
2380   if (*p != 'p' && *p != 'P')
2381     return createError("Invalid character in significand");
2382   if (p == begin)
2383     return createError("Significand has no digits");
2384   if (dot != end && p - begin == 1)
2385     return createError("Significand has no digits");
2386 
2387   /* Ignore the exponent if we are zero.  */
2388   if (p != firstSignificantDigit) {
2389     int expAdjustment;
2390 
2391     /* Implicit hexadecimal point?  */
2392     if (dot == end)
2393       dot = p;
2394 
2395     /* Calculate the exponent adjustment implicit in the number of
2396        significant digits.  */
2397     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2398     if (expAdjustment < 0)
2399       expAdjustment++;
2400     expAdjustment = expAdjustment * 4 - 1;
2401 
2402     /* Adjust for writing the significand starting at the most
2403        significant nibble.  */
2404     expAdjustment += semantics->precision;
2405     expAdjustment -= partsCount * integerPartWidth;
2406 
2407     /* Adjust for the given exponent.  */
2408     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2409     if (!ExpOrErr)
2410       return ExpOrErr.takeError();
2411     exponent = *ExpOrErr;
2412   }
2413 
2414   return normalize(rounding_mode, lost_fraction);
2415 }
2416 
2417 IEEEFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2418 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2419                                         unsigned sigPartCount, int exp,
2420                                         roundingMode rounding_mode) {
2421   unsigned int parts, pow5PartCount;
2422   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2423   integerPart pow5Parts[maxPowerOfFiveParts];
2424   bool isNearest;
2425 
2426   isNearest = (rounding_mode == rmNearestTiesToEven ||
2427                rounding_mode == rmNearestTiesToAway);
2428 
2429   parts = partCountForBits(semantics->precision + 11);
2430 
2431   /* Calculate pow(5, abs(exp)).  */
2432   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2433 
2434   for (;; parts *= 2) {
2435     opStatus sigStatus, powStatus;
2436     unsigned int excessPrecision, truncatedBits;
2437 
2438     calcSemantics.precision = parts * integerPartWidth - 1;
2439     excessPrecision = calcSemantics.precision - semantics->precision;
2440     truncatedBits = excessPrecision;
2441 
2442     IEEEFloat decSig(calcSemantics, uninitialized);
2443     decSig.makeZero(sign);
2444     IEEEFloat pow5(calcSemantics);
2445 
2446     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2447                                                 rmNearestTiesToEven);
2448     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2449                                               rmNearestTiesToEven);
2450     /* Add exp, as 10^n = 5^n * 2^n.  */
2451     decSig.exponent += exp;
2452 
2453     lostFraction calcLostFraction;
2454     integerPart HUerr, HUdistance;
2455     unsigned int powHUerr;
2456 
2457     if (exp >= 0) {
2458       /* multiplySignificand leaves the precision-th bit set to 1.  */
2459       calcLostFraction = decSig.multiplySignificand(pow5);
2460       powHUerr = powStatus != opOK;
2461     } else {
2462       calcLostFraction = decSig.divideSignificand(pow5);
2463       /* Denormal numbers have less precision.  */
2464       if (decSig.exponent < semantics->minExponent) {
2465         excessPrecision += (semantics->minExponent - decSig.exponent);
2466         truncatedBits = excessPrecision;
2467         if (excessPrecision > calcSemantics.precision)
2468           excessPrecision = calcSemantics.precision;
2469       }
2470       /* Extra half-ulp lost in reciprocal of exponent.  */
2471       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2472     }
2473 
2474     /* Both multiplySignificand and divideSignificand return the
2475        result with the integer bit set.  */
2476     assert(APInt::tcExtractBit
2477            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2478 
2479     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2480                        powHUerr);
2481     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2482                                       excessPrecision, isNearest);
2483 
2484     /* Are we guaranteed to round correctly if we truncate?  */
2485     if (HUdistance >= HUerr) {
2486       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2487                        calcSemantics.precision - excessPrecision,
2488                        excessPrecision);
2489       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2490          above we must adjust our exponent to compensate for the
2491          implicit right shift.  */
2492       exponent = (decSig.exponent + semantics->precision
2493                   - (calcSemantics.precision - excessPrecision));
2494       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2495                                                        decSig.partCount(),
2496                                                        truncatedBits);
2497       return normalize(rounding_mode, calcLostFraction);
2498     }
2499   }
2500 }
2501 
2502 Expected<IEEEFloat::opStatus>
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2503 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2504   decimalInfo D;
2505   opStatus fs;
2506 
2507   /* Scan the text.  */
2508   StringRef::iterator p = str.begin();
2509   if (Error Err = interpretDecimal(p, str.end(), &D))
2510     return std::move(Err);
2511 
2512   /* Handle the quick cases.  First the case of no significant digits,
2513      i.e. zero, and then exponents that are obviously too large or too
2514      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
2515      definitely overflows if
2516 
2517            (exp - 1) * L >= maxExponent
2518 
2519      and definitely underflows to zero where
2520 
2521            (exp + 1) * L <= minExponent - precision
2522 
2523      With integer arithmetic the tightest bounds for L are
2524 
2525            93/28 < L < 196/59            [ numerator <= 256 ]
2526            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
2527   */
2528 
2529   // Test if we have a zero number allowing for strings with no null terminators
2530   // and zero decimals with non-zero exponents.
2531   //
2532   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2533   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2534   // be at most one dot. On the other hand, if we have a zero with a non-zero
2535   // exponent, then we know that D.firstSigDigit will be non-numeric.
2536   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2537     category = fcZero;
2538     fs = opOK;
2539 
2540   /* Check whether the normalized exponent is high enough to overflow
2541      max during the log-rebasing in the max-exponent check below. */
2542   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2543     fs = handleOverflow(rounding_mode);
2544 
2545   /* If it wasn't, then it also wasn't high enough to overflow max
2546      during the log-rebasing in the min-exponent check.  Check that it
2547      won't overflow min in either check, then perform the min-exponent
2548      check. */
2549   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2550              (D.normalizedExponent + 1) * 28738 <=
2551                8651 * (semantics->minExponent - (int) semantics->precision)) {
2552     /* Underflow to zero and round.  */
2553     category = fcNormal;
2554     zeroSignificand();
2555     fs = normalize(rounding_mode, lfLessThanHalf);
2556 
2557   /* We can finally safely perform the max-exponent check. */
2558   } else if ((D.normalizedExponent - 1) * 42039
2559              >= 12655 * semantics->maxExponent) {
2560     /* Overflow and round.  */
2561     fs = handleOverflow(rounding_mode);
2562   } else {
2563     integerPart *decSignificand;
2564     unsigned int partCount;
2565 
2566     /* A tight upper bound on number of bits required to hold an
2567        N-digit decimal integer is N * 196 / 59.  Allocate enough space
2568        to hold the full significand, and an extra part required by
2569        tcMultiplyPart.  */
2570     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2571     partCount = partCountForBits(1 + 196 * partCount / 59);
2572     decSignificand = new integerPart[partCount + 1];
2573     partCount = 0;
2574 
2575     /* Convert to binary efficiently - we do almost all multiplication
2576        in an integerPart.  When this would overflow do we do a single
2577        bignum multiplication, and then revert again to multiplication
2578        in an integerPart.  */
2579     do {
2580       integerPart decValue, val, multiplier;
2581 
2582       val = 0;
2583       multiplier = 1;
2584 
2585       do {
2586         if (*p == '.') {
2587           p++;
2588           if (p == str.end()) {
2589             break;
2590           }
2591         }
2592         decValue = decDigitValue(*p++);
2593         if (decValue >= 10U) {
2594           delete[] decSignificand;
2595           return createError("Invalid character in significand");
2596         }
2597         multiplier *= 10;
2598         val = val * 10 + decValue;
2599         /* The maximum number that can be multiplied by ten with any
2600            digit added without overflowing an integerPart.  */
2601       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2602 
2603       /* Multiply out the current part.  */
2604       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2605                             partCount, partCount + 1, false);
2606 
2607       /* If we used another part (likely but not guaranteed), increase
2608          the count.  */
2609       if (decSignificand[partCount])
2610         partCount++;
2611     } while (p <= D.lastSigDigit);
2612 
2613     category = fcNormal;
2614     fs = roundSignificandWithExponent(decSignificand, partCount,
2615                                       D.exponent, rounding_mode);
2616 
2617     delete [] decSignificand;
2618   }
2619 
2620   return fs;
2621 }
2622 
convertFromStringSpecials(StringRef str)2623 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2624   if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2625     makeInf(false);
2626     return true;
2627   }
2628 
2629   if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) {
2630     makeInf(true);
2631     return true;
2632   }
2633 
2634   if (str.equals("nan") || str.equals("NaN")) {
2635     makeNaN(false, false);
2636     return true;
2637   }
2638 
2639   if (str.equals("-nan") || str.equals("-NaN")) {
2640     makeNaN(false, true);
2641     return true;
2642   }
2643 
2644   return false;
2645 }
2646 
2647 Expected<IEEEFloat::opStatus>
convertFromString(StringRef str,roundingMode rounding_mode)2648 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
2649   if (str.empty())
2650     return createError("Invalid string length");
2651 
2652   // Handle special cases.
2653   if (convertFromStringSpecials(str))
2654     return opOK;
2655 
2656   /* Handle a leading minus sign.  */
2657   StringRef::iterator p = str.begin();
2658   size_t slen = str.size();
2659   sign = *p == '-' ? 1 : 0;
2660   if (*p == '-' || *p == '+') {
2661     p++;
2662     slen--;
2663     if (!slen)
2664       return createError("String has no digits");
2665   }
2666 
2667   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2668     if (slen == 2)
2669       return createError("Invalid string");
2670     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2671                                         rounding_mode);
2672   }
2673 
2674   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2675 }
2676 
2677 /* Write out a hexadecimal representation of the floating point value
2678    to DST, which must be of sufficient size, in the C99 form
2679    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
2680    excluding the terminating NUL.
2681 
2682    If UPPERCASE, the output is in upper case, otherwise in lower case.
2683 
2684    HEXDIGITS digits appear altogether, rounding the value if
2685    necessary.  If HEXDIGITS is 0, the minimal precision to display the
2686    number precisely is used instead.  If nothing would appear after
2687    the decimal point it is suppressed.
2688 
2689    The decimal exponent is always printed and has at least one digit.
2690    Zero values display an exponent of zero.  Infinities and NaNs
2691    appear as "infinity" or "nan" respectively.
2692 
2693    The above rules are as specified by C99.  There is ambiguity about
2694    what the leading hexadecimal digit should be.  This implementation
2695    uses whatever is necessary so that the exponent is displayed as
2696    stored.  This implies the exponent will fall within the IEEE format
2697    range, and the leading hexadecimal digit will be 0 (for denormals),
2698    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2699    any other digits zero).
2700 */
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2701 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2702                                            bool upperCase,
2703                                            roundingMode rounding_mode) const {
2704   char *p;
2705 
2706   p = dst;
2707   if (sign)
2708     *dst++ = '-';
2709 
2710   switch (category) {
2711   case fcInfinity:
2712     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2713     dst += sizeof infinityL - 1;
2714     break;
2715 
2716   case fcNaN:
2717     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2718     dst += sizeof NaNU - 1;
2719     break;
2720 
2721   case fcZero:
2722     *dst++ = '0';
2723     *dst++ = upperCase ? 'X': 'x';
2724     *dst++ = '0';
2725     if (hexDigits > 1) {
2726       *dst++ = '.';
2727       memset (dst, '0', hexDigits - 1);
2728       dst += hexDigits - 1;
2729     }
2730     *dst++ = upperCase ? 'P': 'p';
2731     *dst++ = '0';
2732     break;
2733 
2734   case fcNormal:
2735     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2736     break;
2737   }
2738 
2739   *dst = 0;
2740 
2741   return static_cast<unsigned int>(dst - p);
2742 }
2743 
2744 /* Does the hard work of outputting the correctly rounded hexadecimal
2745    form of a normal floating point number with the specified number of
2746    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
2747    digits necessary to print the value precisely is output.  */
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2748 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2749                                           bool upperCase,
2750                                           roundingMode rounding_mode) const {
2751   unsigned int count, valueBits, shift, partsCount, outputDigits;
2752   const char *hexDigitChars;
2753   const integerPart *significand;
2754   char *p;
2755   bool roundUp;
2756 
2757   *dst++ = '0';
2758   *dst++ = upperCase ? 'X': 'x';
2759 
2760   roundUp = false;
2761   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2762 
2763   significand = significandParts();
2764   partsCount = partCount();
2765 
2766   /* +3 because the first digit only uses the single integer bit, so
2767      we have 3 virtual zero most-significant-bits.  */
2768   valueBits = semantics->precision + 3;
2769   shift = integerPartWidth - valueBits % integerPartWidth;
2770 
2771   /* The natural number of digits required ignoring trailing
2772      insignificant zeroes.  */
2773   outputDigits = (valueBits - significandLSB () + 3) / 4;
2774 
2775   /* hexDigits of zero means use the required number for the
2776      precision.  Otherwise, see if we are truncating.  If we are,
2777      find out if we need to round away from zero.  */
2778   if (hexDigits) {
2779     if (hexDigits < outputDigits) {
2780       /* We are dropping non-zero bits, so need to check how to round.
2781          "bits" is the number of dropped bits.  */
2782       unsigned int bits;
2783       lostFraction fraction;
2784 
2785       bits = valueBits - hexDigits * 4;
2786       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2787       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2788     }
2789     outputDigits = hexDigits;
2790   }
2791 
2792   /* Write the digits consecutively, and start writing in the location
2793      of the hexadecimal point.  We move the most significant digit
2794      left and add the hexadecimal point later.  */
2795   p = ++dst;
2796 
2797   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2798 
2799   while (outputDigits && count) {
2800     integerPart part;
2801 
2802     /* Put the most significant integerPartWidth bits in "part".  */
2803     if (--count == partsCount)
2804       part = 0;  /* An imaginary higher zero part.  */
2805     else
2806       part = significand[count] << shift;
2807 
2808     if (count && shift)
2809       part |= significand[count - 1] >> (integerPartWidth - shift);
2810 
2811     /* Convert as much of "part" to hexdigits as we can.  */
2812     unsigned int curDigits = integerPartWidth / 4;
2813 
2814     if (curDigits > outputDigits)
2815       curDigits = outputDigits;
2816     dst += partAsHex (dst, part, curDigits, hexDigitChars);
2817     outputDigits -= curDigits;
2818   }
2819 
2820   if (roundUp) {
2821     char *q = dst;
2822 
2823     /* Note that hexDigitChars has a trailing '0'.  */
2824     do {
2825       q--;
2826       *q = hexDigitChars[hexDigitValue (*q) + 1];
2827     } while (*q == '0');
2828     assert(q >= p);
2829   } else {
2830     /* Add trailing zeroes.  */
2831     memset (dst, '0', outputDigits);
2832     dst += outputDigits;
2833   }
2834 
2835   /* Move the most significant digit to before the point, and if there
2836      is something after the decimal point add it.  This must come
2837      after rounding above.  */
2838   p[-1] = p[0];
2839   if (dst -1 == p)
2840     dst--;
2841   else
2842     p[0] = '.';
2843 
2844   /* Finally output the exponent.  */
2845   *dst++ = upperCase ? 'P': 'p';
2846 
2847   return writeSignedDecimal (dst, exponent);
2848 }
2849 
hash_value(const IEEEFloat & Arg)2850 hash_code hash_value(const IEEEFloat &Arg) {
2851   if (!Arg.isFiniteNonZero())
2852     return hash_combine((uint8_t)Arg.category,
2853                         // NaN has no sign, fix it at zero.
2854                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2855                         Arg.semantics->precision);
2856 
2857   // Normal floats need their exponent and significand hashed.
2858   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2859                       Arg.semantics->precision, Arg.exponent,
2860                       hash_combine_range(
2861                         Arg.significandParts(),
2862                         Arg.significandParts() + Arg.partCount()));
2863 }
2864 
2865 // Conversion from APFloat to/from host float/double.  It may eventually be
2866 // possible to eliminate these and have everybody deal with APFloats, but that
2867 // will take a while.  This approach will not easily extend to long double.
2868 // Current implementation requires integerPartWidth==64, which is correct at
2869 // the moment but could be made more general.
2870 
2871 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2872 // the actual IEEE respresentations.  We compensate for that here.
2873 
convertF80LongDoubleAPFloatToAPInt() const2874 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2875   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2876   assert(partCount()==2);
2877 
2878   uint64_t myexponent, mysignificand;
2879 
2880   if (isFiniteNonZero()) {
2881     myexponent = exponent+16383; //bias
2882     mysignificand = significandParts()[0];
2883     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2884       myexponent = 0;   // denormal
2885   } else if (category==fcZero) {
2886     myexponent = 0;
2887     mysignificand = 0;
2888   } else if (category==fcInfinity) {
2889     myexponent = 0x7fff;
2890     mysignificand = 0x8000000000000000ULL;
2891   } else {
2892     assert(category == fcNaN && "Unknown category");
2893     myexponent = 0x7fff;
2894     mysignificand = significandParts()[0];
2895   }
2896 
2897   uint64_t words[2];
2898   words[0] = mysignificand;
2899   words[1] =  ((uint64_t)(sign & 1) << 15) |
2900               (myexponent & 0x7fffLL);
2901   return APInt(80, words);
2902 }
2903 
convertPPCDoubleDoubleAPFloatToAPInt() const2904 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2905   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2906   assert(partCount()==2);
2907 
2908   uint64_t words[2];
2909   opStatus fs;
2910   bool losesInfo;
2911 
2912   // Convert number to double.  To avoid spurious underflows, we re-
2913   // normalize against the "double" minExponent first, and only *then*
2914   // truncate the mantissa.  The result of that second conversion
2915   // may be inexact, but should never underflow.
2916   // Declare fltSemantics before APFloat that uses it (and
2917   // saves pointer to it) to ensure correct destruction order.
2918   fltSemantics extendedSemantics = *semantics;
2919   extendedSemantics.minExponent = semIEEEdouble.minExponent;
2920   IEEEFloat extended(*this);
2921   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2922   assert(fs == opOK && !losesInfo);
2923   (void)fs;
2924 
2925   IEEEFloat u(extended);
2926   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2927   assert(fs == opOK || fs == opInexact);
2928   (void)fs;
2929   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2930 
2931   // If conversion was exact or resulted in a special case, we're done;
2932   // just set the second double to zero.  Otherwise, re-convert back to
2933   // the extended format and compute the difference.  This now should
2934   // convert exactly to double.
2935   if (u.isFiniteNonZero() && losesInfo) {
2936     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2937     assert(fs == opOK && !losesInfo);
2938     (void)fs;
2939 
2940     IEEEFloat v(extended);
2941     v.subtract(u, rmNearestTiesToEven);
2942     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2943     assert(fs == opOK && !losesInfo);
2944     (void)fs;
2945     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2946   } else {
2947     words[1] = 0;
2948   }
2949 
2950   return APInt(128, words);
2951 }
2952 
convertQuadrupleAPFloatToAPInt() const2953 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2954   assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2955   assert(partCount()==2);
2956 
2957   uint64_t myexponent, mysignificand, mysignificand2;
2958 
2959   if (isFiniteNonZero()) {
2960     myexponent = exponent+16383; //bias
2961     mysignificand = significandParts()[0];
2962     mysignificand2 = significandParts()[1];
2963     if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2964       myexponent = 0;   // denormal
2965   } else if (category==fcZero) {
2966     myexponent = 0;
2967     mysignificand = mysignificand2 = 0;
2968   } else if (category==fcInfinity) {
2969     myexponent = 0x7fff;
2970     mysignificand = mysignificand2 = 0;
2971   } else {
2972     assert(category == fcNaN && "Unknown category!");
2973     myexponent = 0x7fff;
2974     mysignificand = significandParts()[0];
2975     mysignificand2 = significandParts()[1];
2976   }
2977 
2978   uint64_t words[2];
2979   words[0] = mysignificand;
2980   words[1] = ((uint64_t)(sign & 1) << 63) |
2981              ((myexponent & 0x7fff) << 48) |
2982              (mysignificand2 & 0xffffffffffffLL);
2983 
2984   return APInt(128, words);
2985 }
2986 
convertDoubleAPFloatToAPInt() const2987 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2988   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2989   assert(partCount()==1);
2990 
2991   uint64_t myexponent, mysignificand;
2992 
2993   if (isFiniteNonZero()) {
2994     myexponent = exponent+1023; //bias
2995     mysignificand = *significandParts();
2996     if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2997       myexponent = 0;   // denormal
2998   } else if (category==fcZero) {
2999     myexponent = 0;
3000     mysignificand = 0;
3001   } else if (category==fcInfinity) {
3002     myexponent = 0x7ff;
3003     mysignificand = 0;
3004   } else {
3005     assert(category == fcNaN && "Unknown category!");
3006     myexponent = 0x7ff;
3007     mysignificand = *significandParts();
3008   }
3009 
3010   return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3011                      ((myexponent & 0x7ff) <<  52) |
3012                      (mysignificand & 0xfffffffffffffLL))));
3013 }
3014 
convertFloatAPFloatToAPInt() const3015 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3016   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
3017   assert(partCount()==1);
3018 
3019   uint32_t myexponent, mysignificand;
3020 
3021   if (isFiniteNonZero()) {
3022     myexponent = exponent+127; //bias
3023     mysignificand = (uint32_t)*significandParts();
3024     if (myexponent == 1 && !(mysignificand & 0x800000))
3025       myexponent = 0;   // denormal
3026   } else if (category==fcZero) {
3027     myexponent = 0;
3028     mysignificand = 0;
3029   } else if (category==fcInfinity) {
3030     myexponent = 0xff;
3031     mysignificand = 0;
3032   } else {
3033     assert(category == fcNaN && "Unknown category!");
3034     myexponent = 0xff;
3035     mysignificand = (uint32_t)*significandParts();
3036   }
3037 
3038   return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3039                     (mysignificand & 0x7fffff)));
3040 }
3041 
convertHalfAPFloatToAPInt() const3042 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3043   assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3044   assert(partCount()==1);
3045 
3046   uint32_t myexponent, mysignificand;
3047 
3048   if (isFiniteNonZero()) {
3049     myexponent = exponent+15; //bias
3050     mysignificand = (uint32_t)*significandParts();
3051     if (myexponent == 1 && !(mysignificand & 0x400))
3052       myexponent = 0;   // denormal
3053   } else if (category==fcZero) {
3054     myexponent = 0;
3055     mysignificand = 0;
3056   } else if (category==fcInfinity) {
3057     myexponent = 0x1f;
3058     mysignificand = 0;
3059   } else {
3060     assert(category == fcNaN && "Unknown category!");
3061     myexponent = 0x1f;
3062     mysignificand = (uint32_t)*significandParts();
3063   }
3064 
3065   return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3066                     (mysignificand & 0x3ff)));
3067 }
3068 
3069 // This function creates an APInt that is just a bit map of the floating
3070 // point constant as it would appear in memory.  It is not a conversion,
3071 // and treating the result as a normal integer is unlikely to be useful.
3072 
bitcastToAPInt() const3073 APInt IEEEFloat::bitcastToAPInt() const {
3074   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3075     return convertHalfAPFloatToAPInt();
3076 
3077   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3078     return convertFloatAPFloatToAPInt();
3079 
3080   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3081     return convertDoubleAPFloatToAPInt();
3082 
3083   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3084     return convertQuadrupleAPFloatToAPInt();
3085 
3086   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3087     return convertPPCDoubleDoubleAPFloatToAPInt();
3088 
3089   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3090          "unknown format!");
3091   return convertF80LongDoubleAPFloatToAPInt();
3092 }
3093 
convertToFloat() const3094 float IEEEFloat::convertToFloat() const {
3095   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3096          "Float semantics are not IEEEsingle");
3097   APInt api = bitcastToAPInt();
3098   return api.bitsToFloat();
3099 }
3100 
convertToDouble() const3101 double IEEEFloat::convertToDouble() const {
3102   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3103          "Float semantics are not IEEEdouble");
3104   APInt api = bitcastToAPInt();
3105   return api.bitsToDouble();
3106 }
3107 
3108 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3109 /// does not support these bit patterns:
3110 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3111 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3112 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3113 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3114 /// At the moment, the first three are treated as NaNs, the last one as Normal.
initFromF80LongDoubleAPInt(const APInt & api)3115 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3116   assert(api.getBitWidth()==80);
3117   uint64_t i1 = api.getRawData()[0];
3118   uint64_t i2 = api.getRawData()[1];
3119   uint64_t myexponent = (i2 & 0x7fff);
3120   uint64_t mysignificand = i1;
3121   uint8_t myintegerbit = mysignificand >> 63;
3122 
3123   initialize(&semX87DoubleExtended);
3124   assert(partCount()==2);
3125 
3126   sign = static_cast<unsigned int>(i2>>15);
3127   if (myexponent == 0 && mysignificand == 0) {
3128     // exponent, significand meaningless
3129     category = fcZero;
3130   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3131     // exponent, significand meaningless
3132     category = fcInfinity;
3133   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3134              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3135     // exponent meaningless
3136     category = fcNaN;
3137     significandParts()[0] = mysignificand;
3138     significandParts()[1] = 0;
3139   } else {
3140     category = fcNormal;
3141     exponent = myexponent - 16383;
3142     significandParts()[0] = mysignificand;
3143     significandParts()[1] = 0;
3144     if (myexponent==0)          // denormal
3145       exponent = -16382;
3146   }
3147 }
3148 
initFromPPCDoubleDoubleAPInt(const APInt & api)3149 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3150   assert(api.getBitWidth()==128);
3151   uint64_t i1 = api.getRawData()[0];
3152   uint64_t i2 = api.getRawData()[1];
3153   opStatus fs;
3154   bool losesInfo;
3155 
3156   // Get the first double and convert to our format.
3157   initFromDoubleAPInt(APInt(64, i1));
3158   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3159   assert(fs == opOK && !losesInfo);
3160   (void)fs;
3161 
3162   // Unless we have a special case, add in second double.
3163   if (isFiniteNonZero()) {
3164     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3165     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3166     assert(fs == opOK && !losesInfo);
3167     (void)fs;
3168 
3169     add(v, rmNearestTiesToEven);
3170   }
3171 }
3172 
initFromQuadrupleAPInt(const APInt & api)3173 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3174   assert(api.getBitWidth()==128);
3175   uint64_t i1 = api.getRawData()[0];
3176   uint64_t i2 = api.getRawData()[1];
3177   uint64_t myexponent = (i2 >> 48) & 0x7fff;
3178   uint64_t mysignificand  = i1;
3179   uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3180 
3181   initialize(&semIEEEquad);
3182   assert(partCount()==2);
3183 
3184   sign = static_cast<unsigned int>(i2>>63);
3185   if (myexponent==0 &&
3186       (mysignificand==0 && mysignificand2==0)) {
3187     // exponent, significand meaningless
3188     category = fcZero;
3189   } else if (myexponent==0x7fff &&
3190              (mysignificand==0 && mysignificand2==0)) {
3191     // exponent, significand meaningless
3192     category = fcInfinity;
3193   } else if (myexponent==0x7fff &&
3194              (mysignificand!=0 || mysignificand2 !=0)) {
3195     // exponent meaningless
3196     category = fcNaN;
3197     significandParts()[0] = mysignificand;
3198     significandParts()[1] = mysignificand2;
3199   } else {
3200     category = fcNormal;
3201     exponent = myexponent - 16383;
3202     significandParts()[0] = mysignificand;
3203     significandParts()[1] = mysignificand2;
3204     if (myexponent==0)          // denormal
3205       exponent = -16382;
3206     else
3207       significandParts()[1] |= 0x1000000000000LL;  // integer bit
3208   }
3209 }
3210 
initFromDoubleAPInt(const APInt & api)3211 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3212   assert(api.getBitWidth()==64);
3213   uint64_t i = *api.getRawData();
3214   uint64_t myexponent = (i >> 52) & 0x7ff;
3215   uint64_t mysignificand = i & 0xfffffffffffffLL;
3216 
3217   initialize(&semIEEEdouble);
3218   assert(partCount()==1);
3219 
3220   sign = static_cast<unsigned int>(i>>63);
3221   if (myexponent==0 && mysignificand==0) {
3222     // exponent, significand meaningless
3223     category = fcZero;
3224   } else if (myexponent==0x7ff && mysignificand==0) {
3225     // exponent, significand meaningless
3226     category = fcInfinity;
3227   } else if (myexponent==0x7ff && mysignificand!=0) {
3228     // exponent meaningless
3229     category = fcNaN;
3230     *significandParts() = mysignificand;
3231   } else {
3232     category = fcNormal;
3233     exponent = myexponent - 1023;
3234     *significandParts() = mysignificand;
3235     if (myexponent==0)          // denormal
3236       exponent = -1022;
3237     else
3238       *significandParts() |= 0x10000000000000LL;  // integer bit
3239   }
3240 }
3241 
initFromFloatAPInt(const APInt & api)3242 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3243   assert(api.getBitWidth()==32);
3244   uint32_t i = (uint32_t)*api.getRawData();
3245   uint32_t myexponent = (i >> 23) & 0xff;
3246   uint32_t mysignificand = i & 0x7fffff;
3247 
3248   initialize(&semIEEEsingle);
3249   assert(partCount()==1);
3250 
3251   sign = i >> 31;
3252   if (myexponent==0 && mysignificand==0) {
3253     // exponent, significand meaningless
3254     category = fcZero;
3255   } else if (myexponent==0xff && mysignificand==0) {
3256     // exponent, significand meaningless
3257     category = fcInfinity;
3258   } else if (myexponent==0xff && mysignificand!=0) {
3259     // sign, exponent, significand meaningless
3260     category = fcNaN;
3261     *significandParts() = mysignificand;
3262   } else {
3263     category = fcNormal;
3264     exponent = myexponent - 127;  //bias
3265     *significandParts() = mysignificand;
3266     if (myexponent==0)    // denormal
3267       exponent = -126;
3268     else
3269       *significandParts() |= 0x800000; // integer bit
3270   }
3271 }
3272 
initFromHalfAPInt(const APInt & api)3273 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3274   assert(api.getBitWidth()==16);
3275   uint32_t i = (uint32_t)*api.getRawData();
3276   uint32_t myexponent = (i >> 10) & 0x1f;
3277   uint32_t mysignificand = i & 0x3ff;
3278 
3279   initialize(&semIEEEhalf);
3280   assert(partCount()==1);
3281 
3282   sign = i >> 15;
3283   if (myexponent==0 && mysignificand==0) {
3284     // exponent, significand meaningless
3285     category = fcZero;
3286   } else if (myexponent==0x1f && mysignificand==0) {
3287     // exponent, significand meaningless
3288     category = fcInfinity;
3289   } else if (myexponent==0x1f && mysignificand!=0) {
3290     // sign, exponent, significand meaningless
3291     category = fcNaN;
3292     *significandParts() = mysignificand;
3293   } else {
3294     category = fcNormal;
3295     exponent = myexponent - 15;  //bias
3296     *significandParts() = mysignificand;
3297     if (myexponent==0)    // denormal
3298       exponent = -14;
3299     else
3300       *significandParts() |= 0x400; // integer bit
3301   }
3302 }
3303 
3304 /// Treat api as containing the bits of a floating point number.  Currently
3305 /// we infer the floating point type from the size of the APInt.  The
3306 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3307 /// when the size is anything else).
initFromAPInt(const fltSemantics * Sem,const APInt & api)3308 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3309   if (Sem == &semIEEEhalf)
3310     return initFromHalfAPInt(api);
3311   if (Sem == &semIEEEsingle)
3312     return initFromFloatAPInt(api);
3313   if (Sem == &semIEEEdouble)
3314     return initFromDoubleAPInt(api);
3315   if (Sem == &semX87DoubleExtended)
3316     return initFromF80LongDoubleAPInt(api);
3317   if (Sem == &semIEEEquad)
3318     return initFromQuadrupleAPInt(api);
3319   if (Sem == &semPPCDoubleDoubleLegacy)
3320     return initFromPPCDoubleDoubleAPInt(api);
3321 
3322   llvm_unreachable(nullptr);
3323 }
3324 
3325 /// Make this number the largest magnitude normal number in the given
3326 /// semantics.
makeLargest(bool Negative)3327 void IEEEFloat::makeLargest(bool Negative) {
3328   // We want (in interchange format):
3329   //   sign = {Negative}
3330   //   exponent = 1..10
3331   //   significand = 1..1
3332   category = fcNormal;
3333   sign = Negative;
3334   exponent = semantics->maxExponent;
3335 
3336   // Use memset to set all but the highest integerPart to all ones.
3337   integerPart *significand = significandParts();
3338   unsigned PartCount = partCount();
3339   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3340 
3341   // Set the high integerPart especially setting all unused top bits for
3342   // internal consistency.
3343   const unsigned NumUnusedHighBits =
3344     PartCount*integerPartWidth - semantics->precision;
3345   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3346                                    ? (~integerPart(0) >> NumUnusedHighBits)
3347                                    : 0;
3348 }
3349 
3350 /// Make this number the smallest magnitude denormal number in the given
3351 /// semantics.
makeSmallest(bool Negative)3352 void IEEEFloat::makeSmallest(bool Negative) {
3353   // We want (in interchange format):
3354   //   sign = {Negative}
3355   //   exponent = 0..0
3356   //   significand = 0..01
3357   category = fcNormal;
3358   sign = Negative;
3359   exponent = semantics->minExponent;
3360   APInt::tcSet(significandParts(), 1, partCount());
3361 }
3362 
makeSmallestNormalized(bool Negative)3363 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3364   // We want (in interchange format):
3365   //   sign = {Negative}
3366   //   exponent = 0..0
3367   //   significand = 10..0
3368 
3369   category = fcNormal;
3370   zeroSignificand();
3371   sign = Negative;
3372   exponent = semantics->minExponent;
3373   significandParts()[partCountForBits(semantics->precision) - 1] |=
3374       (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3375 }
3376 
IEEEFloat(const fltSemantics & Sem,const APInt & API)3377 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3378   initFromAPInt(&Sem, API);
3379 }
3380 
IEEEFloat(float f)3381 IEEEFloat::IEEEFloat(float f) {
3382   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3383 }
3384 
IEEEFloat(double d)3385 IEEEFloat::IEEEFloat(double d) {
3386   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3387 }
3388 
3389 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3390   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3391     Buffer.append(Str.begin(), Str.end());
3392   }
3393 
3394   /// Removes data from the given significand until it is no more
3395   /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3396   void AdjustToPrecision(APInt &significand,
3397                          int &exp, unsigned FormatPrecision) {
3398     unsigned bits = significand.getActiveBits();
3399 
3400     // 196/59 is a very slight overestimate of lg_2(10).
3401     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3402 
3403     if (bits <= bitsRequired) return;
3404 
3405     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3406     if (!tensRemovable) return;
3407 
3408     exp += tensRemovable;
3409 
3410     APInt divisor(significand.getBitWidth(), 1);
3411     APInt powten(significand.getBitWidth(), 10);
3412     while (true) {
3413       if (tensRemovable & 1)
3414         divisor *= powten;
3415       tensRemovable >>= 1;
3416       if (!tensRemovable) break;
3417       powten *= powten;
3418     }
3419 
3420     significand = significand.udiv(divisor);
3421 
3422     // Truncate the significand down to its active bit count.
3423     significand = significand.trunc(significand.getActiveBits());
3424   }
3425 
3426 
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3427   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3428                          int &exp, unsigned FormatPrecision) {
3429     unsigned N = buffer.size();
3430     if (N <= FormatPrecision) return;
3431 
3432     // The most significant figures are the last ones in the buffer.
3433     unsigned FirstSignificant = N - FormatPrecision;
3434 
3435     // Round.
3436     // FIXME: this probably shouldn't use 'round half up'.
3437 
3438     // Rounding down is just a truncation, except we also want to drop
3439     // trailing zeros from the new result.
3440     if (buffer[FirstSignificant - 1] < '5') {
3441       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3442         FirstSignificant++;
3443 
3444       exp += FirstSignificant;
3445       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3446       return;
3447     }
3448 
3449     // Rounding up requires a decimal add-with-carry.  If we continue
3450     // the carry, the newly-introduced zeros will just be truncated.
3451     for (unsigned I = FirstSignificant; I != N; ++I) {
3452       if (buffer[I] == '9') {
3453         FirstSignificant++;
3454       } else {
3455         buffer[I]++;
3456         break;
3457       }
3458     }
3459 
3460     // If we carried through, we have exactly one digit of precision.
3461     if (FirstSignificant == N) {
3462       exp += FirstSignificant;
3463       buffer.clear();
3464       buffer.push_back('1');
3465       return;
3466     }
3467 
3468     exp += FirstSignificant;
3469     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3470   }
3471 }
3472 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const3473 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3474                          unsigned FormatMaxPadding, bool TruncateZero) const {
3475   switch (category) {
3476   case fcInfinity:
3477     if (isNegative())
3478       return append(Str, "-Inf");
3479     else
3480       return append(Str, "+Inf");
3481 
3482   case fcNaN: return append(Str, "NaN");
3483 
3484   case fcZero:
3485     if (isNegative())
3486       Str.push_back('-');
3487 
3488     if (!FormatMaxPadding) {
3489       if (TruncateZero)
3490         append(Str, "0.0E+0");
3491       else {
3492         append(Str, "0.0");
3493         if (FormatPrecision > 1)
3494           Str.append(FormatPrecision - 1, '0');
3495         append(Str, "e+00");
3496       }
3497     } else
3498       Str.push_back('0');
3499     return;
3500 
3501   case fcNormal:
3502     break;
3503   }
3504 
3505   if (isNegative())
3506     Str.push_back('-');
3507 
3508   // Decompose the number into an APInt and an exponent.
3509   int exp = exponent - ((int) semantics->precision - 1);
3510   APInt significand(semantics->precision,
3511                     makeArrayRef(significandParts(),
3512                                  partCountForBits(semantics->precision)));
3513 
3514   // Set FormatPrecision if zero.  We want to do this before we
3515   // truncate trailing zeros, as those are part of the precision.
3516   if (!FormatPrecision) {
3517     // We use enough digits so the number can be round-tripped back to an
3518     // APFloat. The formula comes from "How to Print Floating-Point Numbers
3519     // Accurately" by Steele and White.
3520     // FIXME: Using a formula based purely on the precision is conservative;
3521     // we can print fewer digits depending on the actual value being printed.
3522 
3523     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3524     FormatPrecision = 2 + semantics->precision * 59 / 196;
3525   }
3526 
3527   // Ignore trailing binary zeros.
3528   int trailingZeros = significand.countTrailingZeros();
3529   exp += trailingZeros;
3530   significand.lshrInPlace(trailingZeros);
3531 
3532   // Change the exponent from 2^e to 10^e.
3533   if (exp == 0) {
3534     // Nothing to do.
3535   } else if (exp > 0) {
3536     // Just shift left.
3537     significand = significand.zext(semantics->precision + exp);
3538     significand <<= exp;
3539     exp = 0;
3540   } else { /* exp < 0 */
3541     int texp = -exp;
3542 
3543     // We transform this using the identity:
3544     //   (N)(2^-e) == (N)(5^e)(10^-e)
3545     // This means we have to multiply N (the significand) by 5^e.
3546     // To avoid overflow, we have to operate on numbers large
3547     // enough to store N * 5^e:
3548     //   log2(N * 5^e) == log2(N) + e * log2(5)
3549     //                 <= semantics->precision + e * 137 / 59
3550     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3551 
3552     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3553 
3554     // Multiply significand by 5^e.
3555     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3556     significand = significand.zext(precision);
3557     APInt five_to_the_i(precision, 5);
3558     while (true) {
3559       if (texp & 1) significand *= five_to_the_i;
3560 
3561       texp >>= 1;
3562       if (!texp) break;
3563       five_to_the_i *= five_to_the_i;
3564     }
3565   }
3566 
3567   AdjustToPrecision(significand, exp, FormatPrecision);
3568 
3569   SmallVector<char, 256> buffer;
3570 
3571   // Fill the buffer.
3572   unsigned precision = significand.getBitWidth();
3573   APInt ten(precision, 10);
3574   APInt digit(precision, 0);
3575 
3576   bool inTrail = true;
3577   while (significand != 0) {
3578     // digit <- significand % 10
3579     // significand <- significand / 10
3580     APInt::udivrem(significand, ten, significand, digit);
3581 
3582     unsigned d = digit.getZExtValue();
3583 
3584     // Drop trailing zeros.
3585     if (inTrail && !d) exp++;
3586     else {
3587       buffer.push_back((char) ('0' + d));
3588       inTrail = false;
3589     }
3590   }
3591 
3592   assert(!buffer.empty() && "no characters in buffer!");
3593 
3594   // Drop down to FormatPrecision.
3595   // TODO: don't do more precise calculations above than are required.
3596   AdjustToPrecision(buffer, exp, FormatPrecision);
3597 
3598   unsigned NDigits = buffer.size();
3599 
3600   // Check whether we should use scientific notation.
3601   bool FormatScientific;
3602   if (!FormatMaxPadding)
3603     FormatScientific = true;
3604   else {
3605     if (exp >= 0) {
3606       // 765e3 --> 765000
3607       //              ^^^
3608       // But we shouldn't make the number look more precise than it is.
3609       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3610                           NDigits + (unsigned) exp > FormatPrecision);
3611     } else {
3612       // Power of the most significant digit.
3613       int MSD = exp + (int) (NDigits - 1);
3614       if (MSD >= 0) {
3615         // 765e-2 == 7.65
3616         FormatScientific = false;
3617       } else {
3618         // 765e-5 == 0.00765
3619         //           ^ ^^
3620         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3621       }
3622     }
3623   }
3624 
3625   // Scientific formatting is pretty straightforward.
3626   if (FormatScientific) {
3627     exp += (NDigits - 1);
3628 
3629     Str.push_back(buffer[NDigits-1]);
3630     Str.push_back('.');
3631     if (NDigits == 1 && TruncateZero)
3632       Str.push_back('0');
3633     else
3634       for (unsigned I = 1; I != NDigits; ++I)
3635         Str.push_back(buffer[NDigits-1-I]);
3636     // Fill with zeros up to FormatPrecision.
3637     if (!TruncateZero && FormatPrecision > NDigits - 1)
3638       Str.append(FormatPrecision - NDigits + 1, '0');
3639     // For !TruncateZero we use lower 'e'.
3640     Str.push_back(TruncateZero ? 'E' : 'e');
3641 
3642     Str.push_back(exp >= 0 ? '+' : '-');
3643     if (exp < 0) exp = -exp;
3644     SmallVector<char, 6> expbuf;
3645     do {
3646       expbuf.push_back((char) ('0' + (exp % 10)));
3647       exp /= 10;
3648     } while (exp);
3649     // Exponent always at least two digits if we do not truncate zeros.
3650     if (!TruncateZero && expbuf.size() < 2)
3651       expbuf.push_back('0');
3652     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3653       Str.push_back(expbuf[E-1-I]);
3654     return;
3655   }
3656 
3657   // Non-scientific, positive exponents.
3658   if (exp >= 0) {
3659     for (unsigned I = 0; I != NDigits; ++I)
3660       Str.push_back(buffer[NDigits-1-I]);
3661     for (unsigned I = 0; I != (unsigned) exp; ++I)
3662       Str.push_back('0');
3663     return;
3664   }
3665 
3666   // Non-scientific, negative exponents.
3667 
3668   // The number of digits to the left of the decimal point.
3669   int NWholeDigits = exp + (int) NDigits;
3670 
3671   unsigned I = 0;
3672   if (NWholeDigits > 0) {
3673     for (; I != (unsigned) NWholeDigits; ++I)
3674       Str.push_back(buffer[NDigits-I-1]);
3675     Str.push_back('.');
3676   } else {
3677     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3678 
3679     Str.push_back('0');
3680     Str.push_back('.');
3681     for (unsigned Z = 1; Z != NZeros; ++Z)
3682       Str.push_back('0');
3683   }
3684 
3685   for (; I != NDigits; ++I)
3686     Str.push_back(buffer[NDigits-I-1]);
3687 }
3688 
getExactInverse(APFloat * inv) const3689 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3690   // Special floats and denormals have no exact inverse.
3691   if (!isFiniteNonZero())
3692     return false;
3693 
3694   // Check that the number is a power of two by making sure that only the
3695   // integer bit is set in the significand.
3696   if (significandLSB() != semantics->precision - 1)
3697     return false;
3698 
3699   // Get the inverse.
3700   IEEEFloat reciprocal(*semantics, 1ULL);
3701   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3702     return false;
3703 
3704   // Avoid multiplication with a denormal, it is not safe on all platforms and
3705   // may be slower than a normal division.
3706   if (reciprocal.isDenormal())
3707     return false;
3708 
3709   assert(reciprocal.isFiniteNonZero() &&
3710          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3711 
3712   if (inv)
3713     *inv = APFloat(reciprocal, *semantics);
3714 
3715   return true;
3716 }
3717 
isSignaling() const3718 bool IEEEFloat::isSignaling() const {
3719   if (!isNaN())
3720     return false;
3721 
3722   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3723   // first bit of the trailing significand being 0.
3724   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3725 }
3726 
3727 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3728 ///
3729 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3730 /// appropriate sign switching before/after the computation.
next(bool nextDown)3731 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3732   // If we are performing nextDown, swap sign so we have -x.
3733   if (nextDown)
3734     changeSign();
3735 
3736   // Compute nextUp(x)
3737   opStatus result = opOK;
3738 
3739   // Handle each float category separately.
3740   switch (category) {
3741   case fcInfinity:
3742     // nextUp(+inf) = +inf
3743     if (!isNegative())
3744       break;
3745     // nextUp(-inf) = -getLargest()
3746     makeLargest(true);
3747     break;
3748   case fcNaN:
3749     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3750     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3751     //                     change the payload.
3752     if (isSignaling()) {
3753       result = opInvalidOp;
3754       // For consistency, propagate the sign of the sNaN to the qNaN.
3755       makeNaN(false, isNegative(), nullptr);
3756     }
3757     break;
3758   case fcZero:
3759     // nextUp(pm 0) = +getSmallest()
3760     makeSmallest(false);
3761     break;
3762   case fcNormal:
3763     // nextUp(-getSmallest()) = -0
3764     if (isSmallest() && isNegative()) {
3765       APInt::tcSet(significandParts(), 0, partCount());
3766       category = fcZero;
3767       exponent = 0;
3768       break;
3769     }
3770 
3771     // nextUp(getLargest()) == INFINITY
3772     if (isLargest() && !isNegative()) {
3773       APInt::tcSet(significandParts(), 0, partCount());
3774       category = fcInfinity;
3775       exponent = semantics->maxExponent + 1;
3776       break;
3777     }
3778 
3779     // nextUp(normal) == normal + inc.
3780     if (isNegative()) {
3781       // If we are negative, we need to decrement the significand.
3782 
3783       // We only cross a binade boundary that requires adjusting the exponent
3784       // if:
3785       //   1. exponent != semantics->minExponent. This implies we are not in the
3786       //   smallest binade or are dealing with denormals.
3787       //   2. Our significand excluding the integral bit is all zeros.
3788       bool WillCrossBinadeBoundary =
3789         exponent != semantics->minExponent && isSignificandAllZeros();
3790 
3791       // Decrement the significand.
3792       //
3793       // We always do this since:
3794       //   1. If we are dealing with a non-binade decrement, by definition we
3795       //   just decrement the significand.
3796       //   2. If we are dealing with a normal -> normal binade decrement, since
3797       //   we have an explicit integral bit the fact that all bits but the
3798       //   integral bit are zero implies that subtracting one will yield a
3799       //   significand with 0 integral bit and 1 in all other spots. Thus we
3800       //   must just adjust the exponent and set the integral bit to 1.
3801       //   3. If we are dealing with a normal -> denormal binade decrement,
3802       //   since we set the integral bit to 0 when we represent denormals, we
3803       //   just decrement the significand.
3804       integerPart *Parts = significandParts();
3805       APInt::tcDecrement(Parts, partCount());
3806 
3807       if (WillCrossBinadeBoundary) {
3808         // Our result is a normal number. Do the following:
3809         // 1. Set the integral bit to 1.
3810         // 2. Decrement the exponent.
3811         APInt::tcSetBit(Parts, semantics->precision - 1);
3812         exponent--;
3813       }
3814     } else {
3815       // If we are positive, we need to increment the significand.
3816 
3817       // We only cross a binade boundary that requires adjusting the exponent if
3818       // the input is not a denormal and all of said input's significand bits
3819       // are set. If all of said conditions are true: clear the significand, set
3820       // the integral bit to 1, and increment the exponent. If we have a
3821       // denormal always increment since moving denormals and the numbers in the
3822       // smallest normal binade have the same exponent in our representation.
3823       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3824 
3825       if (WillCrossBinadeBoundary) {
3826         integerPart *Parts = significandParts();
3827         APInt::tcSet(Parts, 0, partCount());
3828         APInt::tcSetBit(Parts, semantics->precision - 1);
3829         assert(exponent != semantics->maxExponent &&
3830                "We can not increment an exponent beyond the maxExponent allowed"
3831                " by the given floating point semantics.");
3832         exponent++;
3833       } else {
3834         incrementSignificand();
3835       }
3836     }
3837     break;
3838   }
3839 
3840   // If we are performing nextDown, swap sign so we have -nextUp(-x)
3841   if (nextDown)
3842     changeSign();
3843 
3844   return result;
3845 }
3846 
makeInf(bool Negative)3847 void IEEEFloat::makeInf(bool Negative) {
3848   category = fcInfinity;
3849   sign = Negative;
3850   exponent = semantics->maxExponent + 1;
3851   APInt::tcSet(significandParts(), 0, partCount());
3852 }
3853 
makeZero(bool Negative)3854 void IEEEFloat::makeZero(bool Negative) {
3855   category = fcZero;
3856   sign = Negative;
3857   exponent = semantics->minExponent-1;
3858   APInt::tcSet(significandParts(), 0, partCount());
3859 }
3860 
makeQuiet()3861 void IEEEFloat::makeQuiet() {
3862   assert(isNaN());
3863   APInt::tcSetBit(significandParts(), semantics->precision - 2);
3864 }
3865 
ilogb(const IEEEFloat & Arg)3866 int ilogb(const IEEEFloat &Arg) {
3867   if (Arg.isNaN())
3868     return IEEEFloat::IEK_NaN;
3869   if (Arg.isZero())
3870     return IEEEFloat::IEK_Zero;
3871   if (Arg.isInfinity())
3872     return IEEEFloat::IEK_Inf;
3873   if (!Arg.isDenormal())
3874     return Arg.exponent;
3875 
3876   IEEEFloat Normalized(Arg);
3877   int SignificandBits = Arg.getSemantics().precision - 1;
3878 
3879   Normalized.exponent += SignificandBits;
3880   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3881   return Normalized.exponent - SignificandBits;
3882 }
3883 
scalbn(IEEEFloat X,int Exp,IEEEFloat::roundingMode RoundingMode)3884 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3885   auto MaxExp = X.getSemantics().maxExponent;
3886   auto MinExp = X.getSemantics().minExponent;
3887 
3888   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3889   // overflow; clamp it to a safe range before adding, but ensure that the range
3890   // is large enough that the clamp does not change the result. The range we
3891   // need to support is the difference between the largest possible exponent and
3892   // the normalized exponent of half the smallest denormal.
3893 
3894   int SignificandBits = X.getSemantics().precision - 1;
3895   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3896 
3897   // Clamp to one past the range ends to let normalize handle overlflow.
3898   X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3899   X.normalize(RoundingMode, lfExactlyZero);
3900   if (X.isNaN())
3901     X.makeQuiet();
3902   return X;
3903 }
3904 
frexp(const IEEEFloat & Val,int & Exp,IEEEFloat::roundingMode RM)3905 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3906   Exp = ilogb(Val);
3907 
3908   // Quiet signalling nans.
3909   if (Exp == IEEEFloat::IEK_NaN) {
3910     IEEEFloat Quiet(Val);
3911     Quiet.makeQuiet();
3912     return Quiet;
3913   }
3914 
3915   if (Exp == IEEEFloat::IEK_Inf)
3916     return Val;
3917 
3918   // 1 is added because frexp is defined to return a normalized fraction in
3919   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3920   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3921   return scalbn(Val, -Exp, RM);
3922 }
3923 
DoubleAPFloat(const fltSemantics & S)3924 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3925     : Semantics(&S),
3926       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3927   assert(Semantics == &semPPCDoubleDouble);
3928 }
3929 
DoubleAPFloat(const fltSemantics & S,uninitializedTag)3930 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3931     : Semantics(&S),
3932       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3933                             APFloat(semIEEEdouble, uninitialized)}) {
3934   assert(Semantics == &semPPCDoubleDouble);
3935 }
3936 
DoubleAPFloat(const fltSemantics & S,integerPart I)3937 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3938     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3939                                            APFloat(semIEEEdouble)}) {
3940   assert(Semantics == &semPPCDoubleDouble);
3941 }
3942 
DoubleAPFloat(const fltSemantics & S,const APInt & I)3943 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3944     : Semantics(&S),
3945       Floats(new APFloat[2]{
3946           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3947           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3948   assert(Semantics == &semPPCDoubleDouble);
3949 }
3950 
DoubleAPFloat(const fltSemantics & S,APFloat && First,APFloat && Second)3951 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3952                              APFloat &&Second)
3953     : Semantics(&S),
3954       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3955   assert(Semantics == &semPPCDoubleDouble);
3956   assert(&Floats[0].getSemantics() == &semIEEEdouble);
3957   assert(&Floats[1].getSemantics() == &semIEEEdouble);
3958 }
3959 
DoubleAPFloat(const DoubleAPFloat & RHS)3960 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3961     : Semantics(RHS.Semantics),
3962       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3963                                          APFloat(RHS.Floats[1])}
3964                         : nullptr) {
3965   assert(Semantics == &semPPCDoubleDouble);
3966 }
3967 
DoubleAPFloat(DoubleAPFloat && RHS)3968 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3969     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3970   RHS.Semantics = &semBogus;
3971   assert(Semantics == &semPPCDoubleDouble);
3972 }
3973 
operator =(const DoubleAPFloat & RHS)3974 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3975   if (Semantics == RHS.Semantics && RHS.Floats) {
3976     Floats[0] = RHS.Floats[0];
3977     Floats[1] = RHS.Floats[1];
3978   } else if (this != &RHS) {
3979     this->~DoubleAPFloat();
3980     new (this) DoubleAPFloat(RHS);
3981   }
3982   return *this;
3983 }
3984 
3985 // Implement addition, subtraction, multiplication and division based on:
3986 // "Software for Doubled-Precision Floating-Point Computations",
3987 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
addImpl(const APFloat & a,const APFloat & aa,const APFloat & c,const APFloat & cc,roundingMode RM)3988 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3989                                          const APFloat &c, const APFloat &cc,
3990                                          roundingMode RM) {
3991   int Status = opOK;
3992   APFloat z = a;
3993   Status |= z.add(c, RM);
3994   if (!z.isFinite()) {
3995     if (!z.isInfinity()) {
3996       Floats[0] = std::move(z);
3997       Floats[1].makeZero(/* Neg = */ false);
3998       return (opStatus)Status;
3999     }
4000     Status = opOK;
4001     auto AComparedToC = a.compareAbsoluteValue(c);
4002     z = cc;
4003     Status |= z.add(aa, RM);
4004     if (AComparedToC == APFloat::cmpGreaterThan) {
4005       // z = cc + aa + c + a;
4006       Status |= z.add(c, RM);
4007       Status |= z.add(a, RM);
4008     } else {
4009       // z = cc + aa + a + c;
4010       Status |= z.add(a, RM);
4011       Status |= z.add(c, RM);
4012     }
4013     if (!z.isFinite()) {
4014       Floats[0] = std::move(z);
4015       Floats[1].makeZero(/* Neg = */ false);
4016       return (opStatus)Status;
4017     }
4018     Floats[0] = z;
4019     APFloat zz = aa;
4020     Status |= zz.add(cc, RM);
4021     if (AComparedToC == APFloat::cmpGreaterThan) {
4022       // Floats[1] = a - z + c + zz;
4023       Floats[1] = a;
4024       Status |= Floats[1].subtract(z, RM);
4025       Status |= Floats[1].add(c, RM);
4026       Status |= Floats[1].add(zz, RM);
4027     } else {
4028       // Floats[1] = c - z + a + zz;
4029       Floats[1] = c;
4030       Status |= Floats[1].subtract(z, RM);
4031       Status |= Floats[1].add(a, RM);
4032       Status |= Floats[1].add(zz, RM);
4033     }
4034   } else {
4035     // q = a - z;
4036     APFloat q = a;
4037     Status |= q.subtract(z, RM);
4038 
4039     // zz = q + c + (a - (q + z)) + aa + cc;
4040     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4041     auto zz = q;
4042     Status |= zz.add(c, RM);
4043     Status |= q.add(z, RM);
4044     Status |= q.subtract(a, RM);
4045     q.changeSign();
4046     Status |= zz.add(q, RM);
4047     Status |= zz.add(aa, RM);
4048     Status |= zz.add(cc, RM);
4049     if (zz.isZero() && !zz.isNegative()) {
4050       Floats[0] = std::move(z);
4051       Floats[1].makeZero(/* Neg = */ false);
4052       return opOK;
4053     }
4054     Floats[0] = z;
4055     Status |= Floats[0].add(zz, RM);
4056     if (!Floats[0].isFinite()) {
4057       Floats[1].makeZero(/* Neg = */ false);
4058       return (opStatus)Status;
4059     }
4060     Floats[1] = std::move(z);
4061     Status |= Floats[1].subtract(Floats[0], RM);
4062     Status |= Floats[1].add(zz, RM);
4063   }
4064   return (opStatus)Status;
4065 }
4066 
addWithSpecial(const DoubleAPFloat & LHS,const DoubleAPFloat & RHS,DoubleAPFloat & Out,roundingMode RM)4067 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4068                                                 const DoubleAPFloat &RHS,
4069                                                 DoubleAPFloat &Out,
4070                                                 roundingMode RM) {
4071   if (LHS.getCategory() == fcNaN) {
4072     Out = LHS;
4073     return opOK;
4074   }
4075   if (RHS.getCategory() == fcNaN) {
4076     Out = RHS;
4077     return opOK;
4078   }
4079   if (LHS.getCategory() == fcZero) {
4080     Out = RHS;
4081     return opOK;
4082   }
4083   if (RHS.getCategory() == fcZero) {
4084     Out = LHS;
4085     return opOK;
4086   }
4087   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4088       LHS.isNegative() != RHS.isNegative()) {
4089     Out.makeNaN(false, Out.isNegative(), nullptr);
4090     return opInvalidOp;
4091   }
4092   if (LHS.getCategory() == fcInfinity) {
4093     Out = LHS;
4094     return opOK;
4095   }
4096   if (RHS.getCategory() == fcInfinity) {
4097     Out = RHS;
4098     return opOK;
4099   }
4100   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4101 
4102   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4103       CC(RHS.Floats[1]);
4104   assert(&A.getSemantics() == &semIEEEdouble);
4105   assert(&AA.getSemantics() == &semIEEEdouble);
4106   assert(&C.getSemantics() == &semIEEEdouble);
4107   assert(&CC.getSemantics() == &semIEEEdouble);
4108   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4109   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4110   return Out.addImpl(A, AA, C, CC, RM);
4111 }
4112 
add(const DoubleAPFloat & RHS,roundingMode RM)4113 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4114                                      roundingMode RM) {
4115   return addWithSpecial(*this, RHS, *this, RM);
4116 }
4117 
subtract(const DoubleAPFloat & RHS,roundingMode RM)4118 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4119                                           roundingMode RM) {
4120   changeSign();
4121   auto Ret = add(RHS, RM);
4122   changeSign();
4123   return Ret;
4124 }
4125 
multiply(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4126 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4127                                           APFloat::roundingMode RM) {
4128   const auto &LHS = *this;
4129   auto &Out = *this;
4130   /* Interesting observation: For special categories, finding the lowest
4131      common ancestor of the following layered graph gives the correct
4132      return category:
4133 
4134         NaN
4135        /   \
4136      Zero  Inf
4137        \   /
4138        Normal
4139 
4140      e.g. NaN * NaN = NaN
4141           Zero * Inf = NaN
4142           Normal * Zero = Zero
4143           Normal * Inf = Inf
4144   */
4145   if (LHS.getCategory() == fcNaN) {
4146     Out = LHS;
4147     return opOK;
4148   }
4149   if (RHS.getCategory() == fcNaN) {
4150     Out = RHS;
4151     return opOK;
4152   }
4153   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4154       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4155     Out.makeNaN(false, false, nullptr);
4156     return opOK;
4157   }
4158   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4159     Out = LHS;
4160     return opOK;
4161   }
4162   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4163     Out = RHS;
4164     return opOK;
4165   }
4166   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4167          "Special cases not handled exhaustively");
4168 
4169   int Status = opOK;
4170   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4171   // t = a * c
4172   APFloat T = A;
4173   Status |= T.multiply(C, RM);
4174   if (!T.isFiniteNonZero()) {
4175     Floats[0] = T;
4176     Floats[1].makeZero(/* Neg = */ false);
4177     return (opStatus)Status;
4178   }
4179 
4180   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4181   APFloat Tau = A;
4182   T.changeSign();
4183   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4184   T.changeSign();
4185   {
4186     // v = a * d
4187     APFloat V = A;
4188     Status |= V.multiply(D, RM);
4189     // w = b * c
4190     APFloat W = B;
4191     Status |= W.multiply(C, RM);
4192     Status |= V.add(W, RM);
4193     // tau += v + w
4194     Status |= Tau.add(V, RM);
4195   }
4196   // u = t + tau
4197   APFloat U = T;
4198   Status |= U.add(Tau, RM);
4199 
4200   Floats[0] = U;
4201   if (!U.isFinite()) {
4202     Floats[1].makeZero(/* Neg = */ false);
4203   } else {
4204     // Floats[1] = (t - u) + tau
4205     Status |= T.subtract(U, RM);
4206     Status |= T.add(Tau, RM);
4207     Floats[1] = T;
4208   }
4209   return (opStatus)Status;
4210 }
4211 
divide(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4212 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4213                                         APFloat::roundingMode RM) {
4214   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4215   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4216   auto Ret =
4217       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4218   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4219   return Ret;
4220 }
4221 
remainder(const DoubleAPFloat & RHS)4222 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4223   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4224   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4225   auto Ret =
4226       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4227   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4228   return Ret;
4229 }
4230 
mod(const DoubleAPFloat & RHS)4231 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4232   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4233   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4234   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4235   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4236   return Ret;
4237 }
4238 
4239 APFloat::opStatus
fusedMultiplyAdd(const DoubleAPFloat & Multiplicand,const DoubleAPFloat & Addend,APFloat::roundingMode RM)4240 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4241                                 const DoubleAPFloat &Addend,
4242                                 APFloat::roundingMode RM) {
4243   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4244   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4245   auto Ret = Tmp.fusedMultiplyAdd(
4246       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4247       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4248   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4249   return Ret;
4250 }
4251 
roundToIntegral(APFloat::roundingMode RM)4252 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4253   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4254   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4255   auto Ret = Tmp.roundToIntegral(RM);
4256   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4257   return Ret;
4258 }
4259 
changeSign()4260 void DoubleAPFloat::changeSign() {
4261   Floats[0].changeSign();
4262   Floats[1].changeSign();
4263 }
4264 
4265 APFloat::cmpResult
compareAbsoluteValue(const DoubleAPFloat & RHS) const4266 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4267   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4268   if (Result != cmpEqual)
4269     return Result;
4270   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4271   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4272     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4273     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4274     if (Against && !RHSAgainst)
4275       return cmpLessThan;
4276     if (!Against && RHSAgainst)
4277       return cmpGreaterThan;
4278     if (!Against && !RHSAgainst)
4279       return Result;
4280     if (Against && RHSAgainst)
4281       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4282   }
4283   return Result;
4284 }
4285 
getCategory() const4286 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4287   return Floats[0].getCategory();
4288 }
4289 
isNegative() const4290 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4291 
makeInf(bool Neg)4292 void DoubleAPFloat::makeInf(bool Neg) {
4293   Floats[0].makeInf(Neg);
4294   Floats[1].makeZero(/* Neg = */ false);
4295 }
4296 
makeZero(bool Neg)4297 void DoubleAPFloat::makeZero(bool Neg) {
4298   Floats[0].makeZero(Neg);
4299   Floats[1].makeZero(/* Neg = */ false);
4300 }
4301 
makeLargest(bool Neg)4302 void DoubleAPFloat::makeLargest(bool Neg) {
4303   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4304   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4305   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4306   if (Neg)
4307     changeSign();
4308 }
4309 
makeSmallest(bool Neg)4310 void DoubleAPFloat::makeSmallest(bool Neg) {
4311   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4312   Floats[0].makeSmallest(Neg);
4313   Floats[1].makeZero(/* Neg = */ false);
4314 }
4315 
makeSmallestNormalized(bool Neg)4316 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4317   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4318   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4319   if (Neg)
4320     Floats[0].changeSign();
4321   Floats[1].makeZero(/* Neg = */ false);
4322 }
4323 
makeNaN(bool SNaN,bool Neg,const APInt * fill)4324 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4325   Floats[0].makeNaN(SNaN, Neg, fill);
4326   Floats[1].makeZero(/* Neg = */ false);
4327 }
4328 
compare(const DoubleAPFloat & RHS) const4329 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4330   auto Result = Floats[0].compare(RHS.Floats[0]);
4331   // |Float[0]| > |Float[1]|
4332   if (Result == APFloat::cmpEqual)
4333     return Floats[1].compare(RHS.Floats[1]);
4334   return Result;
4335 }
4336 
bitwiseIsEqual(const DoubleAPFloat & RHS) const4337 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4338   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4339          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4340 }
4341 
hash_value(const DoubleAPFloat & Arg)4342 hash_code hash_value(const DoubleAPFloat &Arg) {
4343   if (Arg.Floats)
4344     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4345   return hash_combine(Arg.Semantics);
4346 }
4347 
bitcastToAPInt() const4348 APInt DoubleAPFloat::bitcastToAPInt() const {
4349   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4350   uint64_t Data[] = {
4351       Floats[0].bitcastToAPInt().getRawData()[0],
4352       Floats[1].bitcastToAPInt().getRawData()[0],
4353   };
4354   return APInt(128, 2, Data);
4355 }
4356 
convertFromString(StringRef S,roundingMode RM)4357 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4358                                                              roundingMode RM) {
4359   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4360   APFloat Tmp(semPPCDoubleDoubleLegacy);
4361   auto Ret = Tmp.convertFromString(S, RM);
4362   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4363   return Ret;
4364 }
4365 
next(bool nextDown)4366 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4367   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4368   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4369   auto Ret = Tmp.next(nextDown);
4370   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4371   return Ret;
4372 }
4373 
4374 APFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact) const4375 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4376                                 unsigned int Width, bool IsSigned,
4377                                 roundingMode RM, bool *IsExact) const {
4378   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4379   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4380       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4381 }
4382 
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)4383 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4384                                                   bool IsSigned,
4385                                                   roundingMode RM) {
4386   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4387   APFloat Tmp(semPPCDoubleDoubleLegacy);
4388   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4389   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4390   return Ret;
4391 }
4392 
4393 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4394 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4395                                               unsigned int InputSize,
4396                                               bool IsSigned, roundingMode RM) {
4397   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4398   APFloat Tmp(semPPCDoubleDoubleLegacy);
4399   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4400   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4401   return Ret;
4402 }
4403 
4404 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4405 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4406                                               unsigned int InputSize,
4407                                               bool IsSigned, roundingMode RM) {
4408   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4409   APFloat Tmp(semPPCDoubleDoubleLegacy);
4410   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4411   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4412   return Ret;
4413 }
4414 
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM) const4415 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4416                                                unsigned int HexDigits,
4417                                                bool UpperCase,
4418                                                roundingMode RM) const {
4419   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4420   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4421       .convertToHexString(DST, HexDigits, UpperCase, RM);
4422 }
4423 
isDenormal() const4424 bool DoubleAPFloat::isDenormal() const {
4425   return getCategory() == fcNormal &&
4426          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4427           // (double)(Hi + Lo) == Hi defines a normal number.
4428           Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4429 }
4430 
isSmallest() const4431 bool DoubleAPFloat::isSmallest() const {
4432   if (getCategory() != fcNormal)
4433     return false;
4434   DoubleAPFloat Tmp(*this);
4435   Tmp.makeSmallest(this->isNegative());
4436   return Tmp.compare(*this) == cmpEqual;
4437 }
4438 
isLargest() const4439 bool DoubleAPFloat::isLargest() const {
4440   if (getCategory() != fcNormal)
4441     return false;
4442   DoubleAPFloat Tmp(*this);
4443   Tmp.makeLargest(this->isNegative());
4444   return Tmp.compare(*this) == cmpEqual;
4445 }
4446 
isInteger() const4447 bool DoubleAPFloat::isInteger() const {
4448   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4449   return Floats[0].isInteger() && Floats[1].isInteger();
4450 }
4451 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const4452 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4453                              unsigned FormatPrecision,
4454                              unsigned FormatMaxPadding,
4455                              bool TruncateZero) const {
4456   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4457   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4458       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4459 }
4460 
getExactInverse(APFloat * inv) const4461 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4462   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4463   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4464   if (!inv)
4465     return Tmp.getExactInverse(nullptr);
4466   APFloat Inv(semPPCDoubleDoubleLegacy);
4467   auto Ret = Tmp.getExactInverse(&Inv);
4468   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4469   return Ret;
4470 }
4471 
scalbn(DoubleAPFloat Arg,int Exp,APFloat::roundingMode RM)4472 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4473   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4474   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4475                        scalbn(Arg.Floats[1], Exp, RM));
4476 }
4477 
frexp(const DoubleAPFloat & Arg,int & Exp,APFloat::roundingMode RM)4478 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4479                     APFloat::roundingMode RM) {
4480   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4481   APFloat First = frexp(Arg.Floats[0], Exp, RM);
4482   APFloat Second = Arg.Floats[1];
4483   if (Arg.getCategory() == APFloat::fcNormal)
4484     Second = scalbn(Second, -Exp, RM);
4485   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4486 }
4487 
4488 } // End detail namespace
4489 
Storage(IEEEFloat F,const fltSemantics & Semantics)4490 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4491   if (usesLayout<IEEEFloat>(Semantics)) {
4492     new (&IEEE) IEEEFloat(std::move(F));
4493     return;
4494   }
4495   if (usesLayout<DoubleAPFloat>(Semantics)) {
4496     const fltSemantics& S = F.getSemantics();
4497     new (&Double)
4498         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
4499                       APFloat(semIEEEdouble));
4500     return;
4501   }
4502   llvm_unreachable("Unexpected semantics");
4503 }
4504 
convertFromString(StringRef Str,roundingMode RM)4505 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
4506                                                        roundingMode RM) {
4507   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4508 }
4509 
hash_value(const APFloat & Arg)4510 hash_code hash_value(const APFloat &Arg) {
4511   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4512     return hash_value(Arg.U.IEEE);
4513   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4514     return hash_value(Arg.U.Double);
4515   llvm_unreachable("Unexpected semantics");
4516 }
4517 
APFloat(const fltSemantics & Semantics,StringRef S)4518 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4519     : APFloat(Semantics) {
4520   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
4521   assert(StatusOrErr && "Invalid floating point representation");
4522   consumeError(StatusOrErr.takeError());
4523 }
4524 
convert(const fltSemantics & ToSemantics,roundingMode RM,bool * losesInfo)4525 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4526                                    roundingMode RM, bool *losesInfo) {
4527   if (&getSemantics() == &ToSemantics) {
4528     *losesInfo = false;
4529     return opOK;
4530   }
4531   if (usesLayout<IEEEFloat>(getSemantics()) &&
4532       usesLayout<IEEEFloat>(ToSemantics))
4533     return U.IEEE.convert(ToSemantics, RM, losesInfo);
4534   if (usesLayout<IEEEFloat>(getSemantics()) &&
4535       usesLayout<DoubleAPFloat>(ToSemantics)) {
4536     assert(&ToSemantics == &semPPCDoubleDouble);
4537     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4538     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4539     return Ret;
4540   }
4541   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4542       usesLayout<IEEEFloat>(ToSemantics)) {
4543     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4544     *this = APFloat(std::move(getIEEE()), ToSemantics);
4545     return Ret;
4546   }
4547   llvm_unreachable("Unexpected semantics");
4548 }
4549 
getAllOnesValue(unsigned BitWidth,bool isIEEE)4550 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4551   if (isIEEE) {
4552     switch (BitWidth) {
4553     case 16:
4554       return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4555     case 32:
4556       return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4557     case 64:
4558       return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4559     case 80:
4560       return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4561     case 128:
4562       return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4563     default:
4564       llvm_unreachable("Unknown floating bit width");
4565     }
4566   } else {
4567     assert(BitWidth == 128);
4568     return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4569   }
4570 }
4571 
print(raw_ostream & OS) const4572 void APFloat::print(raw_ostream &OS) const {
4573   SmallVector<char, 16> Buffer;
4574   toString(Buffer);
4575   OS << Buffer << "\n";
4576 }
4577 
4578 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const4579 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4580 #endif
4581 
Profile(FoldingSetNodeID & NID) const4582 void APFloat::Profile(FoldingSetNodeID &NID) const {
4583   NID.Add(bitcastToAPInt());
4584 }
4585 
4586 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4587    an APSInt, whose initial bit-width and signed-ness are used to determine the
4588    precision of the conversion.
4589  */
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const4590 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4591                                             roundingMode rounding_mode,
4592                                             bool *isExact) const {
4593   unsigned bitWidth = result.getBitWidth();
4594   SmallVector<uint64_t, 4> parts(result.getNumWords());
4595   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4596                                      rounding_mode, isExact);
4597   // Keeps the original signed-ness.
4598   result = APInt(bitWidth, parts);
4599   return status;
4600 }
4601 
4602 } // End llvm namespace
4603 
4604 #undef APFLOAT_DISPATCH_ON_SEMANTICS
4605