• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
23 #include <cstring>
24 #include <limits.h>
25 
26 using namespace llvm;
27 
28 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
29 
30 /* Assumed in hexadecimal significand parsing, and conversion to
31    hexadecimal strings.  */
32 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
33 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
34 
35 namespace llvm {
36 
37   /* Represents floating point arithmetic semantics.  */
38   struct fltSemantics {
39     /* The largest E such that 2^E is representable; this matches the
40        definition of IEEE 754.  */
41     exponent_t maxExponent;
42 
43     /* The smallest E such that 2^E is a normalized number; this
44        matches the definition of IEEE 754.  */
45     exponent_t minExponent;
46 
47     /* Number of bits in the significand.  This includes the integer
48        bit.  */
49     unsigned int precision;
50   };
51 
52   const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
53   const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
54   const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
55   const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
56   const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
57   const fltSemantics APFloat::Bogus = { 0, 0, 0 };
58 
59   /* The PowerPC format consists of two doubles.  It does not map cleanly
60      onto the usual format above.  It is approximated using twice the
61      mantissa bits.  Note that for exponents near the double minimum,
62      we no longer can represent the full 106 mantissa bits, so those
63      will be treated as denormal numbers.
64 
65      FIXME: While this approximation is equivalent to what GCC uses for
66      compile-time arithmetic on PPC double-double numbers, it is not able
67      to represent all possible values held by a PPC double-double number,
68      for example: (long double) 1.0 + (long double) 0x1p-106
69      Should this be replaced by a full emulation of PPC double-double?  */
70   const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
71 
72   /* A tight upper bound on number of parts required to hold the value
73      pow(5, power) is
74 
75        power * 815 / (351 * integerPartWidth) + 1
76 
77      However, whilst the result may require only this many parts,
78      because we are multiplying two values to get it, the
79      multiplication may require an extra part with the excess part
80      being zero (consider the trivial case of 1 * 1, tcFullMultiply
81      requires two parts to hold the single-part result).  So we add an
82      extra one to guarantee enough space whilst multiplying.  */
83   const unsigned int maxExponent = 16383;
84   const unsigned int maxPrecision = 113;
85   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
86   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
87                                                 / (351 * integerPartWidth));
88 }
89 
90 /* A bunch of private, handy routines.  */
91 
92 static inline unsigned int
partCountForBits(unsigned int bits)93 partCountForBits(unsigned int bits)
94 {
95   return ((bits) + integerPartWidth - 1) / integerPartWidth;
96 }
97 
98 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
99 static inline unsigned int
decDigitValue(unsigned int c)100 decDigitValue(unsigned int c)
101 {
102   return c - '0';
103 }
104 
105 /* Return the value of a decimal exponent of the form
106    [+-]ddddddd.
107 
108    If the exponent overflows, returns a large exponent with the
109    appropriate sign.  */
110 static int
readExponent(StringRef::iterator begin,StringRef::iterator end)111 readExponent(StringRef::iterator begin, StringRef::iterator end)
112 {
113   bool isNegative;
114   unsigned int absExponent;
115   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
116   StringRef::iterator p = begin;
117 
118   assert(p != end && "Exponent has no digits");
119 
120   isNegative = (*p == '-');
121   if (*p == '-' || *p == '+') {
122     p++;
123     assert(p != end && "Exponent has no digits");
124   }
125 
126   absExponent = decDigitValue(*p++);
127   assert(absExponent < 10U && "Invalid character in exponent");
128 
129   for (; p != end; ++p) {
130     unsigned int value;
131 
132     value = decDigitValue(*p);
133     assert(value < 10U && "Invalid character in exponent");
134 
135     value += absExponent * 10;
136     if (absExponent >= overlargeExponent) {
137       absExponent = overlargeExponent;
138       p = end;  /* outwit assert below */
139       break;
140     }
141     absExponent = value;
142   }
143 
144   assert(p == end && "Invalid exponent in exponent");
145 
146   if (isNegative)
147     return -(int) absExponent;
148   else
149     return (int) absExponent;
150 }
151 
152 /* This is ugly and needs cleaning up, but I don't immediately see
153    how whilst remaining safe.  */
154 static int
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)155 totalExponent(StringRef::iterator p, StringRef::iterator end,
156               int exponentAdjustment)
157 {
158   int unsignedExponent;
159   bool negative, overflow;
160   int exponent = 0;
161 
162   assert(p != end && "Exponent has no digits");
163 
164   negative = *p == '-';
165   if (*p == '-' || *p == '+') {
166     p++;
167     assert(p != end && "Exponent has no digits");
168   }
169 
170   unsignedExponent = 0;
171   overflow = false;
172   for (; p != end; ++p) {
173     unsigned int value;
174 
175     value = decDigitValue(*p);
176     assert(value < 10U && "Invalid character in exponent");
177 
178     unsignedExponent = unsignedExponent * 10 + value;
179     if (unsignedExponent > 32767) {
180       overflow = true;
181       break;
182     }
183   }
184 
185   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
186     overflow = true;
187 
188   if (!overflow) {
189     exponent = unsignedExponent;
190     if (negative)
191       exponent = -exponent;
192     exponent += exponentAdjustment;
193     if (exponent > 32767 || exponent < -32768)
194       overflow = true;
195   }
196 
197   if (overflow)
198     exponent = negative ? -32768: 32767;
199 
200   return exponent;
201 }
202 
203 static StringRef::iterator
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)204 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
205                            StringRef::iterator *dot)
206 {
207   StringRef::iterator p = begin;
208   *dot = end;
209   while (*p == '0' && p != end)
210     p++;
211 
212   if (*p == '.') {
213     *dot = p++;
214 
215     assert(end - begin != 1 && "Significand has no digits");
216 
217     while (*p == '0' && p != end)
218       p++;
219   }
220 
221   return p;
222 }
223 
224 /* Given a normal decimal floating point number of the form
225 
226      dddd.dddd[eE][+-]ddd
227 
228    where the decimal point and exponent are optional, fill out the
229    structure D.  Exponent is appropriate if the significand is
230    treated as an integer, and normalizedExponent if the significand
231    is taken to have the decimal point after a single leading
232    non-zero digit.
233 
234    If the value is zero, V->firstSigDigit points to a non-digit, and
235    the return exponent is zero.
236 */
237 struct decimalInfo {
238   const char *firstSigDigit;
239   const char *lastSigDigit;
240   int exponent;
241   int normalizedExponent;
242 };
243 
244 static void
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)245 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
246                  decimalInfo *D)
247 {
248   StringRef::iterator dot = end;
249   StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
250 
251   D->firstSigDigit = p;
252   D->exponent = 0;
253   D->normalizedExponent = 0;
254 
255   for (; p != end; ++p) {
256     if (*p == '.') {
257       assert(dot == end && "String contains multiple dots");
258       dot = p++;
259       if (p == end)
260         break;
261     }
262     if (decDigitValue(*p) >= 10U)
263       break;
264   }
265 
266   if (p != end) {
267     assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
268     assert(p != begin && "Significand has no digits");
269     assert((dot == end || p - begin != 1) && "Significand has no digits");
270 
271     /* p points to the first non-digit in the string */
272     D->exponent = readExponent(p + 1, end);
273 
274     /* Implied decimal point?  */
275     if (dot == end)
276       dot = p;
277   }
278 
279   /* If number is all zeroes accept any exponent.  */
280   if (p != D->firstSigDigit) {
281     /* Drop insignificant trailing zeroes.  */
282     if (p != begin) {
283       do
284         do
285           p--;
286         while (p != begin && *p == '0');
287       while (p != begin && *p == '.');
288     }
289 
290     /* Adjust the exponents for any decimal point.  */
291     D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
292     D->normalizedExponent = (D->exponent +
293               static_cast<exponent_t>((p - D->firstSigDigit)
294                                       - (dot > D->firstSigDigit && dot < p)));
295   }
296 
297   D->lastSigDigit = p;
298 }
299 
300 /* Return the trailing fraction of a hexadecimal number.
301    DIGITVALUE is the first hex digit of the fraction, P points to
302    the next digit.  */
303 static lostFraction
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)304 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
305                             unsigned int digitValue)
306 {
307   unsigned int hexDigit;
308 
309   /* If the first trailing digit isn't 0 or 8 we can work out the
310      fraction immediately.  */
311   if (digitValue > 8)
312     return lfMoreThanHalf;
313   else if (digitValue < 8 && digitValue > 0)
314     return lfLessThanHalf;
315 
316   /* Otherwise we need to find the first non-zero digit.  */
317   while (*p == '0')
318     p++;
319 
320   assert(p != end && "Invalid trailing hexadecimal fraction!");
321 
322   hexDigit = hexDigitValue(*p);
323 
324   /* If we ran off the end it is exactly zero or one-half, otherwise
325      a little more.  */
326   if (hexDigit == -1U)
327     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
328   else
329     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
330 }
331 
332 /* Return the fraction lost were a bignum truncated losing the least
333    significant BITS bits.  */
334 static lostFraction
lostFractionThroughTruncation(const integerPart * parts,unsigned int partCount,unsigned int bits)335 lostFractionThroughTruncation(const integerPart *parts,
336                               unsigned int partCount,
337                               unsigned int bits)
338 {
339   unsigned int lsb;
340 
341   lsb = APInt::tcLSB(parts, partCount);
342 
343   /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
344   if (bits <= lsb)
345     return lfExactlyZero;
346   if (bits == lsb + 1)
347     return lfExactlyHalf;
348   if (bits <= partCount * integerPartWidth &&
349       APInt::tcExtractBit(parts, bits - 1))
350     return lfMoreThanHalf;
351 
352   return lfLessThanHalf;
353 }
354 
355 /* Shift DST right BITS bits noting lost fraction.  */
356 static lostFraction
shiftRight(integerPart * dst,unsigned int parts,unsigned int bits)357 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
358 {
359   lostFraction lost_fraction;
360 
361   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
362 
363   APInt::tcShiftRight(dst, parts, bits);
364 
365   return lost_fraction;
366 }
367 
368 /* Combine the effect of two lost fractions.  */
369 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)370 combineLostFractions(lostFraction moreSignificant,
371                      lostFraction lessSignificant)
372 {
373   if (lessSignificant != lfExactlyZero) {
374     if (moreSignificant == lfExactlyZero)
375       moreSignificant = lfLessThanHalf;
376     else if (moreSignificant == lfExactlyHalf)
377       moreSignificant = lfMoreThanHalf;
378   }
379 
380   return moreSignificant;
381 }
382 
383 /* The error from the true value, in half-ulps, on multiplying two
384    floating point numbers, which differ from the value they
385    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
386    than the returned value.
387 
388    See "How to Read Floating Point Numbers Accurately" by William D
389    Clinger.  */
390 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)391 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
392 {
393   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
394 
395   if (HUerr1 + HUerr2 == 0)
396     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
397   else
398     return inexactMultiply + 2 * (HUerr1 + HUerr2);
399 }
400 
401 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
402    when the least significant BITS are truncated.  BITS cannot be
403    zero.  */
404 static integerPart
ulpsFromBoundary(const integerPart * parts,unsigned int bits,bool isNearest)405 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
406 {
407   unsigned int count, partBits;
408   integerPart part, boundary;
409 
410   assert(bits != 0);
411 
412   bits--;
413   count = bits / integerPartWidth;
414   partBits = bits % integerPartWidth + 1;
415 
416   part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
417 
418   if (isNearest)
419     boundary = (integerPart) 1 << (partBits - 1);
420   else
421     boundary = 0;
422 
423   if (count == 0) {
424     if (part - boundary <= boundary - part)
425       return part - boundary;
426     else
427       return boundary - part;
428   }
429 
430   if (part == boundary) {
431     while (--count)
432       if (parts[count])
433         return ~(integerPart) 0; /* A lot.  */
434 
435     return parts[0];
436   } else if (part == boundary - 1) {
437     while (--count)
438       if (~parts[count])
439         return ~(integerPart) 0; /* A lot.  */
440 
441     return -parts[0];
442   }
443 
444   return ~(integerPart) 0; /* A lot.  */
445 }
446 
447 /* Place pow(5, power) in DST, and return the number of parts used.
448    DST must be at least one part larger than size of the answer.  */
449 static unsigned int
powerOf5(integerPart * dst,unsigned int power)450 powerOf5(integerPart *dst, unsigned int power)
451 {
452   static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
453                                                   15625, 78125 };
454   integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
455   pow5s[0] = 78125 * 5;
456 
457   unsigned int partsCount[16] = { 1 };
458   integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
459   unsigned int result;
460   assert(power <= maxExponent);
461 
462   p1 = dst;
463   p2 = scratch;
464 
465   *p1 = firstEightPowers[power & 7];
466   power >>= 3;
467 
468   result = 1;
469   pow5 = pow5s;
470 
471   for (unsigned int n = 0; power; power >>= 1, n++) {
472     unsigned int pc;
473 
474     pc = partsCount[n];
475 
476     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
477     if (pc == 0) {
478       pc = partsCount[n - 1];
479       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
480       pc *= 2;
481       if (pow5[pc - 1] == 0)
482         pc--;
483       partsCount[n] = pc;
484     }
485 
486     if (power & 1) {
487       integerPart *tmp;
488 
489       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
490       result += pc;
491       if (p2[result - 1] == 0)
492         result--;
493 
494       /* Now result is in p1 with partsCount parts and p2 is scratch
495          space.  */
496       tmp = p1, p1 = p2, p2 = tmp;
497     }
498 
499     pow5 += pc;
500   }
501 
502   if (p1 != dst)
503     APInt::tcAssign(dst, p1, result);
504 
505   return result;
506 }
507 
508 /* Zero at the end to avoid modular arithmetic when adding one; used
509    when rounding up during hexadecimal output.  */
510 static const char hexDigitsLower[] = "0123456789abcdef0";
511 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
512 static const char infinityL[] = "infinity";
513 static const char infinityU[] = "INFINITY";
514 static const char NaNL[] = "nan";
515 static const char NaNU[] = "NAN";
516 
517 /* Write out an integerPart in hexadecimal, starting with the most
518    significant nibble.  Write out exactly COUNT hexdigits, return
519    COUNT.  */
520 static unsigned int
partAsHex(char * dst,integerPart part,unsigned int count,const char * hexDigitChars)521 partAsHex (char *dst, integerPart part, unsigned int count,
522            const char *hexDigitChars)
523 {
524   unsigned int result = count;
525 
526   assert(count != 0 && count <= integerPartWidth / 4);
527 
528   part >>= (integerPartWidth - 4 * count);
529   while (count--) {
530     dst[count] = hexDigitChars[part & 0xf];
531     part >>= 4;
532   }
533 
534   return result;
535 }
536 
537 /* Write out an unsigned decimal integer.  */
538 static char *
writeUnsignedDecimal(char * dst,unsigned int n)539 writeUnsignedDecimal (char *dst, unsigned int n)
540 {
541   char buff[40], *p;
542 
543   p = buff;
544   do
545     *p++ = '0' + n % 10;
546   while (n /= 10);
547 
548   do
549     *dst++ = *--p;
550   while (p != buff);
551 
552   return dst;
553 }
554 
555 /* Write out a signed decimal integer.  */
556 static char *
writeSignedDecimal(char * dst,int value)557 writeSignedDecimal (char *dst, int value)
558 {
559   if (value < 0) {
560     *dst++ = '-';
561     dst = writeUnsignedDecimal(dst, -(unsigned) value);
562   } else
563     dst = writeUnsignedDecimal(dst, value);
564 
565   return dst;
566 }
567 
568 /* Constructors.  */
569 void
initialize(const fltSemantics * ourSemantics)570 APFloat::initialize(const fltSemantics *ourSemantics)
571 {
572   unsigned int count;
573 
574   semantics = ourSemantics;
575   count = partCount();
576   if (count > 1)
577     significand.parts = new integerPart[count];
578 }
579 
580 void
freeSignificand()581 APFloat::freeSignificand()
582 {
583   if (partCount() > 1)
584     delete [] significand.parts;
585 }
586 
587 void
assign(const APFloat & rhs)588 APFloat::assign(const APFloat &rhs)
589 {
590   assert(semantics == rhs.semantics);
591 
592   sign = rhs.sign;
593   category = rhs.category;
594   exponent = rhs.exponent;
595   if (category == fcNormal || category == fcNaN)
596     copySignificand(rhs);
597 }
598 
599 void
copySignificand(const APFloat & rhs)600 APFloat::copySignificand(const APFloat &rhs)
601 {
602   assert(category == fcNormal || category == fcNaN);
603   assert(rhs.partCount() >= partCount());
604 
605   APInt::tcAssign(significandParts(), rhs.significandParts(),
606                   partCount());
607 }
608 
609 /* Make this number a NaN, with an arbitrary but deterministic value
610    for the significand.  If double or longer, this is a signalling NaN,
611    which may not be ideal.  If float, this is QNaN(0).  */
makeNaN(bool SNaN,bool Negative,const APInt * fill)612 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
613 {
614   category = fcNaN;
615   sign = Negative;
616 
617   integerPart *significand = significandParts();
618   unsigned numParts = partCount();
619 
620   // Set the significand bits to the fill.
621   if (!fill || fill->getNumWords() < numParts)
622     APInt::tcSet(significand, 0, numParts);
623   if (fill) {
624     APInt::tcAssign(significand, fill->getRawData(),
625                     std::min(fill->getNumWords(), numParts));
626 
627     // Zero out the excess bits of the significand.
628     unsigned bitsToPreserve = semantics->precision - 1;
629     unsigned part = bitsToPreserve / 64;
630     bitsToPreserve %= 64;
631     significand[part] &= ((1ULL << bitsToPreserve) - 1);
632     for (part++; part != numParts; ++part)
633       significand[part] = 0;
634   }
635 
636   unsigned QNaNBit = semantics->precision - 2;
637 
638   if (SNaN) {
639     // We always have to clear the QNaN bit to make it an SNaN.
640     APInt::tcClearBit(significand, QNaNBit);
641 
642     // If there are no bits set in the payload, we have to set
643     // *something* to make it a NaN instead of an infinity;
644     // conventionally, this is the next bit down from the QNaN bit.
645     if (APInt::tcIsZero(significand, numParts))
646       APInt::tcSetBit(significand, QNaNBit - 1);
647   } else {
648     // We always have to set the QNaN bit to make it a QNaN.
649     APInt::tcSetBit(significand, QNaNBit);
650   }
651 
652   // For x87 extended precision, we want to make a NaN, not a
653   // pseudo-NaN.  Maybe we should expose the ability to make
654   // pseudo-NaNs?
655   if (semantics == &APFloat::x87DoubleExtended)
656     APInt::tcSetBit(significand, QNaNBit + 1);
657 }
658 
makeNaN(const fltSemantics & Sem,bool SNaN,bool Negative,const APInt * fill)659 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
660                          const APInt *fill) {
661   APFloat value(Sem, uninitialized);
662   value.makeNaN(SNaN, Negative, fill);
663   return value;
664 }
665 
666 APFloat &
operator =(const APFloat & rhs)667 APFloat::operator=(const APFloat &rhs)
668 {
669   if (this != &rhs) {
670     if (semantics != rhs.semantics) {
671       freeSignificand();
672       initialize(rhs.semantics);
673     }
674     assign(rhs);
675   }
676 
677   return *this;
678 }
679 
680 bool
isDenormal() const681 APFloat::isDenormal() const {
682   return isNormal() && (exponent == semantics->minExponent) &&
683          (APInt::tcExtractBit(significandParts(),
684                               semantics->precision - 1) == 0);
685 }
686 
687 bool
bitwiseIsEqual(const APFloat & rhs) const688 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
689   if (this == &rhs)
690     return true;
691   if (semantics != rhs.semantics ||
692       category != rhs.category ||
693       sign != rhs.sign)
694     return false;
695   if (category==fcZero || category==fcInfinity)
696     return true;
697   else if (category==fcNormal && exponent!=rhs.exponent)
698     return false;
699   else {
700     int i= partCount();
701     const integerPart* p=significandParts();
702     const integerPart* q=rhs.significandParts();
703     for (; i>0; i--, p++, q++) {
704       if (*p != *q)
705         return false;
706     }
707     return true;
708   }
709 }
710 
APFloat(const fltSemantics & ourSemantics,integerPart value)711 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
712   initialize(&ourSemantics);
713   sign = 0;
714   zeroSignificand();
715   exponent = ourSemantics.precision - 1;
716   significandParts()[0] = value;
717   normalize(rmNearestTiesToEven, lfExactlyZero);
718 }
719 
APFloat(const fltSemantics & ourSemantics)720 APFloat::APFloat(const fltSemantics &ourSemantics) {
721   initialize(&ourSemantics);
722   category = fcZero;
723   sign = false;
724 }
725 
APFloat(const fltSemantics & ourSemantics,uninitializedTag tag)726 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
727   // Allocates storage if necessary but does not initialize it.
728   initialize(&ourSemantics);
729 }
730 
APFloat(const fltSemantics & ourSemantics,fltCategory ourCategory,bool negative)731 APFloat::APFloat(const fltSemantics &ourSemantics,
732                  fltCategory ourCategory, bool negative) {
733   initialize(&ourSemantics);
734   category = ourCategory;
735   sign = negative;
736   if (category == fcNormal)
737     category = fcZero;
738   else if (ourCategory == fcNaN)
739     makeNaN();
740 }
741 
APFloat(const fltSemantics & ourSemantics,StringRef text)742 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
743   initialize(&ourSemantics);
744   convertFromString(text, rmNearestTiesToEven);
745 }
746 
APFloat(const APFloat & rhs)747 APFloat::APFloat(const APFloat &rhs) {
748   initialize(rhs.semantics);
749   assign(rhs);
750 }
751 
~APFloat()752 APFloat::~APFloat()
753 {
754   freeSignificand();
755 }
756 
757 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
Profile(FoldingSetNodeID & ID) const758 void APFloat::Profile(FoldingSetNodeID& ID) const {
759   ID.Add(bitcastToAPInt());
760 }
761 
762 unsigned int
partCount() const763 APFloat::partCount() const
764 {
765   return partCountForBits(semantics->precision + 1);
766 }
767 
768 unsigned int
semanticsPrecision(const fltSemantics & semantics)769 APFloat::semanticsPrecision(const fltSemantics &semantics)
770 {
771   return semantics.precision;
772 }
773 
774 const integerPart *
significandParts() const775 APFloat::significandParts() const
776 {
777   return const_cast<APFloat *>(this)->significandParts();
778 }
779 
780 integerPart *
significandParts()781 APFloat::significandParts()
782 {
783   assert(category == fcNormal || category == fcNaN);
784 
785   if (partCount() > 1)
786     return significand.parts;
787   else
788     return &significand.part;
789 }
790 
791 void
zeroSignificand()792 APFloat::zeroSignificand()
793 {
794   category = fcNormal;
795   APInt::tcSet(significandParts(), 0, partCount());
796 }
797 
798 /* Increment an fcNormal floating point number's significand.  */
799 void
incrementSignificand()800 APFloat::incrementSignificand()
801 {
802   integerPart carry;
803 
804   carry = APInt::tcIncrement(significandParts(), partCount());
805 
806   /* Our callers should never cause us to overflow.  */
807   assert(carry == 0);
808   (void)carry;
809 }
810 
811 /* Add the significand of the RHS.  Returns the carry flag.  */
812 integerPart
addSignificand(const APFloat & rhs)813 APFloat::addSignificand(const APFloat &rhs)
814 {
815   integerPart *parts;
816 
817   parts = significandParts();
818 
819   assert(semantics == rhs.semantics);
820   assert(exponent == rhs.exponent);
821 
822   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
823 }
824 
825 /* Subtract the significand of the RHS with a borrow flag.  Returns
826    the borrow flag.  */
827 integerPart
subtractSignificand(const APFloat & rhs,integerPart borrow)828 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
829 {
830   integerPart *parts;
831 
832   parts = significandParts();
833 
834   assert(semantics == rhs.semantics);
835   assert(exponent == rhs.exponent);
836 
837   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
838                            partCount());
839 }
840 
841 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
842    on to the full-precision result of the multiplication.  Returns the
843    lost fraction.  */
844 lostFraction
multiplySignificand(const APFloat & rhs,const APFloat * addend)845 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
846 {
847   unsigned int omsb;        // One, not zero, based MSB.
848   unsigned int partsCount, newPartsCount, precision;
849   integerPart *lhsSignificand;
850   integerPart scratch[4];
851   integerPart *fullSignificand;
852   lostFraction lost_fraction;
853   bool ignored;
854 
855   assert(semantics == rhs.semantics);
856 
857   precision = semantics->precision;
858   newPartsCount = partCountForBits(precision * 2);
859 
860   if (newPartsCount > 4)
861     fullSignificand = new integerPart[newPartsCount];
862   else
863     fullSignificand = scratch;
864 
865   lhsSignificand = significandParts();
866   partsCount = partCount();
867 
868   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
869                         rhs.significandParts(), partsCount, partsCount);
870 
871   lost_fraction = lfExactlyZero;
872   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
873   exponent += rhs.exponent;
874 
875   if (addend) {
876     Significand savedSignificand = significand;
877     const fltSemantics *savedSemantics = semantics;
878     fltSemantics extendedSemantics;
879     opStatus status;
880     unsigned int extendedPrecision;
881 
882     /* Normalize our MSB.  */
883     extendedPrecision = precision + precision - 1;
884     if (omsb != extendedPrecision) {
885       APInt::tcShiftLeft(fullSignificand, newPartsCount,
886                          extendedPrecision - omsb);
887       exponent -= extendedPrecision - omsb;
888     }
889 
890     /* Create new semantics.  */
891     extendedSemantics = *semantics;
892     extendedSemantics.precision = extendedPrecision;
893 
894     if (newPartsCount == 1)
895       significand.part = fullSignificand[0];
896     else
897       significand.parts = fullSignificand;
898     semantics = &extendedSemantics;
899 
900     APFloat extendedAddend(*addend);
901     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
902     assert(status == opOK);
903     (void)status;
904     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
905 
906     /* Restore our state.  */
907     if (newPartsCount == 1)
908       fullSignificand[0] = significand.part;
909     significand = savedSignificand;
910     semantics = savedSemantics;
911 
912     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
913   }
914 
915   exponent -= (precision - 1);
916 
917   if (omsb > precision) {
918     unsigned int bits, significantParts;
919     lostFraction lf;
920 
921     bits = omsb - precision;
922     significantParts = partCountForBits(omsb);
923     lf = shiftRight(fullSignificand, significantParts, bits);
924     lost_fraction = combineLostFractions(lf, lost_fraction);
925     exponent += bits;
926   }
927 
928   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
929 
930   if (newPartsCount > 4)
931     delete [] fullSignificand;
932 
933   return lost_fraction;
934 }
935 
936 /* Multiply the significands of LHS and RHS to DST.  */
937 lostFraction
divideSignificand(const APFloat & rhs)938 APFloat::divideSignificand(const APFloat &rhs)
939 {
940   unsigned int bit, i, partsCount;
941   const integerPart *rhsSignificand;
942   integerPart *lhsSignificand, *dividend, *divisor;
943   integerPart scratch[4];
944   lostFraction lost_fraction;
945 
946   assert(semantics == rhs.semantics);
947 
948   lhsSignificand = significandParts();
949   rhsSignificand = rhs.significandParts();
950   partsCount = partCount();
951 
952   if (partsCount > 2)
953     dividend = new integerPart[partsCount * 2];
954   else
955     dividend = scratch;
956 
957   divisor = dividend + partsCount;
958 
959   /* Copy the dividend and divisor as they will be modified in-place.  */
960   for (i = 0; i < partsCount; i++) {
961     dividend[i] = lhsSignificand[i];
962     divisor[i] = rhsSignificand[i];
963     lhsSignificand[i] = 0;
964   }
965 
966   exponent -= rhs.exponent;
967 
968   unsigned int precision = semantics->precision;
969 
970   /* Normalize the divisor.  */
971   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
972   if (bit) {
973     exponent += bit;
974     APInt::tcShiftLeft(divisor, partsCount, bit);
975   }
976 
977   /* Normalize the dividend.  */
978   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
979   if (bit) {
980     exponent -= bit;
981     APInt::tcShiftLeft(dividend, partsCount, bit);
982   }
983 
984   /* Ensure the dividend >= divisor initially for the loop below.
985      Incidentally, this means that the division loop below is
986      guaranteed to set the integer bit to one.  */
987   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
988     exponent--;
989     APInt::tcShiftLeft(dividend, partsCount, 1);
990     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
991   }
992 
993   /* Long division.  */
994   for (bit = precision; bit; bit -= 1) {
995     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
996       APInt::tcSubtract(dividend, divisor, 0, partsCount);
997       APInt::tcSetBit(lhsSignificand, bit - 1);
998     }
999 
1000     APInt::tcShiftLeft(dividend, partsCount, 1);
1001   }
1002 
1003   /* Figure out the lost fraction.  */
1004   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1005 
1006   if (cmp > 0)
1007     lost_fraction = lfMoreThanHalf;
1008   else if (cmp == 0)
1009     lost_fraction = lfExactlyHalf;
1010   else if (APInt::tcIsZero(dividend, partsCount))
1011     lost_fraction = lfExactlyZero;
1012   else
1013     lost_fraction = lfLessThanHalf;
1014 
1015   if (partsCount > 2)
1016     delete [] dividend;
1017 
1018   return lost_fraction;
1019 }
1020 
1021 unsigned int
significandMSB() const1022 APFloat::significandMSB() const
1023 {
1024   return APInt::tcMSB(significandParts(), partCount());
1025 }
1026 
1027 unsigned int
significandLSB() const1028 APFloat::significandLSB() const
1029 {
1030   return APInt::tcLSB(significandParts(), partCount());
1031 }
1032 
1033 /* Note that a zero result is NOT normalized to fcZero.  */
1034 lostFraction
shiftSignificandRight(unsigned int bits)1035 APFloat::shiftSignificandRight(unsigned int bits)
1036 {
1037   /* Our exponent should not overflow.  */
1038   assert((exponent_t) (exponent + bits) >= exponent);
1039 
1040   exponent += bits;
1041 
1042   return shiftRight(significandParts(), partCount(), bits);
1043 }
1044 
1045 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1046 void
shiftSignificandLeft(unsigned int bits)1047 APFloat::shiftSignificandLeft(unsigned int bits)
1048 {
1049   assert(bits < semantics->precision);
1050 
1051   if (bits) {
1052     unsigned int partsCount = partCount();
1053 
1054     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1055     exponent -= bits;
1056 
1057     assert(!APInt::tcIsZero(significandParts(), partsCount));
1058   }
1059 }
1060 
1061 APFloat::cmpResult
compareAbsoluteValue(const APFloat & rhs) const1062 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1063 {
1064   int compare;
1065 
1066   assert(semantics == rhs.semantics);
1067   assert(category == fcNormal);
1068   assert(rhs.category == fcNormal);
1069 
1070   compare = exponent - rhs.exponent;
1071 
1072   /* If exponents are equal, do an unsigned bignum comparison of the
1073      significands.  */
1074   if (compare == 0)
1075     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1076                                partCount());
1077 
1078   if (compare > 0)
1079     return cmpGreaterThan;
1080   else if (compare < 0)
1081     return cmpLessThan;
1082   else
1083     return cmpEqual;
1084 }
1085 
1086 /* Handle overflow.  Sign is preserved.  We either become infinity or
1087    the largest finite number.  */
1088 APFloat::opStatus
handleOverflow(roundingMode rounding_mode)1089 APFloat::handleOverflow(roundingMode rounding_mode)
1090 {
1091   /* Infinity?  */
1092   if (rounding_mode == rmNearestTiesToEven ||
1093       rounding_mode == rmNearestTiesToAway ||
1094       (rounding_mode == rmTowardPositive && !sign) ||
1095       (rounding_mode == rmTowardNegative && sign)) {
1096     category = fcInfinity;
1097     return (opStatus) (opOverflow | opInexact);
1098   }
1099 
1100   /* Otherwise we become the largest finite number.  */
1101   category = fcNormal;
1102   exponent = semantics->maxExponent;
1103   APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1104                                    semantics->precision);
1105 
1106   return opInexact;
1107 }
1108 
1109 /* Returns TRUE if, when truncating the current number, with BIT the
1110    new LSB, with the given lost fraction and rounding mode, the result
1111    would need to be rounded away from zero (i.e., by increasing the
1112    signficand).  This routine must work for fcZero of both signs, and
1113    fcNormal numbers.  */
1114 bool
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1115 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1116                            lostFraction lost_fraction,
1117                            unsigned int bit) const
1118 {
1119   /* NaNs and infinities should not have lost fractions.  */
1120   assert(category == fcNormal || category == fcZero);
1121 
1122   /* Current callers never pass this so we don't handle it.  */
1123   assert(lost_fraction != lfExactlyZero);
1124 
1125   switch (rounding_mode) {
1126   case rmNearestTiesToAway:
1127     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1128 
1129   case rmNearestTiesToEven:
1130     if (lost_fraction == lfMoreThanHalf)
1131       return true;
1132 
1133     /* Our zeroes don't have a significand to test.  */
1134     if (lost_fraction == lfExactlyHalf && category != fcZero)
1135       return APInt::tcExtractBit(significandParts(), bit);
1136 
1137     return false;
1138 
1139   case rmTowardZero:
1140     return false;
1141 
1142   case rmTowardPositive:
1143     return sign == false;
1144 
1145   case rmTowardNegative:
1146     return sign == true;
1147   }
1148   llvm_unreachable("Invalid rounding mode found");
1149 }
1150 
1151 APFloat::opStatus
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1152 APFloat::normalize(roundingMode rounding_mode,
1153                    lostFraction lost_fraction)
1154 {
1155   unsigned int omsb;                /* One, not zero, based MSB.  */
1156   int exponentChange;
1157 
1158   if (category != fcNormal)
1159     return opOK;
1160 
1161   /* Before rounding normalize the exponent of fcNormal numbers.  */
1162   omsb = significandMSB() + 1;
1163 
1164   if (omsb) {
1165     /* OMSB is numbered from 1.  We want to place it in the integer
1166        bit numbered PRECISION if possible, with a compensating change in
1167        the exponent.  */
1168     exponentChange = omsb - semantics->precision;
1169 
1170     /* If the resulting exponent is too high, overflow according to
1171        the rounding mode.  */
1172     if (exponent + exponentChange > semantics->maxExponent)
1173       return handleOverflow(rounding_mode);
1174 
1175     /* Subnormal numbers have exponent minExponent, and their MSB
1176        is forced based on that.  */
1177     if (exponent + exponentChange < semantics->minExponent)
1178       exponentChange = semantics->minExponent - exponent;
1179 
1180     /* Shifting left is easy as we don't lose precision.  */
1181     if (exponentChange < 0) {
1182       assert(lost_fraction == lfExactlyZero);
1183 
1184       shiftSignificandLeft(-exponentChange);
1185 
1186       return opOK;
1187     }
1188 
1189     if (exponentChange > 0) {
1190       lostFraction lf;
1191 
1192       /* Shift right and capture any new lost fraction.  */
1193       lf = shiftSignificandRight(exponentChange);
1194 
1195       lost_fraction = combineLostFractions(lf, lost_fraction);
1196 
1197       /* Keep OMSB up-to-date.  */
1198       if (omsb > (unsigned) exponentChange)
1199         omsb -= exponentChange;
1200       else
1201         omsb = 0;
1202     }
1203   }
1204 
1205   /* Now round the number according to rounding_mode given the lost
1206      fraction.  */
1207 
1208   /* As specified in IEEE 754, since we do not trap we do not report
1209      underflow for exact results.  */
1210   if (lost_fraction == lfExactlyZero) {
1211     /* Canonicalize zeroes.  */
1212     if (omsb == 0)
1213       category = fcZero;
1214 
1215     return opOK;
1216   }
1217 
1218   /* Increment the significand if we're rounding away from zero.  */
1219   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1220     if (omsb == 0)
1221       exponent = semantics->minExponent;
1222 
1223     incrementSignificand();
1224     omsb = significandMSB() + 1;
1225 
1226     /* Did the significand increment overflow?  */
1227     if (omsb == (unsigned) semantics->precision + 1) {
1228       /* Renormalize by incrementing the exponent and shifting our
1229          significand right one.  However if we already have the
1230          maximum exponent we overflow to infinity.  */
1231       if (exponent == semantics->maxExponent) {
1232         category = fcInfinity;
1233 
1234         return (opStatus) (opOverflow | opInexact);
1235       }
1236 
1237       shiftSignificandRight(1);
1238 
1239       return opInexact;
1240     }
1241   }
1242 
1243   /* The normal case - we were and are not denormal, and any
1244      significand increment above didn't overflow.  */
1245   if (omsb == semantics->precision)
1246     return opInexact;
1247 
1248   /* We have a non-zero denormal.  */
1249   assert(omsb < semantics->precision);
1250 
1251   /* Canonicalize zeroes.  */
1252   if (omsb == 0)
1253     category = fcZero;
1254 
1255   /* The fcZero case is a denormal that underflowed to zero.  */
1256   return (opStatus) (opUnderflow | opInexact);
1257 }
1258 
1259 APFloat::opStatus
addOrSubtractSpecials(const APFloat & rhs,bool subtract)1260 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1261 {
1262   switch (convolve(category, rhs.category)) {
1263   default:
1264     llvm_unreachable(0);
1265 
1266   case convolve(fcNaN, fcZero):
1267   case convolve(fcNaN, fcNormal):
1268   case convolve(fcNaN, fcInfinity):
1269   case convolve(fcNaN, fcNaN):
1270   case convolve(fcNormal, fcZero):
1271   case convolve(fcInfinity, fcNormal):
1272   case convolve(fcInfinity, fcZero):
1273     return opOK;
1274 
1275   case convolve(fcZero, fcNaN):
1276   case convolve(fcNormal, fcNaN):
1277   case convolve(fcInfinity, fcNaN):
1278     category = fcNaN;
1279     copySignificand(rhs);
1280     return opOK;
1281 
1282   case convolve(fcNormal, fcInfinity):
1283   case convolve(fcZero, fcInfinity):
1284     category = fcInfinity;
1285     sign = rhs.sign ^ subtract;
1286     return opOK;
1287 
1288   case convolve(fcZero, fcNormal):
1289     assign(rhs);
1290     sign = rhs.sign ^ subtract;
1291     return opOK;
1292 
1293   case convolve(fcZero, fcZero):
1294     /* Sign depends on rounding mode; handled by caller.  */
1295     return opOK;
1296 
1297   case convolve(fcInfinity, fcInfinity):
1298     /* Differently signed infinities can only be validly
1299        subtracted.  */
1300     if (((sign ^ rhs.sign)!=0) != subtract) {
1301       makeNaN();
1302       return opInvalidOp;
1303     }
1304 
1305     return opOK;
1306 
1307   case convolve(fcNormal, fcNormal):
1308     return opDivByZero;
1309   }
1310 }
1311 
1312 /* Add or subtract two normal numbers.  */
1313 lostFraction
addOrSubtractSignificand(const APFloat & rhs,bool subtract)1314 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1315 {
1316   integerPart carry;
1317   lostFraction lost_fraction;
1318   int bits;
1319 
1320   /* Determine if the operation on the absolute values is effectively
1321      an addition or subtraction.  */
1322   subtract ^= (sign ^ rhs.sign) ? true : false;
1323 
1324   /* Are we bigger exponent-wise than the RHS?  */
1325   bits = exponent - rhs.exponent;
1326 
1327   /* Subtraction is more subtle than one might naively expect.  */
1328   if (subtract) {
1329     APFloat temp_rhs(rhs);
1330     bool reverse;
1331 
1332     if (bits == 0) {
1333       reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1334       lost_fraction = lfExactlyZero;
1335     } else if (bits > 0) {
1336       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1337       shiftSignificandLeft(1);
1338       reverse = false;
1339     } else {
1340       lost_fraction = shiftSignificandRight(-bits - 1);
1341       temp_rhs.shiftSignificandLeft(1);
1342       reverse = true;
1343     }
1344 
1345     if (reverse) {
1346       carry = temp_rhs.subtractSignificand
1347         (*this, lost_fraction != lfExactlyZero);
1348       copySignificand(temp_rhs);
1349       sign = !sign;
1350     } else {
1351       carry = subtractSignificand
1352         (temp_rhs, lost_fraction != lfExactlyZero);
1353     }
1354 
1355     /* Invert the lost fraction - it was on the RHS and
1356        subtracted.  */
1357     if (lost_fraction == lfLessThanHalf)
1358       lost_fraction = lfMoreThanHalf;
1359     else if (lost_fraction == lfMoreThanHalf)
1360       lost_fraction = lfLessThanHalf;
1361 
1362     /* The code above is intended to ensure that no borrow is
1363        necessary.  */
1364     assert(!carry);
1365     (void)carry;
1366   } else {
1367     if (bits > 0) {
1368       APFloat temp_rhs(rhs);
1369 
1370       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1371       carry = addSignificand(temp_rhs);
1372     } else {
1373       lost_fraction = shiftSignificandRight(-bits);
1374       carry = addSignificand(rhs);
1375     }
1376 
1377     /* We have a guard bit; generating a carry cannot happen.  */
1378     assert(!carry);
1379     (void)carry;
1380   }
1381 
1382   return lost_fraction;
1383 }
1384 
1385 APFloat::opStatus
multiplySpecials(const APFloat & rhs)1386 APFloat::multiplySpecials(const APFloat &rhs)
1387 {
1388   switch (convolve(category, rhs.category)) {
1389   default:
1390     llvm_unreachable(0);
1391 
1392   case convolve(fcNaN, fcZero):
1393   case convolve(fcNaN, fcNormal):
1394   case convolve(fcNaN, fcInfinity):
1395   case convolve(fcNaN, fcNaN):
1396     return opOK;
1397 
1398   case convolve(fcZero, fcNaN):
1399   case convolve(fcNormal, fcNaN):
1400   case convolve(fcInfinity, fcNaN):
1401     category = fcNaN;
1402     copySignificand(rhs);
1403     return opOK;
1404 
1405   case convolve(fcNormal, fcInfinity):
1406   case convolve(fcInfinity, fcNormal):
1407   case convolve(fcInfinity, fcInfinity):
1408     category = fcInfinity;
1409     return opOK;
1410 
1411   case convolve(fcZero, fcNormal):
1412   case convolve(fcNormal, fcZero):
1413   case convolve(fcZero, fcZero):
1414     category = fcZero;
1415     return opOK;
1416 
1417   case convolve(fcZero, fcInfinity):
1418   case convolve(fcInfinity, fcZero):
1419     makeNaN();
1420     return opInvalidOp;
1421 
1422   case convolve(fcNormal, fcNormal):
1423     return opOK;
1424   }
1425 }
1426 
1427 APFloat::opStatus
divideSpecials(const APFloat & rhs)1428 APFloat::divideSpecials(const APFloat &rhs)
1429 {
1430   switch (convolve(category, rhs.category)) {
1431   default:
1432     llvm_unreachable(0);
1433 
1434   case convolve(fcNaN, fcZero):
1435   case convolve(fcNaN, fcNormal):
1436   case convolve(fcNaN, fcInfinity):
1437   case convolve(fcNaN, fcNaN):
1438   case convolve(fcInfinity, fcZero):
1439   case convolve(fcInfinity, fcNormal):
1440   case convolve(fcZero, fcInfinity):
1441   case convolve(fcZero, fcNormal):
1442     return opOK;
1443 
1444   case convolve(fcZero, fcNaN):
1445   case convolve(fcNormal, fcNaN):
1446   case convolve(fcInfinity, fcNaN):
1447     category = fcNaN;
1448     copySignificand(rhs);
1449     return opOK;
1450 
1451   case convolve(fcNormal, fcInfinity):
1452     category = fcZero;
1453     return opOK;
1454 
1455   case convolve(fcNormal, fcZero):
1456     category = fcInfinity;
1457     return opDivByZero;
1458 
1459   case convolve(fcInfinity, fcInfinity):
1460   case convolve(fcZero, fcZero):
1461     makeNaN();
1462     return opInvalidOp;
1463 
1464   case convolve(fcNormal, fcNormal):
1465     return opOK;
1466   }
1467 }
1468 
1469 APFloat::opStatus
modSpecials(const APFloat & rhs)1470 APFloat::modSpecials(const APFloat &rhs)
1471 {
1472   switch (convolve(category, rhs.category)) {
1473   default:
1474     llvm_unreachable(0);
1475 
1476   case convolve(fcNaN, fcZero):
1477   case convolve(fcNaN, fcNormal):
1478   case convolve(fcNaN, fcInfinity):
1479   case convolve(fcNaN, fcNaN):
1480   case convolve(fcZero, fcInfinity):
1481   case convolve(fcZero, fcNormal):
1482   case convolve(fcNormal, fcInfinity):
1483     return opOK;
1484 
1485   case convolve(fcZero, fcNaN):
1486   case convolve(fcNormal, fcNaN):
1487   case convolve(fcInfinity, fcNaN):
1488     category = fcNaN;
1489     copySignificand(rhs);
1490     return opOK;
1491 
1492   case convolve(fcNormal, fcZero):
1493   case convolve(fcInfinity, fcZero):
1494   case convolve(fcInfinity, fcNormal):
1495   case convolve(fcInfinity, fcInfinity):
1496   case convolve(fcZero, fcZero):
1497     makeNaN();
1498     return opInvalidOp;
1499 
1500   case convolve(fcNormal, fcNormal):
1501     return opOK;
1502   }
1503 }
1504 
1505 /* Change sign.  */
1506 void
changeSign()1507 APFloat::changeSign()
1508 {
1509   /* Look mummy, this one's easy.  */
1510   sign = !sign;
1511 }
1512 
1513 void
clearSign()1514 APFloat::clearSign()
1515 {
1516   /* So is this one. */
1517   sign = 0;
1518 }
1519 
1520 void
copySign(const APFloat & rhs)1521 APFloat::copySign(const APFloat &rhs)
1522 {
1523   /* And this one. */
1524   sign = rhs.sign;
1525 }
1526 
1527 /* Normalized addition or subtraction.  */
1528 APFloat::opStatus
addOrSubtract(const APFloat & rhs,roundingMode rounding_mode,bool subtract)1529 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1530                        bool subtract)
1531 {
1532   opStatus fs;
1533 
1534   fs = addOrSubtractSpecials(rhs, subtract);
1535 
1536   /* This return code means it was not a simple case.  */
1537   if (fs == opDivByZero) {
1538     lostFraction lost_fraction;
1539 
1540     lost_fraction = addOrSubtractSignificand(rhs, subtract);
1541     fs = normalize(rounding_mode, lost_fraction);
1542 
1543     /* Can only be zero if we lost no fraction.  */
1544     assert(category != fcZero || lost_fraction == lfExactlyZero);
1545   }
1546 
1547   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1548      positive zero unless rounding to minus infinity, except that
1549      adding two like-signed zeroes gives that zero.  */
1550   if (category == fcZero) {
1551     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1552       sign = (rounding_mode == rmTowardNegative);
1553   }
1554 
1555   return fs;
1556 }
1557 
1558 /* Normalized addition.  */
1559 APFloat::opStatus
add(const APFloat & rhs,roundingMode rounding_mode)1560 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1561 {
1562   return addOrSubtract(rhs, rounding_mode, false);
1563 }
1564 
1565 /* Normalized subtraction.  */
1566 APFloat::opStatus
subtract(const APFloat & rhs,roundingMode rounding_mode)1567 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1568 {
1569   return addOrSubtract(rhs, rounding_mode, true);
1570 }
1571 
1572 /* Normalized multiply.  */
1573 APFloat::opStatus
multiply(const APFloat & rhs,roundingMode rounding_mode)1574 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1575 {
1576   opStatus fs;
1577 
1578   sign ^= rhs.sign;
1579   fs = multiplySpecials(rhs);
1580 
1581   if (category == fcNormal) {
1582     lostFraction lost_fraction = multiplySignificand(rhs, 0);
1583     fs = normalize(rounding_mode, lost_fraction);
1584     if (lost_fraction != lfExactlyZero)
1585       fs = (opStatus) (fs | opInexact);
1586   }
1587 
1588   return fs;
1589 }
1590 
1591 /* Normalized divide.  */
1592 APFloat::opStatus
divide(const APFloat & rhs,roundingMode rounding_mode)1593 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1594 {
1595   opStatus fs;
1596 
1597   sign ^= rhs.sign;
1598   fs = divideSpecials(rhs);
1599 
1600   if (category == fcNormal) {
1601     lostFraction lost_fraction = divideSignificand(rhs);
1602     fs = normalize(rounding_mode, lost_fraction);
1603     if (lost_fraction != lfExactlyZero)
1604       fs = (opStatus) (fs | opInexact);
1605   }
1606 
1607   return fs;
1608 }
1609 
1610 /* Normalized remainder.  This is not currently correct in all cases.  */
1611 APFloat::opStatus
remainder(const APFloat & rhs)1612 APFloat::remainder(const APFloat &rhs)
1613 {
1614   opStatus fs;
1615   APFloat V = *this;
1616   unsigned int origSign = sign;
1617 
1618   fs = V.divide(rhs, rmNearestTiesToEven);
1619   if (fs == opDivByZero)
1620     return fs;
1621 
1622   int parts = partCount();
1623   integerPart *x = new integerPart[parts];
1624   bool ignored;
1625   fs = V.convertToInteger(x, parts * integerPartWidth, true,
1626                           rmNearestTiesToEven, &ignored);
1627   if (fs==opInvalidOp)
1628     return fs;
1629 
1630   fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1631                                         rmNearestTiesToEven);
1632   assert(fs==opOK);   // should always work
1633 
1634   fs = V.multiply(rhs, rmNearestTiesToEven);
1635   assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
1636 
1637   fs = subtract(V, rmNearestTiesToEven);
1638   assert(fs==opOK || fs==opInexact);   // likewise
1639 
1640   if (isZero())
1641     sign = origSign;    // IEEE754 requires this
1642   delete[] x;
1643   return fs;
1644 }
1645 
1646 /* Normalized llvm frem (C fmod).
1647    This is not currently correct in all cases.  */
1648 APFloat::opStatus
mod(const APFloat & rhs,roundingMode rounding_mode)1649 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1650 {
1651   opStatus fs;
1652   fs = modSpecials(rhs);
1653 
1654   if (category == fcNormal && rhs.category == fcNormal) {
1655     APFloat V = *this;
1656     unsigned int origSign = sign;
1657 
1658     fs = V.divide(rhs, rmNearestTiesToEven);
1659     if (fs == opDivByZero)
1660       return fs;
1661 
1662     int parts = partCount();
1663     integerPart *x = new integerPart[parts];
1664     bool ignored;
1665     fs = V.convertToInteger(x, parts * integerPartWidth, true,
1666                             rmTowardZero, &ignored);
1667     if (fs==opInvalidOp)
1668       return fs;
1669 
1670     fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1671                                           rmNearestTiesToEven);
1672     assert(fs==opOK);   // should always work
1673 
1674     fs = V.multiply(rhs, rounding_mode);
1675     assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
1676 
1677     fs = subtract(V, rounding_mode);
1678     assert(fs==opOK || fs==opInexact);   // likewise
1679 
1680     if (isZero())
1681       sign = origSign;    // IEEE754 requires this
1682     delete[] x;
1683   }
1684   return fs;
1685 }
1686 
1687 /* Normalized fused-multiply-add.  */
1688 APFloat::opStatus
fusedMultiplyAdd(const APFloat & multiplicand,const APFloat & addend,roundingMode rounding_mode)1689 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1690                           const APFloat &addend,
1691                           roundingMode rounding_mode)
1692 {
1693   opStatus fs;
1694 
1695   /* Post-multiplication sign, before addition.  */
1696   sign ^= multiplicand.sign;
1697 
1698   /* If and only if all arguments are normal do we need to do an
1699      extended-precision calculation.  */
1700   if (category == fcNormal &&
1701       multiplicand.category == fcNormal &&
1702       addend.category == fcNormal) {
1703     lostFraction lost_fraction;
1704 
1705     lost_fraction = multiplySignificand(multiplicand, &addend);
1706     fs = normalize(rounding_mode, lost_fraction);
1707     if (lost_fraction != lfExactlyZero)
1708       fs = (opStatus) (fs | opInexact);
1709 
1710     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1711        positive zero unless rounding to minus infinity, except that
1712        adding two like-signed zeroes gives that zero.  */
1713     if (category == fcZero && sign != addend.sign)
1714       sign = (rounding_mode == rmTowardNegative);
1715   } else {
1716     fs = multiplySpecials(multiplicand);
1717 
1718     /* FS can only be opOK or opInvalidOp.  There is no more work
1719        to do in the latter case.  The IEEE-754R standard says it is
1720        implementation-defined in this case whether, if ADDEND is a
1721        quiet NaN, we raise invalid op; this implementation does so.
1722 
1723        If we need to do the addition we can do so with normal
1724        precision.  */
1725     if (fs == opOK)
1726       fs = addOrSubtract(addend, rounding_mode, false);
1727   }
1728 
1729   return fs;
1730 }
1731 
1732 /* Rounding-mode corrrect round to integral value.  */
roundToIntegral(roundingMode rounding_mode)1733 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1734   opStatus fs;
1735 
1736   // If the exponent is large enough, we know that this value is already
1737   // integral, and the arithmetic below would potentially cause it to saturate
1738   // to +/-Inf.  Bail out early instead.
1739   if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1740     return opOK;
1741 
1742   // The algorithm here is quite simple: we add 2^(p-1), where p is the
1743   // precision of our format, and then subtract it back off again.  The choice
1744   // of rounding modes for the addition/subtraction determines the rounding mode
1745   // for our integral rounding as well.
1746   // NOTE: When the input value is negative, we do subtraction followed by
1747   // addition instead.
1748   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1749   IntegerConstant <<= semanticsPrecision(*semantics)-1;
1750   APFloat MagicConstant(*semantics);
1751   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1752                                       rmNearestTiesToEven);
1753   MagicConstant.copySign(*this);
1754 
1755   if (fs != opOK)
1756     return fs;
1757 
1758   // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1759   bool inputSign = isNegative();
1760 
1761   fs = add(MagicConstant, rounding_mode);
1762   if (fs != opOK && fs != opInexact)
1763     return fs;
1764 
1765   fs = subtract(MagicConstant, rounding_mode);
1766 
1767   // Restore the input sign.
1768   if (inputSign != isNegative())
1769     changeSign();
1770 
1771   return fs;
1772 }
1773 
1774 
1775 /* Comparison requires normalized numbers.  */
1776 APFloat::cmpResult
compare(const APFloat & rhs) const1777 APFloat::compare(const APFloat &rhs) const
1778 {
1779   cmpResult result;
1780 
1781   assert(semantics == rhs.semantics);
1782 
1783   switch (convolve(category, rhs.category)) {
1784   default:
1785     llvm_unreachable(0);
1786 
1787   case convolve(fcNaN, fcZero):
1788   case convolve(fcNaN, fcNormal):
1789   case convolve(fcNaN, fcInfinity):
1790   case convolve(fcNaN, fcNaN):
1791   case convolve(fcZero, fcNaN):
1792   case convolve(fcNormal, fcNaN):
1793   case convolve(fcInfinity, fcNaN):
1794     return cmpUnordered;
1795 
1796   case convolve(fcInfinity, fcNormal):
1797   case convolve(fcInfinity, fcZero):
1798   case convolve(fcNormal, fcZero):
1799     if (sign)
1800       return cmpLessThan;
1801     else
1802       return cmpGreaterThan;
1803 
1804   case convolve(fcNormal, fcInfinity):
1805   case convolve(fcZero, fcInfinity):
1806   case convolve(fcZero, fcNormal):
1807     if (rhs.sign)
1808       return cmpGreaterThan;
1809     else
1810       return cmpLessThan;
1811 
1812   case convolve(fcInfinity, fcInfinity):
1813     if (sign == rhs.sign)
1814       return cmpEqual;
1815     else if (sign)
1816       return cmpLessThan;
1817     else
1818       return cmpGreaterThan;
1819 
1820   case convolve(fcZero, fcZero):
1821     return cmpEqual;
1822 
1823   case convolve(fcNormal, fcNormal):
1824     break;
1825   }
1826 
1827   /* Two normal numbers.  Do they have the same sign?  */
1828   if (sign != rhs.sign) {
1829     if (sign)
1830       result = cmpLessThan;
1831     else
1832       result = cmpGreaterThan;
1833   } else {
1834     /* Compare absolute values; invert result if negative.  */
1835     result = compareAbsoluteValue(rhs);
1836 
1837     if (sign) {
1838       if (result == cmpLessThan)
1839         result = cmpGreaterThan;
1840       else if (result == cmpGreaterThan)
1841         result = cmpLessThan;
1842     }
1843   }
1844 
1845   return result;
1846 }
1847 
1848 /// APFloat::convert - convert a value of one floating point type to another.
1849 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
1850 /// records whether the transformation lost information, i.e. whether
1851 /// converting the result back to the original type will produce the
1852 /// original value (this is almost the same as return value==fsOK, but there
1853 /// are edge cases where this is not so).
1854 
1855 APFloat::opStatus
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)1856 APFloat::convert(const fltSemantics &toSemantics,
1857                  roundingMode rounding_mode, bool *losesInfo)
1858 {
1859   lostFraction lostFraction;
1860   unsigned int newPartCount, oldPartCount;
1861   opStatus fs;
1862   int shift;
1863   const fltSemantics &fromSemantics = *semantics;
1864 
1865   lostFraction = lfExactlyZero;
1866   newPartCount = partCountForBits(toSemantics.precision + 1);
1867   oldPartCount = partCount();
1868   shift = toSemantics.precision - fromSemantics.precision;
1869 
1870   bool X86SpecialNan = false;
1871   if (&fromSemantics == &APFloat::x87DoubleExtended &&
1872       &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1873       (!(*significandParts() & 0x8000000000000000ULL) ||
1874        !(*significandParts() & 0x4000000000000000ULL))) {
1875     // x86 has some unusual NaNs which cannot be represented in any other
1876     // format; note them here.
1877     X86SpecialNan = true;
1878   }
1879 
1880   // If this is a truncation, perform the shift before we narrow the storage.
1881   if (shift < 0 && (category==fcNormal || category==fcNaN))
1882     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1883 
1884   // Fix the storage so it can hold to new value.
1885   if (newPartCount > oldPartCount) {
1886     // The new type requires more storage; make it available.
1887     integerPart *newParts;
1888     newParts = new integerPart[newPartCount];
1889     APInt::tcSet(newParts, 0, newPartCount);
1890     if (category==fcNormal || category==fcNaN)
1891       APInt::tcAssign(newParts, significandParts(), oldPartCount);
1892     freeSignificand();
1893     significand.parts = newParts;
1894   } else if (newPartCount == 1 && oldPartCount != 1) {
1895     // Switch to built-in storage for a single part.
1896     integerPart newPart = 0;
1897     if (category==fcNormal || category==fcNaN)
1898       newPart = significandParts()[0];
1899     freeSignificand();
1900     significand.part = newPart;
1901   }
1902 
1903   // Now that we have the right storage, switch the semantics.
1904   semantics = &toSemantics;
1905 
1906   // If this is an extension, perform the shift now that the storage is
1907   // available.
1908   if (shift > 0 && (category==fcNormal || category==fcNaN))
1909     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1910 
1911   if (category == fcNormal) {
1912     fs = normalize(rounding_mode, lostFraction);
1913     *losesInfo = (fs != opOK);
1914   } else if (category == fcNaN) {
1915     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1916 
1917     // For x87 extended precision, we want to make a NaN, not a special NaN if
1918     // the input wasn't special either.
1919     if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
1920       APInt::tcSetBit(significandParts(), semantics->precision - 1);
1921 
1922     // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1923     // does not give you back the same bits.  This is dubious, and we
1924     // don't currently do it.  You're really supposed to get
1925     // an invalid operation signal at runtime, but nobody does that.
1926     fs = opOK;
1927   } else {
1928     *losesInfo = false;
1929     fs = opOK;
1930   }
1931 
1932   return fs;
1933 }
1934 
1935 /* Convert a floating point number to an integer according to the
1936    rounding mode.  If the rounded integer value is out of range this
1937    returns an invalid operation exception and the contents of the
1938    destination parts are unspecified.  If the rounded value is in
1939    range but the floating point number is not the exact integer, the C
1940    standard doesn't require an inexact exception to be raised.  IEEE
1941    854 does require it so we do that.
1942 
1943    Note that for conversions to integer type the C standard requires
1944    round-to-zero to always be used.  */
1945 APFloat::opStatus
convertToSignExtendedInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const1946 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1947                                       bool isSigned,
1948                                       roundingMode rounding_mode,
1949                                       bool *isExact) const
1950 {
1951   lostFraction lost_fraction;
1952   const integerPart *src;
1953   unsigned int dstPartsCount, truncatedBits;
1954 
1955   *isExact = false;
1956 
1957   /* Handle the three special cases first.  */
1958   if (category == fcInfinity || category == fcNaN)
1959     return opInvalidOp;
1960 
1961   dstPartsCount = partCountForBits(width);
1962 
1963   if (category == fcZero) {
1964     APInt::tcSet(parts, 0, dstPartsCount);
1965     // Negative zero can't be represented as an int.
1966     *isExact = !sign;
1967     return opOK;
1968   }
1969 
1970   src = significandParts();
1971 
1972   /* Step 1: place our absolute value, with any fraction truncated, in
1973      the destination.  */
1974   if (exponent < 0) {
1975     /* Our absolute value is less than one; truncate everything.  */
1976     APInt::tcSet(parts, 0, dstPartsCount);
1977     /* For exponent -1 the integer bit represents .5, look at that.
1978        For smaller exponents leftmost truncated bit is 0. */
1979     truncatedBits = semantics->precision -1U - exponent;
1980   } else {
1981     /* We want the most significant (exponent + 1) bits; the rest are
1982        truncated.  */
1983     unsigned int bits = exponent + 1U;
1984 
1985     /* Hopelessly large in magnitude?  */
1986     if (bits > width)
1987       return opInvalidOp;
1988 
1989     if (bits < semantics->precision) {
1990       /* We truncate (semantics->precision - bits) bits.  */
1991       truncatedBits = semantics->precision - bits;
1992       APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1993     } else {
1994       /* We want at least as many bits as are available.  */
1995       APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1996       APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1997       truncatedBits = 0;
1998     }
1999   }
2000 
2001   /* Step 2: work out any lost fraction, and increment the absolute
2002      value if we would round away from zero.  */
2003   if (truncatedBits) {
2004     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2005                                                   truncatedBits);
2006     if (lost_fraction != lfExactlyZero &&
2007         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2008       if (APInt::tcIncrement(parts, dstPartsCount))
2009         return opInvalidOp;     /* Overflow.  */
2010     }
2011   } else {
2012     lost_fraction = lfExactlyZero;
2013   }
2014 
2015   /* Step 3: check if we fit in the destination.  */
2016   unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2017 
2018   if (sign) {
2019     if (!isSigned) {
2020       /* Negative numbers cannot be represented as unsigned.  */
2021       if (omsb != 0)
2022         return opInvalidOp;
2023     } else {
2024       /* It takes omsb bits to represent the unsigned integer value.
2025          We lose a bit for the sign, but care is needed as the
2026          maximally negative integer is a special case.  */
2027       if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2028         return opInvalidOp;
2029 
2030       /* This case can happen because of rounding.  */
2031       if (omsb > width)
2032         return opInvalidOp;
2033     }
2034 
2035     APInt::tcNegate (parts, dstPartsCount);
2036   } else {
2037     if (omsb >= width + !isSigned)
2038       return opInvalidOp;
2039   }
2040 
2041   if (lost_fraction == lfExactlyZero) {
2042     *isExact = true;
2043     return opOK;
2044   } else
2045     return opInexact;
2046 }
2047 
2048 /* Same as convertToSignExtendedInteger, except we provide
2049    deterministic values in case of an invalid operation exception,
2050    namely zero for NaNs and the minimal or maximal value respectively
2051    for underflow or overflow.
2052    The *isExact output tells whether the result is exact, in the sense
2053    that converting it back to the original floating point type produces
2054    the original value.  This is almost equivalent to result==opOK,
2055    except for negative zeroes.
2056 */
2057 APFloat::opStatus
convertToInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2058 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2059                           bool isSigned,
2060                           roundingMode rounding_mode, bool *isExact) const
2061 {
2062   opStatus fs;
2063 
2064   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2065                                     isExact);
2066 
2067   if (fs == opInvalidOp) {
2068     unsigned int bits, dstPartsCount;
2069 
2070     dstPartsCount = partCountForBits(width);
2071 
2072     if (category == fcNaN)
2073       bits = 0;
2074     else if (sign)
2075       bits = isSigned;
2076     else
2077       bits = width - isSigned;
2078 
2079     APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2080     if (sign && isSigned)
2081       APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2082   }
2083 
2084   return fs;
2085 }
2086 
2087 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2088    an APSInt, whose initial bit-width and signed-ness are used to determine the
2089    precision of the conversion.
2090  */
2091 APFloat::opStatus
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const2092 APFloat::convertToInteger(APSInt &result,
2093                           roundingMode rounding_mode, bool *isExact) const
2094 {
2095   unsigned bitWidth = result.getBitWidth();
2096   SmallVector<uint64_t, 4> parts(result.getNumWords());
2097   opStatus status = convertToInteger(
2098     parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2099   // Keeps the original signed-ness.
2100   result = APInt(bitWidth, parts);
2101   return status;
2102 }
2103 
2104 /* Convert an unsigned integer SRC to a floating point number,
2105    rounding according to ROUNDING_MODE.  The sign of the floating
2106    point number is not modified.  */
2107 APFloat::opStatus
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2108 APFloat::convertFromUnsignedParts(const integerPart *src,
2109                                   unsigned int srcCount,
2110                                   roundingMode rounding_mode)
2111 {
2112   unsigned int omsb, precision, dstCount;
2113   integerPart *dst;
2114   lostFraction lost_fraction;
2115 
2116   category = fcNormal;
2117   omsb = APInt::tcMSB(src, srcCount) + 1;
2118   dst = significandParts();
2119   dstCount = partCount();
2120   precision = semantics->precision;
2121 
2122   /* We want the most significant PRECISION bits of SRC.  There may not
2123      be that many; extract what we can.  */
2124   if (precision <= omsb) {
2125     exponent = omsb - 1;
2126     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2127                                                   omsb - precision);
2128     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2129   } else {
2130     exponent = precision - 1;
2131     lost_fraction = lfExactlyZero;
2132     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2133   }
2134 
2135   return normalize(rounding_mode, lost_fraction);
2136 }
2137 
2138 APFloat::opStatus
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2139 APFloat::convertFromAPInt(const APInt &Val,
2140                           bool isSigned,
2141                           roundingMode rounding_mode)
2142 {
2143   unsigned int partCount = Val.getNumWords();
2144   APInt api = Val;
2145 
2146   sign = false;
2147   if (isSigned && api.isNegative()) {
2148     sign = true;
2149     api = -api;
2150   }
2151 
2152   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2153 }
2154 
2155 /* Convert a two's complement integer SRC to a floating point number,
2156    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2157    integer is signed, in which case it must be sign-extended.  */
2158 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2159 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2160                                         unsigned int srcCount,
2161                                         bool isSigned,
2162                                         roundingMode rounding_mode)
2163 {
2164   opStatus status;
2165 
2166   if (isSigned &&
2167       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2168     integerPart *copy;
2169 
2170     /* If we're signed and negative negate a copy.  */
2171     sign = true;
2172     copy = new integerPart[srcCount];
2173     APInt::tcAssign(copy, src, srcCount);
2174     APInt::tcNegate(copy, srcCount);
2175     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2176     delete [] copy;
2177   } else {
2178     sign = false;
2179     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2180   }
2181 
2182   return status;
2183 }
2184 
2185 /* FIXME: should this just take a const APInt reference?  */
2186 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2187 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2188                                         unsigned int width, bool isSigned,
2189                                         roundingMode rounding_mode)
2190 {
2191   unsigned int partCount = partCountForBits(width);
2192   APInt api = APInt(width, makeArrayRef(parts, partCount));
2193 
2194   sign = false;
2195   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2196     sign = true;
2197     api = -api;
2198   }
2199 
2200   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2201 }
2202 
2203 APFloat::opStatus
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2204 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2205 {
2206   lostFraction lost_fraction = lfExactlyZero;
2207   integerPart *significand;
2208   unsigned int bitPos, partsCount;
2209   StringRef::iterator dot, firstSignificantDigit;
2210 
2211   zeroSignificand();
2212   exponent = 0;
2213   category = fcNormal;
2214 
2215   significand = significandParts();
2216   partsCount = partCount();
2217   bitPos = partsCount * integerPartWidth;
2218 
2219   /* Skip leading zeroes and any (hexa)decimal point.  */
2220   StringRef::iterator begin = s.begin();
2221   StringRef::iterator end = s.end();
2222   StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2223   firstSignificantDigit = p;
2224 
2225   for (; p != end;) {
2226     integerPart hex_value;
2227 
2228     if (*p == '.') {
2229       assert(dot == end && "String contains multiple dots");
2230       dot = p++;
2231       if (p == end) {
2232         break;
2233       }
2234     }
2235 
2236     hex_value = hexDigitValue(*p);
2237     if (hex_value == -1U) {
2238       break;
2239     }
2240 
2241     p++;
2242 
2243     if (p == end) {
2244       break;
2245     } else {
2246       /* Store the number whilst 4-bit nibbles remain.  */
2247       if (bitPos) {
2248         bitPos -= 4;
2249         hex_value <<= bitPos % integerPartWidth;
2250         significand[bitPos / integerPartWidth] |= hex_value;
2251       } else {
2252         lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2253         while (p != end && hexDigitValue(*p) != -1U)
2254           p++;
2255         break;
2256       }
2257     }
2258   }
2259 
2260   /* Hex floats require an exponent but not a hexadecimal point.  */
2261   assert(p != end && "Hex strings require an exponent");
2262   assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2263   assert(p != begin && "Significand has no digits");
2264   assert((dot == end || p - begin != 1) && "Significand has no digits");
2265 
2266   /* Ignore the exponent if we are zero.  */
2267   if (p != firstSignificantDigit) {
2268     int expAdjustment;
2269 
2270     /* Implicit hexadecimal point?  */
2271     if (dot == end)
2272       dot = p;
2273 
2274     /* Calculate the exponent adjustment implicit in the number of
2275        significant digits.  */
2276     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2277     if (expAdjustment < 0)
2278       expAdjustment++;
2279     expAdjustment = expAdjustment * 4 - 1;
2280 
2281     /* Adjust for writing the significand starting at the most
2282        significant nibble.  */
2283     expAdjustment += semantics->precision;
2284     expAdjustment -= partsCount * integerPartWidth;
2285 
2286     /* Adjust for the given exponent.  */
2287     exponent = totalExponent(p + 1, end, expAdjustment);
2288   }
2289 
2290   return normalize(rounding_mode, lost_fraction);
2291 }
2292 
2293 APFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2294 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2295                                       unsigned sigPartCount, int exp,
2296                                       roundingMode rounding_mode)
2297 {
2298   unsigned int parts, pow5PartCount;
2299   fltSemantics calcSemantics = { 32767, -32767, 0 };
2300   integerPart pow5Parts[maxPowerOfFiveParts];
2301   bool isNearest;
2302 
2303   isNearest = (rounding_mode == rmNearestTiesToEven ||
2304                rounding_mode == rmNearestTiesToAway);
2305 
2306   parts = partCountForBits(semantics->precision + 11);
2307 
2308   /* Calculate pow(5, abs(exp)).  */
2309   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2310 
2311   for (;; parts *= 2) {
2312     opStatus sigStatus, powStatus;
2313     unsigned int excessPrecision, truncatedBits;
2314 
2315     calcSemantics.precision = parts * integerPartWidth - 1;
2316     excessPrecision = calcSemantics.precision - semantics->precision;
2317     truncatedBits = excessPrecision;
2318 
2319     APFloat decSig(calcSemantics, fcZero, sign);
2320     APFloat pow5(calcSemantics, fcZero, false);
2321 
2322     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2323                                                 rmNearestTiesToEven);
2324     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2325                                               rmNearestTiesToEven);
2326     /* Add exp, as 10^n = 5^n * 2^n.  */
2327     decSig.exponent += exp;
2328 
2329     lostFraction calcLostFraction;
2330     integerPart HUerr, HUdistance;
2331     unsigned int powHUerr;
2332 
2333     if (exp >= 0) {
2334       /* multiplySignificand leaves the precision-th bit set to 1.  */
2335       calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2336       powHUerr = powStatus != opOK;
2337     } else {
2338       calcLostFraction = decSig.divideSignificand(pow5);
2339       /* Denormal numbers have less precision.  */
2340       if (decSig.exponent < semantics->minExponent) {
2341         excessPrecision += (semantics->minExponent - decSig.exponent);
2342         truncatedBits = excessPrecision;
2343         if (excessPrecision > calcSemantics.precision)
2344           excessPrecision = calcSemantics.precision;
2345       }
2346       /* Extra half-ulp lost in reciprocal of exponent.  */
2347       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2348     }
2349 
2350     /* Both multiplySignificand and divideSignificand return the
2351        result with the integer bit set.  */
2352     assert(APInt::tcExtractBit
2353            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2354 
2355     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2356                        powHUerr);
2357     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2358                                       excessPrecision, isNearest);
2359 
2360     /* Are we guaranteed to round correctly if we truncate?  */
2361     if (HUdistance >= HUerr) {
2362       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2363                        calcSemantics.precision - excessPrecision,
2364                        excessPrecision);
2365       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2366          above we must adjust our exponent to compensate for the
2367          implicit right shift.  */
2368       exponent = (decSig.exponent + semantics->precision
2369                   - (calcSemantics.precision - excessPrecision));
2370       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2371                                                        decSig.partCount(),
2372                                                        truncatedBits);
2373       return normalize(rounding_mode, calcLostFraction);
2374     }
2375   }
2376 }
2377 
2378 APFloat::opStatus
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2379 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2380 {
2381   decimalInfo D;
2382   opStatus fs;
2383 
2384   /* Scan the text.  */
2385   StringRef::iterator p = str.begin();
2386   interpretDecimal(p, str.end(), &D);
2387 
2388   /* Handle the quick cases.  First the case of no significant digits,
2389      i.e. zero, and then exponents that are obviously too large or too
2390      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
2391      definitely overflows if
2392 
2393            (exp - 1) * L >= maxExponent
2394 
2395      and definitely underflows to zero where
2396 
2397            (exp + 1) * L <= minExponent - precision
2398 
2399      With integer arithmetic the tightest bounds for L are
2400 
2401            93/28 < L < 196/59            [ numerator <= 256 ]
2402            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
2403   */
2404 
2405   if (decDigitValue(*D.firstSigDigit) >= 10U) {
2406     category = fcZero;
2407     fs = opOK;
2408 
2409   /* Check whether the normalized exponent is high enough to overflow
2410      max during the log-rebasing in the max-exponent check below. */
2411   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2412     fs = handleOverflow(rounding_mode);
2413 
2414   /* If it wasn't, then it also wasn't high enough to overflow max
2415      during the log-rebasing in the min-exponent check.  Check that it
2416      won't overflow min in either check, then perform the min-exponent
2417      check. */
2418   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2419              (D.normalizedExponent + 1) * 28738 <=
2420                8651 * (semantics->minExponent - (int) semantics->precision)) {
2421     /* Underflow to zero and round.  */
2422     zeroSignificand();
2423     fs = normalize(rounding_mode, lfLessThanHalf);
2424 
2425   /* We can finally safely perform the max-exponent check. */
2426   } else if ((D.normalizedExponent - 1) * 42039
2427              >= 12655 * semantics->maxExponent) {
2428     /* Overflow and round.  */
2429     fs = handleOverflow(rounding_mode);
2430   } else {
2431     integerPart *decSignificand;
2432     unsigned int partCount;
2433 
2434     /* A tight upper bound on number of bits required to hold an
2435        N-digit decimal integer is N * 196 / 59.  Allocate enough space
2436        to hold the full significand, and an extra part required by
2437        tcMultiplyPart.  */
2438     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2439     partCount = partCountForBits(1 + 196 * partCount / 59);
2440     decSignificand = new integerPart[partCount + 1];
2441     partCount = 0;
2442 
2443     /* Convert to binary efficiently - we do almost all multiplication
2444        in an integerPart.  When this would overflow do we do a single
2445        bignum multiplication, and then revert again to multiplication
2446        in an integerPart.  */
2447     do {
2448       integerPart decValue, val, multiplier;
2449 
2450       val = 0;
2451       multiplier = 1;
2452 
2453       do {
2454         if (*p == '.') {
2455           p++;
2456           if (p == str.end()) {
2457             break;
2458           }
2459         }
2460         decValue = decDigitValue(*p++);
2461         assert(decValue < 10U && "Invalid character in significand");
2462         multiplier *= 10;
2463         val = val * 10 + decValue;
2464         /* The maximum number that can be multiplied by ten with any
2465            digit added without overflowing an integerPart.  */
2466       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2467 
2468       /* Multiply out the current part.  */
2469       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2470                             partCount, partCount + 1, false);
2471 
2472       /* If we used another part (likely but not guaranteed), increase
2473          the count.  */
2474       if (decSignificand[partCount])
2475         partCount++;
2476     } while (p <= D.lastSigDigit);
2477 
2478     category = fcNormal;
2479     fs = roundSignificandWithExponent(decSignificand, partCount,
2480                                       D.exponent, rounding_mode);
2481 
2482     delete [] decSignificand;
2483   }
2484 
2485   return fs;
2486 }
2487 
2488 APFloat::opStatus
convertFromString(StringRef str,roundingMode rounding_mode)2489 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2490 {
2491   assert(!str.empty() && "Invalid string length");
2492 
2493   /* Handle a leading minus sign.  */
2494   StringRef::iterator p = str.begin();
2495   size_t slen = str.size();
2496   sign = *p == '-' ? 1 : 0;
2497   if (*p == '-' || *p == '+') {
2498     p++;
2499     slen--;
2500     assert(slen && "String has no digits");
2501   }
2502 
2503   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2504     assert(slen - 2 && "Invalid string");
2505     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2506                                         rounding_mode);
2507   }
2508 
2509   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2510 }
2511 
2512 /* Write out a hexadecimal representation of the floating point value
2513    to DST, which must be of sufficient size, in the C99 form
2514    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
2515    excluding the terminating NUL.
2516 
2517    If UPPERCASE, the output is in upper case, otherwise in lower case.
2518 
2519    HEXDIGITS digits appear altogether, rounding the value if
2520    necessary.  If HEXDIGITS is 0, the minimal precision to display the
2521    number precisely is used instead.  If nothing would appear after
2522    the decimal point it is suppressed.
2523 
2524    The decimal exponent is always printed and has at least one digit.
2525    Zero values display an exponent of zero.  Infinities and NaNs
2526    appear as "infinity" or "nan" respectively.
2527 
2528    The above rules are as specified by C99.  There is ambiguity about
2529    what the leading hexadecimal digit should be.  This implementation
2530    uses whatever is necessary so that the exponent is displayed as
2531    stored.  This implies the exponent will fall within the IEEE format
2532    range, and the leading hexadecimal digit will be 0 (for denormals),
2533    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2534    any other digits zero).
2535 */
2536 unsigned int
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2537 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2538                             bool upperCase, roundingMode rounding_mode) const
2539 {
2540   char *p;
2541 
2542   p = dst;
2543   if (sign)
2544     *dst++ = '-';
2545 
2546   switch (category) {
2547   case fcInfinity:
2548     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2549     dst += sizeof infinityL - 1;
2550     break;
2551 
2552   case fcNaN:
2553     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2554     dst += sizeof NaNU - 1;
2555     break;
2556 
2557   case fcZero:
2558     *dst++ = '0';
2559     *dst++ = upperCase ? 'X': 'x';
2560     *dst++ = '0';
2561     if (hexDigits > 1) {
2562       *dst++ = '.';
2563       memset (dst, '0', hexDigits - 1);
2564       dst += hexDigits - 1;
2565     }
2566     *dst++ = upperCase ? 'P': 'p';
2567     *dst++ = '0';
2568     break;
2569 
2570   case fcNormal:
2571     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2572     break;
2573   }
2574 
2575   *dst = 0;
2576 
2577   return static_cast<unsigned int>(dst - p);
2578 }
2579 
2580 /* Does the hard work of outputting the correctly rounded hexadecimal
2581    form of a normal floating point number with the specified number of
2582    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
2583    digits necessary to print the value precisely is output.  */
2584 char *
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2585 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2586                                   bool upperCase,
2587                                   roundingMode rounding_mode) const
2588 {
2589   unsigned int count, valueBits, shift, partsCount, outputDigits;
2590   const char *hexDigitChars;
2591   const integerPart *significand;
2592   char *p;
2593   bool roundUp;
2594 
2595   *dst++ = '0';
2596   *dst++ = upperCase ? 'X': 'x';
2597 
2598   roundUp = false;
2599   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2600 
2601   significand = significandParts();
2602   partsCount = partCount();
2603 
2604   /* +3 because the first digit only uses the single integer bit, so
2605      we have 3 virtual zero most-significant-bits.  */
2606   valueBits = semantics->precision + 3;
2607   shift = integerPartWidth - valueBits % integerPartWidth;
2608 
2609   /* The natural number of digits required ignoring trailing
2610      insignificant zeroes.  */
2611   outputDigits = (valueBits - significandLSB () + 3) / 4;
2612 
2613   /* hexDigits of zero means use the required number for the
2614      precision.  Otherwise, see if we are truncating.  If we are,
2615      find out if we need to round away from zero.  */
2616   if (hexDigits) {
2617     if (hexDigits < outputDigits) {
2618       /* We are dropping non-zero bits, so need to check how to round.
2619          "bits" is the number of dropped bits.  */
2620       unsigned int bits;
2621       lostFraction fraction;
2622 
2623       bits = valueBits - hexDigits * 4;
2624       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2625       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2626     }
2627     outputDigits = hexDigits;
2628   }
2629 
2630   /* Write the digits consecutively, and start writing in the location
2631      of the hexadecimal point.  We move the most significant digit
2632      left and add the hexadecimal point later.  */
2633   p = ++dst;
2634 
2635   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2636 
2637   while (outputDigits && count) {
2638     integerPart part;
2639 
2640     /* Put the most significant integerPartWidth bits in "part".  */
2641     if (--count == partsCount)
2642       part = 0;  /* An imaginary higher zero part.  */
2643     else
2644       part = significand[count] << shift;
2645 
2646     if (count && shift)
2647       part |= significand[count - 1] >> (integerPartWidth - shift);
2648 
2649     /* Convert as much of "part" to hexdigits as we can.  */
2650     unsigned int curDigits = integerPartWidth / 4;
2651 
2652     if (curDigits > outputDigits)
2653       curDigits = outputDigits;
2654     dst += partAsHex (dst, part, curDigits, hexDigitChars);
2655     outputDigits -= curDigits;
2656   }
2657 
2658   if (roundUp) {
2659     char *q = dst;
2660 
2661     /* Note that hexDigitChars has a trailing '0'.  */
2662     do {
2663       q--;
2664       *q = hexDigitChars[hexDigitValue (*q) + 1];
2665     } while (*q == '0');
2666     assert(q >= p);
2667   } else {
2668     /* Add trailing zeroes.  */
2669     memset (dst, '0', outputDigits);
2670     dst += outputDigits;
2671   }
2672 
2673   /* Move the most significant digit to before the point, and if there
2674      is something after the decimal point add it.  This must come
2675      after rounding above.  */
2676   p[-1] = p[0];
2677   if (dst -1 == p)
2678     dst--;
2679   else
2680     p[0] = '.';
2681 
2682   /* Finally output the exponent.  */
2683   *dst++ = upperCase ? 'P': 'p';
2684 
2685   return writeSignedDecimal (dst, exponent);
2686 }
2687 
hash_value(const APFloat & Arg)2688 hash_code llvm::hash_value(const APFloat &Arg) {
2689   if (Arg.category != APFloat::fcNormal)
2690     return hash_combine((uint8_t)Arg.category,
2691                         // NaN has no sign, fix it at zero.
2692                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2693                         Arg.semantics->precision);
2694 
2695   // Normal floats need their exponent and significand hashed.
2696   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2697                       Arg.semantics->precision, Arg.exponent,
2698                       hash_combine_range(
2699                         Arg.significandParts(),
2700                         Arg.significandParts() + Arg.partCount()));
2701 }
2702 
2703 // Conversion from APFloat to/from host float/double.  It may eventually be
2704 // possible to eliminate these and have everybody deal with APFloats, but that
2705 // will take a while.  This approach will not easily extend to long double.
2706 // Current implementation requires integerPartWidth==64, which is correct at
2707 // the moment but could be made more general.
2708 
2709 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2710 // the actual IEEE respresentations.  We compensate for that here.
2711 
2712 APInt
convertF80LongDoubleAPFloatToAPInt() const2713 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2714 {
2715   assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2716   assert(partCount()==2);
2717 
2718   uint64_t myexponent, mysignificand;
2719 
2720   if (category==fcNormal) {
2721     myexponent = exponent+16383; //bias
2722     mysignificand = significandParts()[0];
2723     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2724       myexponent = 0;   // denormal
2725   } else if (category==fcZero) {
2726     myexponent = 0;
2727     mysignificand = 0;
2728   } else if (category==fcInfinity) {
2729     myexponent = 0x7fff;
2730     mysignificand = 0x8000000000000000ULL;
2731   } else {
2732     assert(category == fcNaN && "Unknown category");
2733     myexponent = 0x7fff;
2734     mysignificand = significandParts()[0];
2735   }
2736 
2737   uint64_t words[2];
2738   words[0] = mysignificand;
2739   words[1] =  ((uint64_t)(sign & 1) << 15) |
2740               (myexponent & 0x7fffLL);
2741   return APInt(80, words);
2742 }
2743 
2744 APInt
convertPPCDoubleDoubleAPFloatToAPInt() const2745 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2746 {
2747   assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2748   assert(partCount()==2);
2749 
2750   uint64_t words[2];
2751   opStatus fs;
2752   bool losesInfo;
2753 
2754   // Convert number to double.  To avoid spurious underflows, we re-
2755   // normalize against the "double" minExponent first, and only *then*
2756   // truncate the mantissa.  The result of that second conversion
2757   // may be inexact, but should never underflow.
2758   // Declare fltSemantics before APFloat that uses it (and
2759   // saves pointer to it) to ensure correct destruction order.
2760   fltSemantics extendedSemantics = *semantics;
2761   extendedSemantics.minExponent = IEEEdouble.minExponent;
2762   APFloat extended(*this);
2763   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2764   assert(fs == opOK && !losesInfo);
2765   (void)fs;
2766 
2767   APFloat u(extended);
2768   fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2769   assert(fs == opOK || fs == opInexact);
2770   (void)fs;
2771   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2772 
2773   // If conversion was exact or resulted in a special case, we're done;
2774   // just set the second double to zero.  Otherwise, re-convert back to
2775   // the extended format and compute the difference.  This now should
2776   // convert exactly to double.
2777   if (u.category == fcNormal && losesInfo) {
2778     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2779     assert(fs == opOK && !losesInfo);
2780     (void)fs;
2781 
2782     APFloat v(extended);
2783     v.subtract(u, rmNearestTiesToEven);
2784     fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2785     assert(fs == opOK && !losesInfo);
2786     (void)fs;
2787     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2788   } else {
2789     words[1] = 0;
2790   }
2791 
2792   return APInt(128, words);
2793 }
2794 
2795 APInt
convertQuadrupleAPFloatToAPInt() const2796 APFloat::convertQuadrupleAPFloatToAPInt() const
2797 {
2798   assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2799   assert(partCount()==2);
2800 
2801   uint64_t myexponent, mysignificand, mysignificand2;
2802 
2803   if (category==fcNormal) {
2804     myexponent = exponent+16383; //bias
2805     mysignificand = significandParts()[0];
2806     mysignificand2 = significandParts()[1];
2807     if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2808       myexponent = 0;   // denormal
2809   } else if (category==fcZero) {
2810     myexponent = 0;
2811     mysignificand = mysignificand2 = 0;
2812   } else if (category==fcInfinity) {
2813     myexponent = 0x7fff;
2814     mysignificand = mysignificand2 = 0;
2815   } else {
2816     assert(category == fcNaN && "Unknown category!");
2817     myexponent = 0x7fff;
2818     mysignificand = significandParts()[0];
2819     mysignificand2 = significandParts()[1];
2820   }
2821 
2822   uint64_t words[2];
2823   words[0] = mysignificand;
2824   words[1] = ((uint64_t)(sign & 1) << 63) |
2825              ((myexponent & 0x7fff) << 48) |
2826              (mysignificand2 & 0xffffffffffffLL);
2827 
2828   return APInt(128, words);
2829 }
2830 
2831 APInt
convertDoubleAPFloatToAPInt() const2832 APFloat::convertDoubleAPFloatToAPInt() const
2833 {
2834   assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2835   assert(partCount()==1);
2836 
2837   uint64_t myexponent, mysignificand;
2838 
2839   if (category==fcNormal) {
2840     myexponent = exponent+1023; //bias
2841     mysignificand = *significandParts();
2842     if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2843       myexponent = 0;   // denormal
2844   } else if (category==fcZero) {
2845     myexponent = 0;
2846     mysignificand = 0;
2847   } else if (category==fcInfinity) {
2848     myexponent = 0x7ff;
2849     mysignificand = 0;
2850   } else {
2851     assert(category == fcNaN && "Unknown category!");
2852     myexponent = 0x7ff;
2853     mysignificand = *significandParts();
2854   }
2855 
2856   return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2857                      ((myexponent & 0x7ff) <<  52) |
2858                      (mysignificand & 0xfffffffffffffLL))));
2859 }
2860 
2861 APInt
convertFloatAPFloatToAPInt() const2862 APFloat::convertFloatAPFloatToAPInt() const
2863 {
2864   assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2865   assert(partCount()==1);
2866 
2867   uint32_t myexponent, mysignificand;
2868 
2869   if (category==fcNormal) {
2870     myexponent = exponent+127; //bias
2871     mysignificand = (uint32_t)*significandParts();
2872     if (myexponent == 1 && !(mysignificand & 0x800000))
2873       myexponent = 0;   // denormal
2874   } else if (category==fcZero) {
2875     myexponent = 0;
2876     mysignificand = 0;
2877   } else if (category==fcInfinity) {
2878     myexponent = 0xff;
2879     mysignificand = 0;
2880   } else {
2881     assert(category == fcNaN && "Unknown category!");
2882     myexponent = 0xff;
2883     mysignificand = (uint32_t)*significandParts();
2884   }
2885 
2886   return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2887                     (mysignificand & 0x7fffff)));
2888 }
2889 
2890 APInt
convertHalfAPFloatToAPInt() const2891 APFloat::convertHalfAPFloatToAPInt() const
2892 {
2893   assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2894   assert(partCount()==1);
2895 
2896   uint32_t myexponent, mysignificand;
2897 
2898   if (category==fcNormal) {
2899     myexponent = exponent+15; //bias
2900     mysignificand = (uint32_t)*significandParts();
2901     if (myexponent == 1 && !(mysignificand & 0x400))
2902       myexponent = 0;   // denormal
2903   } else if (category==fcZero) {
2904     myexponent = 0;
2905     mysignificand = 0;
2906   } else if (category==fcInfinity) {
2907     myexponent = 0x1f;
2908     mysignificand = 0;
2909   } else {
2910     assert(category == fcNaN && "Unknown category!");
2911     myexponent = 0x1f;
2912     mysignificand = (uint32_t)*significandParts();
2913   }
2914 
2915   return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2916                     (mysignificand & 0x3ff)));
2917 }
2918 
2919 // This function creates an APInt that is just a bit map of the floating
2920 // point constant as it would appear in memory.  It is not a conversion,
2921 // and treating the result as a normal integer is unlikely to be useful.
2922 
2923 APInt
bitcastToAPInt() const2924 APFloat::bitcastToAPInt() const
2925 {
2926   if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2927     return convertHalfAPFloatToAPInt();
2928 
2929   if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2930     return convertFloatAPFloatToAPInt();
2931 
2932   if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2933     return convertDoubleAPFloatToAPInt();
2934 
2935   if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2936     return convertQuadrupleAPFloatToAPInt();
2937 
2938   if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2939     return convertPPCDoubleDoubleAPFloatToAPInt();
2940 
2941   assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2942          "unknown format!");
2943   return convertF80LongDoubleAPFloatToAPInt();
2944 }
2945 
2946 float
convertToFloat() const2947 APFloat::convertToFloat() const
2948 {
2949   assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2950          "Float semantics are not IEEEsingle");
2951   APInt api = bitcastToAPInt();
2952   return api.bitsToFloat();
2953 }
2954 
2955 double
convertToDouble() const2956 APFloat::convertToDouble() const
2957 {
2958   assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2959          "Float semantics are not IEEEdouble");
2960   APInt api = bitcastToAPInt();
2961   return api.bitsToDouble();
2962 }
2963 
2964 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
2965 /// does not support these bit patterns:
2966 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2967 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2968 ///  exponent = 0, integer bit 1 ("pseudodenormal")
2969 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2970 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2971 void
initFromF80LongDoubleAPInt(const APInt & api)2972 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2973 {
2974   assert(api.getBitWidth()==80);
2975   uint64_t i1 = api.getRawData()[0];
2976   uint64_t i2 = api.getRawData()[1];
2977   uint64_t myexponent = (i2 & 0x7fff);
2978   uint64_t mysignificand = i1;
2979 
2980   initialize(&APFloat::x87DoubleExtended);
2981   assert(partCount()==2);
2982 
2983   sign = static_cast<unsigned int>(i2>>15);
2984   if (myexponent==0 && mysignificand==0) {
2985     // exponent, significand meaningless
2986     category = fcZero;
2987   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2988     // exponent, significand meaningless
2989     category = fcInfinity;
2990   } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2991     // exponent meaningless
2992     category = fcNaN;
2993     significandParts()[0] = mysignificand;
2994     significandParts()[1] = 0;
2995   } else {
2996     category = fcNormal;
2997     exponent = myexponent - 16383;
2998     significandParts()[0] = mysignificand;
2999     significandParts()[1] = 0;
3000     if (myexponent==0)          // denormal
3001       exponent = -16382;
3002   }
3003 }
3004 
3005 void
initFromPPCDoubleDoubleAPInt(const APInt & api)3006 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3007 {
3008   assert(api.getBitWidth()==128);
3009   uint64_t i1 = api.getRawData()[0];
3010   uint64_t i2 = api.getRawData()[1];
3011   opStatus fs;
3012   bool losesInfo;
3013 
3014   // Get the first double and convert to our format.
3015   initFromDoubleAPInt(APInt(64, i1));
3016   fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3017   assert(fs == opOK && !losesInfo);
3018   (void)fs;
3019 
3020   // Unless we have a special case, add in second double.
3021   if (category == fcNormal) {
3022     APFloat v(IEEEdouble, APInt(64, i2));
3023     fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3024     assert(fs == opOK && !losesInfo);
3025     (void)fs;
3026 
3027     add(v, rmNearestTiesToEven);
3028   }
3029 }
3030 
3031 void
initFromQuadrupleAPInt(const APInt & api)3032 APFloat::initFromQuadrupleAPInt(const APInt &api)
3033 {
3034   assert(api.getBitWidth()==128);
3035   uint64_t i1 = api.getRawData()[0];
3036   uint64_t i2 = api.getRawData()[1];
3037   uint64_t myexponent = (i2 >> 48) & 0x7fff;
3038   uint64_t mysignificand  = i1;
3039   uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3040 
3041   initialize(&APFloat::IEEEquad);
3042   assert(partCount()==2);
3043 
3044   sign = static_cast<unsigned int>(i2>>63);
3045   if (myexponent==0 &&
3046       (mysignificand==0 && mysignificand2==0)) {
3047     // exponent, significand meaningless
3048     category = fcZero;
3049   } else if (myexponent==0x7fff &&
3050              (mysignificand==0 && mysignificand2==0)) {
3051     // exponent, significand meaningless
3052     category = fcInfinity;
3053   } else if (myexponent==0x7fff &&
3054              (mysignificand!=0 || mysignificand2 !=0)) {
3055     // exponent meaningless
3056     category = fcNaN;
3057     significandParts()[0] = mysignificand;
3058     significandParts()[1] = mysignificand2;
3059   } else {
3060     category = fcNormal;
3061     exponent = myexponent - 16383;
3062     significandParts()[0] = mysignificand;
3063     significandParts()[1] = mysignificand2;
3064     if (myexponent==0)          // denormal
3065       exponent = -16382;
3066     else
3067       significandParts()[1] |= 0x1000000000000LL;  // integer bit
3068   }
3069 }
3070 
3071 void
initFromDoubleAPInt(const APInt & api)3072 APFloat::initFromDoubleAPInt(const APInt &api)
3073 {
3074   assert(api.getBitWidth()==64);
3075   uint64_t i = *api.getRawData();
3076   uint64_t myexponent = (i >> 52) & 0x7ff;
3077   uint64_t mysignificand = i & 0xfffffffffffffLL;
3078 
3079   initialize(&APFloat::IEEEdouble);
3080   assert(partCount()==1);
3081 
3082   sign = static_cast<unsigned int>(i>>63);
3083   if (myexponent==0 && mysignificand==0) {
3084     // exponent, significand meaningless
3085     category = fcZero;
3086   } else if (myexponent==0x7ff && mysignificand==0) {
3087     // exponent, significand meaningless
3088     category = fcInfinity;
3089   } else if (myexponent==0x7ff && mysignificand!=0) {
3090     // exponent meaningless
3091     category = fcNaN;
3092     *significandParts() = mysignificand;
3093   } else {
3094     category = fcNormal;
3095     exponent = myexponent - 1023;
3096     *significandParts() = mysignificand;
3097     if (myexponent==0)          // denormal
3098       exponent = -1022;
3099     else
3100       *significandParts() |= 0x10000000000000LL;  // integer bit
3101   }
3102 }
3103 
3104 void
initFromFloatAPInt(const APInt & api)3105 APFloat::initFromFloatAPInt(const APInt & api)
3106 {
3107   assert(api.getBitWidth()==32);
3108   uint32_t i = (uint32_t)*api.getRawData();
3109   uint32_t myexponent = (i >> 23) & 0xff;
3110   uint32_t mysignificand = i & 0x7fffff;
3111 
3112   initialize(&APFloat::IEEEsingle);
3113   assert(partCount()==1);
3114 
3115   sign = i >> 31;
3116   if (myexponent==0 && mysignificand==0) {
3117     // exponent, significand meaningless
3118     category = fcZero;
3119   } else if (myexponent==0xff && mysignificand==0) {
3120     // exponent, significand meaningless
3121     category = fcInfinity;
3122   } else if (myexponent==0xff && mysignificand!=0) {
3123     // sign, exponent, significand meaningless
3124     category = fcNaN;
3125     *significandParts() = mysignificand;
3126   } else {
3127     category = fcNormal;
3128     exponent = myexponent - 127;  //bias
3129     *significandParts() = mysignificand;
3130     if (myexponent==0)    // denormal
3131       exponent = -126;
3132     else
3133       *significandParts() |= 0x800000; // integer bit
3134   }
3135 }
3136 
3137 void
initFromHalfAPInt(const APInt & api)3138 APFloat::initFromHalfAPInt(const APInt & api)
3139 {
3140   assert(api.getBitWidth()==16);
3141   uint32_t i = (uint32_t)*api.getRawData();
3142   uint32_t myexponent = (i >> 10) & 0x1f;
3143   uint32_t mysignificand = i & 0x3ff;
3144 
3145   initialize(&APFloat::IEEEhalf);
3146   assert(partCount()==1);
3147 
3148   sign = i >> 15;
3149   if (myexponent==0 && mysignificand==0) {
3150     // exponent, significand meaningless
3151     category = fcZero;
3152   } else if (myexponent==0x1f && mysignificand==0) {
3153     // exponent, significand meaningless
3154     category = fcInfinity;
3155   } else if (myexponent==0x1f && mysignificand!=0) {
3156     // sign, exponent, significand meaningless
3157     category = fcNaN;
3158     *significandParts() = mysignificand;
3159   } else {
3160     category = fcNormal;
3161     exponent = myexponent - 15;  //bias
3162     *significandParts() = mysignificand;
3163     if (myexponent==0)    // denormal
3164       exponent = -14;
3165     else
3166       *significandParts() |= 0x400; // integer bit
3167   }
3168 }
3169 
3170 /// Treat api as containing the bits of a floating point number.  Currently
3171 /// we infer the floating point type from the size of the APInt.  The
3172 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3173 /// when the size is anything else).
3174 void
initFromAPInt(const fltSemantics * Sem,const APInt & api)3175 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3176 {
3177   if (Sem == &IEEEhalf)
3178     return initFromHalfAPInt(api);
3179   if (Sem == &IEEEsingle)
3180     return initFromFloatAPInt(api);
3181   if (Sem == &IEEEdouble)
3182     return initFromDoubleAPInt(api);
3183   if (Sem == &x87DoubleExtended)
3184     return initFromF80LongDoubleAPInt(api);
3185   if (Sem == &IEEEquad)
3186     return initFromQuadrupleAPInt(api);
3187   if (Sem == &PPCDoubleDouble)
3188     return initFromPPCDoubleDoubleAPInt(api);
3189 
3190   llvm_unreachable(0);
3191 }
3192 
3193 APFloat
getAllOnesValue(unsigned BitWidth,bool isIEEE)3194 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3195 {
3196   switch (BitWidth) {
3197   case 16:
3198     return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3199   case 32:
3200     return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3201   case 64:
3202     return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3203   case 80:
3204     return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3205   case 128:
3206     if (isIEEE)
3207       return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3208     return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3209   default:
3210     llvm_unreachable("Unknown floating bit width");
3211   }
3212 }
3213 
getLargest(const fltSemantics & Sem,bool Negative)3214 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3215   APFloat Val(Sem, fcNormal, Negative);
3216 
3217   // We want (in interchange format):
3218   //   sign = {Negative}
3219   //   exponent = 1..10
3220   //   significand = 1..1
3221 
3222   Val.exponent = Sem.maxExponent; // unbiased
3223 
3224   // 1-initialize all bits....
3225   Val.zeroSignificand();
3226   integerPart *significand = Val.significandParts();
3227   unsigned N = partCountForBits(Sem.precision);
3228   for (unsigned i = 0; i != N; ++i)
3229     significand[i] = ~((integerPart) 0);
3230 
3231   // ...and then clear the top bits for internal consistency.
3232   if (Sem.precision % integerPartWidth != 0)
3233     significand[N-1] &=
3234       (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3235 
3236   return Val;
3237 }
3238 
getSmallest(const fltSemantics & Sem,bool Negative)3239 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3240   APFloat Val(Sem, fcNormal, Negative);
3241 
3242   // We want (in interchange format):
3243   //   sign = {Negative}
3244   //   exponent = 0..0
3245   //   significand = 0..01
3246 
3247   Val.exponent = Sem.minExponent; // unbiased
3248   Val.zeroSignificand();
3249   Val.significandParts()[0] = 1;
3250   return Val;
3251 }
3252 
getSmallestNormalized(const fltSemantics & Sem,bool Negative)3253 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3254   APFloat Val(Sem, fcNormal, Negative);
3255 
3256   // We want (in interchange format):
3257   //   sign = {Negative}
3258   //   exponent = 0..0
3259   //   significand = 10..0
3260 
3261   Val.exponent = Sem.minExponent;
3262   Val.zeroSignificand();
3263   Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3264     (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3265 
3266   return Val;
3267 }
3268 
APFloat(const fltSemantics & Sem,const APInt & API)3269 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3270   initFromAPInt(&Sem, API);
3271 }
3272 
APFloat(float f)3273 APFloat::APFloat(float f) {
3274   initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3275 }
3276 
APFloat(double d)3277 APFloat::APFloat(double d) {
3278   initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3279 }
3280 
3281 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3282   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3283     Buffer.append(Str.begin(), Str.end());
3284   }
3285 
3286   /// Removes data from the given significand until it is no more
3287   /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3288   void AdjustToPrecision(APInt &significand,
3289                          int &exp, unsigned FormatPrecision) {
3290     unsigned bits = significand.getActiveBits();
3291 
3292     // 196/59 is a very slight overestimate of lg_2(10).
3293     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3294 
3295     if (bits <= bitsRequired) return;
3296 
3297     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3298     if (!tensRemovable) return;
3299 
3300     exp += tensRemovable;
3301 
3302     APInt divisor(significand.getBitWidth(), 1);
3303     APInt powten(significand.getBitWidth(), 10);
3304     while (true) {
3305       if (tensRemovable & 1)
3306         divisor *= powten;
3307       tensRemovable >>= 1;
3308       if (!tensRemovable) break;
3309       powten *= powten;
3310     }
3311 
3312     significand = significand.udiv(divisor);
3313 
3314     // Truncate the significand down to its active bit count, but
3315     // don't try to drop below 32.
3316     unsigned newPrecision = std::max(32U, significand.getActiveBits());
3317     significand = significand.trunc(newPrecision);
3318   }
3319 
3320 
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3321   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3322                          int &exp, unsigned FormatPrecision) {
3323     unsigned N = buffer.size();
3324     if (N <= FormatPrecision) return;
3325 
3326     // The most significant figures are the last ones in the buffer.
3327     unsigned FirstSignificant = N - FormatPrecision;
3328 
3329     // Round.
3330     // FIXME: this probably shouldn't use 'round half up'.
3331 
3332     // Rounding down is just a truncation, except we also want to drop
3333     // trailing zeros from the new result.
3334     if (buffer[FirstSignificant - 1] < '5') {
3335       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3336         FirstSignificant++;
3337 
3338       exp += FirstSignificant;
3339       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3340       return;
3341     }
3342 
3343     // Rounding up requires a decimal add-with-carry.  If we continue
3344     // the carry, the newly-introduced zeros will just be truncated.
3345     for (unsigned I = FirstSignificant; I != N; ++I) {
3346       if (buffer[I] == '9') {
3347         FirstSignificant++;
3348       } else {
3349         buffer[I]++;
3350         break;
3351       }
3352     }
3353 
3354     // If we carried through, we have exactly one digit of precision.
3355     if (FirstSignificant == N) {
3356       exp += FirstSignificant;
3357       buffer.clear();
3358       buffer.push_back('1');
3359       return;
3360     }
3361 
3362     exp += FirstSignificant;
3363     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3364   }
3365 }
3366 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding) const3367 void APFloat::toString(SmallVectorImpl<char> &Str,
3368                        unsigned FormatPrecision,
3369                        unsigned FormatMaxPadding) const {
3370   switch (category) {
3371   case fcInfinity:
3372     if (isNegative())
3373       return append(Str, "-Inf");
3374     else
3375       return append(Str, "+Inf");
3376 
3377   case fcNaN: return append(Str, "NaN");
3378 
3379   case fcZero:
3380     if (isNegative())
3381       Str.push_back('-');
3382 
3383     if (!FormatMaxPadding)
3384       append(Str, "0.0E+0");
3385     else
3386       Str.push_back('0');
3387     return;
3388 
3389   case fcNormal:
3390     break;
3391   }
3392 
3393   if (isNegative())
3394     Str.push_back('-');
3395 
3396   // Decompose the number into an APInt and an exponent.
3397   int exp = exponent - ((int) semantics->precision - 1);
3398   APInt significand(semantics->precision,
3399                     makeArrayRef(significandParts(),
3400                                  partCountForBits(semantics->precision)));
3401 
3402   // Set FormatPrecision if zero.  We want to do this before we
3403   // truncate trailing zeros, as those are part of the precision.
3404   if (!FormatPrecision) {
3405     // It's an interesting question whether to use the nominal
3406     // precision or the active precision here for denormals.
3407 
3408     // FormatPrecision = ceil(significandBits / lg_2(10))
3409     FormatPrecision = (semantics->precision * 59 + 195) / 196;
3410   }
3411 
3412   // Ignore trailing binary zeros.
3413   int trailingZeros = significand.countTrailingZeros();
3414   exp += trailingZeros;
3415   significand = significand.lshr(trailingZeros);
3416 
3417   // Change the exponent from 2^e to 10^e.
3418   if (exp == 0) {
3419     // Nothing to do.
3420   } else if (exp > 0) {
3421     // Just shift left.
3422     significand = significand.zext(semantics->precision + exp);
3423     significand <<= exp;
3424     exp = 0;
3425   } else { /* exp < 0 */
3426     int texp = -exp;
3427 
3428     // We transform this using the identity:
3429     //   (N)(2^-e) == (N)(5^e)(10^-e)
3430     // This means we have to multiply N (the significand) by 5^e.
3431     // To avoid overflow, we have to operate on numbers large
3432     // enough to store N * 5^e:
3433     //   log2(N * 5^e) == log2(N) + e * log2(5)
3434     //                 <= semantics->precision + e * 137 / 59
3435     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3436 
3437     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3438 
3439     // Multiply significand by 5^e.
3440     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3441     significand = significand.zext(precision);
3442     APInt five_to_the_i(precision, 5);
3443     while (true) {
3444       if (texp & 1) significand *= five_to_the_i;
3445 
3446       texp >>= 1;
3447       if (!texp) break;
3448       five_to_the_i *= five_to_the_i;
3449     }
3450   }
3451 
3452   AdjustToPrecision(significand, exp, FormatPrecision);
3453 
3454   SmallVector<char, 256> buffer;
3455 
3456   // Fill the buffer.
3457   unsigned precision = significand.getBitWidth();
3458   APInt ten(precision, 10);
3459   APInt digit(precision, 0);
3460 
3461   bool inTrail = true;
3462   while (significand != 0) {
3463     // digit <- significand % 10
3464     // significand <- significand / 10
3465     APInt::udivrem(significand, ten, significand, digit);
3466 
3467     unsigned d = digit.getZExtValue();
3468 
3469     // Drop trailing zeros.
3470     if (inTrail && !d) exp++;
3471     else {
3472       buffer.push_back((char) ('0' + d));
3473       inTrail = false;
3474     }
3475   }
3476 
3477   assert(!buffer.empty() && "no characters in buffer!");
3478 
3479   // Drop down to FormatPrecision.
3480   // TODO: don't do more precise calculations above than are required.
3481   AdjustToPrecision(buffer, exp, FormatPrecision);
3482 
3483   unsigned NDigits = buffer.size();
3484 
3485   // Check whether we should use scientific notation.
3486   bool FormatScientific;
3487   if (!FormatMaxPadding)
3488     FormatScientific = true;
3489   else {
3490     if (exp >= 0) {
3491       // 765e3 --> 765000
3492       //              ^^^
3493       // But we shouldn't make the number look more precise than it is.
3494       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3495                           NDigits + (unsigned) exp > FormatPrecision);
3496     } else {
3497       // Power of the most significant digit.
3498       int MSD = exp + (int) (NDigits - 1);
3499       if (MSD >= 0) {
3500         // 765e-2 == 7.65
3501         FormatScientific = false;
3502       } else {
3503         // 765e-5 == 0.00765
3504         //           ^ ^^
3505         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3506       }
3507     }
3508   }
3509 
3510   // Scientific formatting is pretty straightforward.
3511   if (FormatScientific) {
3512     exp += (NDigits - 1);
3513 
3514     Str.push_back(buffer[NDigits-1]);
3515     Str.push_back('.');
3516     if (NDigits == 1)
3517       Str.push_back('0');
3518     else
3519       for (unsigned I = 1; I != NDigits; ++I)
3520         Str.push_back(buffer[NDigits-1-I]);
3521     Str.push_back('E');
3522 
3523     Str.push_back(exp >= 0 ? '+' : '-');
3524     if (exp < 0) exp = -exp;
3525     SmallVector<char, 6> expbuf;
3526     do {
3527       expbuf.push_back((char) ('0' + (exp % 10)));
3528       exp /= 10;
3529     } while (exp);
3530     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3531       Str.push_back(expbuf[E-1-I]);
3532     return;
3533   }
3534 
3535   // Non-scientific, positive exponents.
3536   if (exp >= 0) {
3537     for (unsigned I = 0; I != NDigits; ++I)
3538       Str.push_back(buffer[NDigits-1-I]);
3539     for (unsigned I = 0; I != (unsigned) exp; ++I)
3540       Str.push_back('0');
3541     return;
3542   }
3543 
3544   // Non-scientific, negative exponents.
3545 
3546   // The number of digits to the left of the decimal point.
3547   int NWholeDigits = exp + (int) NDigits;
3548 
3549   unsigned I = 0;
3550   if (NWholeDigits > 0) {
3551     for (; I != (unsigned) NWholeDigits; ++I)
3552       Str.push_back(buffer[NDigits-I-1]);
3553     Str.push_back('.');
3554   } else {
3555     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3556 
3557     Str.push_back('0');
3558     Str.push_back('.');
3559     for (unsigned Z = 1; Z != NZeros; ++Z)
3560       Str.push_back('0');
3561   }
3562 
3563   for (; I != NDigits; ++I)
3564     Str.push_back(buffer[NDigits-I-1]);
3565 }
3566 
getExactInverse(APFloat * inv) const3567 bool APFloat::getExactInverse(APFloat *inv) const {
3568   // Special floats and denormals have no exact inverse.
3569   if (category != fcNormal)
3570     return false;
3571 
3572   // Check that the number is a power of two by making sure that only the
3573   // integer bit is set in the significand.
3574   if (significandLSB() != semantics->precision - 1)
3575     return false;
3576 
3577   // Get the inverse.
3578   APFloat reciprocal(*semantics, 1ULL);
3579   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3580     return false;
3581 
3582   // Avoid multiplication with a denormal, it is not safe on all platforms and
3583   // may be slower than a normal division.
3584   if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3585     return false;
3586 
3587   assert(reciprocal.category == fcNormal &&
3588          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3589 
3590   if (inv)
3591     *inv = reciprocal;
3592 
3593   return true;
3594 }
3595