1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
23 #include <cstring>
24 #include <limits.h>
25
26 using namespace llvm;
27
28 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
29
30 /* Assumed in hexadecimal significand parsing, and conversion to
31 hexadecimal strings. */
32 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
33 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
34
35 namespace llvm {
36
37 /* Represents floating point arithmetic semantics. */
38 struct fltSemantics {
39 /* The largest E such that 2^E is representable; this matches the
40 definition of IEEE 754. */
41 exponent_t maxExponent;
42
43 /* The smallest E such that 2^E is a normalized number; this
44 matches the definition of IEEE 754. */
45 exponent_t minExponent;
46
47 /* Number of bits in the significand. This includes the integer
48 bit. */
49 unsigned int precision;
50 };
51
52 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
53 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
54 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
55 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
56 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
57 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
58
59 /* The PowerPC format consists of two doubles. It does not map cleanly
60 onto the usual format above. It is approximated using twice the
61 mantissa bits. Note that for exponents near the double minimum,
62 we no longer can represent the full 106 mantissa bits, so those
63 will be treated as denormal numbers.
64
65 FIXME: While this approximation is equivalent to what GCC uses for
66 compile-time arithmetic on PPC double-double numbers, it is not able
67 to represent all possible values held by a PPC double-double number,
68 for example: (long double) 1.0 + (long double) 0x1p-106
69 Should this be replaced by a full emulation of PPC double-double? */
70 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
71
72 /* A tight upper bound on number of parts required to hold the value
73 pow(5, power) is
74
75 power * 815 / (351 * integerPartWidth) + 1
76
77 However, whilst the result may require only this many parts,
78 because we are multiplying two values to get it, the
79 multiplication may require an extra part with the excess part
80 being zero (consider the trivial case of 1 * 1, tcFullMultiply
81 requires two parts to hold the single-part result). So we add an
82 extra one to guarantee enough space whilst multiplying. */
83 const unsigned int maxExponent = 16383;
84 const unsigned int maxPrecision = 113;
85 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
86 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
87 / (351 * integerPartWidth));
88 }
89
90 /* A bunch of private, handy routines. */
91
92 static inline unsigned int
partCountForBits(unsigned int bits)93 partCountForBits(unsigned int bits)
94 {
95 return ((bits) + integerPartWidth - 1) / integerPartWidth;
96 }
97
98 /* Returns 0U-9U. Return values >= 10U are not digits. */
99 static inline unsigned int
decDigitValue(unsigned int c)100 decDigitValue(unsigned int c)
101 {
102 return c - '0';
103 }
104
105 /* Return the value of a decimal exponent of the form
106 [+-]ddddddd.
107
108 If the exponent overflows, returns a large exponent with the
109 appropriate sign. */
110 static int
readExponent(StringRef::iterator begin,StringRef::iterator end)111 readExponent(StringRef::iterator begin, StringRef::iterator end)
112 {
113 bool isNegative;
114 unsigned int absExponent;
115 const unsigned int overlargeExponent = 24000; /* FIXME. */
116 StringRef::iterator p = begin;
117
118 assert(p != end && "Exponent has no digits");
119
120 isNegative = (*p == '-');
121 if (*p == '-' || *p == '+') {
122 p++;
123 assert(p != end && "Exponent has no digits");
124 }
125
126 absExponent = decDigitValue(*p++);
127 assert(absExponent < 10U && "Invalid character in exponent");
128
129 for (; p != end; ++p) {
130 unsigned int value;
131
132 value = decDigitValue(*p);
133 assert(value < 10U && "Invalid character in exponent");
134
135 value += absExponent * 10;
136 if (absExponent >= overlargeExponent) {
137 absExponent = overlargeExponent;
138 p = end; /* outwit assert below */
139 break;
140 }
141 absExponent = value;
142 }
143
144 assert(p == end && "Invalid exponent in exponent");
145
146 if (isNegative)
147 return -(int) absExponent;
148 else
149 return (int) absExponent;
150 }
151
152 /* This is ugly and needs cleaning up, but I don't immediately see
153 how whilst remaining safe. */
154 static int
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)155 totalExponent(StringRef::iterator p, StringRef::iterator end,
156 int exponentAdjustment)
157 {
158 int unsignedExponent;
159 bool negative, overflow;
160 int exponent = 0;
161
162 assert(p != end && "Exponent has no digits");
163
164 negative = *p == '-';
165 if (*p == '-' || *p == '+') {
166 p++;
167 assert(p != end && "Exponent has no digits");
168 }
169
170 unsignedExponent = 0;
171 overflow = false;
172 for (; p != end; ++p) {
173 unsigned int value;
174
175 value = decDigitValue(*p);
176 assert(value < 10U && "Invalid character in exponent");
177
178 unsignedExponent = unsignedExponent * 10 + value;
179 if (unsignedExponent > 32767) {
180 overflow = true;
181 break;
182 }
183 }
184
185 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
186 overflow = true;
187
188 if (!overflow) {
189 exponent = unsignedExponent;
190 if (negative)
191 exponent = -exponent;
192 exponent += exponentAdjustment;
193 if (exponent > 32767 || exponent < -32768)
194 overflow = true;
195 }
196
197 if (overflow)
198 exponent = negative ? -32768: 32767;
199
200 return exponent;
201 }
202
203 static StringRef::iterator
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)204 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
205 StringRef::iterator *dot)
206 {
207 StringRef::iterator p = begin;
208 *dot = end;
209 while (*p == '0' && p != end)
210 p++;
211
212 if (*p == '.') {
213 *dot = p++;
214
215 assert(end - begin != 1 && "Significand has no digits");
216
217 while (*p == '0' && p != end)
218 p++;
219 }
220
221 return p;
222 }
223
224 /* Given a normal decimal floating point number of the form
225
226 dddd.dddd[eE][+-]ddd
227
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
232 non-zero digit.
233
234 If the value is zero, V->firstSigDigit points to a non-digit, and
235 the return exponent is zero.
236 */
237 struct decimalInfo {
238 const char *firstSigDigit;
239 const char *lastSigDigit;
240 int exponent;
241 int normalizedExponent;
242 };
243
244 static void
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)245 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
246 decimalInfo *D)
247 {
248 StringRef::iterator dot = end;
249 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
250
251 D->firstSigDigit = p;
252 D->exponent = 0;
253 D->normalizedExponent = 0;
254
255 for (; p != end; ++p) {
256 if (*p == '.') {
257 assert(dot == end && "String contains multiple dots");
258 dot = p++;
259 if (p == end)
260 break;
261 }
262 if (decDigitValue(*p) >= 10U)
263 break;
264 }
265
266 if (p != end) {
267 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
268 assert(p != begin && "Significand has no digits");
269 assert((dot == end || p - begin != 1) && "Significand has no digits");
270
271 /* p points to the first non-digit in the string */
272 D->exponent = readExponent(p + 1, end);
273
274 /* Implied decimal point? */
275 if (dot == end)
276 dot = p;
277 }
278
279 /* If number is all zeroes accept any exponent. */
280 if (p != D->firstSigDigit) {
281 /* Drop insignificant trailing zeroes. */
282 if (p != begin) {
283 do
284 do
285 p--;
286 while (p != begin && *p == '0');
287 while (p != begin && *p == '.');
288 }
289
290 /* Adjust the exponents for any decimal point. */
291 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
292 D->normalizedExponent = (D->exponent +
293 static_cast<exponent_t>((p - D->firstSigDigit)
294 - (dot > D->firstSigDigit && dot < p)));
295 }
296
297 D->lastSigDigit = p;
298 }
299
300 /* Return the trailing fraction of a hexadecimal number.
301 DIGITVALUE is the first hex digit of the fraction, P points to
302 the next digit. */
303 static lostFraction
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)304 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
305 unsigned int digitValue)
306 {
307 unsigned int hexDigit;
308
309 /* If the first trailing digit isn't 0 or 8 we can work out the
310 fraction immediately. */
311 if (digitValue > 8)
312 return lfMoreThanHalf;
313 else if (digitValue < 8 && digitValue > 0)
314 return lfLessThanHalf;
315
316 /* Otherwise we need to find the first non-zero digit. */
317 while (*p == '0')
318 p++;
319
320 assert(p != end && "Invalid trailing hexadecimal fraction!");
321
322 hexDigit = hexDigitValue(*p);
323
324 /* If we ran off the end it is exactly zero or one-half, otherwise
325 a little more. */
326 if (hexDigit == -1U)
327 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
328 else
329 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
330 }
331
332 /* Return the fraction lost were a bignum truncated losing the least
333 significant BITS bits. */
334 static lostFraction
lostFractionThroughTruncation(const integerPart * parts,unsigned int partCount,unsigned int bits)335 lostFractionThroughTruncation(const integerPart *parts,
336 unsigned int partCount,
337 unsigned int bits)
338 {
339 unsigned int lsb;
340
341 lsb = APInt::tcLSB(parts, partCount);
342
343 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
344 if (bits <= lsb)
345 return lfExactlyZero;
346 if (bits == lsb + 1)
347 return lfExactlyHalf;
348 if (bits <= partCount * integerPartWidth &&
349 APInt::tcExtractBit(parts, bits - 1))
350 return lfMoreThanHalf;
351
352 return lfLessThanHalf;
353 }
354
355 /* Shift DST right BITS bits noting lost fraction. */
356 static lostFraction
shiftRight(integerPart * dst,unsigned int parts,unsigned int bits)357 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
358 {
359 lostFraction lost_fraction;
360
361 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
362
363 APInt::tcShiftRight(dst, parts, bits);
364
365 return lost_fraction;
366 }
367
368 /* Combine the effect of two lost fractions. */
369 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)370 combineLostFractions(lostFraction moreSignificant,
371 lostFraction lessSignificant)
372 {
373 if (lessSignificant != lfExactlyZero) {
374 if (moreSignificant == lfExactlyZero)
375 moreSignificant = lfLessThanHalf;
376 else if (moreSignificant == lfExactlyHalf)
377 moreSignificant = lfMoreThanHalf;
378 }
379
380 return moreSignificant;
381 }
382
383 /* The error from the true value, in half-ulps, on multiplying two
384 floating point numbers, which differ from the value they
385 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
386 than the returned value.
387
388 See "How to Read Floating Point Numbers Accurately" by William D
389 Clinger. */
390 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)391 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
392 {
393 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
394
395 if (HUerr1 + HUerr2 == 0)
396 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
397 else
398 return inexactMultiply + 2 * (HUerr1 + HUerr2);
399 }
400
401 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
402 when the least significant BITS are truncated. BITS cannot be
403 zero. */
404 static integerPart
ulpsFromBoundary(const integerPart * parts,unsigned int bits,bool isNearest)405 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
406 {
407 unsigned int count, partBits;
408 integerPart part, boundary;
409
410 assert(bits != 0);
411
412 bits--;
413 count = bits / integerPartWidth;
414 partBits = bits % integerPartWidth + 1;
415
416 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
417
418 if (isNearest)
419 boundary = (integerPart) 1 << (partBits - 1);
420 else
421 boundary = 0;
422
423 if (count == 0) {
424 if (part - boundary <= boundary - part)
425 return part - boundary;
426 else
427 return boundary - part;
428 }
429
430 if (part == boundary) {
431 while (--count)
432 if (parts[count])
433 return ~(integerPart) 0; /* A lot. */
434
435 return parts[0];
436 } else if (part == boundary - 1) {
437 while (--count)
438 if (~parts[count])
439 return ~(integerPart) 0; /* A lot. */
440
441 return -parts[0];
442 }
443
444 return ~(integerPart) 0; /* A lot. */
445 }
446
447 /* Place pow(5, power) in DST, and return the number of parts used.
448 DST must be at least one part larger than size of the answer. */
449 static unsigned int
powerOf5(integerPart * dst,unsigned int power)450 powerOf5(integerPart *dst, unsigned int power)
451 {
452 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
453 15625, 78125 };
454 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
455 pow5s[0] = 78125 * 5;
456
457 unsigned int partsCount[16] = { 1 };
458 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
459 unsigned int result;
460 assert(power <= maxExponent);
461
462 p1 = dst;
463 p2 = scratch;
464
465 *p1 = firstEightPowers[power & 7];
466 power >>= 3;
467
468 result = 1;
469 pow5 = pow5s;
470
471 for (unsigned int n = 0; power; power >>= 1, n++) {
472 unsigned int pc;
473
474 pc = partsCount[n];
475
476 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
477 if (pc == 0) {
478 pc = partsCount[n - 1];
479 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
480 pc *= 2;
481 if (pow5[pc - 1] == 0)
482 pc--;
483 partsCount[n] = pc;
484 }
485
486 if (power & 1) {
487 integerPart *tmp;
488
489 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
490 result += pc;
491 if (p2[result - 1] == 0)
492 result--;
493
494 /* Now result is in p1 with partsCount parts and p2 is scratch
495 space. */
496 tmp = p1, p1 = p2, p2 = tmp;
497 }
498
499 pow5 += pc;
500 }
501
502 if (p1 != dst)
503 APInt::tcAssign(dst, p1, result);
504
505 return result;
506 }
507
508 /* Zero at the end to avoid modular arithmetic when adding one; used
509 when rounding up during hexadecimal output. */
510 static const char hexDigitsLower[] = "0123456789abcdef0";
511 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
512 static const char infinityL[] = "infinity";
513 static const char infinityU[] = "INFINITY";
514 static const char NaNL[] = "nan";
515 static const char NaNU[] = "NAN";
516
517 /* Write out an integerPart in hexadecimal, starting with the most
518 significant nibble. Write out exactly COUNT hexdigits, return
519 COUNT. */
520 static unsigned int
partAsHex(char * dst,integerPart part,unsigned int count,const char * hexDigitChars)521 partAsHex (char *dst, integerPart part, unsigned int count,
522 const char *hexDigitChars)
523 {
524 unsigned int result = count;
525
526 assert(count != 0 && count <= integerPartWidth / 4);
527
528 part >>= (integerPartWidth - 4 * count);
529 while (count--) {
530 dst[count] = hexDigitChars[part & 0xf];
531 part >>= 4;
532 }
533
534 return result;
535 }
536
537 /* Write out an unsigned decimal integer. */
538 static char *
writeUnsignedDecimal(char * dst,unsigned int n)539 writeUnsignedDecimal (char *dst, unsigned int n)
540 {
541 char buff[40], *p;
542
543 p = buff;
544 do
545 *p++ = '0' + n % 10;
546 while (n /= 10);
547
548 do
549 *dst++ = *--p;
550 while (p != buff);
551
552 return dst;
553 }
554
555 /* Write out a signed decimal integer. */
556 static char *
writeSignedDecimal(char * dst,int value)557 writeSignedDecimal (char *dst, int value)
558 {
559 if (value < 0) {
560 *dst++ = '-';
561 dst = writeUnsignedDecimal(dst, -(unsigned) value);
562 } else
563 dst = writeUnsignedDecimal(dst, value);
564
565 return dst;
566 }
567
568 /* Constructors. */
569 void
initialize(const fltSemantics * ourSemantics)570 APFloat::initialize(const fltSemantics *ourSemantics)
571 {
572 unsigned int count;
573
574 semantics = ourSemantics;
575 count = partCount();
576 if (count > 1)
577 significand.parts = new integerPart[count];
578 }
579
580 void
freeSignificand()581 APFloat::freeSignificand()
582 {
583 if (partCount() > 1)
584 delete [] significand.parts;
585 }
586
587 void
assign(const APFloat & rhs)588 APFloat::assign(const APFloat &rhs)
589 {
590 assert(semantics == rhs.semantics);
591
592 sign = rhs.sign;
593 category = rhs.category;
594 exponent = rhs.exponent;
595 if (category == fcNormal || category == fcNaN)
596 copySignificand(rhs);
597 }
598
599 void
copySignificand(const APFloat & rhs)600 APFloat::copySignificand(const APFloat &rhs)
601 {
602 assert(category == fcNormal || category == fcNaN);
603 assert(rhs.partCount() >= partCount());
604
605 APInt::tcAssign(significandParts(), rhs.significandParts(),
606 partCount());
607 }
608
609 /* Make this number a NaN, with an arbitrary but deterministic value
610 for the significand. If double or longer, this is a signalling NaN,
611 which may not be ideal. If float, this is QNaN(0). */
makeNaN(bool SNaN,bool Negative,const APInt * fill)612 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
613 {
614 category = fcNaN;
615 sign = Negative;
616
617 integerPart *significand = significandParts();
618 unsigned numParts = partCount();
619
620 // Set the significand bits to the fill.
621 if (!fill || fill->getNumWords() < numParts)
622 APInt::tcSet(significand, 0, numParts);
623 if (fill) {
624 APInt::tcAssign(significand, fill->getRawData(),
625 std::min(fill->getNumWords(), numParts));
626
627 // Zero out the excess bits of the significand.
628 unsigned bitsToPreserve = semantics->precision - 1;
629 unsigned part = bitsToPreserve / 64;
630 bitsToPreserve %= 64;
631 significand[part] &= ((1ULL << bitsToPreserve) - 1);
632 for (part++; part != numParts; ++part)
633 significand[part] = 0;
634 }
635
636 unsigned QNaNBit = semantics->precision - 2;
637
638 if (SNaN) {
639 // We always have to clear the QNaN bit to make it an SNaN.
640 APInt::tcClearBit(significand, QNaNBit);
641
642 // If there are no bits set in the payload, we have to set
643 // *something* to make it a NaN instead of an infinity;
644 // conventionally, this is the next bit down from the QNaN bit.
645 if (APInt::tcIsZero(significand, numParts))
646 APInt::tcSetBit(significand, QNaNBit - 1);
647 } else {
648 // We always have to set the QNaN bit to make it a QNaN.
649 APInt::tcSetBit(significand, QNaNBit);
650 }
651
652 // For x87 extended precision, we want to make a NaN, not a
653 // pseudo-NaN. Maybe we should expose the ability to make
654 // pseudo-NaNs?
655 if (semantics == &APFloat::x87DoubleExtended)
656 APInt::tcSetBit(significand, QNaNBit + 1);
657 }
658
makeNaN(const fltSemantics & Sem,bool SNaN,bool Negative,const APInt * fill)659 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
660 const APInt *fill) {
661 APFloat value(Sem, uninitialized);
662 value.makeNaN(SNaN, Negative, fill);
663 return value;
664 }
665
666 APFloat &
operator =(const APFloat & rhs)667 APFloat::operator=(const APFloat &rhs)
668 {
669 if (this != &rhs) {
670 if (semantics != rhs.semantics) {
671 freeSignificand();
672 initialize(rhs.semantics);
673 }
674 assign(rhs);
675 }
676
677 return *this;
678 }
679
680 bool
isDenormal() const681 APFloat::isDenormal() const {
682 return isNormal() && (exponent == semantics->minExponent) &&
683 (APInt::tcExtractBit(significandParts(),
684 semantics->precision - 1) == 0);
685 }
686
687 bool
bitwiseIsEqual(const APFloat & rhs) const688 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
689 if (this == &rhs)
690 return true;
691 if (semantics != rhs.semantics ||
692 category != rhs.category ||
693 sign != rhs.sign)
694 return false;
695 if (category==fcZero || category==fcInfinity)
696 return true;
697 else if (category==fcNormal && exponent!=rhs.exponent)
698 return false;
699 else {
700 int i= partCount();
701 const integerPart* p=significandParts();
702 const integerPart* q=rhs.significandParts();
703 for (; i>0; i--, p++, q++) {
704 if (*p != *q)
705 return false;
706 }
707 return true;
708 }
709 }
710
APFloat(const fltSemantics & ourSemantics,integerPart value)711 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
712 initialize(&ourSemantics);
713 sign = 0;
714 zeroSignificand();
715 exponent = ourSemantics.precision - 1;
716 significandParts()[0] = value;
717 normalize(rmNearestTiesToEven, lfExactlyZero);
718 }
719
APFloat(const fltSemantics & ourSemantics)720 APFloat::APFloat(const fltSemantics &ourSemantics) {
721 initialize(&ourSemantics);
722 category = fcZero;
723 sign = false;
724 }
725
APFloat(const fltSemantics & ourSemantics,uninitializedTag tag)726 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
727 // Allocates storage if necessary but does not initialize it.
728 initialize(&ourSemantics);
729 }
730
APFloat(const fltSemantics & ourSemantics,fltCategory ourCategory,bool negative)731 APFloat::APFloat(const fltSemantics &ourSemantics,
732 fltCategory ourCategory, bool negative) {
733 initialize(&ourSemantics);
734 category = ourCategory;
735 sign = negative;
736 if (category == fcNormal)
737 category = fcZero;
738 else if (ourCategory == fcNaN)
739 makeNaN();
740 }
741
APFloat(const fltSemantics & ourSemantics,StringRef text)742 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
743 initialize(&ourSemantics);
744 convertFromString(text, rmNearestTiesToEven);
745 }
746
APFloat(const APFloat & rhs)747 APFloat::APFloat(const APFloat &rhs) {
748 initialize(rhs.semantics);
749 assign(rhs);
750 }
751
~APFloat()752 APFloat::~APFloat()
753 {
754 freeSignificand();
755 }
756
757 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
Profile(FoldingSetNodeID & ID) const758 void APFloat::Profile(FoldingSetNodeID& ID) const {
759 ID.Add(bitcastToAPInt());
760 }
761
762 unsigned int
partCount() const763 APFloat::partCount() const
764 {
765 return partCountForBits(semantics->precision + 1);
766 }
767
768 unsigned int
semanticsPrecision(const fltSemantics & semantics)769 APFloat::semanticsPrecision(const fltSemantics &semantics)
770 {
771 return semantics.precision;
772 }
773
774 const integerPart *
significandParts() const775 APFloat::significandParts() const
776 {
777 return const_cast<APFloat *>(this)->significandParts();
778 }
779
780 integerPart *
significandParts()781 APFloat::significandParts()
782 {
783 assert(category == fcNormal || category == fcNaN);
784
785 if (partCount() > 1)
786 return significand.parts;
787 else
788 return &significand.part;
789 }
790
791 void
zeroSignificand()792 APFloat::zeroSignificand()
793 {
794 category = fcNormal;
795 APInt::tcSet(significandParts(), 0, partCount());
796 }
797
798 /* Increment an fcNormal floating point number's significand. */
799 void
incrementSignificand()800 APFloat::incrementSignificand()
801 {
802 integerPart carry;
803
804 carry = APInt::tcIncrement(significandParts(), partCount());
805
806 /* Our callers should never cause us to overflow. */
807 assert(carry == 0);
808 (void)carry;
809 }
810
811 /* Add the significand of the RHS. Returns the carry flag. */
812 integerPart
addSignificand(const APFloat & rhs)813 APFloat::addSignificand(const APFloat &rhs)
814 {
815 integerPart *parts;
816
817 parts = significandParts();
818
819 assert(semantics == rhs.semantics);
820 assert(exponent == rhs.exponent);
821
822 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
823 }
824
825 /* Subtract the significand of the RHS with a borrow flag. Returns
826 the borrow flag. */
827 integerPart
subtractSignificand(const APFloat & rhs,integerPart borrow)828 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
829 {
830 integerPart *parts;
831
832 parts = significandParts();
833
834 assert(semantics == rhs.semantics);
835 assert(exponent == rhs.exponent);
836
837 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
838 partCount());
839 }
840
841 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
842 on to the full-precision result of the multiplication. Returns the
843 lost fraction. */
844 lostFraction
multiplySignificand(const APFloat & rhs,const APFloat * addend)845 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
846 {
847 unsigned int omsb; // One, not zero, based MSB.
848 unsigned int partsCount, newPartsCount, precision;
849 integerPart *lhsSignificand;
850 integerPart scratch[4];
851 integerPart *fullSignificand;
852 lostFraction lost_fraction;
853 bool ignored;
854
855 assert(semantics == rhs.semantics);
856
857 precision = semantics->precision;
858 newPartsCount = partCountForBits(precision * 2);
859
860 if (newPartsCount > 4)
861 fullSignificand = new integerPart[newPartsCount];
862 else
863 fullSignificand = scratch;
864
865 lhsSignificand = significandParts();
866 partsCount = partCount();
867
868 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
869 rhs.significandParts(), partsCount, partsCount);
870
871 lost_fraction = lfExactlyZero;
872 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
873 exponent += rhs.exponent;
874
875 if (addend) {
876 Significand savedSignificand = significand;
877 const fltSemantics *savedSemantics = semantics;
878 fltSemantics extendedSemantics;
879 opStatus status;
880 unsigned int extendedPrecision;
881
882 /* Normalize our MSB. */
883 extendedPrecision = precision + precision - 1;
884 if (omsb != extendedPrecision) {
885 APInt::tcShiftLeft(fullSignificand, newPartsCount,
886 extendedPrecision - omsb);
887 exponent -= extendedPrecision - omsb;
888 }
889
890 /* Create new semantics. */
891 extendedSemantics = *semantics;
892 extendedSemantics.precision = extendedPrecision;
893
894 if (newPartsCount == 1)
895 significand.part = fullSignificand[0];
896 else
897 significand.parts = fullSignificand;
898 semantics = &extendedSemantics;
899
900 APFloat extendedAddend(*addend);
901 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
902 assert(status == opOK);
903 (void)status;
904 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
905
906 /* Restore our state. */
907 if (newPartsCount == 1)
908 fullSignificand[0] = significand.part;
909 significand = savedSignificand;
910 semantics = savedSemantics;
911
912 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
913 }
914
915 exponent -= (precision - 1);
916
917 if (omsb > precision) {
918 unsigned int bits, significantParts;
919 lostFraction lf;
920
921 bits = omsb - precision;
922 significantParts = partCountForBits(omsb);
923 lf = shiftRight(fullSignificand, significantParts, bits);
924 lost_fraction = combineLostFractions(lf, lost_fraction);
925 exponent += bits;
926 }
927
928 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
929
930 if (newPartsCount > 4)
931 delete [] fullSignificand;
932
933 return lost_fraction;
934 }
935
936 /* Multiply the significands of LHS and RHS to DST. */
937 lostFraction
divideSignificand(const APFloat & rhs)938 APFloat::divideSignificand(const APFloat &rhs)
939 {
940 unsigned int bit, i, partsCount;
941 const integerPart *rhsSignificand;
942 integerPart *lhsSignificand, *dividend, *divisor;
943 integerPart scratch[4];
944 lostFraction lost_fraction;
945
946 assert(semantics == rhs.semantics);
947
948 lhsSignificand = significandParts();
949 rhsSignificand = rhs.significandParts();
950 partsCount = partCount();
951
952 if (partsCount > 2)
953 dividend = new integerPart[partsCount * 2];
954 else
955 dividend = scratch;
956
957 divisor = dividend + partsCount;
958
959 /* Copy the dividend and divisor as they will be modified in-place. */
960 for (i = 0; i < partsCount; i++) {
961 dividend[i] = lhsSignificand[i];
962 divisor[i] = rhsSignificand[i];
963 lhsSignificand[i] = 0;
964 }
965
966 exponent -= rhs.exponent;
967
968 unsigned int precision = semantics->precision;
969
970 /* Normalize the divisor. */
971 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
972 if (bit) {
973 exponent += bit;
974 APInt::tcShiftLeft(divisor, partsCount, bit);
975 }
976
977 /* Normalize the dividend. */
978 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
979 if (bit) {
980 exponent -= bit;
981 APInt::tcShiftLeft(dividend, partsCount, bit);
982 }
983
984 /* Ensure the dividend >= divisor initially for the loop below.
985 Incidentally, this means that the division loop below is
986 guaranteed to set the integer bit to one. */
987 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
988 exponent--;
989 APInt::tcShiftLeft(dividend, partsCount, 1);
990 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
991 }
992
993 /* Long division. */
994 for (bit = precision; bit; bit -= 1) {
995 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
996 APInt::tcSubtract(dividend, divisor, 0, partsCount);
997 APInt::tcSetBit(lhsSignificand, bit - 1);
998 }
999
1000 APInt::tcShiftLeft(dividend, partsCount, 1);
1001 }
1002
1003 /* Figure out the lost fraction. */
1004 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1005
1006 if (cmp > 0)
1007 lost_fraction = lfMoreThanHalf;
1008 else if (cmp == 0)
1009 lost_fraction = lfExactlyHalf;
1010 else if (APInt::tcIsZero(dividend, partsCount))
1011 lost_fraction = lfExactlyZero;
1012 else
1013 lost_fraction = lfLessThanHalf;
1014
1015 if (partsCount > 2)
1016 delete [] dividend;
1017
1018 return lost_fraction;
1019 }
1020
1021 unsigned int
significandMSB() const1022 APFloat::significandMSB() const
1023 {
1024 return APInt::tcMSB(significandParts(), partCount());
1025 }
1026
1027 unsigned int
significandLSB() const1028 APFloat::significandLSB() const
1029 {
1030 return APInt::tcLSB(significandParts(), partCount());
1031 }
1032
1033 /* Note that a zero result is NOT normalized to fcZero. */
1034 lostFraction
shiftSignificandRight(unsigned int bits)1035 APFloat::shiftSignificandRight(unsigned int bits)
1036 {
1037 /* Our exponent should not overflow. */
1038 assert((exponent_t) (exponent + bits) >= exponent);
1039
1040 exponent += bits;
1041
1042 return shiftRight(significandParts(), partCount(), bits);
1043 }
1044
1045 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1046 void
shiftSignificandLeft(unsigned int bits)1047 APFloat::shiftSignificandLeft(unsigned int bits)
1048 {
1049 assert(bits < semantics->precision);
1050
1051 if (bits) {
1052 unsigned int partsCount = partCount();
1053
1054 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1055 exponent -= bits;
1056
1057 assert(!APInt::tcIsZero(significandParts(), partsCount));
1058 }
1059 }
1060
1061 APFloat::cmpResult
compareAbsoluteValue(const APFloat & rhs) const1062 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1063 {
1064 int compare;
1065
1066 assert(semantics == rhs.semantics);
1067 assert(category == fcNormal);
1068 assert(rhs.category == fcNormal);
1069
1070 compare = exponent - rhs.exponent;
1071
1072 /* If exponents are equal, do an unsigned bignum comparison of the
1073 significands. */
1074 if (compare == 0)
1075 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1076 partCount());
1077
1078 if (compare > 0)
1079 return cmpGreaterThan;
1080 else if (compare < 0)
1081 return cmpLessThan;
1082 else
1083 return cmpEqual;
1084 }
1085
1086 /* Handle overflow. Sign is preserved. We either become infinity or
1087 the largest finite number. */
1088 APFloat::opStatus
handleOverflow(roundingMode rounding_mode)1089 APFloat::handleOverflow(roundingMode rounding_mode)
1090 {
1091 /* Infinity? */
1092 if (rounding_mode == rmNearestTiesToEven ||
1093 rounding_mode == rmNearestTiesToAway ||
1094 (rounding_mode == rmTowardPositive && !sign) ||
1095 (rounding_mode == rmTowardNegative && sign)) {
1096 category = fcInfinity;
1097 return (opStatus) (opOverflow | opInexact);
1098 }
1099
1100 /* Otherwise we become the largest finite number. */
1101 category = fcNormal;
1102 exponent = semantics->maxExponent;
1103 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1104 semantics->precision);
1105
1106 return opInexact;
1107 }
1108
1109 /* Returns TRUE if, when truncating the current number, with BIT the
1110 new LSB, with the given lost fraction and rounding mode, the result
1111 would need to be rounded away from zero (i.e., by increasing the
1112 signficand). This routine must work for fcZero of both signs, and
1113 fcNormal numbers. */
1114 bool
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1115 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1116 lostFraction lost_fraction,
1117 unsigned int bit) const
1118 {
1119 /* NaNs and infinities should not have lost fractions. */
1120 assert(category == fcNormal || category == fcZero);
1121
1122 /* Current callers never pass this so we don't handle it. */
1123 assert(lost_fraction != lfExactlyZero);
1124
1125 switch (rounding_mode) {
1126 case rmNearestTiesToAway:
1127 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1128
1129 case rmNearestTiesToEven:
1130 if (lost_fraction == lfMoreThanHalf)
1131 return true;
1132
1133 /* Our zeroes don't have a significand to test. */
1134 if (lost_fraction == lfExactlyHalf && category != fcZero)
1135 return APInt::tcExtractBit(significandParts(), bit);
1136
1137 return false;
1138
1139 case rmTowardZero:
1140 return false;
1141
1142 case rmTowardPositive:
1143 return sign == false;
1144
1145 case rmTowardNegative:
1146 return sign == true;
1147 }
1148 llvm_unreachable("Invalid rounding mode found");
1149 }
1150
1151 APFloat::opStatus
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1152 APFloat::normalize(roundingMode rounding_mode,
1153 lostFraction lost_fraction)
1154 {
1155 unsigned int omsb; /* One, not zero, based MSB. */
1156 int exponentChange;
1157
1158 if (category != fcNormal)
1159 return opOK;
1160
1161 /* Before rounding normalize the exponent of fcNormal numbers. */
1162 omsb = significandMSB() + 1;
1163
1164 if (omsb) {
1165 /* OMSB is numbered from 1. We want to place it in the integer
1166 bit numbered PRECISION if possible, with a compensating change in
1167 the exponent. */
1168 exponentChange = omsb - semantics->precision;
1169
1170 /* If the resulting exponent is too high, overflow according to
1171 the rounding mode. */
1172 if (exponent + exponentChange > semantics->maxExponent)
1173 return handleOverflow(rounding_mode);
1174
1175 /* Subnormal numbers have exponent minExponent, and their MSB
1176 is forced based on that. */
1177 if (exponent + exponentChange < semantics->minExponent)
1178 exponentChange = semantics->minExponent - exponent;
1179
1180 /* Shifting left is easy as we don't lose precision. */
1181 if (exponentChange < 0) {
1182 assert(lost_fraction == lfExactlyZero);
1183
1184 shiftSignificandLeft(-exponentChange);
1185
1186 return opOK;
1187 }
1188
1189 if (exponentChange > 0) {
1190 lostFraction lf;
1191
1192 /* Shift right and capture any new lost fraction. */
1193 lf = shiftSignificandRight(exponentChange);
1194
1195 lost_fraction = combineLostFractions(lf, lost_fraction);
1196
1197 /* Keep OMSB up-to-date. */
1198 if (omsb > (unsigned) exponentChange)
1199 omsb -= exponentChange;
1200 else
1201 omsb = 0;
1202 }
1203 }
1204
1205 /* Now round the number according to rounding_mode given the lost
1206 fraction. */
1207
1208 /* As specified in IEEE 754, since we do not trap we do not report
1209 underflow for exact results. */
1210 if (lost_fraction == lfExactlyZero) {
1211 /* Canonicalize zeroes. */
1212 if (omsb == 0)
1213 category = fcZero;
1214
1215 return opOK;
1216 }
1217
1218 /* Increment the significand if we're rounding away from zero. */
1219 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1220 if (omsb == 0)
1221 exponent = semantics->minExponent;
1222
1223 incrementSignificand();
1224 omsb = significandMSB() + 1;
1225
1226 /* Did the significand increment overflow? */
1227 if (omsb == (unsigned) semantics->precision + 1) {
1228 /* Renormalize by incrementing the exponent and shifting our
1229 significand right one. However if we already have the
1230 maximum exponent we overflow to infinity. */
1231 if (exponent == semantics->maxExponent) {
1232 category = fcInfinity;
1233
1234 return (opStatus) (opOverflow | opInexact);
1235 }
1236
1237 shiftSignificandRight(1);
1238
1239 return opInexact;
1240 }
1241 }
1242
1243 /* The normal case - we were and are not denormal, and any
1244 significand increment above didn't overflow. */
1245 if (omsb == semantics->precision)
1246 return opInexact;
1247
1248 /* We have a non-zero denormal. */
1249 assert(omsb < semantics->precision);
1250
1251 /* Canonicalize zeroes. */
1252 if (omsb == 0)
1253 category = fcZero;
1254
1255 /* The fcZero case is a denormal that underflowed to zero. */
1256 return (opStatus) (opUnderflow | opInexact);
1257 }
1258
1259 APFloat::opStatus
addOrSubtractSpecials(const APFloat & rhs,bool subtract)1260 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1261 {
1262 switch (convolve(category, rhs.category)) {
1263 default:
1264 llvm_unreachable(0);
1265
1266 case convolve(fcNaN, fcZero):
1267 case convolve(fcNaN, fcNormal):
1268 case convolve(fcNaN, fcInfinity):
1269 case convolve(fcNaN, fcNaN):
1270 case convolve(fcNormal, fcZero):
1271 case convolve(fcInfinity, fcNormal):
1272 case convolve(fcInfinity, fcZero):
1273 return opOK;
1274
1275 case convolve(fcZero, fcNaN):
1276 case convolve(fcNormal, fcNaN):
1277 case convolve(fcInfinity, fcNaN):
1278 category = fcNaN;
1279 copySignificand(rhs);
1280 return opOK;
1281
1282 case convolve(fcNormal, fcInfinity):
1283 case convolve(fcZero, fcInfinity):
1284 category = fcInfinity;
1285 sign = rhs.sign ^ subtract;
1286 return opOK;
1287
1288 case convolve(fcZero, fcNormal):
1289 assign(rhs);
1290 sign = rhs.sign ^ subtract;
1291 return opOK;
1292
1293 case convolve(fcZero, fcZero):
1294 /* Sign depends on rounding mode; handled by caller. */
1295 return opOK;
1296
1297 case convolve(fcInfinity, fcInfinity):
1298 /* Differently signed infinities can only be validly
1299 subtracted. */
1300 if (((sign ^ rhs.sign)!=0) != subtract) {
1301 makeNaN();
1302 return opInvalidOp;
1303 }
1304
1305 return opOK;
1306
1307 case convolve(fcNormal, fcNormal):
1308 return opDivByZero;
1309 }
1310 }
1311
1312 /* Add or subtract two normal numbers. */
1313 lostFraction
addOrSubtractSignificand(const APFloat & rhs,bool subtract)1314 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1315 {
1316 integerPart carry;
1317 lostFraction lost_fraction;
1318 int bits;
1319
1320 /* Determine if the operation on the absolute values is effectively
1321 an addition or subtraction. */
1322 subtract ^= (sign ^ rhs.sign) ? true : false;
1323
1324 /* Are we bigger exponent-wise than the RHS? */
1325 bits = exponent - rhs.exponent;
1326
1327 /* Subtraction is more subtle than one might naively expect. */
1328 if (subtract) {
1329 APFloat temp_rhs(rhs);
1330 bool reverse;
1331
1332 if (bits == 0) {
1333 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1334 lost_fraction = lfExactlyZero;
1335 } else if (bits > 0) {
1336 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1337 shiftSignificandLeft(1);
1338 reverse = false;
1339 } else {
1340 lost_fraction = shiftSignificandRight(-bits - 1);
1341 temp_rhs.shiftSignificandLeft(1);
1342 reverse = true;
1343 }
1344
1345 if (reverse) {
1346 carry = temp_rhs.subtractSignificand
1347 (*this, lost_fraction != lfExactlyZero);
1348 copySignificand(temp_rhs);
1349 sign = !sign;
1350 } else {
1351 carry = subtractSignificand
1352 (temp_rhs, lost_fraction != lfExactlyZero);
1353 }
1354
1355 /* Invert the lost fraction - it was on the RHS and
1356 subtracted. */
1357 if (lost_fraction == lfLessThanHalf)
1358 lost_fraction = lfMoreThanHalf;
1359 else if (lost_fraction == lfMoreThanHalf)
1360 lost_fraction = lfLessThanHalf;
1361
1362 /* The code above is intended to ensure that no borrow is
1363 necessary. */
1364 assert(!carry);
1365 (void)carry;
1366 } else {
1367 if (bits > 0) {
1368 APFloat temp_rhs(rhs);
1369
1370 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1371 carry = addSignificand(temp_rhs);
1372 } else {
1373 lost_fraction = shiftSignificandRight(-bits);
1374 carry = addSignificand(rhs);
1375 }
1376
1377 /* We have a guard bit; generating a carry cannot happen. */
1378 assert(!carry);
1379 (void)carry;
1380 }
1381
1382 return lost_fraction;
1383 }
1384
1385 APFloat::opStatus
multiplySpecials(const APFloat & rhs)1386 APFloat::multiplySpecials(const APFloat &rhs)
1387 {
1388 switch (convolve(category, rhs.category)) {
1389 default:
1390 llvm_unreachable(0);
1391
1392 case convolve(fcNaN, fcZero):
1393 case convolve(fcNaN, fcNormal):
1394 case convolve(fcNaN, fcInfinity):
1395 case convolve(fcNaN, fcNaN):
1396 return opOK;
1397
1398 case convolve(fcZero, fcNaN):
1399 case convolve(fcNormal, fcNaN):
1400 case convolve(fcInfinity, fcNaN):
1401 category = fcNaN;
1402 copySignificand(rhs);
1403 return opOK;
1404
1405 case convolve(fcNormal, fcInfinity):
1406 case convolve(fcInfinity, fcNormal):
1407 case convolve(fcInfinity, fcInfinity):
1408 category = fcInfinity;
1409 return opOK;
1410
1411 case convolve(fcZero, fcNormal):
1412 case convolve(fcNormal, fcZero):
1413 case convolve(fcZero, fcZero):
1414 category = fcZero;
1415 return opOK;
1416
1417 case convolve(fcZero, fcInfinity):
1418 case convolve(fcInfinity, fcZero):
1419 makeNaN();
1420 return opInvalidOp;
1421
1422 case convolve(fcNormal, fcNormal):
1423 return opOK;
1424 }
1425 }
1426
1427 APFloat::opStatus
divideSpecials(const APFloat & rhs)1428 APFloat::divideSpecials(const APFloat &rhs)
1429 {
1430 switch (convolve(category, rhs.category)) {
1431 default:
1432 llvm_unreachable(0);
1433
1434 case convolve(fcNaN, fcZero):
1435 case convolve(fcNaN, fcNormal):
1436 case convolve(fcNaN, fcInfinity):
1437 case convolve(fcNaN, fcNaN):
1438 case convolve(fcInfinity, fcZero):
1439 case convolve(fcInfinity, fcNormal):
1440 case convolve(fcZero, fcInfinity):
1441 case convolve(fcZero, fcNormal):
1442 return opOK;
1443
1444 case convolve(fcZero, fcNaN):
1445 case convolve(fcNormal, fcNaN):
1446 case convolve(fcInfinity, fcNaN):
1447 category = fcNaN;
1448 copySignificand(rhs);
1449 return opOK;
1450
1451 case convolve(fcNormal, fcInfinity):
1452 category = fcZero;
1453 return opOK;
1454
1455 case convolve(fcNormal, fcZero):
1456 category = fcInfinity;
1457 return opDivByZero;
1458
1459 case convolve(fcInfinity, fcInfinity):
1460 case convolve(fcZero, fcZero):
1461 makeNaN();
1462 return opInvalidOp;
1463
1464 case convolve(fcNormal, fcNormal):
1465 return opOK;
1466 }
1467 }
1468
1469 APFloat::opStatus
modSpecials(const APFloat & rhs)1470 APFloat::modSpecials(const APFloat &rhs)
1471 {
1472 switch (convolve(category, rhs.category)) {
1473 default:
1474 llvm_unreachable(0);
1475
1476 case convolve(fcNaN, fcZero):
1477 case convolve(fcNaN, fcNormal):
1478 case convolve(fcNaN, fcInfinity):
1479 case convolve(fcNaN, fcNaN):
1480 case convolve(fcZero, fcInfinity):
1481 case convolve(fcZero, fcNormal):
1482 case convolve(fcNormal, fcInfinity):
1483 return opOK;
1484
1485 case convolve(fcZero, fcNaN):
1486 case convolve(fcNormal, fcNaN):
1487 case convolve(fcInfinity, fcNaN):
1488 category = fcNaN;
1489 copySignificand(rhs);
1490 return opOK;
1491
1492 case convolve(fcNormal, fcZero):
1493 case convolve(fcInfinity, fcZero):
1494 case convolve(fcInfinity, fcNormal):
1495 case convolve(fcInfinity, fcInfinity):
1496 case convolve(fcZero, fcZero):
1497 makeNaN();
1498 return opInvalidOp;
1499
1500 case convolve(fcNormal, fcNormal):
1501 return opOK;
1502 }
1503 }
1504
1505 /* Change sign. */
1506 void
changeSign()1507 APFloat::changeSign()
1508 {
1509 /* Look mummy, this one's easy. */
1510 sign = !sign;
1511 }
1512
1513 void
clearSign()1514 APFloat::clearSign()
1515 {
1516 /* So is this one. */
1517 sign = 0;
1518 }
1519
1520 void
copySign(const APFloat & rhs)1521 APFloat::copySign(const APFloat &rhs)
1522 {
1523 /* And this one. */
1524 sign = rhs.sign;
1525 }
1526
1527 /* Normalized addition or subtraction. */
1528 APFloat::opStatus
addOrSubtract(const APFloat & rhs,roundingMode rounding_mode,bool subtract)1529 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1530 bool subtract)
1531 {
1532 opStatus fs;
1533
1534 fs = addOrSubtractSpecials(rhs, subtract);
1535
1536 /* This return code means it was not a simple case. */
1537 if (fs == opDivByZero) {
1538 lostFraction lost_fraction;
1539
1540 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1541 fs = normalize(rounding_mode, lost_fraction);
1542
1543 /* Can only be zero if we lost no fraction. */
1544 assert(category != fcZero || lost_fraction == lfExactlyZero);
1545 }
1546
1547 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1548 positive zero unless rounding to minus infinity, except that
1549 adding two like-signed zeroes gives that zero. */
1550 if (category == fcZero) {
1551 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1552 sign = (rounding_mode == rmTowardNegative);
1553 }
1554
1555 return fs;
1556 }
1557
1558 /* Normalized addition. */
1559 APFloat::opStatus
add(const APFloat & rhs,roundingMode rounding_mode)1560 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1561 {
1562 return addOrSubtract(rhs, rounding_mode, false);
1563 }
1564
1565 /* Normalized subtraction. */
1566 APFloat::opStatus
subtract(const APFloat & rhs,roundingMode rounding_mode)1567 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1568 {
1569 return addOrSubtract(rhs, rounding_mode, true);
1570 }
1571
1572 /* Normalized multiply. */
1573 APFloat::opStatus
multiply(const APFloat & rhs,roundingMode rounding_mode)1574 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1575 {
1576 opStatus fs;
1577
1578 sign ^= rhs.sign;
1579 fs = multiplySpecials(rhs);
1580
1581 if (category == fcNormal) {
1582 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1583 fs = normalize(rounding_mode, lost_fraction);
1584 if (lost_fraction != lfExactlyZero)
1585 fs = (opStatus) (fs | opInexact);
1586 }
1587
1588 return fs;
1589 }
1590
1591 /* Normalized divide. */
1592 APFloat::opStatus
divide(const APFloat & rhs,roundingMode rounding_mode)1593 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1594 {
1595 opStatus fs;
1596
1597 sign ^= rhs.sign;
1598 fs = divideSpecials(rhs);
1599
1600 if (category == fcNormal) {
1601 lostFraction lost_fraction = divideSignificand(rhs);
1602 fs = normalize(rounding_mode, lost_fraction);
1603 if (lost_fraction != lfExactlyZero)
1604 fs = (opStatus) (fs | opInexact);
1605 }
1606
1607 return fs;
1608 }
1609
1610 /* Normalized remainder. This is not currently correct in all cases. */
1611 APFloat::opStatus
remainder(const APFloat & rhs)1612 APFloat::remainder(const APFloat &rhs)
1613 {
1614 opStatus fs;
1615 APFloat V = *this;
1616 unsigned int origSign = sign;
1617
1618 fs = V.divide(rhs, rmNearestTiesToEven);
1619 if (fs == opDivByZero)
1620 return fs;
1621
1622 int parts = partCount();
1623 integerPart *x = new integerPart[parts];
1624 bool ignored;
1625 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1626 rmNearestTiesToEven, &ignored);
1627 if (fs==opInvalidOp)
1628 return fs;
1629
1630 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1631 rmNearestTiesToEven);
1632 assert(fs==opOK); // should always work
1633
1634 fs = V.multiply(rhs, rmNearestTiesToEven);
1635 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1636
1637 fs = subtract(V, rmNearestTiesToEven);
1638 assert(fs==opOK || fs==opInexact); // likewise
1639
1640 if (isZero())
1641 sign = origSign; // IEEE754 requires this
1642 delete[] x;
1643 return fs;
1644 }
1645
1646 /* Normalized llvm frem (C fmod).
1647 This is not currently correct in all cases. */
1648 APFloat::opStatus
mod(const APFloat & rhs,roundingMode rounding_mode)1649 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1650 {
1651 opStatus fs;
1652 fs = modSpecials(rhs);
1653
1654 if (category == fcNormal && rhs.category == fcNormal) {
1655 APFloat V = *this;
1656 unsigned int origSign = sign;
1657
1658 fs = V.divide(rhs, rmNearestTiesToEven);
1659 if (fs == opDivByZero)
1660 return fs;
1661
1662 int parts = partCount();
1663 integerPart *x = new integerPart[parts];
1664 bool ignored;
1665 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1666 rmTowardZero, &ignored);
1667 if (fs==opInvalidOp)
1668 return fs;
1669
1670 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1671 rmNearestTiesToEven);
1672 assert(fs==opOK); // should always work
1673
1674 fs = V.multiply(rhs, rounding_mode);
1675 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1676
1677 fs = subtract(V, rounding_mode);
1678 assert(fs==opOK || fs==opInexact); // likewise
1679
1680 if (isZero())
1681 sign = origSign; // IEEE754 requires this
1682 delete[] x;
1683 }
1684 return fs;
1685 }
1686
1687 /* Normalized fused-multiply-add. */
1688 APFloat::opStatus
fusedMultiplyAdd(const APFloat & multiplicand,const APFloat & addend,roundingMode rounding_mode)1689 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1690 const APFloat &addend,
1691 roundingMode rounding_mode)
1692 {
1693 opStatus fs;
1694
1695 /* Post-multiplication sign, before addition. */
1696 sign ^= multiplicand.sign;
1697
1698 /* If and only if all arguments are normal do we need to do an
1699 extended-precision calculation. */
1700 if (category == fcNormal &&
1701 multiplicand.category == fcNormal &&
1702 addend.category == fcNormal) {
1703 lostFraction lost_fraction;
1704
1705 lost_fraction = multiplySignificand(multiplicand, &addend);
1706 fs = normalize(rounding_mode, lost_fraction);
1707 if (lost_fraction != lfExactlyZero)
1708 fs = (opStatus) (fs | opInexact);
1709
1710 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1711 positive zero unless rounding to minus infinity, except that
1712 adding two like-signed zeroes gives that zero. */
1713 if (category == fcZero && sign != addend.sign)
1714 sign = (rounding_mode == rmTowardNegative);
1715 } else {
1716 fs = multiplySpecials(multiplicand);
1717
1718 /* FS can only be opOK or opInvalidOp. There is no more work
1719 to do in the latter case. The IEEE-754R standard says it is
1720 implementation-defined in this case whether, if ADDEND is a
1721 quiet NaN, we raise invalid op; this implementation does so.
1722
1723 If we need to do the addition we can do so with normal
1724 precision. */
1725 if (fs == opOK)
1726 fs = addOrSubtract(addend, rounding_mode, false);
1727 }
1728
1729 return fs;
1730 }
1731
1732 /* Rounding-mode corrrect round to integral value. */
roundToIntegral(roundingMode rounding_mode)1733 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1734 opStatus fs;
1735
1736 // If the exponent is large enough, we know that this value is already
1737 // integral, and the arithmetic below would potentially cause it to saturate
1738 // to +/-Inf. Bail out early instead.
1739 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1740 return opOK;
1741
1742 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1743 // precision of our format, and then subtract it back off again. The choice
1744 // of rounding modes for the addition/subtraction determines the rounding mode
1745 // for our integral rounding as well.
1746 // NOTE: When the input value is negative, we do subtraction followed by
1747 // addition instead.
1748 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1749 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1750 APFloat MagicConstant(*semantics);
1751 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1752 rmNearestTiesToEven);
1753 MagicConstant.copySign(*this);
1754
1755 if (fs != opOK)
1756 return fs;
1757
1758 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1759 bool inputSign = isNegative();
1760
1761 fs = add(MagicConstant, rounding_mode);
1762 if (fs != opOK && fs != opInexact)
1763 return fs;
1764
1765 fs = subtract(MagicConstant, rounding_mode);
1766
1767 // Restore the input sign.
1768 if (inputSign != isNegative())
1769 changeSign();
1770
1771 return fs;
1772 }
1773
1774
1775 /* Comparison requires normalized numbers. */
1776 APFloat::cmpResult
compare(const APFloat & rhs) const1777 APFloat::compare(const APFloat &rhs) const
1778 {
1779 cmpResult result;
1780
1781 assert(semantics == rhs.semantics);
1782
1783 switch (convolve(category, rhs.category)) {
1784 default:
1785 llvm_unreachable(0);
1786
1787 case convolve(fcNaN, fcZero):
1788 case convolve(fcNaN, fcNormal):
1789 case convolve(fcNaN, fcInfinity):
1790 case convolve(fcNaN, fcNaN):
1791 case convolve(fcZero, fcNaN):
1792 case convolve(fcNormal, fcNaN):
1793 case convolve(fcInfinity, fcNaN):
1794 return cmpUnordered;
1795
1796 case convolve(fcInfinity, fcNormal):
1797 case convolve(fcInfinity, fcZero):
1798 case convolve(fcNormal, fcZero):
1799 if (sign)
1800 return cmpLessThan;
1801 else
1802 return cmpGreaterThan;
1803
1804 case convolve(fcNormal, fcInfinity):
1805 case convolve(fcZero, fcInfinity):
1806 case convolve(fcZero, fcNormal):
1807 if (rhs.sign)
1808 return cmpGreaterThan;
1809 else
1810 return cmpLessThan;
1811
1812 case convolve(fcInfinity, fcInfinity):
1813 if (sign == rhs.sign)
1814 return cmpEqual;
1815 else if (sign)
1816 return cmpLessThan;
1817 else
1818 return cmpGreaterThan;
1819
1820 case convolve(fcZero, fcZero):
1821 return cmpEqual;
1822
1823 case convolve(fcNormal, fcNormal):
1824 break;
1825 }
1826
1827 /* Two normal numbers. Do they have the same sign? */
1828 if (sign != rhs.sign) {
1829 if (sign)
1830 result = cmpLessThan;
1831 else
1832 result = cmpGreaterThan;
1833 } else {
1834 /* Compare absolute values; invert result if negative. */
1835 result = compareAbsoluteValue(rhs);
1836
1837 if (sign) {
1838 if (result == cmpLessThan)
1839 result = cmpGreaterThan;
1840 else if (result == cmpGreaterThan)
1841 result = cmpLessThan;
1842 }
1843 }
1844
1845 return result;
1846 }
1847
1848 /// APFloat::convert - convert a value of one floating point type to another.
1849 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1850 /// records whether the transformation lost information, i.e. whether
1851 /// converting the result back to the original type will produce the
1852 /// original value (this is almost the same as return value==fsOK, but there
1853 /// are edge cases where this is not so).
1854
1855 APFloat::opStatus
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)1856 APFloat::convert(const fltSemantics &toSemantics,
1857 roundingMode rounding_mode, bool *losesInfo)
1858 {
1859 lostFraction lostFraction;
1860 unsigned int newPartCount, oldPartCount;
1861 opStatus fs;
1862 int shift;
1863 const fltSemantics &fromSemantics = *semantics;
1864
1865 lostFraction = lfExactlyZero;
1866 newPartCount = partCountForBits(toSemantics.precision + 1);
1867 oldPartCount = partCount();
1868 shift = toSemantics.precision - fromSemantics.precision;
1869
1870 bool X86SpecialNan = false;
1871 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1872 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1873 (!(*significandParts() & 0x8000000000000000ULL) ||
1874 !(*significandParts() & 0x4000000000000000ULL))) {
1875 // x86 has some unusual NaNs which cannot be represented in any other
1876 // format; note them here.
1877 X86SpecialNan = true;
1878 }
1879
1880 // If this is a truncation, perform the shift before we narrow the storage.
1881 if (shift < 0 && (category==fcNormal || category==fcNaN))
1882 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1883
1884 // Fix the storage so it can hold to new value.
1885 if (newPartCount > oldPartCount) {
1886 // The new type requires more storage; make it available.
1887 integerPart *newParts;
1888 newParts = new integerPart[newPartCount];
1889 APInt::tcSet(newParts, 0, newPartCount);
1890 if (category==fcNormal || category==fcNaN)
1891 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1892 freeSignificand();
1893 significand.parts = newParts;
1894 } else if (newPartCount == 1 && oldPartCount != 1) {
1895 // Switch to built-in storage for a single part.
1896 integerPart newPart = 0;
1897 if (category==fcNormal || category==fcNaN)
1898 newPart = significandParts()[0];
1899 freeSignificand();
1900 significand.part = newPart;
1901 }
1902
1903 // Now that we have the right storage, switch the semantics.
1904 semantics = &toSemantics;
1905
1906 // If this is an extension, perform the shift now that the storage is
1907 // available.
1908 if (shift > 0 && (category==fcNormal || category==fcNaN))
1909 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1910
1911 if (category == fcNormal) {
1912 fs = normalize(rounding_mode, lostFraction);
1913 *losesInfo = (fs != opOK);
1914 } else if (category == fcNaN) {
1915 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1916
1917 // For x87 extended precision, we want to make a NaN, not a special NaN if
1918 // the input wasn't special either.
1919 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
1920 APInt::tcSetBit(significandParts(), semantics->precision - 1);
1921
1922 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1923 // does not give you back the same bits. This is dubious, and we
1924 // don't currently do it. You're really supposed to get
1925 // an invalid operation signal at runtime, but nobody does that.
1926 fs = opOK;
1927 } else {
1928 *losesInfo = false;
1929 fs = opOK;
1930 }
1931
1932 return fs;
1933 }
1934
1935 /* Convert a floating point number to an integer according to the
1936 rounding mode. If the rounded integer value is out of range this
1937 returns an invalid operation exception and the contents of the
1938 destination parts are unspecified. If the rounded value is in
1939 range but the floating point number is not the exact integer, the C
1940 standard doesn't require an inexact exception to be raised. IEEE
1941 854 does require it so we do that.
1942
1943 Note that for conversions to integer type the C standard requires
1944 round-to-zero to always be used. */
1945 APFloat::opStatus
convertToSignExtendedInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const1946 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1947 bool isSigned,
1948 roundingMode rounding_mode,
1949 bool *isExact) const
1950 {
1951 lostFraction lost_fraction;
1952 const integerPart *src;
1953 unsigned int dstPartsCount, truncatedBits;
1954
1955 *isExact = false;
1956
1957 /* Handle the three special cases first. */
1958 if (category == fcInfinity || category == fcNaN)
1959 return opInvalidOp;
1960
1961 dstPartsCount = partCountForBits(width);
1962
1963 if (category == fcZero) {
1964 APInt::tcSet(parts, 0, dstPartsCount);
1965 // Negative zero can't be represented as an int.
1966 *isExact = !sign;
1967 return opOK;
1968 }
1969
1970 src = significandParts();
1971
1972 /* Step 1: place our absolute value, with any fraction truncated, in
1973 the destination. */
1974 if (exponent < 0) {
1975 /* Our absolute value is less than one; truncate everything. */
1976 APInt::tcSet(parts, 0, dstPartsCount);
1977 /* For exponent -1 the integer bit represents .5, look at that.
1978 For smaller exponents leftmost truncated bit is 0. */
1979 truncatedBits = semantics->precision -1U - exponent;
1980 } else {
1981 /* We want the most significant (exponent + 1) bits; the rest are
1982 truncated. */
1983 unsigned int bits = exponent + 1U;
1984
1985 /* Hopelessly large in magnitude? */
1986 if (bits > width)
1987 return opInvalidOp;
1988
1989 if (bits < semantics->precision) {
1990 /* We truncate (semantics->precision - bits) bits. */
1991 truncatedBits = semantics->precision - bits;
1992 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1993 } else {
1994 /* We want at least as many bits as are available. */
1995 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1996 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1997 truncatedBits = 0;
1998 }
1999 }
2000
2001 /* Step 2: work out any lost fraction, and increment the absolute
2002 value if we would round away from zero. */
2003 if (truncatedBits) {
2004 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2005 truncatedBits);
2006 if (lost_fraction != lfExactlyZero &&
2007 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2008 if (APInt::tcIncrement(parts, dstPartsCount))
2009 return opInvalidOp; /* Overflow. */
2010 }
2011 } else {
2012 lost_fraction = lfExactlyZero;
2013 }
2014
2015 /* Step 3: check if we fit in the destination. */
2016 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2017
2018 if (sign) {
2019 if (!isSigned) {
2020 /* Negative numbers cannot be represented as unsigned. */
2021 if (omsb != 0)
2022 return opInvalidOp;
2023 } else {
2024 /* It takes omsb bits to represent the unsigned integer value.
2025 We lose a bit for the sign, but care is needed as the
2026 maximally negative integer is a special case. */
2027 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2028 return opInvalidOp;
2029
2030 /* This case can happen because of rounding. */
2031 if (omsb > width)
2032 return opInvalidOp;
2033 }
2034
2035 APInt::tcNegate (parts, dstPartsCount);
2036 } else {
2037 if (omsb >= width + !isSigned)
2038 return opInvalidOp;
2039 }
2040
2041 if (lost_fraction == lfExactlyZero) {
2042 *isExact = true;
2043 return opOK;
2044 } else
2045 return opInexact;
2046 }
2047
2048 /* Same as convertToSignExtendedInteger, except we provide
2049 deterministic values in case of an invalid operation exception,
2050 namely zero for NaNs and the minimal or maximal value respectively
2051 for underflow or overflow.
2052 The *isExact output tells whether the result is exact, in the sense
2053 that converting it back to the original floating point type produces
2054 the original value. This is almost equivalent to result==opOK,
2055 except for negative zeroes.
2056 */
2057 APFloat::opStatus
convertToInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2058 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2059 bool isSigned,
2060 roundingMode rounding_mode, bool *isExact) const
2061 {
2062 opStatus fs;
2063
2064 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2065 isExact);
2066
2067 if (fs == opInvalidOp) {
2068 unsigned int bits, dstPartsCount;
2069
2070 dstPartsCount = partCountForBits(width);
2071
2072 if (category == fcNaN)
2073 bits = 0;
2074 else if (sign)
2075 bits = isSigned;
2076 else
2077 bits = width - isSigned;
2078
2079 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2080 if (sign && isSigned)
2081 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2082 }
2083
2084 return fs;
2085 }
2086
2087 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2088 an APSInt, whose initial bit-width and signed-ness are used to determine the
2089 precision of the conversion.
2090 */
2091 APFloat::opStatus
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const2092 APFloat::convertToInteger(APSInt &result,
2093 roundingMode rounding_mode, bool *isExact) const
2094 {
2095 unsigned bitWidth = result.getBitWidth();
2096 SmallVector<uint64_t, 4> parts(result.getNumWords());
2097 opStatus status = convertToInteger(
2098 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2099 // Keeps the original signed-ness.
2100 result = APInt(bitWidth, parts);
2101 return status;
2102 }
2103
2104 /* Convert an unsigned integer SRC to a floating point number,
2105 rounding according to ROUNDING_MODE. The sign of the floating
2106 point number is not modified. */
2107 APFloat::opStatus
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2108 APFloat::convertFromUnsignedParts(const integerPart *src,
2109 unsigned int srcCount,
2110 roundingMode rounding_mode)
2111 {
2112 unsigned int omsb, precision, dstCount;
2113 integerPart *dst;
2114 lostFraction lost_fraction;
2115
2116 category = fcNormal;
2117 omsb = APInt::tcMSB(src, srcCount) + 1;
2118 dst = significandParts();
2119 dstCount = partCount();
2120 precision = semantics->precision;
2121
2122 /* We want the most significant PRECISION bits of SRC. There may not
2123 be that many; extract what we can. */
2124 if (precision <= omsb) {
2125 exponent = omsb - 1;
2126 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2127 omsb - precision);
2128 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2129 } else {
2130 exponent = precision - 1;
2131 lost_fraction = lfExactlyZero;
2132 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2133 }
2134
2135 return normalize(rounding_mode, lost_fraction);
2136 }
2137
2138 APFloat::opStatus
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2139 APFloat::convertFromAPInt(const APInt &Val,
2140 bool isSigned,
2141 roundingMode rounding_mode)
2142 {
2143 unsigned int partCount = Val.getNumWords();
2144 APInt api = Val;
2145
2146 sign = false;
2147 if (isSigned && api.isNegative()) {
2148 sign = true;
2149 api = -api;
2150 }
2151
2152 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2153 }
2154
2155 /* Convert a two's complement integer SRC to a floating point number,
2156 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2157 integer is signed, in which case it must be sign-extended. */
2158 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2159 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2160 unsigned int srcCount,
2161 bool isSigned,
2162 roundingMode rounding_mode)
2163 {
2164 opStatus status;
2165
2166 if (isSigned &&
2167 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2168 integerPart *copy;
2169
2170 /* If we're signed and negative negate a copy. */
2171 sign = true;
2172 copy = new integerPart[srcCount];
2173 APInt::tcAssign(copy, src, srcCount);
2174 APInt::tcNegate(copy, srcCount);
2175 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2176 delete [] copy;
2177 } else {
2178 sign = false;
2179 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2180 }
2181
2182 return status;
2183 }
2184
2185 /* FIXME: should this just take a const APInt reference? */
2186 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2187 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2188 unsigned int width, bool isSigned,
2189 roundingMode rounding_mode)
2190 {
2191 unsigned int partCount = partCountForBits(width);
2192 APInt api = APInt(width, makeArrayRef(parts, partCount));
2193
2194 sign = false;
2195 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2196 sign = true;
2197 api = -api;
2198 }
2199
2200 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2201 }
2202
2203 APFloat::opStatus
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2204 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2205 {
2206 lostFraction lost_fraction = lfExactlyZero;
2207 integerPart *significand;
2208 unsigned int bitPos, partsCount;
2209 StringRef::iterator dot, firstSignificantDigit;
2210
2211 zeroSignificand();
2212 exponent = 0;
2213 category = fcNormal;
2214
2215 significand = significandParts();
2216 partsCount = partCount();
2217 bitPos = partsCount * integerPartWidth;
2218
2219 /* Skip leading zeroes and any (hexa)decimal point. */
2220 StringRef::iterator begin = s.begin();
2221 StringRef::iterator end = s.end();
2222 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2223 firstSignificantDigit = p;
2224
2225 for (; p != end;) {
2226 integerPart hex_value;
2227
2228 if (*p == '.') {
2229 assert(dot == end && "String contains multiple dots");
2230 dot = p++;
2231 if (p == end) {
2232 break;
2233 }
2234 }
2235
2236 hex_value = hexDigitValue(*p);
2237 if (hex_value == -1U) {
2238 break;
2239 }
2240
2241 p++;
2242
2243 if (p == end) {
2244 break;
2245 } else {
2246 /* Store the number whilst 4-bit nibbles remain. */
2247 if (bitPos) {
2248 bitPos -= 4;
2249 hex_value <<= bitPos % integerPartWidth;
2250 significand[bitPos / integerPartWidth] |= hex_value;
2251 } else {
2252 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2253 while (p != end && hexDigitValue(*p) != -1U)
2254 p++;
2255 break;
2256 }
2257 }
2258 }
2259
2260 /* Hex floats require an exponent but not a hexadecimal point. */
2261 assert(p != end && "Hex strings require an exponent");
2262 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2263 assert(p != begin && "Significand has no digits");
2264 assert((dot == end || p - begin != 1) && "Significand has no digits");
2265
2266 /* Ignore the exponent if we are zero. */
2267 if (p != firstSignificantDigit) {
2268 int expAdjustment;
2269
2270 /* Implicit hexadecimal point? */
2271 if (dot == end)
2272 dot = p;
2273
2274 /* Calculate the exponent adjustment implicit in the number of
2275 significant digits. */
2276 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2277 if (expAdjustment < 0)
2278 expAdjustment++;
2279 expAdjustment = expAdjustment * 4 - 1;
2280
2281 /* Adjust for writing the significand starting at the most
2282 significant nibble. */
2283 expAdjustment += semantics->precision;
2284 expAdjustment -= partsCount * integerPartWidth;
2285
2286 /* Adjust for the given exponent. */
2287 exponent = totalExponent(p + 1, end, expAdjustment);
2288 }
2289
2290 return normalize(rounding_mode, lost_fraction);
2291 }
2292
2293 APFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2294 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2295 unsigned sigPartCount, int exp,
2296 roundingMode rounding_mode)
2297 {
2298 unsigned int parts, pow5PartCount;
2299 fltSemantics calcSemantics = { 32767, -32767, 0 };
2300 integerPart pow5Parts[maxPowerOfFiveParts];
2301 bool isNearest;
2302
2303 isNearest = (rounding_mode == rmNearestTiesToEven ||
2304 rounding_mode == rmNearestTiesToAway);
2305
2306 parts = partCountForBits(semantics->precision + 11);
2307
2308 /* Calculate pow(5, abs(exp)). */
2309 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2310
2311 for (;; parts *= 2) {
2312 opStatus sigStatus, powStatus;
2313 unsigned int excessPrecision, truncatedBits;
2314
2315 calcSemantics.precision = parts * integerPartWidth - 1;
2316 excessPrecision = calcSemantics.precision - semantics->precision;
2317 truncatedBits = excessPrecision;
2318
2319 APFloat decSig(calcSemantics, fcZero, sign);
2320 APFloat pow5(calcSemantics, fcZero, false);
2321
2322 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2323 rmNearestTiesToEven);
2324 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2325 rmNearestTiesToEven);
2326 /* Add exp, as 10^n = 5^n * 2^n. */
2327 decSig.exponent += exp;
2328
2329 lostFraction calcLostFraction;
2330 integerPart HUerr, HUdistance;
2331 unsigned int powHUerr;
2332
2333 if (exp >= 0) {
2334 /* multiplySignificand leaves the precision-th bit set to 1. */
2335 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2336 powHUerr = powStatus != opOK;
2337 } else {
2338 calcLostFraction = decSig.divideSignificand(pow5);
2339 /* Denormal numbers have less precision. */
2340 if (decSig.exponent < semantics->minExponent) {
2341 excessPrecision += (semantics->minExponent - decSig.exponent);
2342 truncatedBits = excessPrecision;
2343 if (excessPrecision > calcSemantics.precision)
2344 excessPrecision = calcSemantics.precision;
2345 }
2346 /* Extra half-ulp lost in reciprocal of exponent. */
2347 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2348 }
2349
2350 /* Both multiplySignificand and divideSignificand return the
2351 result with the integer bit set. */
2352 assert(APInt::tcExtractBit
2353 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2354
2355 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2356 powHUerr);
2357 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2358 excessPrecision, isNearest);
2359
2360 /* Are we guaranteed to round correctly if we truncate? */
2361 if (HUdistance >= HUerr) {
2362 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2363 calcSemantics.precision - excessPrecision,
2364 excessPrecision);
2365 /* Take the exponent of decSig. If we tcExtract-ed less bits
2366 above we must adjust our exponent to compensate for the
2367 implicit right shift. */
2368 exponent = (decSig.exponent + semantics->precision
2369 - (calcSemantics.precision - excessPrecision));
2370 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2371 decSig.partCount(),
2372 truncatedBits);
2373 return normalize(rounding_mode, calcLostFraction);
2374 }
2375 }
2376 }
2377
2378 APFloat::opStatus
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2379 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2380 {
2381 decimalInfo D;
2382 opStatus fs;
2383
2384 /* Scan the text. */
2385 StringRef::iterator p = str.begin();
2386 interpretDecimal(p, str.end(), &D);
2387
2388 /* Handle the quick cases. First the case of no significant digits,
2389 i.e. zero, and then exponents that are obviously too large or too
2390 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2391 definitely overflows if
2392
2393 (exp - 1) * L >= maxExponent
2394
2395 and definitely underflows to zero where
2396
2397 (exp + 1) * L <= minExponent - precision
2398
2399 With integer arithmetic the tightest bounds for L are
2400
2401 93/28 < L < 196/59 [ numerator <= 256 ]
2402 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2403 */
2404
2405 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2406 category = fcZero;
2407 fs = opOK;
2408
2409 /* Check whether the normalized exponent is high enough to overflow
2410 max during the log-rebasing in the max-exponent check below. */
2411 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2412 fs = handleOverflow(rounding_mode);
2413
2414 /* If it wasn't, then it also wasn't high enough to overflow max
2415 during the log-rebasing in the min-exponent check. Check that it
2416 won't overflow min in either check, then perform the min-exponent
2417 check. */
2418 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2419 (D.normalizedExponent + 1) * 28738 <=
2420 8651 * (semantics->minExponent - (int) semantics->precision)) {
2421 /* Underflow to zero and round. */
2422 zeroSignificand();
2423 fs = normalize(rounding_mode, lfLessThanHalf);
2424
2425 /* We can finally safely perform the max-exponent check. */
2426 } else if ((D.normalizedExponent - 1) * 42039
2427 >= 12655 * semantics->maxExponent) {
2428 /* Overflow and round. */
2429 fs = handleOverflow(rounding_mode);
2430 } else {
2431 integerPart *decSignificand;
2432 unsigned int partCount;
2433
2434 /* A tight upper bound on number of bits required to hold an
2435 N-digit decimal integer is N * 196 / 59. Allocate enough space
2436 to hold the full significand, and an extra part required by
2437 tcMultiplyPart. */
2438 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2439 partCount = partCountForBits(1 + 196 * partCount / 59);
2440 decSignificand = new integerPart[partCount + 1];
2441 partCount = 0;
2442
2443 /* Convert to binary efficiently - we do almost all multiplication
2444 in an integerPart. When this would overflow do we do a single
2445 bignum multiplication, and then revert again to multiplication
2446 in an integerPart. */
2447 do {
2448 integerPart decValue, val, multiplier;
2449
2450 val = 0;
2451 multiplier = 1;
2452
2453 do {
2454 if (*p == '.') {
2455 p++;
2456 if (p == str.end()) {
2457 break;
2458 }
2459 }
2460 decValue = decDigitValue(*p++);
2461 assert(decValue < 10U && "Invalid character in significand");
2462 multiplier *= 10;
2463 val = val * 10 + decValue;
2464 /* The maximum number that can be multiplied by ten with any
2465 digit added without overflowing an integerPart. */
2466 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2467
2468 /* Multiply out the current part. */
2469 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2470 partCount, partCount + 1, false);
2471
2472 /* If we used another part (likely but not guaranteed), increase
2473 the count. */
2474 if (decSignificand[partCount])
2475 partCount++;
2476 } while (p <= D.lastSigDigit);
2477
2478 category = fcNormal;
2479 fs = roundSignificandWithExponent(decSignificand, partCount,
2480 D.exponent, rounding_mode);
2481
2482 delete [] decSignificand;
2483 }
2484
2485 return fs;
2486 }
2487
2488 APFloat::opStatus
convertFromString(StringRef str,roundingMode rounding_mode)2489 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2490 {
2491 assert(!str.empty() && "Invalid string length");
2492
2493 /* Handle a leading minus sign. */
2494 StringRef::iterator p = str.begin();
2495 size_t slen = str.size();
2496 sign = *p == '-' ? 1 : 0;
2497 if (*p == '-' || *p == '+') {
2498 p++;
2499 slen--;
2500 assert(slen && "String has no digits");
2501 }
2502
2503 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2504 assert(slen - 2 && "Invalid string");
2505 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2506 rounding_mode);
2507 }
2508
2509 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2510 }
2511
2512 /* Write out a hexadecimal representation of the floating point value
2513 to DST, which must be of sufficient size, in the C99 form
2514 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2515 excluding the terminating NUL.
2516
2517 If UPPERCASE, the output is in upper case, otherwise in lower case.
2518
2519 HEXDIGITS digits appear altogether, rounding the value if
2520 necessary. If HEXDIGITS is 0, the minimal precision to display the
2521 number precisely is used instead. If nothing would appear after
2522 the decimal point it is suppressed.
2523
2524 The decimal exponent is always printed and has at least one digit.
2525 Zero values display an exponent of zero. Infinities and NaNs
2526 appear as "infinity" or "nan" respectively.
2527
2528 The above rules are as specified by C99. There is ambiguity about
2529 what the leading hexadecimal digit should be. This implementation
2530 uses whatever is necessary so that the exponent is displayed as
2531 stored. This implies the exponent will fall within the IEEE format
2532 range, and the leading hexadecimal digit will be 0 (for denormals),
2533 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2534 any other digits zero).
2535 */
2536 unsigned int
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2537 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2538 bool upperCase, roundingMode rounding_mode) const
2539 {
2540 char *p;
2541
2542 p = dst;
2543 if (sign)
2544 *dst++ = '-';
2545
2546 switch (category) {
2547 case fcInfinity:
2548 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2549 dst += sizeof infinityL - 1;
2550 break;
2551
2552 case fcNaN:
2553 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2554 dst += sizeof NaNU - 1;
2555 break;
2556
2557 case fcZero:
2558 *dst++ = '0';
2559 *dst++ = upperCase ? 'X': 'x';
2560 *dst++ = '0';
2561 if (hexDigits > 1) {
2562 *dst++ = '.';
2563 memset (dst, '0', hexDigits - 1);
2564 dst += hexDigits - 1;
2565 }
2566 *dst++ = upperCase ? 'P': 'p';
2567 *dst++ = '0';
2568 break;
2569
2570 case fcNormal:
2571 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2572 break;
2573 }
2574
2575 *dst = 0;
2576
2577 return static_cast<unsigned int>(dst - p);
2578 }
2579
2580 /* Does the hard work of outputting the correctly rounded hexadecimal
2581 form of a normal floating point number with the specified number of
2582 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2583 digits necessary to print the value precisely is output. */
2584 char *
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2585 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2586 bool upperCase,
2587 roundingMode rounding_mode) const
2588 {
2589 unsigned int count, valueBits, shift, partsCount, outputDigits;
2590 const char *hexDigitChars;
2591 const integerPart *significand;
2592 char *p;
2593 bool roundUp;
2594
2595 *dst++ = '0';
2596 *dst++ = upperCase ? 'X': 'x';
2597
2598 roundUp = false;
2599 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2600
2601 significand = significandParts();
2602 partsCount = partCount();
2603
2604 /* +3 because the first digit only uses the single integer bit, so
2605 we have 3 virtual zero most-significant-bits. */
2606 valueBits = semantics->precision + 3;
2607 shift = integerPartWidth - valueBits % integerPartWidth;
2608
2609 /* The natural number of digits required ignoring trailing
2610 insignificant zeroes. */
2611 outputDigits = (valueBits - significandLSB () + 3) / 4;
2612
2613 /* hexDigits of zero means use the required number for the
2614 precision. Otherwise, see if we are truncating. If we are,
2615 find out if we need to round away from zero. */
2616 if (hexDigits) {
2617 if (hexDigits < outputDigits) {
2618 /* We are dropping non-zero bits, so need to check how to round.
2619 "bits" is the number of dropped bits. */
2620 unsigned int bits;
2621 lostFraction fraction;
2622
2623 bits = valueBits - hexDigits * 4;
2624 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2625 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2626 }
2627 outputDigits = hexDigits;
2628 }
2629
2630 /* Write the digits consecutively, and start writing in the location
2631 of the hexadecimal point. We move the most significant digit
2632 left and add the hexadecimal point later. */
2633 p = ++dst;
2634
2635 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2636
2637 while (outputDigits && count) {
2638 integerPart part;
2639
2640 /* Put the most significant integerPartWidth bits in "part". */
2641 if (--count == partsCount)
2642 part = 0; /* An imaginary higher zero part. */
2643 else
2644 part = significand[count] << shift;
2645
2646 if (count && shift)
2647 part |= significand[count - 1] >> (integerPartWidth - shift);
2648
2649 /* Convert as much of "part" to hexdigits as we can. */
2650 unsigned int curDigits = integerPartWidth / 4;
2651
2652 if (curDigits > outputDigits)
2653 curDigits = outputDigits;
2654 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2655 outputDigits -= curDigits;
2656 }
2657
2658 if (roundUp) {
2659 char *q = dst;
2660
2661 /* Note that hexDigitChars has a trailing '0'. */
2662 do {
2663 q--;
2664 *q = hexDigitChars[hexDigitValue (*q) + 1];
2665 } while (*q == '0');
2666 assert(q >= p);
2667 } else {
2668 /* Add trailing zeroes. */
2669 memset (dst, '0', outputDigits);
2670 dst += outputDigits;
2671 }
2672
2673 /* Move the most significant digit to before the point, and if there
2674 is something after the decimal point add it. This must come
2675 after rounding above. */
2676 p[-1] = p[0];
2677 if (dst -1 == p)
2678 dst--;
2679 else
2680 p[0] = '.';
2681
2682 /* Finally output the exponent. */
2683 *dst++ = upperCase ? 'P': 'p';
2684
2685 return writeSignedDecimal (dst, exponent);
2686 }
2687
hash_value(const APFloat & Arg)2688 hash_code llvm::hash_value(const APFloat &Arg) {
2689 if (Arg.category != APFloat::fcNormal)
2690 return hash_combine((uint8_t)Arg.category,
2691 // NaN has no sign, fix it at zero.
2692 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2693 Arg.semantics->precision);
2694
2695 // Normal floats need their exponent and significand hashed.
2696 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2697 Arg.semantics->precision, Arg.exponent,
2698 hash_combine_range(
2699 Arg.significandParts(),
2700 Arg.significandParts() + Arg.partCount()));
2701 }
2702
2703 // Conversion from APFloat to/from host float/double. It may eventually be
2704 // possible to eliminate these and have everybody deal with APFloats, but that
2705 // will take a while. This approach will not easily extend to long double.
2706 // Current implementation requires integerPartWidth==64, which is correct at
2707 // the moment but could be made more general.
2708
2709 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2710 // the actual IEEE respresentations. We compensate for that here.
2711
2712 APInt
convertF80LongDoubleAPFloatToAPInt() const2713 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2714 {
2715 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2716 assert(partCount()==2);
2717
2718 uint64_t myexponent, mysignificand;
2719
2720 if (category==fcNormal) {
2721 myexponent = exponent+16383; //bias
2722 mysignificand = significandParts()[0];
2723 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2724 myexponent = 0; // denormal
2725 } else if (category==fcZero) {
2726 myexponent = 0;
2727 mysignificand = 0;
2728 } else if (category==fcInfinity) {
2729 myexponent = 0x7fff;
2730 mysignificand = 0x8000000000000000ULL;
2731 } else {
2732 assert(category == fcNaN && "Unknown category");
2733 myexponent = 0x7fff;
2734 mysignificand = significandParts()[0];
2735 }
2736
2737 uint64_t words[2];
2738 words[0] = mysignificand;
2739 words[1] = ((uint64_t)(sign & 1) << 15) |
2740 (myexponent & 0x7fffLL);
2741 return APInt(80, words);
2742 }
2743
2744 APInt
convertPPCDoubleDoubleAPFloatToAPInt() const2745 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2746 {
2747 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2748 assert(partCount()==2);
2749
2750 uint64_t words[2];
2751 opStatus fs;
2752 bool losesInfo;
2753
2754 // Convert number to double. To avoid spurious underflows, we re-
2755 // normalize against the "double" minExponent first, and only *then*
2756 // truncate the mantissa. The result of that second conversion
2757 // may be inexact, but should never underflow.
2758 // Declare fltSemantics before APFloat that uses it (and
2759 // saves pointer to it) to ensure correct destruction order.
2760 fltSemantics extendedSemantics = *semantics;
2761 extendedSemantics.minExponent = IEEEdouble.minExponent;
2762 APFloat extended(*this);
2763 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2764 assert(fs == opOK && !losesInfo);
2765 (void)fs;
2766
2767 APFloat u(extended);
2768 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2769 assert(fs == opOK || fs == opInexact);
2770 (void)fs;
2771 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2772
2773 // If conversion was exact or resulted in a special case, we're done;
2774 // just set the second double to zero. Otherwise, re-convert back to
2775 // the extended format and compute the difference. This now should
2776 // convert exactly to double.
2777 if (u.category == fcNormal && losesInfo) {
2778 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2779 assert(fs == opOK && !losesInfo);
2780 (void)fs;
2781
2782 APFloat v(extended);
2783 v.subtract(u, rmNearestTiesToEven);
2784 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2785 assert(fs == opOK && !losesInfo);
2786 (void)fs;
2787 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2788 } else {
2789 words[1] = 0;
2790 }
2791
2792 return APInt(128, words);
2793 }
2794
2795 APInt
convertQuadrupleAPFloatToAPInt() const2796 APFloat::convertQuadrupleAPFloatToAPInt() const
2797 {
2798 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2799 assert(partCount()==2);
2800
2801 uint64_t myexponent, mysignificand, mysignificand2;
2802
2803 if (category==fcNormal) {
2804 myexponent = exponent+16383; //bias
2805 mysignificand = significandParts()[0];
2806 mysignificand2 = significandParts()[1];
2807 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2808 myexponent = 0; // denormal
2809 } else if (category==fcZero) {
2810 myexponent = 0;
2811 mysignificand = mysignificand2 = 0;
2812 } else if (category==fcInfinity) {
2813 myexponent = 0x7fff;
2814 mysignificand = mysignificand2 = 0;
2815 } else {
2816 assert(category == fcNaN && "Unknown category!");
2817 myexponent = 0x7fff;
2818 mysignificand = significandParts()[0];
2819 mysignificand2 = significandParts()[1];
2820 }
2821
2822 uint64_t words[2];
2823 words[0] = mysignificand;
2824 words[1] = ((uint64_t)(sign & 1) << 63) |
2825 ((myexponent & 0x7fff) << 48) |
2826 (mysignificand2 & 0xffffffffffffLL);
2827
2828 return APInt(128, words);
2829 }
2830
2831 APInt
convertDoubleAPFloatToAPInt() const2832 APFloat::convertDoubleAPFloatToAPInt() const
2833 {
2834 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2835 assert(partCount()==1);
2836
2837 uint64_t myexponent, mysignificand;
2838
2839 if (category==fcNormal) {
2840 myexponent = exponent+1023; //bias
2841 mysignificand = *significandParts();
2842 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2843 myexponent = 0; // denormal
2844 } else if (category==fcZero) {
2845 myexponent = 0;
2846 mysignificand = 0;
2847 } else if (category==fcInfinity) {
2848 myexponent = 0x7ff;
2849 mysignificand = 0;
2850 } else {
2851 assert(category == fcNaN && "Unknown category!");
2852 myexponent = 0x7ff;
2853 mysignificand = *significandParts();
2854 }
2855
2856 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2857 ((myexponent & 0x7ff) << 52) |
2858 (mysignificand & 0xfffffffffffffLL))));
2859 }
2860
2861 APInt
convertFloatAPFloatToAPInt() const2862 APFloat::convertFloatAPFloatToAPInt() const
2863 {
2864 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2865 assert(partCount()==1);
2866
2867 uint32_t myexponent, mysignificand;
2868
2869 if (category==fcNormal) {
2870 myexponent = exponent+127; //bias
2871 mysignificand = (uint32_t)*significandParts();
2872 if (myexponent == 1 && !(mysignificand & 0x800000))
2873 myexponent = 0; // denormal
2874 } else if (category==fcZero) {
2875 myexponent = 0;
2876 mysignificand = 0;
2877 } else if (category==fcInfinity) {
2878 myexponent = 0xff;
2879 mysignificand = 0;
2880 } else {
2881 assert(category == fcNaN && "Unknown category!");
2882 myexponent = 0xff;
2883 mysignificand = (uint32_t)*significandParts();
2884 }
2885
2886 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2887 (mysignificand & 0x7fffff)));
2888 }
2889
2890 APInt
convertHalfAPFloatToAPInt() const2891 APFloat::convertHalfAPFloatToAPInt() const
2892 {
2893 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2894 assert(partCount()==1);
2895
2896 uint32_t myexponent, mysignificand;
2897
2898 if (category==fcNormal) {
2899 myexponent = exponent+15; //bias
2900 mysignificand = (uint32_t)*significandParts();
2901 if (myexponent == 1 && !(mysignificand & 0x400))
2902 myexponent = 0; // denormal
2903 } else if (category==fcZero) {
2904 myexponent = 0;
2905 mysignificand = 0;
2906 } else if (category==fcInfinity) {
2907 myexponent = 0x1f;
2908 mysignificand = 0;
2909 } else {
2910 assert(category == fcNaN && "Unknown category!");
2911 myexponent = 0x1f;
2912 mysignificand = (uint32_t)*significandParts();
2913 }
2914
2915 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2916 (mysignificand & 0x3ff)));
2917 }
2918
2919 // This function creates an APInt that is just a bit map of the floating
2920 // point constant as it would appear in memory. It is not a conversion,
2921 // and treating the result as a normal integer is unlikely to be useful.
2922
2923 APInt
bitcastToAPInt() const2924 APFloat::bitcastToAPInt() const
2925 {
2926 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2927 return convertHalfAPFloatToAPInt();
2928
2929 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2930 return convertFloatAPFloatToAPInt();
2931
2932 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2933 return convertDoubleAPFloatToAPInt();
2934
2935 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2936 return convertQuadrupleAPFloatToAPInt();
2937
2938 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2939 return convertPPCDoubleDoubleAPFloatToAPInt();
2940
2941 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2942 "unknown format!");
2943 return convertF80LongDoubleAPFloatToAPInt();
2944 }
2945
2946 float
convertToFloat() const2947 APFloat::convertToFloat() const
2948 {
2949 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2950 "Float semantics are not IEEEsingle");
2951 APInt api = bitcastToAPInt();
2952 return api.bitsToFloat();
2953 }
2954
2955 double
convertToDouble() const2956 APFloat::convertToDouble() const
2957 {
2958 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2959 "Float semantics are not IEEEdouble");
2960 APInt api = bitcastToAPInt();
2961 return api.bitsToDouble();
2962 }
2963
2964 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2965 /// does not support these bit patterns:
2966 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2967 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2968 /// exponent = 0, integer bit 1 ("pseudodenormal")
2969 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2970 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2971 void
initFromF80LongDoubleAPInt(const APInt & api)2972 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2973 {
2974 assert(api.getBitWidth()==80);
2975 uint64_t i1 = api.getRawData()[0];
2976 uint64_t i2 = api.getRawData()[1];
2977 uint64_t myexponent = (i2 & 0x7fff);
2978 uint64_t mysignificand = i1;
2979
2980 initialize(&APFloat::x87DoubleExtended);
2981 assert(partCount()==2);
2982
2983 sign = static_cast<unsigned int>(i2>>15);
2984 if (myexponent==0 && mysignificand==0) {
2985 // exponent, significand meaningless
2986 category = fcZero;
2987 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2988 // exponent, significand meaningless
2989 category = fcInfinity;
2990 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2991 // exponent meaningless
2992 category = fcNaN;
2993 significandParts()[0] = mysignificand;
2994 significandParts()[1] = 0;
2995 } else {
2996 category = fcNormal;
2997 exponent = myexponent - 16383;
2998 significandParts()[0] = mysignificand;
2999 significandParts()[1] = 0;
3000 if (myexponent==0) // denormal
3001 exponent = -16382;
3002 }
3003 }
3004
3005 void
initFromPPCDoubleDoubleAPInt(const APInt & api)3006 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3007 {
3008 assert(api.getBitWidth()==128);
3009 uint64_t i1 = api.getRawData()[0];
3010 uint64_t i2 = api.getRawData()[1];
3011 opStatus fs;
3012 bool losesInfo;
3013
3014 // Get the first double and convert to our format.
3015 initFromDoubleAPInt(APInt(64, i1));
3016 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3017 assert(fs == opOK && !losesInfo);
3018 (void)fs;
3019
3020 // Unless we have a special case, add in second double.
3021 if (category == fcNormal) {
3022 APFloat v(IEEEdouble, APInt(64, i2));
3023 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3024 assert(fs == opOK && !losesInfo);
3025 (void)fs;
3026
3027 add(v, rmNearestTiesToEven);
3028 }
3029 }
3030
3031 void
initFromQuadrupleAPInt(const APInt & api)3032 APFloat::initFromQuadrupleAPInt(const APInt &api)
3033 {
3034 assert(api.getBitWidth()==128);
3035 uint64_t i1 = api.getRawData()[0];
3036 uint64_t i2 = api.getRawData()[1];
3037 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3038 uint64_t mysignificand = i1;
3039 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3040
3041 initialize(&APFloat::IEEEquad);
3042 assert(partCount()==2);
3043
3044 sign = static_cast<unsigned int>(i2>>63);
3045 if (myexponent==0 &&
3046 (mysignificand==0 && mysignificand2==0)) {
3047 // exponent, significand meaningless
3048 category = fcZero;
3049 } else if (myexponent==0x7fff &&
3050 (mysignificand==0 && mysignificand2==0)) {
3051 // exponent, significand meaningless
3052 category = fcInfinity;
3053 } else if (myexponent==0x7fff &&
3054 (mysignificand!=0 || mysignificand2 !=0)) {
3055 // exponent meaningless
3056 category = fcNaN;
3057 significandParts()[0] = mysignificand;
3058 significandParts()[1] = mysignificand2;
3059 } else {
3060 category = fcNormal;
3061 exponent = myexponent - 16383;
3062 significandParts()[0] = mysignificand;
3063 significandParts()[1] = mysignificand2;
3064 if (myexponent==0) // denormal
3065 exponent = -16382;
3066 else
3067 significandParts()[1] |= 0x1000000000000LL; // integer bit
3068 }
3069 }
3070
3071 void
initFromDoubleAPInt(const APInt & api)3072 APFloat::initFromDoubleAPInt(const APInt &api)
3073 {
3074 assert(api.getBitWidth()==64);
3075 uint64_t i = *api.getRawData();
3076 uint64_t myexponent = (i >> 52) & 0x7ff;
3077 uint64_t mysignificand = i & 0xfffffffffffffLL;
3078
3079 initialize(&APFloat::IEEEdouble);
3080 assert(partCount()==1);
3081
3082 sign = static_cast<unsigned int>(i>>63);
3083 if (myexponent==0 && mysignificand==0) {
3084 // exponent, significand meaningless
3085 category = fcZero;
3086 } else if (myexponent==0x7ff && mysignificand==0) {
3087 // exponent, significand meaningless
3088 category = fcInfinity;
3089 } else if (myexponent==0x7ff && mysignificand!=0) {
3090 // exponent meaningless
3091 category = fcNaN;
3092 *significandParts() = mysignificand;
3093 } else {
3094 category = fcNormal;
3095 exponent = myexponent - 1023;
3096 *significandParts() = mysignificand;
3097 if (myexponent==0) // denormal
3098 exponent = -1022;
3099 else
3100 *significandParts() |= 0x10000000000000LL; // integer bit
3101 }
3102 }
3103
3104 void
initFromFloatAPInt(const APInt & api)3105 APFloat::initFromFloatAPInt(const APInt & api)
3106 {
3107 assert(api.getBitWidth()==32);
3108 uint32_t i = (uint32_t)*api.getRawData();
3109 uint32_t myexponent = (i >> 23) & 0xff;
3110 uint32_t mysignificand = i & 0x7fffff;
3111
3112 initialize(&APFloat::IEEEsingle);
3113 assert(partCount()==1);
3114
3115 sign = i >> 31;
3116 if (myexponent==0 && mysignificand==0) {
3117 // exponent, significand meaningless
3118 category = fcZero;
3119 } else if (myexponent==0xff && mysignificand==0) {
3120 // exponent, significand meaningless
3121 category = fcInfinity;
3122 } else if (myexponent==0xff && mysignificand!=0) {
3123 // sign, exponent, significand meaningless
3124 category = fcNaN;
3125 *significandParts() = mysignificand;
3126 } else {
3127 category = fcNormal;
3128 exponent = myexponent - 127; //bias
3129 *significandParts() = mysignificand;
3130 if (myexponent==0) // denormal
3131 exponent = -126;
3132 else
3133 *significandParts() |= 0x800000; // integer bit
3134 }
3135 }
3136
3137 void
initFromHalfAPInt(const APInt & api)3138 APFloat::initFromHalfAPInt(const APInt & api)
3139 {
3140 assert(api.getBitWidth()==16);
3141 uint32_t i = (uint32_t)*api.getRawData();
3142 uint32_t myexponent = (i >> 10) & 0x1f;
3143 uint32_t mysignificand = i & 0x3ff;
3144
3145 initialize(&APFloat::IEEEhalf);
3146 assert(partCount()==1);
3147
3148 sign = i >> 15;
3149 if (myexponent==0 && mysignificand==0) {
3150 // exponent, significand meaningless
3151 category = fcZero;
3152 } else if (myexponent==0x1f && mysignificand==0) {
3153 // exponent, significand meaningless
3154 category = fcInfinity;
3155 } else if (myexponent==0x1f && mysignificand!=0) {
3156 // sign, exponent, significand meaningless
3157 category = fcNaN;
3158 *significandParts() = mysignificand;
3159 } else {
3160 category = fcNormal;
3161 exponent = myexponent - 15; //bias
3162 *significandParts() = mysignificand;
3163 if (myexponent==0) // denormal
3164 exponent = -14;
3165 else
3166 *significandParts() |= 0x400; // integer bit
3167 }
3168 }
3169
3170 /// Treat api as containing the bits of a floating point number. Currently
3171 /// we infer the floating point type from the size of the APInt. The
3172 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3173 /// when the size is anything else).
3174 void
initFromAPInt(const fltSemantics * Sem,const APInt & api)3175 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3176 {
3177 if (Sem == &IEEEhalf)
3178 return initFromHalfAPInt(api);
3179 if (Sem == &IEEEsingle)
3180 return initFromFloatAPInt(api);
3181 if (Sem == &IEEEdouble)
3182 return initFromDoubleAPInt(api);
3183 if (Sem == &x87DoubleExtended)
3184 return initFromF80LongDoubleAPInt(api);
3185 if (Sem == &IEEEquad)
3186 return initFromQuadrupleAPInt(api);
3187 if (Sem == &PPCDoubleDouble)
3188 return initFromPPCDoubleDoubleAPInt(api);
3189
3190 llvm_unreachable(0);
3191 }
3192
3193 APFloat
getAllOnesValue(unsigned BitWidth,bool isIEEE)3194 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3195 {
3196 switch (BitWidth) {
3197 case 16:
3198 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3199 case 32:
3200 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3201 case 64:
3202 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3203 case 80:
3204 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3205 case 128:
3206 if (isIEEE)
3207 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3208 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3209 default:
3210 llvm_unreachable("Unknown floating bit width");
3211 }
3212 }
3213
getLargest(const fltSemantics & Sem,bool Negative)3214 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3215 APFloat Val(Sem, fcNormal, Negative);
3216
3217 // We want (in interchange format):
3218 // sign = {Negative}
3219 // exponent = 1..10
3220 // significand = 1..1
3221
3222 Val.exponent = Sem.maxExponent; // unbiased
3223
3224 // 1-initialize all bits....
3225 Val.zeroSignificand();
3226 integerPart *significand = Val.significandParts();
3227 unsigned N = partCountForBits(Sem.precision);
3228 for (unsigned i = 0; i != N; ++i)
3229 significand[i] = ~((integerPart) 0);
3230
3231 // ...and then clear the top bits for internal consistency.
3232 if (Sem.precision % integerPartWidth != 0)
3233 significand[N-1] &=
3234 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3235
3236 return Val;
3237 }
3238
getSmallest(const fltSemantics & Sem,bool Negative)3239 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3240 APFloat Val(Sem, fcNormal, Negative);
3241
3242 // We want (in interchange format):
3243 // sign = {Negative}
3244 // exponent = 0..0
3245 // significand = 0..01
3246
3247 Val.exponent = Sem.minExponent; // unbiased
3248 Val.zeroSignificand();
3249 Val.significandParts()[0] = 1;
3250 return Val;
3251 }
3252
getSmallestNormalized(const fltSemantics & Sem,bool Negative)3253 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3254 APFloat Val(Sem, fcNormal, Negative);
3255
3256 // We want (in interchange format):
3257 // sign = {Negative}
3258 // exponent = 0..0
3259 // significand = 10..0
3260
3261 Val.exponent = Sem.minExponent;
3262 Val.zeroSignificand();
3263 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3264 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3265
3266 return Val;
3267 }
3268
APFloat(const fltSemantics & Sem,const APInt & API)3269 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3270 initFromAPInt(&Sem, API);
3271 }
3272
APFloat(float f)3273 APFloat::APFloat(float f) {
3274 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3275 }
3276
APFloat(double d)3277 APFloat::APFloat(double d) {
3278 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3279 }
3280
3281 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3282 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3283 Buffer.append(Str.begin(), Str.end());
3284 }
3285
3286 /// Removes data from the given significand until it is no more
3287 /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3288 void AdjustToPrecision(APInt &significand,
3289 int &exp, unsigned FormatPrecision) {
3290 unsigned bits = significand.getActiveBits();
3291
3292 // 196/59 is a very slight overestimate of lg_2(10).
3293 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3294
3295 if (bits <= bitsRequired) return;
3296
3297 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3298 if (!tensRemovable) return;
3299
3300 exp += tensRemovable;
3301
3302 APInt divisor(significand.getBitWidth(), 1);
3303 APInt powten(significand.getBitWidth(), 10);
3304 while (true) {
3305 if (tensRemovable & 1)
3306 divisor *= powten;
3307 tensRemovable >>= 1;
3308 if (!tensRemovable) break;
3309 powten *= powten;
3310 }
3311
3312 significand = significand.udiv(divisor);
3313
3314 // Truncate the significand down to its active bit count, but
3315 // don't try to drop below 32.
3316 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3317 significand = significand.trunc(newPrecision);
3318 }
3319
3320
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3321 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3322 int &exp, unsigned FormatPrecision) {
3323 unsigned N = buffer.size();
3324 if (N <= FormatPrecision) return;
3325
3326 // The most significant figures are the last ones in the buffer.
3327 unsigned FirstSignificant = N - FormatPrecision;
3328
3329 // Round.
3330 // FIXME: this probably shouldn't use 'round half up'.
3331
3332 // Rounding down is just a truncation, except we also want to drop
3333 // trailing zeros from the new result.
3334 if (buffer[FirstSignificant - 1] < '5') {
3335 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3336 FirstSignificant++;
3337
3338 exp += FirstSignificant;
3339 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3340 return;
3341 }
3342
3343 // Rounding up requires a decimal add-with-carry. If we continue
3344 // the carry, the newly-introduced zeros will just be truncated.
3345 for (unsigned I = FirstSignificant; I != N; ++I) {
3346 if (buffer[I] == '9') {
3347 FirstSignificant++;
3348 } else {
3349 buffer[I]++;
3350 break;
3351 }
3352 }
3353
3354 // If we carried through, we have exactly one digit of precision.
3355 if (FirstSignificant == N) {
3356 exp += FirstSignificant;
3357 buffer.clear();
3358 buffer.push_back('1');
3359 return;
3360 }
3361
3362 exp += FirstSignificant;
3363 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3364 }
3365 }
3366
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding) const3367 void APFloat::toString(SmallVectorImpl<char> &Str,
3368 unsigned FormatPrecision,
3369 unsigned FormatMaxPadding) const {
3370 switch (category) {
3371 case fcInfinity:
3372 if (isNegative())
3373 return append(Str, "-Inf");
3374 else
3375 return append(Str, "+Inf");
3376
3377 case fcNaN: return append(Str, "NaN");
3378
3379 case fcZero:
3380 if (isNegative())
3381 Str.push_back('-');
3382
3383 if (!FormatMaxPadding)
3384 append(Str, "0.0E+0");
3385 else
3386 Str.push_back('0');
3387 return;
3388
3389 case fcNormal:
3390 break;
3391 }
3392
3393 if (isNegative())
3394 Str.push_back('-');
3395
3396 // Decompose the number into an APInt and an exponent.
3397 int exp = exponent - ((int) semantics->precision - 1);
3398 APInt significand(semantics->precision,
3399 makeArrayRef(significandParts(),
3400 partCountForBits(semantics->precision)));
3401
3402 // Set FormatPrecision if zero. We want to do this before we
3403 // truncate trailing zeros, as those are part of the precision.
3404 if (!FormatPrecision) {
3405 // It's an interesting question whether to use the nominal
3406 // precision or the active precision here for denormals.
3407
3408 // FormatPrecision = ceil(significandBits / lg_2(10))
3409 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3410 }
3411
3412 // Ignore trailing binary zeros.
3413 int trailingZeros = significand.countTrailingZeros();
3414 exp += trailingZeros;
3415 significand = significand.lshr(trailingZeros);
3416
3417 // Change the exponent from 2^e to 10^e.
3418 if (exp == 0) {
3419 // Nothing to do.
3420 } else if (exp > 0) {
3421 // Just shift left.
3422 significand = significand.zext(semantics->precision + exp);
3423 significand <<= exp;
3424 exp = 0;
3425 } else { /* exp < 0 */
3426 int texp = -exp;
3427
3428 // We transform this using the identity:
3429 // (N)(2^-e) == (N)(5^e)(10^-e)
3430 // This means we have to multiply N (the significand) by 5^e.
3431 // To avoid overflow, we have to operate on numbers large
3432 // enough to store N * 5^e:
3433 // log2(N * 5^e) == log2(N) + e * log2(5)
3434 // <= semantics->precision + e * 137 / 59
3435 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3436
3437 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3438
3439 // Multiply significand by 5^e.
3440 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3441 significand = significand.zext(precision);
3442 APInt five_to_the_i(precision, 5);
3443 while (true) {
3444 if (texp & 1) significand *= five_to_the_i;
3445
3446 texp >>= 1;
3447 if (!texp) break;
3448 five_to_the_i *= five_to_the_i;
3449 }
3450 }
3451
3452 AdjustToPrecision(significand, exp, FormatPrecision);
3453
3454 SmallVector<char, 256> buffer;
3455
3456 // Fill the buffer.
3457 unsigned precision = significand.getBitWidth();
3458 APInt ten(precision, 10);
3459 APInt digit(precision, 0);
3460
3461 bool inTrail = true;
3462 while (significand != 0) {
3463 // digit <- significand % 10
3464 // significand <- significand / 10
3465 APInt::udivrem(significand, ten, significand, digit);
3466
3467 unsigned d = digit.getZExtValue();
3468
3469 // Drop trailing zeros.
3470 if (inTrail && !d) exp++;
3471 else {
3472 buffer.push_back((char) ('0' + d));
3473 inTrail = false;
3474 }
3475 }
3476
3477 assert(!buffer.empty() && "no characters in buffer!");
3478
3479 // Drop down to FormatPrecision.
3480 // TODO: don't do more precise calculations above than are required.
3481 AdjustToPrecision(buffer, exp, FormatPrecision);
3482
3483 unsigned NDigits = buffer.size();
3484
3485 // Check whether we should use scientific notation.
3486 bool FormatScientific;
3487 if (!FormatMaxPadding)
3488 FormatScientific = true;
3489 else {
3490 if (exp >= 0) {
3491 // 765e3 --> 765000
3492 // ^^^
3493 // But we shouldn't make the number look more precise than it is.
3494 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3495 NDigits + (unsigned) exp > FormatPrecision);
3496 } else {
3497 // Power of the most significant digit.
3498 int MSD = exp + (int) (NDigits - 1);
3499 if (MSD >= 0) {
3500 // 765e-2 == 7.65
3501 FormatScientific = false;
3502 } else {
3503 // 765e-5 == 0.00765
3504 // ^ ^^
3505 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3506 }
3507 }
3508 }
3509
3510 // Scientific formatting is pretty straightforward.
3511 if (FormatScientific) {
3512 exp += (NDigits - 1);
3513
3514 Str.push_back(buffer[NDigits-1]);
3515 Str.push_back('.');
3516 if (NDigits == 1)
3517 Str.push_back('0');
3518 else
3519 for (unsigned I = 1; I != NDigits; ++I)
3520 Str.push_back(buffer[NDigits-1-I]);
3521 Str.push_back('E');
3522
3523 Str.push_back(exp >= 0 ? '+' : '-');
3524 if (exp < 0) exp = -exp;
3525 SmallVector<char, 6> expbuf;
3526 do {
3527 expbuf.push_back((char) ('0' + (exp % 10)));
3528 exp /= 10;
3529 } while (exp);
3530 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3531 Str.push_back(expbuf[E-1-I]);
3532 return;
3533 }
3534
3535 // Non-scientific, positive exponents.
3536 if (exp >= 0) {
3537 for (unsigned I = 0; I != NDigits; ++I)
3538 Str.push_back(buffer[NDigits-1-I]);
3539 for (unsigned I = 0; I != (unsigned) exp; ++I)
3540 Str.push_back('0');
3541 return;
3542 }
3543
3544 // Non-scientific, negative exponents.
3545
3546 // The number of digits to the left of the decimal point.
3547 int NWholeDigits = exp + (int) NDigits;
3548
3549 unsigned I = 0;
3550 if (NWholeDigits > 0) {
3551 for (; I != (unsigned) NWholeDigits; ++I)
3552 Str.push_back(buffer[NDigits-I-1]);
3553 Str.push_back('.');
3554 } else {
3555 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3556
3557 Str.push_back('0');
3558 Str.push_back('.');
3559 for (unsigned Z = 1; Z != NZeros; ++Z)
3560 Str.push_back('0');
3561 }
3562
3563 for (; I != NDigits; ++I)
3564 Str.push_back(buffer[NDigits-I-1]);
3565 }
3566
getExactInverse(APFloat * inv) const3567 bool APFloat::getExactInverse(APFloat *inv) const {
3568 // Special floats and denormals have no exact inverse.
3569 if (category != fcNormal)
3570 return false;
3571
3572 // Check that the number is a power of two by making sure that only the
3573 // integer bit is set in the significand.
3574 if (significandLSB() != semantics->precision - 1)
3575 return false;
3576
3577 // Get the inverse.
3578 APFloat reciprocal(*semantics, 1ULL);
3579 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3580 return false;
3581
3582 // Avoid multiplication with a denormal, it is not safe on all platforms and
3583 // may be slower than a normal division.
3584 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3585 return false;
3586
3587 assert(reciprocal.category == fcNormal &&
3588 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3589
3590 if (inv)
3591 *inv = reciprocal;
3592
3593 return true;
3594 }
3595