1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/MathExtras.h"
21 #include <limits.h>
22 #include <cstring>
23
24 using namespace llvm;
25
26 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
27
28 /* Assumed in hexadecimal significand parsing, and conversion to
29 hexadecimal strings. */
30 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
31 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
32
33 namespace llvm {
34
35 /* Represents floating point arithmetic semantics. */
36 struct fltSemantics {
37 /* The largest E such that 2^E is representable; this matches the
38 definition of IEEE 754. */
39 exponent_t maxExponent;
40
41 /* The smallest E such that 2^E is a normalized number; this
42 matches the definition of IEEE 754. */
43 exponent_t minExponent;
44
45 /* Number of bits in the significand. This includes the integer
46 bit. */
47 unsigned int precision;
48
49 /* True if arithmetic is supported. */
50 unsigned int arithmeticOK;
51 };
52
53 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
54 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
55 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
56 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
57 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
58 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
59
60 // The PowerPC format consists of two doubles. It does not map cleanly
61 // onto the usual format above. For now only storage of constants of
62 // this type is supported, no arithmetic.
63 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
64
65 /* A tight upper bound on number of parts required to hold the value
66 pow(5, power) is
67
68 power * 815 / (351 * integerPartWidth) + 1
69
70 However, whilst the result may require only this many parts,
71 because we are multiplying two values to get it, the
72 multiplication may require an extra part with the excess part
73 being zero (consider the trivial case of 1 * 1, tcFullMultiply
74 requires two parts to hold the single-part result). So we add an
75 extra one to guarantee enough space whilst multiplying. */
76 const unsigned int maxExponent = 16383;
77 const unsigned int maxPrecision = 113;
78 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
79 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
80 / (351 * integerPartWidth));
81 }
82
83 /* A bunch of private, handy routines. */
84
85 static inline unsigned int
partCountForBits(unsigned int bits)86 partCountForBits(unsigned int bits)
87 {
88 return ((bits) + integerPartWidth - 1) / integerPartWidth;
89 }
90
91 /* Returns 0U-9U. Return values >= 10U are not digits. */
92 static inline unsigned int
decDigitValue(unsigned int c)93 decDigitValue(unsigned int c)
94 {
95 return c - '0';
96 }
97
98 static unsigned int
hexDigitValue(unsigned int c)99 hexDigitValue(unsigned int c)
100 {
101 unsigned int r;
102
103 r = c - '0';
104 if (r <= 9)
105 return r;
106
107 r = c - 'A';
108 if (r <= 5)
109 return r + 10;
110
111 r = c - 'a';
112 if (r <= 5)
113 return r + 10;
114
115 return -1U;
116 }
117
118 static inline void
assertArithmeticOK(const llvm::fltSemantics & semantics)119 assertArithmeticOK(const llvm::fltSemantics &semantics) {
120 assert(semantics.arithmeticOK &&
121 "Compile-time arithmetic does not support these semantics");
122 }
123
124 /* Return the value of a decimal exponent of the form
125 [+-]ddddddd.
126
127 If the exponent overflows, returns a large exponent with the
128 appropriate sign. */
129 static int
readExponent(StringRef::iterator begin,StringRef::iterator end)130 readExponent(StringRef::iterator begin, StringRef::iterator end)
131 {
132 bool isNegative;
133 unsigned int absExponent;
134 const unsigned int overlargeExponent = 24000; /* FIXME. */
135 StringRef::iterator p = begin;
136
137 assert(p != end && "Exponent has no digits");
138
139 isNegative = (*p == '-');
140 if (*p == '-' || *p == '+') {
141 p++;
142 assert(p != end && "Exponent has no digits");
143 }
144
145 absExponent = decDigitValue(*p++);
146 assert(absExponent < 10U && "Invalid character in exponent");
147
148 for (; p != end; ++p) {
149 unsigned int value;
150
151 value = decDigitValue(*p);
152 assert(value < 10U && "Invalid character in exponent");
153
154 value += absExponent * 10;
155 if (absExponent >= overlargeExponent) {
156 absExponent = overlargeExponent;
157 p = end; /* outwit assert below */
158 break;
159 }
160 absExponent = value;
161 }
162
163 assert(p == end && "Invalid exponent in exponent");
164
165 if (isNegative)
166 return -(int) absExponent;
167 else
168 return (int) absExponent;
169 }
170
171 /* This is ugly and needs cleaning up, but I don't immediately see
172 how whilst remaining safe. */
173 static int
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)174 totalExponent(StringRef::iterator p, StringRef::iterator end,
175 int exponentAdjustment)
176 {
177 int unsignedExponent;
178 bool negative, overflow;
179 int exponent = 0;
180
181 assert(p != end && "Exponent has no digits");
182
183 negative = *p == '-';
184 if (*p == '-' || *p == '+') {
185 p++;
186 assert(p != end && "Exponent has no digits");
187 }
188
189 unsignedExponent = 0;
190 overflow = false;
191 for (; p != end; ++p) {
192 unsigned int value;
193
194 value = decDigitValue(*p);
195 assert(value < 10U && "Invalid character in exponent");
196
197 unsignedExponent = unsignedExponent * 10 + value;
198 if (unsignedExponent > 32767)
199 overflow = true;
200 }
201
202 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
203 overflow = true;
204
205 if (!overflow) {
206 exponent = unsignedExponent;
207 if (negative)
208 exponent = -exponent;
209 exponent += exponentAdjustment;
210 if (exponent > 32767 || exponent < -32768)
211 overflow = true;
212 }
213
214 if (overflow)
215 exponent = negative ? -32768: 32767;
216
217 return exponent;
218 }
219
220 static StringRef::iterator
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)221 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
222 StringRef::iterator *dot)
223 {
224 StringRef::iterator p = begin;
225 *dot = end;
226 while (*p == '0' && p != end)
227 p++;
228
229 if (*p == '.') {
230 *dot = p++;
231
232 assert(end - begin != 1 && "Significand has no digits");
233
234 while (*p == '0' && p != end)
235 p++;
236 }
237
238 return p;
239 }
240
241 /* Given a normal decimal floating point number of the form
242
243 dddd.dddd[eE][+-]ddd
244
245 where the decimal point and exponent are optional, fill out the
246 structure D. Exponent is appropriate if the significand is
247 treated as an integer, and normalizedExponent if the significand
248 is taken to have the decimal point after a single leading
249 non-zero digit.
250
251 If the value is zero, V->firstSigDigit points to a non-digit, and
252 the return exponent is zero.
253 */
254 struct decimalInfo {
255 const char *firstSigDigit;
256 const char *lastSigDigit;
257 int exponent;
258 int normalizedExponent;
259 };
260
261 static void
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)262 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
263 decimalInfo *D)
264 {
265 StringRef::iterator dot = end;
266 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
267
268 D->firstSigDigit = p;
269 D->exponent = 0;
270 D->normalizedExponent = 0;
271
272 for (; p != end; ++p) {
273 if (*p == '.') {
274 assert(dot == end && "String contains multiple dots");
275 dot = p++;
276 if (p == end)
277 break;
278 }
279 if (decDigitValue(*p) >= 10U)
280 break;
281 }
282
283 if (p != end) {
284 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
285 assert(p != begin && "Significand has no digits");
286 assert((dot == end || p - begin != 1) && "Significand has no digits");
287
288 /* p points to the first non-digit in the string */
289 D->exponent = readExponent(p + 1, end);
290
291 /* Implied decimal point? */
292 if (dot == end)
293 dot = p;
294 }
295
296 /* If number is all zeroes accept any exponent. */
297 if (p != D->firstSigDigit) {
298 /* Drop insignificant trailing zeroes. */
299 if (p != begin) {
300 do
301 do
302 p--;
303 while (p != begin && *p == '0');
304 while (p != begin && *p == '.');
305 }
306
307 /* Adjust the exponents for any decimal point. */
308 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
309 D->normalizedExponent = (D->exponent +
310 static_cast<exponent_t>((p - D->firstSigDigit)
311 - (dot > D->firstSigDigit && dot < p)));
312 }
313
314 D->lastSigDigit = p;
315 }
316
317 /* Return the trailing fraction of a hexadecimal number.
318 DIGITVALUE is the first hex digit of the fraction, P points to
319 the next digit. */
320 static lostFraction
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)321 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
322 unsigned int digitValue)
323 {
324 unsigned int hexDigit;
325
326 /* If the first trailing digit isn't 0 or 8 we can work out the
327 fraction immediately. */
328 if (digitValue > 8)
329 return lfMoreThanHalf;
330 else if (digitValue < 8 && digitValue > 0)
331 return lfLessThanHalf;
332
333 /* Otherwise we need to find the first non-zero digit. */
334 while (*p == '0')
335 p++;
336
337 assert(p != end && "Invalid trailing hexadecimal fraction!");
338
339 hexDigit = hexDigitValue(*p);
340
341 /* If we ran off the end it is exactly zero or one-half, otherwise
342 a little more. */
343 if (hexDigit == -1U)
344 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
345 else
346 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
347 }
348
349 /* Return the fraction lost were a bignum truncated losing the least
350 significant BITS bits. */
351 static lostFraction
lostFractionThroughTruncation(const integerPart * parts,unsigned int partCount,unsigned int bits)352 lostFractionThroughTruncation(const integerPart *parts,
353 unsigned int partCount,
354 unsigned int bits)
355 {
356 unsigned int lsb;
357
358 lsb = APInt::tcLSB(parts, partCount);
359
360 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
361 if (bits <= lsb)
362 return lfExactlyZero;
363 if (bits == lsb + 1)
364 return lfExactlyHalf;
365 if (bits <= partCount * integerPartWidth &&
366 APInt::tcExtractBit(parts, bits - 1))
367 return lfMoreThanHalf;
368
369 return lfLessThanHalf;
370 }
371
372 /* Shift DST right BITS bits noting lost fraction. */
373 static lostFraction
shiftRight(integerPart * dst,unsigned int parts,unsigned int bits)374 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
375 {
376 lostFraction lost_fraction;
377
378 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
379
380 APInt::tcShiftRight(dst, parts, bits);
381
382 return lost_fraction;
383 }
384
385 /* Combine the effect of two lost fractions. */
386 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)387 combineLostFractions(lostFraction moreSignificant,
388 lostFraction lessSignificant)
389 {
390 if (lessSignificant != lfExactlyZero) {
391 if (moreSignificant == lfExactlyZero)
392 moreSignificant = lfLessThanHalf;
393 else if (moreSignificant == lfExactlyHalf)
394 moreSignificant = lfMoreThanHalf;
395 }
396
397 return moreSignificant;
398 }
399
400 /* The error from the true value, in half-ulps, on multiplying two
401 floating point numbers, which differ from the value they
402 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
403 than the returned value.
404
405 See "How to Read Floating Point Numbers Accurately" by William D
406 Clinger. */
407 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)408 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
409 {
410 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
411
412 if (HUerr1 + HUerr2 == 0)
413 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
414 else
415 return inexactMultiply + 2 * (HUerr1 + HUerr2);
416 }
417
418 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
419 when the least significant BITS are truncated. BITS cannot be
420 zero. */
421 static integerPart
ulpsFromBoundary(const integerPart * parts,unsigned int bits,bool isNearest)422 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
423 {
424 unsigned int count, partBits;
425 integerPart part, boundary;
426
427 assert(bits != 0);
428
429 bits--;
430 count = bits / integerPartWidth;
431 partBits = bits % integerPartWidth + 1;
432
433 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
434
435 if (isNearest)
436 boundary = (integerPart) 1 << (partBits - 1);
437 else
438 boundary = 0;
439
440 if (count == 0) {
441 if (part - boundary <= boundary - part)
442 return part - boundary;
443 else
444 return boundary - part;
445 }
446
447 if (part == boundary) {
448 while (--count)
449 if (parts[count])
450 return ~(integerPart) 0; /* A lot. */
451
452 return parts[0];
453 } else if (part == boundary - 1) {
454 while (--count)
455 if (~parts[count])
456 return ~(integerPart) 0; /* A lot. */
457
458 return -parts[0];
459 }
460
461 return ~(integerPart) 0; /* A lot. */
462 }
463
464 /* Place pow(5, power) in DST, and return the number of parts used.
465 DST must be at least one part larger than size of the answer. */
466 static unsigned int
powerOf5(integerPart * dst,unsigned int power)467 powerOf5(integerPart *dst, unsigned int power)
468 {
469 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
470 15625, 78125 };
471 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
472 pow5s[0] = 78125 * 5;
473
474 unsigned int partsCount[16] = { 1 };
475 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
476 unsigned int result;
477 assert(power <= maxExponent);
478
479 p1 = dst;
480 p2 = scratch;
481
482 *p1 = firstEightPowers[power & 7];
483 power >>= 3;
484
485 result = 1;
486 pow5 = pow5s;
487
488 for (unsigned int n = 0; power; power >>= 1, n++) {
489 unsigned int pc;
490
491 pc = partsCount[n];
492
493 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
494 if (pc == 0) {
495 pc = partsCount[n - 1];
496 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
497 pc *= 2;
498 if (pow5[pc - 1] == 0)
499 pc--;
500 partsCount[n] = pc;
501 }
502
503 if (power & 1) {
504 integerPart *tmp;
505
506 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
507 result += pc;
508 if (p2[result - 1] == 0)
509 result--;
510
511 /* Now result is in p1 with partsCount parts and p2 is scratch
512 space. */
513 tmp = p1, p1 = p2, p2 = tmp;
514 }
515
516 pow5 += pc;
517 }
518
519 if (p1 != dst)
520 APInt::tcAssign(dst, p1, result);
521
522 return result;
523 }
524
525 /* Zero at the end to avoid modular arithmetic when adding one; used
526 when rounding up during hexadecimal output. */
527 static const char hexDigitsLower[] = "0123456789abcdef0";
528 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
529 static const char infinityL[] = "infinity";
530 static const char infinityU[] = "INFINITY";
531 static const char NaNL[] = "nan";
532 static const char NaNU[] = "NAN";
533
534 /* Write out an integerPart in hexadecimal, starting with the most
535 significant nibble. Write out exactly COUNT hexdigits, return
536 COUNT. */
537 static unsigned int
partAsHex(char * dst,integerPart part,unsigned int count,const char * hexDigitChars)538 partAsHex (char *dst, integerPart part, unsigned int count,
539 const char *hexDigitChars)
540 {
541 unsigned int result = count;
542
543 assert(count != 0 && count <= integerPartWidth / 4);
544
545 part >>= (integerPartWidth - 4 * count);
546 while (count--) {
547 dst[count] = hexDigitChars[part & 0xf];
548 part >>= 4;
549 }
550
551 return result;
552 }
553
554 /* Write out an unsigned decimal integer. */
555 static char *
writeUnsignedDecimal(char * dst,unsigned int n)556 writeUnsignedDecimal (char *dst, unsigned int n)
557 {
558 char buff[40], *p;
559
560 p = buff;
561 do
562 *p++ = '0' + n % 10;
563 while (n /= 10);
564
565 do
566 *dst++ = *--p;
567 while (p != buff);
568
569 return dst;
570 }
571
572 /* Write out a signed decimal integer. */
573 static char *
writeSignedDecimal(char * dst,int value)574 writeSignedDecimal (char *dst, int value)
575 {
576 if (value < 0) {
577 *dst++ = '-';
578 dst = writeUnsignedDecimal(dst, -(unsigned) value);
579 } else
580 dst = writeUnsignedDecimal(dst, value);
581
582 return dst;
583 }
584
585 /* Constructors. */
586 void
initialize(const fltSemantics * ourSemantics)587 APFloat::initialize(const fltSemantics *ourSemantics)
588 {
589 unsigned int count;
590
591 semantics = ourSemantics;
592 count = partCount();
593 if (count > 1)
594 significand.parts = new integerPart[count];
595 }
596
597 void
freeSignificand()598 APFloat::freeSignificand()
599 {
600 if (partCount() > 1)
601 delete [] significand.parts;
602 }
603
604 void
assign(const APFloat & rhs)605 APFloat::assign(const APFloat &rhs)
606 {
607 assert(semantics == rhs.semantics);
608
609 sign = rhs.sign;
610 category = rhs.category;
611 exponent = rhs.exponent;
612 sign2 = rhs.sign2;
613 exponent2 = rhs.exponent2;
614 if (category == fcNormal || category == fcNaN)
615 copySignificand(rhs);
616 }
617
618 void
copySignificand(const APFloat & rhs)619 APFloat::copySignificand(const APFloat &rhs)
620 {
621 assert(category == fcNormal || category == fcNaN);
622 assert(rhs.partCount() >= partCount());
623
624 APInt::tcAssign(significandParts(), rhs.significandParts(),
625 partCount());
626 }
627
628 /* Make this number a NaN, with an arbitrary but deterministic value
629 for the significand. If double or longer, this is a signalling NaN,
630 which may not be ideal. If float, this is QNaN(0). */
makeNaN(bool SNaN,bool Negative,const APInt * fill)631 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
632 {
633 category = fcNaN;
634 sign = Negative;
635
636 integerPart *significand = significandParts();
637 unsigned numParts = partCount();
638
639 // Set the significand bits to the fill.
640 if (!fill || fill->getNumWords() < numParts)
641 APInt::tcSet(significand, 0, numParts);
642 if (fill) {
643 APInt::tcAssign(significand, fill->getRawData(),
644 std::min(fill->getNumWords(), numParts));
645
646 // Zero out the excess bits of the significand.
647 unsigned bitsToPreserve = semantics->precision - 1;
648 unsigned part = bitsToPreserve / 64;
649 bitsToPreserve %= 64;
650 significand[part] &= ((1ULL << bitsToPreserve) - 1);
651 for (part++; part != numParts; ++part)
652 significand[part] = 0;
653 }
654
655 unsigned QNaNBit = semantics->precision - 2;
656
657 if (SNaN) {
658 // We always have to clear the QNaN bit to make it an SNaN.
659 APInt::tcClearBit(significand, QNaNBit);
660
661 // If there are no bits set in the payload, we have to set
662 // *something* to make it a NaN instead of an infinity;
663 // conventionally, this is the next bit down from the QNaN bit.
664 if (APInt::tcIsZero(significand, numParts))
665 APInt::tcSetBit(significand, QNaNBit - 1);
666 } else {
667 // We always have to set the QNaN bit to make it a QNaN.
668 APInt::tcSetBit(significand, QNaNBit);
669 }
670
671 // For x87 extended precision, we want to make a NaN, not a
672 // pseudo-NaN. Maybe we should expose the ability to make
673 // pseudo-NaNs?
674 if (semantics == &APFloat::x87DoubleExtended)
675 APInt::tcSetBit(significand, QNaNBit + 1);
676 }
677
makeNaN(const fltSemantics & Sem,bool SNaN,bool Negative,const APInt * fill)678 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
679 const APInt *fill) {
680 APFloat value(Sem, uninitialized);
681 value.makeNaN(SNaN, Negative, fill);
682 return value;
683 }
684
685 APFloat &
operator =(const APFloat & rhs)686 APFloat::operator=(const APFloat &rhs)
687 {
688 if (this != &rhs) {
689 if (semantics != rhs.semantics) {
690 freeSignificand();
691 initialize(rhs.semantics);
692 }
693 assign(rhs);
694 }
695
696 return *this;
697 }
698
699 bool
bitwiseIsEqual(const APFloat & rhs) const700 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
701 if (this == &rhs)
702 return true;
703 if (semantics != rhs.semantics ||
704 category != rhs.category ||
705 sign != rhs.sign)
706 return false;
707 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
708 sign2 != rhs.sign2)
709 return false;
710 if (category==fcZero || category==fcInfinity)
711 return true;
712 else if (category==fcNormal && exponent!=rhs.exponent)
713 return false;
714 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
715 exponent2!=rhs.exponent2)
716 return false;
717 else {
718 int i= partCount();
719 const integerPart* p=significandParts();
720 const integerPart* q=rhs.significandParts();
721 for (; i>0; i--, p++, q++) {
722 if (*p != *q)
723 return false;
724 }
725 return true;
726 }
727 }
728
APFloat(const fltSemantics & ourSemantics,integerPart value)729 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
730 : exponent2(0), sign2(0) {
731 assertArithmeticOK(ourSemantics);
732 initialize(&ourSemantics);
733 sign = 0;
734 zeroSignificand();
735 exponent = ourSemantics.precision - 1;
736 significandParts()[0] = value;
737 normalize(rmNearestTiesToEven, lfExactlyZero);
738 }
739
APFloat(const fltSemantics & ourSemantics)740 APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
741 assertArithmeticOK(ourSemantics);
742 initialize(&ourSemantics);
743 category = fcZero;
744 sign = false;
745 }
746
APFloat(const fltSemantics & ourSemantics,uninitializedTag tag)747 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
748 : exponent2(0), sign2(0) {
749 assertArithmeticOK(ourSemantics);
750 // Allocates storage if necessary but does not initialize it.
751 initialize(&ourSemantics);
752 }
753
APFloat(const fltSemantics & ourSemantics,fltCategory ourCategory,bool negative)754 APFloat::APFloat(const fltSemantics &ourSemantics,
755 fltCategory ourCategory, bool negative)
756 : exponent2(0), sign2(0) {
757 assertArithmeticOK(ourSemantics);
758 initialize(&ourSemantics);
759 category = ourCategory;
760 sign = negative;
761 if (category == fcNormal)
762 category = fcZero;
763 else if (ourCategory == fcNaN)
764 makeNaN();
765 }
766
APFloat(const fltSemantics & ourSemantics,StringRef text)767 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
768 : exponent2(0), sign2(0) {
769 assertArithmeticOK(ourSemantics);
770 initialize(&ourSemantics);
771 convertFromString(text, rmNearestTiesToEven);
772 }
773
APFloat(const APFloat & rhs)774 APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
775 initialize(rhs.semantics);
776 assign(rhs);
777 }
778
~APFloat()779 APFloat::~APFloat()
780 {
781 freeSignificand();
782 }
783
784 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
Profile(FoldingSetNodeID & ID) const785 void APFloat::Profile(FoldingSetNodeID& ID) const {
786 ID.Add(bitcastToAPInt());
787 }
788
789 unsigned int
partCount() const790 APFloat::partCount() const
791 {
792 return partCountForBits(semantics->precision + 1);
793 }
794
795 unsigned int
semanticsPrecision(const fltSemantics & semantics)796 APFloat::semanticsPrecision(const fltSemantics &semantics)
797 {
798 return semantics.precision;
799 }
800
801 const integerPart *
significandParts() const802 APFloat::significandParts() const
803 {
804 return const_cast<APFloat *>(this)->significandParts();
805 }
806
807 integerPart *
significandParts()808 APFloat::significandParts()
809 {
810 assert(category == fcNormal || category == fcNaN);
811
812 if (partCount() > 1)
813 return significand.parts;
814 else
815 return &significand.part;
816 }
817
818 void
zeroSignificand()819 APFloat::zeroSignificand()
820 {
821 category = fcNormal;
822 APInt::tcSet(significandParts(), 0, partCount());
823 }
824
825 /* Increment an fcNormal floating point number's significand. */
826 void
incrementSignificand()827 APFloat::incrementSignificand()
828 {
829 integerPart carry;
830
831 carry = APInt::tcIncrement(significandParts(), partCount());
832
833 /* Our callers should never cause us to overflow. */
834 assert(carry == 0);
835 (void)carry;
836 }
837
838 /* Add the significand of the RHS. Returns the carry flag. */
839 integerPart
addSignificand(const APFloat & rhs)840 APFloat::addSignificand(const APFloat &rhs)
841 {
842 integerPart *parts;
843
844 parts = significandParts();
845
846 assert(semantics == rhs.semantics);
847 assert(exponent == rhs.exponent);
848
849 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
850 }
851
852 /* Subtract the significand of the RHS with a borrow flag. Returns
853 the borrow flag. */
854 integerPart
subtractSignificand(const APFloat & rhs,integerPart borrow)855 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
856 {
857 integerPart *parts;
858
859 parts = significandParts();
860
861 assert(semantics == rhs.semantics);
862 assert(exponent == rhs.exponent);
863
864 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
865 partCount());
866 }
867
868 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
869 on to the full-precision result of the multiplication. Returns the
870 lost fraction. */
871 lostFraction
multiplySignificand(const APFloat & rhs,const APFloat * addend)872 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
873 {
874 unsigned int omsb; // One, not zero, based MSB.
875 unsigned int partsCount, newPartsCount, precision;
876 integerPart *lhsSignificand;
877 integerPart scratch[4];
878 integerPart *fullSignificand;
879 lostFraction lost_fraction;
880 bool ignored;
881
882 assert(semantics == rhs.semantics);
883
884 precision = semantics->precision;
885 newPartsCount = partCountForBits(precision * 2);
886
887 if (newPartsCount > 4)
888 fullSignificand = new integerPart[newPartsCount];
889 else
890 fullSignificand = scratch;
891
892 lhsSignificand = significandParts();
893 partsCount = partCount();
894
895 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
896 rhs.significandParts(), partsCount, partsCount);
897
898 lost_fraction = lfExactlyZero;
899 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
900 exponent += rhs.exponent;
901
902 if (addend) {
903 Significand savedSignificand = significand;
904 const fltSemantics *savedSemantics = semantics;
905 fltSemantics extendedSemantics;
906 opStatus status;
907 unsigned int extendedPrecision;
908
909 /* Normalize our MSB. */
910 extendedPrecision = precision + precision - 1;
911 if (omsb != extendedPrecision) {
912 APInt::tcShiftLeft(fullSignificand, newPartsCount,
913 extendedPrecision - omsb);
914 exponent -= extendedPrecision - omsb;
915 }
916
917 /* Create new semantics. */
918 extendedSemantics = *semantics;
919 extendedSemantics.precision = extendedPrecision;
920
921 if (newPartsCount == 1)
922 significand.part = fullSignificand[0];
923 else
924 significand.parts = fullSignificand;
925 semantics = &extendedSemantics;
926
927 APFloat extendedAddend(*addend);
928 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
929 assert(status == opOK);
930 (void)status;
931 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
932
933 /* Restore our state. */
934 if (newPartsCount == 1)
935 fullSignificand[0] = significand.part;
936 significand = savedSignificand;
937 semantics = savedSemantics;
938
939 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
940 }
941
942 exponent -= (precision - 1);
943
944 if (omsb > precision) {
945 unsigned int bits, significantParts;
946 lostFraction lf;
947
948 bits = omsb - precision;
949 significantParts = partCountForBits(omsb);
950 lf = shiftRight(fullSignificand, significantParts, bits);
951 lost_fraction = combineLostFractions(lf, lost_fraction);
952 exponent += bits;
953 }
954
955 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
956
957 if (newPartsCount > 4)
958 delete [] fullSignificand;
959
960 return lost_fraction;
961 }
962
963 /* Multiply the significands of LHS and RHS to DST. */
964 lostFraction
divideSignificand(const APFloat & rhs)965 APFloat::divideSignificand(const APFloat &rhs)
966 {
967 unsigned int bit, i, partsCount;
968 const integerPart *rhsSignificand;
969 integerPart *lhsSignificand, *dividend, *divisor;
970 integerPart scratch[4];
971 lostFraction lost_fraction;
972
973 assert(semantics == rhs.semantics);
974
975 lhsSignificand = significandParts();
976 rhsSignificand = rhs.significandParts();
977 partsCount = partCount();
978
979 if (partsCount > 2)
980 dividend = new integerPart[partsCount * 2];
981 else
982 dividend = scratch;
983
984 divisor = dividend + partsCount;
985
986 /* Copy the dividend and divisor as they will be modified in-place. */
987 for (i = 0; i < partsCount; i++) {
988 dividend[i] = lhsSignificand[i];
989 divisor[i] = rhsSignificand[i];
990 lhsSignificand[i] = 0;
991 }
992
993 exponent -= rhs.exponent;
994
995 unsigned int precision = semantics->precision;
996
997 /* Normalize the divisor. */
998 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
999 if (bit) {
1000 exponent += bit;
1001 APInt::tcShiftLeft(divisor, partsCount, bit);
1002 }
1003
1004 /* Normalize the dividend. */
1005 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1006 if (bit) {
1007 exponent -= bit;
1008 APInt::tcShiftLeft(dividend, partsCount, bit);
1009 }
1010
1011 /* Ensure the dividend >= divisor initially for the loop below.
1012 Incidentally, this means that the division loop below is
1013 guaranteed to set the integer bit to one. */
1014 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1015 exponent--;
1016 APInt::tcShiftLeft(dividend, partsCount, 1);
1017 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1018 }
1019
1020 /* Long division. */
1021 for (bit = precision; bit; bit -= 1) {
1022 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1023 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1024 APInt::tcSetBit(lhsSignificand, bit - 1);
1025 }
1026
1027 APInt::tcShiftLeft(dividend, partsCount, 1);
1028 }
1029
1030 /* Figure out the lost fraction. */
1031 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1032
1033 if (cmp > 0)
1034 lost_fraction = lfMoreThanHalf;
1035 else if (cmp == 0)
1036 lost_fraction = lfExactlyHalf;
1037 else if (APInt::tcIsZero(dividend, partsCount))
1038 lost_fraction = lfExactlyZero;
1039 else
1040 lost_fraction = lfLessThanHalf;
1041
1042 if (partsCount > 2)
1043 delete [] dividend;
1044
1045 return lost_fraction;
1046 }
1047
1048 unsigned int
significandMSB() const1049 APFloat::significandMSB() const
1050 {
1051 return APInt::tcMSB(significandParts(), partCount());
1052 }
1053
1054 unsigned int
significandLSB() const1055 APFloat::significandLSB() const
1056 {
1057 return APInt::tcLSB(significandParts(), partCount());
1058 }
1059
1060 /* Note that a zero result is NOT normalized to fcZero. */
1061 lostFraction
shiftSignificandRight(unsigned int bits)1062 APFloat::shiftSignificandRight(unsigned int bits)
1063 {
1064 /* Our exponent should not overflow. */
1065 assert((exponent_t) (exponent + bits) >= exponent);
1066
1067 exponent += bits;
1068
1069 return shiftRight(significandParts(), partCount(), bits);
1070 }
1071
1072 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1073 void
shiftSignificandLeft(unsigned int bits)1074 APFloat::shiftSignificandLeft(unsigned int bits)
1075 {
1076 assert(bits < semantics->precision);
1077
1078 if (bits) {
1079 unsigned int partsCount = partCount();
1080
1081 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1082 exponent -= bits;
1083
1084 assert(!APInt::tcIsZero(significandParts(), partsCount));
1085 }
1086 }
1087
1088 APFloat::cmpResult
compareAbsoluteValue(const APFloat & rhs) const1089 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1090 {
1091 int compare;
1092
1093 assert(semantics == rhs.semantics);
1094 assert(category == fcNormal);
1095 assert(rhs.category == fcNormal);
1096
1097 compare = exponent - rhs.exponent;
1098
1099 /* If exponents are equal, do an unsigned bignum comparison of the
1100 significands. */
1101 if (compare == 0)
1102 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1103 partCount());
1104
1105 if (compare > 0)
1106 return cmpGreaterThan;
1107 else if (compare < 0)
1108 return cmpLessThan;
1109 else
1110 return cmpEqual;
1111 }
1112
1113 /* Handle overflow. Sign is preserved. We either become infinity or
1114 the largest finite number. */
1115 APFloat::opStatus
handleOverflow(roundingMode rounding_mode)1116 APFloat::handleOverflow(roundingMode rounding_mode)
1117 {
1118 /* Infinity? */
1119 if (rounding_mode == rmNearestTiesToEven ||
1120 rounding_mode == rmNearestTiesToAway ||
1121 (rounding_mode == rmTowardPositive && !sign) ||
1122 (rounding_mode == rmTowardNegative && sign)) {
1123 category = fcInfinity;
1124 return (opStatus) (opOverflow | opInexact);
1125 }
1126
1127 /* Otherwise we become the largest finite number. */
1128 category = fcNormal;
1129 exponent = semantics->maxExponent;
1130 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1131 semantics->precision);
1132
1133 return opInexact;
1134 }
1135
1136 /* Returns TRUE if, when truncating the current number, with BIT the
1137 new LSB, with the given lost fraction and rounding mode, the result
1138 would need to be rounded away from zero (i.e., by increasing the
1139 signficand). This routine must work for fcZero of both signs, and
1140 fcNormal numbers. */
1141 bool
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1142 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1143 lostFraction lost_fraction,
1144 unsigned int bit) const
1145 {
1146 /* NaNs and infinities should not have lost fractions. */
1147 assert(category == fcNormal || category == fcZero);
1148
1149 /* Current callers never pass this so we don't handle it. */
1150 assert(lost_fraction != lfExactlyZero);
1151
1152 switch (rounding_mode) {
1153 default:
1154 llvm_unreachable(0);
1155
1156 case rmNearestTiesToAway:
1157 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1158
1159 case rmNearestTiesToEven:
1160 if (lost_fraction == lfMoreThanHalf)
1161 return true;
1162
1163 /* Our zeroes don't have a significand to test. */
1164 if (lost_fraction == lfExactlyHalf && category != fcZero)
1165 return APInt::tcExtractBit(significandParts(), bit);
1166
1167 return false;
1168
1169 case rmTowardZero:
1170 return false;
1171
1172 case rmTowardPositive:
1173 return sign == false;
1174
1175 case rmTowardNegative:
1176 return sign == true;
1177 }
1178 }
1179
1180 APFloat::opStatus
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1181 APFloat::normalize(roundingMode rounding_mode,
1182 lostFraction lost_fraction)
1183 {
1184 unsigned int omsb; /* One, not zero, based MSB. */
1185 int exponentChange;
1186
1187 if (category != fcNormal)
1188 return opOK;
1189
1190 /* Before rounding normalize the exponent of fcNormal numbers. */
1191 omsb = significandMSB() + 1;
1192
1193 if (omsb) {
1194 /* OMSB is numbered from 1. We want to place it in the integer
1195 bit numbered PRECISION if possible, with a compensating change in
1196 the exponent. */
1197 exponentChange = omsb - semantics->precision;
1198
1199 /* If the resulting exponent is too high, overflow according to
1200 the rounding mode. */
1201 if (exponent + exponentChange > semantics->maxExponent)
1202 return handleOverflow(rounding_mode);
1203
1204 /* Subnormal numbers have exponent minExponent, and their MSB
1205 is forced based on that. */
1206 if (exponent + exponentChange < semantics->minExponent)
1207 exponentChange = semantics->minExponent - exponent;
1208
1209 /* Shifting left is easy as we don't lose precision. */
1210 if (exponentChange < 0) {
1211 assert(lost_fraction == lfExactlyZero);
1212
1213 shiftSignificandLeft(-exponentChange);
1214
1215 return opOK;
1216 }
1217
1218 if (exponentChange > 0) {
1219 lostFraction lf;
1220
1221 /* Shift right and capture any new lost fraction. */
1222 lf = shiftSignificandRight(exponentChange);
1223
1224 lost_fraction = combineLostFractions(lf, lost_fraction);
1225
1226 /* Keep OMSB up-to-date. */
1227 if (omsb > (unsigned) exponentChange)
1228 omsb -= exponentChange;
1229 else
1230 omsb = 0;
1231 }
1232 }
1233
1234 /* Now round the number according to rounding_mode given the lost
1235 fraction. */
1236
1237 /* As specified in IEEE 754, since we do not trap we do not report
1238 underflow for exact results. */
1239 if (lost_fraction == lfExactlyZero) {
1240 /* Canonicalize zeroes. */
1241 if (omsb == 0)
1242 category = fcZero;
1243
1244 return opOK;
1245 }
1246
1247 /* Increment the significand if we're rounding away from zero. */
1248 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1249 if (omsb == 0)
1250 exponent = semantics->minExponent;
1251
1252 incrementSignificand();
1253 omsb = significandMSB() + 1;
1254
1255 /* Did the significand increment overflow? */
1256 if (omsb == (unsigned) semantics->precision + 1) {
1257 /* Renormalize by incrementing the exponent and shifting our
1258 significand right one. However if we already have the
1259 maximum exponent we overflow to infinity. */
1260 if (exponent == semantics->maxExponent) {
1261 category = fcInfinity;
1262
1263 return (opStatus) (opOverflow | opInexact);
1264 }
1265
1266 shiftSignificandRight(1);
1267
1268 return opInexact;
1269 }
1270 }
1271
1272 /* The normal case - we were and are not denormal, and any
1273 significand increment above didn't overflow. */
1274 if (omsb == semantics->precision)
1275 return opInexact;
1276
1277 /* We have a non-zero denormal. */
1278 assert(omsb < semantics->precision);
1279
1280 /* Canonicalize zeroes. */
1281 if (omsb == 0)
1282 category = fcZero;
1283
1284 /* The fcZero case is a denormal that underflowed to zero. */
1285 return (opStatus) (opUnderflow | opInexact);
1286 }
1287
1288 APFloat::opStatus
addOrSubtractSpecials(const APFloat & rhs,bool subtract)1289 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1290 {
1291 switch (convolve(category, rhs.category)) {
1292 default:
1293 llvm_unreachable(0);
1294
1295 case convolve(fcNaN, fcZero):
1296 case convolve(fcNaN, fcNormal):
1297 case convolve(fcNaN, fcInfinity):
1298 case convolve(fcNaN, fcNaN):
1299 case convolve(fcNormal, fcZero):
1300 case convolve(fcInfinity, fcNormal):
1301 case convolve(fcInfinity, fcZero):
1302 return opOK;
1303
1304 case convolve(fcZero, fcNaN):
1305 case convolve(fcNormal, fcNaN):
1306 case convolve(fcInfinity, fcNaN):
1307 category = fcNaN;
1308 copySignificand(rhs);
1309 return opOK;
1310
1311 case convolve(fcNormal, fcInfinity):
1312 case convolve(fcZero, fcInfinity):
1313 category = fcInfinity;
1314 sign = rhs.sign ^ subtract;
1315 return opOK;
1316
1317 case convolve(fcZero, fcNormal):
1318 assign(rhs);
1319 sign = rhs.sign ^ subtract;
1320 return opOK;
1321
1322 case convolve(fcZero, fcZero):
1323 /* Sign depends on rounding mode; handled by caller. */
1324 return opOK;
1325
1326 case convolve(fcInfinity, fcInfinity):
1327 /* Differently signed infinities can only be validly
1328 subtracted. */
1329 if (((sign ^ rhs.sign)!=0) != subtract) {
1330 makeNaN();
1331 return opInvalidOp;
1332 }
1333
1334 return opOK;
1335
1336 case convolve(fcNormal, fcNormal):
1337 return opDivByZero;
1338 }
1339 }
1340
1341 /* Add or subtract two normal numbers. */
1342 lostFraction
addOrSubtractSignificand(const APFloat & rhs,bool subtract)1343 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1344 {
1345 integerPart carry;
1346 lostFraction lost_fraction;
1347 int bits;
1348
1349 /* Determine if the operation on the absolute values is effectively
1350 an addition or subtraction. */
1351 subtract ^= (sign ^ rhs.sign) ? true : false;
1352
1353 /* Are we bigger exponent-wise than the RHS? */
1354 bits = exponent - rhs.exponent;
1355
1356 /* Subtraction is more subtle than one might naively expect. */
1357 if (subtract) {
1358 APFloat temp_rhs(rhs);
1359 bool reverse;
1360
1361 if (bits == 0) {
1362 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1363 lost_fraction = lfExactlyZero;
1364 } else if (bits > 0) {
1365 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1366 shiftSignificandLeft(1);
1367 reverse = false;
1368 } else {
1369 lost_fraction = shiftSignificandRight(-bits - 1);
1370 temp_rhs.shiftSignificandLeft(1);
1371 reverse = true;
1372 }
1373
1374 if (reverse) {
1375 carry = temp_rhs.subtractSignificand
1376 (*this, lost_fraction != lfExactlyZero);
1377 copySignificand(temp_rhs);
1378 sign = !sign;
1379 } else {
1380 carry = subtractSignificand
1381 (temp_rhs, lost_fraction != lfExactlyZero);
1382 }
1383
1384 /* Invert the lost fraction - it was on the RHS and
1385 subtracted. */
1386 if (lost_fraction == lfLessThanHalf)
1387 lost_fraction = lfMoreThanHalf;
1388 else if (lost_fraction == lfMoreThanHalf)
1389 lost_fraction = lfLessThanHalf;
1390
1391 /* The code above is intended to ensure that no borrow is
1392 necessary. */
1393 assert(!carry);
1394 (void)carry;
1395 } else {
1396 if (bits > 0) {
1397 APFloat temp_rhs(rhs);
1398
1399 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1400 carry = addSignificand(temp_rhs);
1401 } else {
1402 lost_fraction = shiftSignificandRight(-bits);
1403 carry = addSignificand(rhs);
1404 }
1405
1406 /* We have a guard bit; generating a carry cannot happen. */
1407 assert(!carry);
1408 (void)carry;
1409 }
1410
1411 return lost_fraction;
1412 }
1413
1414 APFloat::opStatus
multiplySpecials(const APFloat & rhs)1415 APFloat::multiplySpecials(const APFloat &rhs)
1416 {
1417 switch (convolve(category, rhs.category)) {
1418 default:
1419 llvm_unreachable(0);
1420
1421 case convolve(fcNaN, fcZero):
1422 case convolve(fcNaN, fcNormal):
1423 case convolve(fcNaN, fcInfinity):
1424 case convolve(fcNaN, fcNaN):
1425 return opOK;
1426
1427 case convolve(fcZero, fcNaN):
1428 case convolve(fcNormal, fcNaN):
1429 case convolve(fcInfinity, fcNaN):
1430 category = fcNaN;
1431 copySignificand(rhs);
1432 return opOK;
1433
1434 case convolve(fcNormal, fcInfinity):
1435 case convolve(fcInfinity, fcNormal):
1436 case convolve(fcInfinity, fcInfinity):
1437 category = fcInfinity;
1438 return opOK;
1439
1440 case convolve(fcZero, fcNormal):
1441 case convolve(fcNormal, fcZero):
1442 case convolve(fcZero, fcZero):
1443 category = fcZero;
1444 return opOK;
1445
1446 case convolve(fcZero, fcInfinity):
1447 case convolve(fcInfinity, fcZero):
1448 makeNaN();
1449 return opInvalidOp;
1450
1451 case convolve(fcNormal, fcNormal):
1452 return opOK;
1453 }
1454 }
1455
1456 APFloat::opStatus
divideSpecials(const APFloat & rhs)1457 APFloat::divideSpecials(const APFloat &rhs)
1458 {
1459 switch (convolve(category, rhs.category)) {
1460 default:
1461 llvm_unreachable(0);
1462
1463 case convolve(fcNaN, fcZero):
1464 case convolve(fcNaN, fcNormal):
1465 case convolve(fcNaN, fcInfinity):
1466 case convolve(fcNaN, fcNaN):
1467 case convolve(fcInfinity, fcZero):
1468 case convolve(fcInfinity, fcNormal):
1469 case convolve(fcZero, fcInfinity):
1470 case convolve(fcZero, fcNormal):
1471 return opOK;
1472
1473 case convolve(fcZero, fcNaN):
1474 case convolve(fcNormal, fcNaN):
1475 case convolve(fcInfinity, fcNaN):
1476 category = fcNaN;
1477 copySignificand(rhs);
1478 return opOK;
1479
1480 case convolve(fcNormal, fcInfinity):
1481 category = fcZero;
1482 return opOK;
1483
1484 case convolve(fcNormal, fcZero):
1485 category = fcInfinity;
1486 return opDivByZero;
1487
1488 case convolve(fcInfinity, fcInfinity):
1489 case convolve(fcZero, fcZero):
1490 makeNaN();
1491 return opInvalidOp;
1492
1493 case convolve(fcNormal, fcNormal):
1494 return opOK;
1495 }
1496 }
1497
1498 APFloat::opStatus
modSpecials(const APFloat & rhs)1499 APFloat::modSpecials(const APFloat &rhs)
1500 {
1501 switch (convolve(category, rhs.category)) {
1502 default:
1503 llvm_unreachable(0);
1504
1505 case convolve(fcNaN, fcZero):
1506 case convolve(fcNaN, fcNormal):
1507 case convolve(fcNaN, fcInfinity):
1508 case convolve(fcNaN, fcNaN):
1509 case convolve(fcZero, fcInfinity):
1510 case convolve(fcZero, fcNormal):
1511 case convolve(fcNormal, fcInfinity):
1512 return opOK;
1513
1514 case convolve(fcZero, fcNaN):
1515 case convolve(fcNormal, fcNaN):
1516 case convolve(fcInfinity, fcNaN):
1517 category = fcNaN;
1518 copySignificand(rhs);
1519 return opOK;
1520
1521 case convolve(fcNormal, fcZero):
1522 case convolve(fcInfinity, fcZero):
1523 case convolve(fcInfinity, fcNormal):
1524 case convolve(fcInfinity, fcInfinity):
1525 case convolve(fcZero, fcZero):
1526 makeNaN();
1527 return opInvalidOp;
1528
1529 case convolve(fcNormal, fcNormal):
1530 return opOK;
1531 }
1532 }
1533
1534 /* Change sign. */
1535 void
changeSign()1536 APFloat::changeSign()
1537 {
1538 /* Look mummy, this one's easy. */
1539 sign = !sign;
1540 }
1541
1542 void
clearSign()1543 APFloat::clearSign()
1544 {
1545 /* So is this one. */
1546 sign = 0;
1547 }
1548
1549 void
copySign(const APFloat & rhs)1550 APFloat::copySign(const APFloat &rhs)
1551 {
1552 /* And this one. */
1553 sign = rhs.sign;
1554 }
1555
1556 /* Normalized addition or subtraction. */
1557 APFloat::opStatus
addOrSubtract(const APFloat & rhs,roundingMode rounding_mode,bool subtract)1558 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1559 bool subtract)
1560 {
1561 opStatus fs;
1562
1563 assertArithmeticOK(*semantics);
1564
1565 fs = addOrSubtractSpecials(rhs, subtract);
1566
1567 /* This return code means it was not a simple case. */
1568 if (fs == opDivByZero) {
1569 lostFraction lost_fraction;
1570
1571 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1572 fs = normalize(rounding_mode, lost_fraction);
1573
1574 /* Can only be zero if we lost no fraction. */
1575 assert(category != fcZero || lost_fraction == lfExactlyZero);
1576 }
1577
1578 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1579 positive zero unless rounding to minus infinity, except that
1580 adding two like-signed zeroes gives that zero. */
1581 if (category == fcZero) {
1582 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1583 sign = (rounding_mode == rmTowardNegative);
1584 }
1585
1586 return fs;
1587 }
1588
1589 /* Normalized addition. */
1590 APFloat::opStatus
add(const APFloat & rhs,roundingMode rounding_mode)1591 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1592 {
1593 return addOrSubtract(rhs, rounding_mode, false);
1594 }
1595
1596 /* Normalized subtraction. */
1597 APFloat::opStatus
subtract(const APFloat & rhs,roundingMode rounding_mode)1598 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1599 {
1600 return addOrSubtract(rhs, rounding_mode, true);
1601 }
1602
1603 /* Normalized multiply. */
1604 APFloat::opStatus
multiply(const APFloat & rhs,roundingMode rounding_mode)1605 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1606 {
1607 opStatus fs;
1608
1609 assertArithmeticOK(*semantics);
1610 sign ^= rhs.sign;
1611 fs = multiplySpecials(rhs);
1612
1613 if (category == fcNormal) {
1614 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1615 fs = normalize(rounding_mode, lost_fraction);
1616 if (lost_fraction != lfExactlyZero)
1617 fs = (opStatus) (fs | opInexact);
1618 }
1619
1620 return fs;
1621 }
1622
1623 /* Normalized divide. */
1624 APFloat::opStatus
divide(const APFloat & rhs,roundingMode rounding_mode)1625 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1626 {
1627 opStatus fs;
1628
1629 assertArithmeticOK(*semantics);
1630 sign ^= rhs.sign;
1631 fs = divideSpecials(rhs);
1632
1633 if (category == fcNormal) {
1634 lostFraction lost_fraction = divideSignificand(rhs);
1635 fs = normalize(rounding_mode, lost_fraction);
1636 if (lost_fraction != lfExactlyZero)
1637 fs = (opStatus) (fs | opInexact);
1638 }
1639
1640 return fs;
1641 }
1642
1643 /* Normalized remainder. This is not currently correct in all cases. */
1644 APFloat::opStatus
remainder(const APFloat & rhs)1645 APFloat::remainder(const APFloat &rhs)
1646 {
1647 opStatus fs;
1648 APFloat V = *this;
1649 unsigned int origSign = sign;
1650
1651 assertArithmeticOK(*semantics);
1652 fs = V.divide(rhs, rmNearestTiesToEven);
1653 if (fs == opDivByZero)
1654 return fs;
1655
1656 int parts = partCount();
1657 integerPart *x = new integerPart[parts];
1658 bool ignored;
1659 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1660 rmNearestTiesToEven, &ignored);
1661 if (fs==opInvalidOp)
1662 return fs;
1663
1664 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1665 rmNearestTiesToEven);
1666 assert(fs==opOK); // should always work
1667
1668 fs = V.multiply(rhs, rmNearestTiesToEven);
1669 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1670
1671 fs = subtract(V, rmNearestTiesToEven);
1672 assert(fs==opOK || fs==opInexact); // likewise
1673
1674 if (isZero())
1675 sign = origSign; // IEEE754 requires this
1676 delete[] x;
1677 return fs;
1678 }
1679
1680 /* Normalized llvm frem (C fmod).
1681 This is not currently correct in all cases. */
1682 APFloat::opStatus
mod(const APFloat & rhs,roundingMode rounding_mode)1683 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1684 {
1685 opStatus fs;
1686 assertArithmeticOK(*semantics);
1687 fs = modSpecials(rhs);
1688
1689 if (category == fcNormal && rhs.category == fcNormal) {
1690 APFloat V = *this;
1691 unsigned int origSign = sign;
1692
1693 fs = V.divide(rhs, rmNearestTiesToEven);
1694 if (fs == opDivByZero)
1695 return fs;
1696
1697 int parts = partCount();
1698 integerPart *x = new integerPart[parts];
1699 bool ignored;
1700 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1701 rmTowardZero, &ignored);
1702 if (fs==opInvalidOp)
1703 return fs;
1704
1705 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1706 rmNearestTiesToEven);
1707 assert(fs==opOK); // should always work
1708
1709 fs = V.multiply(rhs, rounding_mode);
1710 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1711
1712 fs = subtract(V, rounding_mode);
1713 assert(fs==opOK || fs==opInexact); // likewise
1714
1715 if (isZero())
1716 sign = origSign; // IEEE754 requires this
1717 delete[] x;
1718 }
1719 return fs;
1720 }
1721
1722 /* Normalized fused-multiply-add. */
1723 APFloat::opStatus
fusedMultiplyAdd(const APFloat & multiplicand,const APFloat & addend,roundingMode rounding_mode)1724 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1725 const APFloat &addend,
1726 roundingMode rounding_mode)
1727 {
1728 opStatus fs;
1729
1730 assertArithmeticOK(*semantics);
1731
1732 /* Post-multiplication sign, before addition. */
1733 sign ^= multiplicand.sign;
1734
1735 /* If and only if all arguments are normal do we need to do an
1736 extended-precision calculation. */
1737 if (category == fcNormal &&
1738 multiplicand.category == fcNormal &&
1739 addend.category == fcNormal) {
1740 lostFraction lost_fraction;
1741
1742 lost_fraction = multiplySignificand(multiplicand, &addend);
1743 fs = normalize(rounding_mode, lost_fraction);
1744 if (lost_fraction != lfExactlyZero)
1745 fs = (opStatus) (fs | opInexact);
1746
1747 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1748 positive zero unless rounding to minus infinity, except that
1749 adding two like-signed zeroes gives that zero. */
1750 if (category == fcZero && sign != addend.sign)
1751 sign = (rounding_mode == rmTowardNegative);
1752 } else {
1753 fs = multiplySpecials(multiplicand);
1754
1755 /* FS can only be opOK or opInvalidOp. There is no more work
1756 to do in the latter case. The IEEE-754R standard says it is
1757 implementation-defined in this case whether, if ADDEND is a
1758 quiet NaN, we raise invalid op; this implementation does so.
1759
1760 If we need to do the addition we can do so with normal
1761 precision. */
1762 if (fs == opOK)
1763 fs = addOrSubtract(addend, rounding_mode, false);
1764 }
1765
1766 return fs;
1767 }
1768
1769 /* Comparison requires normalized numbers. */
1770 APFloat::cmpResult
compare(const APFloat & rhs) const1771 APFloat::compare(const APFloat &rhs) const
1772 {
1773 cmpResult result;
1774
1775 assertArithmeticOK(*semantics);
1776 assert(semantics == rhs.semantics);
1777
1778 switch (convolve(category, rhs.category)) {
1779 default:
1780 llvm_unreachable(0);
1781
1782 case convolve(fcNaN, fcZero):
1783 case convolve(fcNaN, fcNormal):
1784 case convolve(fcNaN, fcInfinity):
1785 case convolve(fcNaN, fcNaN):
1786 case convolve(fcZero, fcNaN):
1787 case convolve(fcNormal, fcNaN):
1788 case convolve(fcInfinity, fcNaN):
1789 return cmpUnordered;
1790
1791 case convolve(fcInfinity, fcNormal):
1792 case convolve(fcInfinity, fcZero):
1793 case convolve(fcNormal, fcZero):
1794 if (sign)
1795 return cmpLessThan;
1796 else
1797 return cmpGreaterThan;
1798
1799 case convolve(fcNormal, fcInfinity):
1800 case convolve(fcZero, fcInfinity):
1801 case convolve(fcZero, fcNormal):
1802 if (rhs.sign)
1803 return cmpGreaterThan;
1804 else
1805 return cmpLessThan;
1806
1807 case convolve(fcInfinity, fcInfinity):
1808 if (sign == rhs.sign)
1809 return cmpEqual;
1810 else if (sign)
1811 return cmpLessThan;
1812 else
1813 return cmpGreaterThan;
1814
1815 case convolve(fcZero, fcZero):
1816 return cmpEqual;
1817
1818 case convolve(fcNormal, fcNormal):
1819 break;
1820 }
1821
1822 /* Two normal numbers. Do they have the same sign? */
1823 if (sign != rhs.sign) {
1824 if (sign)
1825 result = cmpLessThan;
1826 else
1827 result = cmpGreaterThan;
1828 } else {
1829 /* Compare absolute values; invert result if negative. */
1830 result = compareAbsoluteValue(rhs);
1831
1832 if (sign) {
1833 if (result == cmpLessThan)
1834 result = cmpGreaterThan;
1835 else if (result == cmpGreaterThan)
1836 result = cmpLessThan;
1837 }
1838 }
1839
1840 return result;
1841 }
1842
1843 /// APFloat::convert - convert a value of one floating point type to another.
1844 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1845 /// records whether the transformation lost information, i.e. whether
1846 /// converting the result back to the original type will produce the
1847 /// original value (this is almost the same as return value==fsOK, but there
1848 /// are edge cases where this is not so).
1849
1850 APFloat::opStatus
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)1851 APFloat::convert(const fltSemantics &toSemantics,
1852 roundingMode rounding_mode, bool *losesInfo)
1853 {
1854 lostFraction lostFraction;
1855 unsigned int newPartCount, oldPartCount;
1856 opStatus fs;
1857
1858 assertArithmeticOK(*semantics);
1859 assertArithmeticOK(toSemantics);
1860 lostFraction = lfExactlyZero;
1861 newPartCount = partCountForBits(toSemantics.precision + 1);
1862 oldPartCount = partCount();
1863
1864 /* Handle storage complications. If our new form is wider,
1865 re-allocate our bit pattern into wider storage. If it is
1866 narrower, we ignore the excess parts, but if narrowing to a
1867 single part we need to free the old storage.
1868 Be careful not to reference significandParts for zeroes
1869 and infinities, since it aborts. */
1870 if (newPartCount > oldPartCount) {
1871 integerPart *newParts;
1872 newParts = new integerPart[newPartCount];
1873 APInt::tcSet(newParts, 0, newPartCount);
1874 if (category==fcNormal || category==fcNaN)
1875 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1876 freeSignificand();
1877 significand.parts = newParts;
1878 } else if (newPartCount < oldPartCount) {
1879 /* Capture any lost fraction through truncation of parts so we get
1880 correct rounding whilst normalizing. */
1881 if (category==fcNormal)
1882 lostFraction = lostFractionThroughTruncation
1883 (significandParts(), oldPartCount, toSemantics.precision);
1884 if (newPartCount == 1) {
1885 integerPart newPart = 0;
1886 if (category==fcNormal || category==fcNaN)
1887 newPart = significandParts()[0];
1888 freeSignificand();
1889 significand.part = newPart;
1890 }
1891 }
1892
1893 if (category == fcNormal) {
1894 /* Re-interpret our bit-pattern. */
1895 exponent += toSemantics.precision - semantics->precision;
1896 semantics = &toSemantics;
1897 fs = normalize(rounding_mode, lostFraction);
1898 *losesInfo = (fs != opOK);
1899 } else if (category == fcNaN) {
1900 int shift = toSemantics.precision - semantics->precision;
1901 // Do this now so significandParts gets the right answer
1902 const fltSemantics *oldSemantics = semantics;
1903 semantics = &toSemantics;
1904 *losesInfo = false;
1905 // No normalization here, just truncate
1906 if (shift>0)
1907 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1908 else if (shift < 0) {
1909 unsigned ushift = -shift;
1910 // Figure out if we are losing information. This happens
1911 // if are shifting out something other than 0s, or if the x87 long
1912 // double input did not have its integer bit set (pseudo-NaN), or if the
1913 // x87 long double input did not have its QNan bit set (because the x87
1914 // hardware sets this bit when converting a lower-precision NaN to
1915 // x87 long double).
1916 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1917 *losesInfo = true;
1918 if (oldSemantics == &APFloat::x87DoubleExtended &&
1919 (!(*significandParts() & 0x8000000000000000ULL) ||
1920 !(*significandParts() & 0x4000000000000000ULL)))
1921 *losesInfo = true;
1922 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1923 }
1924 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1925 // does not give you back the same bits. This is dubious, and we
1926 // don't currently do it. You're really supposed to get
1927 // an invalid operation signal at runtime, but nobody does that.
1928 fs = opOK;
1929 } else {
1930 semantics = &toSemantics;
1931 fs = opOK;
1932 *losesInfo = false;
1933 }
1934
1935 return fs;
1936 }
1937
1938 /* Convert a floating point number to an integer according to the
1939 rounding mode. If the rounded integer value is out of range this
1940 returns an invalid operation exception and the contents of the
1941 destination parts are unspecified. If the rounded value is in
1942 range but the floating point number is not the exact integer, the C
1943 standard doesn't require an inexact exception to be raised. IEEE
1944 854 does require it so we do that.
1945
1946 Note that for conversions to integer type the C standard requires
1947 round-to-zero to always be used. */
1948 APFloat::opStatus
convertToSignExtendedInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const1949 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1950 bool isSigned,
1951 roundingMode rounding_mode,
1952 bool *isExact) const
1953 {
1954 lostFraction lost_fraction;
1955 const integerPart *src;
1956 unsigned int dstPartsCount, truncatedBits;
1957
1958 assertArithmeticOK(*semantics);
1959
1960 *isExact = false;
1961
1962 /* Handle the three special cases first. */
1963 if (category == fcInfinity || category == fcNaN)
1964 return opInvalidOp;
1965
1966 dstPartsCount = partCountForBits(width);
1967
1968 if (category == fcZero) {
1969 APInt::tcSet(parts, 0, dstPartsCount);
1970 // Negative zero can't be represented as an int.
1971 *isExact = !sign;
1972 return opOK;
1973 }
1974
1975 src = significandParts();
1976
1977 /* Step 1: place our absolute value, with any fraction truncated, in
1978 the destination. */
1979 if (exponent < 0) {
1980 /* Our absolute value is less than one; truncate everything. */
1981 APInt::tcSet(parts, 0, dstPartsCount);
1982 /* For exponent -1 the integer bit represents .5, look at that.
1983 For smaller exponents leftmost truncated bit is 0. */
1984 truncatedBits = semantics->precision -1U - exponent;
1985 } else {
1986 /* We want the most significant (exponent + 1) bits; the rest are
1987 truncated. */
1988 unsigned int bits = exponent + 1U;
1989
1990 /* Hopelessly large in magnitude? */
1991 if (bits > width)
1992 return opInvalidOp;
1993
1994 if (bits < semantics->precision) {
1995 /* We truncate (semantics->precision - bits) bits. */
1996 truncatedBits = semantics->precision - bits;
1997 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1998 } else {
1999 /* We want at least as many bits as are available. */
2000 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2001 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2002 truncatedBits = 0;
2003 }
2004 }
2005
2006 /* Step 2: work out any lost fraction, and increment the absolute
2007 value if we would round away from zero. */
2008 if (truncatedBits) {
2009 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2010 truncatedBits);
2011 if (lost_fraction != lfExactlyZero &&
2012 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2013 if (APInt::tcIncrement(parts, dstPartsCount))
2014 return opInvalidOp; /* Overflow. */
2015 }
2016 } else {
2017 lost_fraction = lfExactlyZero;
2018 }
2019
2020 /* Step 3: check if we fit in the destination. */
2021 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2022
2023 if (sign) {
2024 if (!isSigned) {
2025 /* Negative numbers cannot be represented as unsigned. */
2026 if (omsb != 0)
2027 return opInvalidOp;
2028 } else {
2029 /* It takes omsb bits to represent the unsigned integer value.
2030 We lose a bit for the sign, but care is needed as the
2031 maximally negative integer is a special case. */
2032 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2033 return opInvalidOp;
2034
2035 /* This case can happen because of rounding. */
2036 if (omsb > width)
2037 return opInvalidOp;
2038 }
2039
2040 APInt::tcNegate (parts, dstPartsCount);
2041 } else {
2042 if (omsb >= width + !isSigned)
2043 return opInvalidOp;
2044 }
2045
2046 if (lost_fraction == lfExactlyZero) {
2047 *isExact = true;
2048 return opOK;
2049 } else
2050 return opInexact;
2051 }
2052
2053 /* Same as convertToSignExtendedInteger, except we provide
2054 deterministic values in case of an invalid operation exception,
2055 namely zero for NaNs and the minimal or maximal value respectively
2056 for underflow or overflow.
2057 The *isExact output tells whether the result is exact, in the sense
2058 that converting it back to the original floating point type produces
2059 the original value. This is almost equivalent to result==opOK,
2060 except for negative zeroes.
2061 */
2062 APFloat::opStatus
convertToInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2063 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2064 bool isSigned,
2065 roundingMode rounding_mode, bool *isExact) const
2066 {
2067 opStatus fs;
2068
2069 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2070 isExact);
2071
2072 if (fs == opInvalidOp) {
2073 unsigned int bits, dstPartsCount;
2074
2075 dstPartsCount = partCountForBits(width);
2076
2077 if (category == fcNaN)
2078 bits = 0;
2079 else if (sign)
2080 bits = isSigned;
2081 else
2082 bits = width - isSigned;
2083
2084 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2085 if (sign && isSigned)
2086 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2087 }
2088
2089 return fs;
2090 }
2091
2092 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2093 an APSInt, whose initial bit-width and signed-ness are used to determine the
2094 precision of the conversion.
2095 */
2096 APFloat::opStatus
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const2097 APFloat::convertToInteger(APSInt &result,
2098 roundingMode rounding_mode, bool *isExact) const
2099 {
2100 unsigned bitWidth = result.getBitWidth();
2101 SmallVector<uint64_t, 4> parts(result.getNumWords());
2102 opStatus status = convertToInteger(
2103 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2104 // Keeps the original signed-ness.
2105 result = APInt(bitWidth, parts);
2106 return status;
2107 }
2108
2109 /* Convert an unsigned integer SRC to a floating point number,
2110 rounding according to ROUNDING_MODE. The sign of the floating
2111 point number is not modified. */
2112 APFloat::opStatus
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2113 APFloat::convertFromUnsignedParts(const integerPart *src,
2114 unsigned int srcCount,
2115 roundingMode rounding_mode)
2116 {
2117 unsigned int omsb, precision, dstCount;
2118 integerPart *dst;
2119 lostFraction lost_fraction;
2120
2121 assertArithmeticOK(*semantics);
2122 category = fcNormal;
2123 omsb = APInt::tcMSB(src, srcCount) + 1;
2124 dst = significandParts();
2125 dstCount = partCount();
2126 precision = semantics->precision;
2127
2128 /* We want the most significant PRECISION bits of SRC. There may not
2129 be that many; extract what we can. */
2130 if (precision <= omsb) {
2131 exponent = omsb - 1;
2132 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2133 omsb - precision);
2134 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2135 } else {
2136 exponent = precision - 1;
2137 lost_fraction = lfExactlyZero;
2138 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2139 }
2140
2141 return normalize(rounding_mode, lost_fraction);
2142 }
2143
2144 APFloat::opStatus
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2145 APFloat::convertFromAPInt(const APInt &Val,
2146 bool isSigned,
2147 roundingMode rounding_mode)
2148 {
2149 unsigned int partCount = Val.getNumWords();
2150 APInt api = Val;
2151
2152 sign = false;
2153 if (isSigned && api.isNegative()) {
2154 sign = true;
2155 api = -api;
2156 }
2157
2158 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2159 }
2160
2161 /* Convert a two's complement integer SRC to a floating point number,
2162 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2163 integer is signed, in which case it must be sign-extended. */
2164 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2165 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2166 unsigned int srcCount,
2167 bool isSigned,
2168 roundingMode rounding_mode)
2169 {
2170 opStatus status;
2171
2172 assertArithmeticOK(*semantics);
2173 if (isSigned &&
2174 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2175 integerPart *copy;
2176
2177 /* If we're signed and negative negate a copy. */
2178 sign = true;
2179 copy = new integerPart[srcCount];
2180 APInt::tcAssign(copy, src, srcCount);
2181 APInt::tcNegate(copy, srcCount);
2182 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2183 delete [] copy;
2184 } else {
2185 sign = false;
2186 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2187 }
2188
2189 return status;
2190 }
2191
2192 /* FIXME: should this just take a const APInt reference? */
2193 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2194 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2195 unsigned int width, bool isSigned,
2196 roundingMode rounding_mode)
2197 {
2198 unsigned int partCount = partCountForBits(width);
2199 APInt api = APInt(width, makeArrayRef(parts, partCount));
2200
2201 sign = false;
2202 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2203 sign = true;
2204 api = -api;
2205 }
2206
2207 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2208 }
2209
2210 APFloat::opStatus
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2211 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2212 {
2213 lostFraction lost_fraction = lfExactlyZero;
2214 integerPart *significand;
2215 unsigned int bitPos, partsCount;
2216 StringRef::iterator dot, firstSignificantDigit;
2217
2218 zeroSignificand();
2219 exponent = 0;
2220 category = fcNormal;
2221
2222 significand = significandParts();
2223 partsCount = partCount();
2224 bitPos = partsCount * integerPartWidth;
2225
2226 /* Skip leading zeroes and any (hexa)decimal point. */
2227 StringRef::iterator begin = s.begin();
2228 StringRef::iterator end = s.end();
2229 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2230 firstSignificantDigit = p;
2231
2232 for (; p != end;) {
2233 integerPart hex_value;
2234
2235 if (*p == '.') {
2236 assert(dot == end && "String contains multiple dots");
2237 dot = p++;
2238 if (p == end) {
2239 break;
2240 }
2241 }
2242
2243 hex_value = hexDigitValue(*p);
2244 if (hex_value == -1U) {
2245 break;
2246 }
2247
2248 p++;
2249
2250 if (p == end) {
2251 break;
2252 } else {
2253 /* Store the number whilst 4-bit nibbles remain. */
2254 if (bitPos) {
2255 bitPos -= 4;
2256 hex_value <<= bitPos % integerPartWidth;
2257 significand[bitPos / integerPartWidth] |= hex_value;
2258 } else {
2259 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2260 while (p != end && hexDigitValue(*p) != -1U)
2261 p++;
2262 break;
2263 }
2264 }
2265 }
2266
2267 /* Hex floats require an exponent but not a hexadecimal point. */
2268 assert(p != end && "Hex strings require an exponent");
2269 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2270 assert(p != begin && "Significand has no digits");
2271 assert((dot == end || p - begin != 1) && "Significand has no digits");
2272
2273 /* Ignore the exponent if we are zero. */
2274 if (p != firstSignificantDigit) {
2275 int expAdjustment;
2276
2277 /* Implicit hexadecimal point? */
2278 if (dot == end)
2279 dot = p;
2280
2281 /* Calculate the exponent adjustment implicit in the number of
2282 significant digits. */
2283 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2284 if (expAdjustment < 0)
2285 expAdjustment++;
2286 expAdjustment = expAdjustment * 4 - 1;
2287
2288 /* Adjust for writing the significand starting at the most
2289 significant nibble. */
2290 expAdjustment += semantics->precision;
2291 expAdjustment -= partsCount * integerPartWidth;
2292
2293 /* Adjust for the given exponent. */
2294 exponent = totalExponent(p + 1, end, expAdjustment);
2295 }
2296
2297 return normalize(rounding_mode, lost_fraction);
2298 }
2299
2300 APFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2301 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2302 unsigned sigPartCount, int exp,
2303 roundingMode rounding_mode)
2304 {
2305 unsigned int parts, pow5PartCount;
2306 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2307 integerPart pow5Parts[maxPowerOfFiveParts];
2308 bool isNearest;
2309
2310 isNearest = (rounding_mode == rmNearestTiesToEven ||
2311 rounding_mode == rmNearestTiesToAway);
2312
2313 parts = partCountForBits(semantics->precision + 11);
2314
2315 /* Calculate pow(5, abs(exp)). */
2316 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2317
2318 for (;; parts *= 2) {
2319 opStatus sigStatus, powStatus;
2320 unsigned int excessPrecision, truncatedBits;
2321
2322 calcSemantics.precision = parts * integerPartWidth - 1;
2323 excessPrecision = calcSemantics.precision - semantics->precision;
2324 truncatedBits = excessPrecision;
2325
2326 APFloat decSig(calcSemantics, fcZero, sign);
2327 APFloat pow5(calcSemantics, fcZero, false);
2328
2329 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2330 rmNearestTiesToEven);
2331 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2332 rmNearestTiesToEven);
2333 /* Add exp, as 10^n = 5^n * 2^n. */
2334 decSig.exponent += exp;
2335
2336 lostFraction calcLostFraction;
2337 integerPart HUerr, HUdistance;
2338 unsigned int powHUerr;
2339
2340 if (exp >= 0) {
2341 /* multiplySignificand leaves the precision-th bit set to 1. */
2342 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2343 powHUerr = powStatus != opOK;
2344 } else {
2345 calcLostFraction = decSig.divideSignificand(pow5);
2346 /* Denormal numbers have less precision. */
2347 if (decSig.exponent < semantics->minExponent) {
2348 excessPrecision += (semantics->minExponent - decSig.exponent);
2349 truncatedBits = excessPrecision;
2350 if (excessPrecision > calcSemantics.precision)
2351 excessPrecision = calcSemantics.precision;
2352 }
2353 /* Extra half-ulp lost in reciprocal of exponent. */
2354 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2355 }
2356
2357 /* Both multiplySignificand and divideSignificand return the
2358 result with the integer bit set. */
2359 assert(APInt::tcExtractBit
2360 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2361
2362 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2363 powHUerr);
2364 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2365 excessPrecision, isNearest);
2366
2367 /* Are we guaranteed to round correctly if we truncate? */
2368 if (HUdistance >= HUerr) {
2369 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2370 calcSemantics.precision - excessPrecision,
2371 excessPrecision);
2372 /* Take the exponent of decSig. If we tcExtract-ed less bits
2373 above we must adjust our exponent to compensate for the
2374 implicit right shift. */
2375 exponent = (decSig.exponent + semantics->precision
2376 - (calcSemantics.precision - excessPrecision));
2377 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2378 decSig.partCount(),
2379 truncatedBits);
2380 return normalize(rounding_mode, calcLostFraction);
2381 }
2382 }
2383 }
2384
2385 APFloat::opStatus
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2386 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2387 {
2388 decimalInfo D;
2389 opStatus fs;
2390
2391 /* Scan the text. */
2392 StringRef::iterator p = str.begin();
2393 interpretDecimal(p, str.end(), &D);
2394
2395 /* Handle the quick cases. First the case of no significant digits,
2396 i.e. zero, and then exponents that are obviously too large or too
2397 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2398 definitely overflows if
2399
2400 (exp - 1) * L >= maxExponent
2401
2402 and definitely underflows to zero where
2403
2404 (exp + 1) * L <= minExponent - precision
2405
2406 With integer arithmetic the tightest bounds for L are
2407
2408 93/28 < L < 196/59 [ numerator <= 256 ]
2409 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2410 */
2411
2412 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2413 category = fcZero;
2414 fs = opOK;
2415
2416 /* Check whether the normalized exponent is high enough to overflow
2417 max during the log-rebasing in the max-exponent check below. */
2418 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2419 fs = handleOverflow(rounding_mode);
2420
2421 /* If it wasn't, then it also wasn't high enough to overflow max
2422 during the log-rebasing in the min-exponent check. Check that it
2423 won't overflow min in either check, then perform the min-exponent
2424 check. */
2425 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2426 (D.normalizedExponent + 1) * 28738 <=
2427 8651 * (semantics->minExponent - (int) semantics->precision)) {
2428 /* Underflow to zero and round. */
2429 zeroSignificand();
2430 fs = normalize(rounding_mode, lfLessThanHalf);
2431
2432 /* We can finally safely perform the max-exponent check. */
2433 } else if ((D.normalizedExponent - 1) * 42039
2434 >= 12655 * semantics->maxExponent) {
2435 /* Overflow and round. */
2436 fs = handleOverflow(rounding_mode);
2437 } else {
2438 integerPart *decSignificand;
2439 unsigned int partCount;
2440
2441 /* A tight upper bound on number of bits required to hold an
2442 N-digit decimal integer is N * 196 / 59. Allocate enough space
2443 to hold the full significand, and an extra part required by
2444 tcMultiplyPart. */
2445 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2446 partCount = partCountForBits(1 + 196 * partCount / 59);
2447 decSignificand = new integerPart[partCount + 1];
2448 partCount = 0;
2449
2450 /* Convert to binary efficiently - we do almost all multiplication
2451 in an integerPart. When this would overflow do we do a single
2452 bignum multiplication, and then revert again to multiplication
2453 in an integerPart. */
2454 do {
2455 integerPart decValue, val, multiplier;
2456
2457 val = 0;
2458 multiplier = 1;
2459
2460 do {
2461 if (*p == '.') {
2462 p++;
2463 if (p == str.end()) {
2464 break;
2465 }
2466 }
2467 decValue = decDigitValue(*p++);
2468 assert(decValue < 10U && "Invalid character in significand");
2469 multiplier *= 10;
2470 val = val * 10 + decValue;
2471 /* The maximum number that can be multiplied by ten with any
2472 digit added without overflowing an integerPart. */
2473 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2474
2475 /* Multiply out the current part. */
2476 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2477 partCount, partCount + 1, false);
2478
2479 /* If we used another part (likely but not guaranteed), increase
2480 the count. */
2481 if (decSignificand[partCount])
2482 partCount++;
2483 } while (p <= D.lastSigDigit);
2484
2485 category = fcNormal;
2486 fs = roundSignificandWithExponent(decSignificand, partCount,
2487 D.exponent, rounding_mode);
2488
2489 delete [] decSignificand;
2490 }
2491
2492 return fs;
2493 }
2494
2495 APFloat::opStatus
convertFromString(StringRef str,roundingMode rounding_mode)2496 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2497 {
2498 assertArithmeticOK(*semantics);
2499 assert(!str.empty() && "Invalid string length");
2500
2501 /* Handle a leading minus sign. */
2502 StringRef::iterator p = str.begin();
2503 size_t slen = str.size();
2504 sign = *p == '-' ? 1 : 0;
2505 if (*p == '-' || *p == '+') {
2506 p++;
2507 slen--;
2508 assert(slen && "String has no digits");
2509 }
2510
2511 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2512 assert(slen - 2 && "Invalid string");
2513 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2514 rounding_mode);
2515 }
2516
2517 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2518 }
2519
2520 /* Write out a hexadecimal representation of the floating point value
2521 to DST, which must be of sufficient size, in the C99 form
2522 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2523 excluding the terminating NUL.
2524
2525 If UPPERCASE, the output is in upper case, otherwise in lower case.
2526
2527 HEXDIGITS digits appear altogether, rounding the value if
2528 necessary. If HEXDIGITS is 0, the minimal precision to display the
2529 number precisely is used instead. If nothing would appear after
2530 the decimal point it is suppressed.
2531
2532 The decimal exponent is always printed and has at least one digit.
2533 Zero values display an exponent of zero. Infinities and NaNs
2534 appear as "infinity" or "nan" respectively.
2535
2536 The above rules are as specified by C99. There is ambiguity about
2537 what the leading hexadecimal digit should be. This implementation
2538 uses whatever is necessary so that the exponent is displayed as
2539 stored. This implies the exponent will fall within the IEEE format
2540 range, and the leading hexadecimal digit will be 0 (for denormals),
2541 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2542 any other digits zero).
2543 */
2544 unsigned int
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2545 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2546 bool upperCase, roundingMode rounding_mode) const
2547 {
2548 char *p;
2549
2550 assertArithmeticOK(*semantics);
2551
2552 p = dst;
2553 if (sign)
2554 *dst++ = '-';
2555
2556 switch (category) {
2557 case fcInfinity:
2558 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2559 dst += sizeof infinityL - 1;
2560 break;
2561
2562 case fcNaN:
2563 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2564 dst += sizeof NaNU - 1;
2565 break;
2566
2567 case fcZero:
2568 *dst++ = '0';
2569 *dst++ = upperCase ? 'X': 'x';
2570 *dst++ = '0';
2571 if (hexDigits > 1) {
2572 *dst++ = '.';
2573 memset (dst, '0', hexDigits - 1);
2574 dst += hexDigits - 1;
2575 }
2576 *dst++ = upperCase ? 'P': 'p';
2577 *dst++ = '0';
2578 break;
2579
2580 case fcNormal:
2581 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2582 break;
2583 }
2584
2585 *dst = 0;
2586
2587 return static_cast<unsigned int>(dst - p);
2588 }
2589
2590 /* Does the hard work of outputting the correctly rounded hexadecimal
2591 form of a normal floating point number with the specified number of
2592 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2593 digits necessary to print the value precisely is output. */
2594 char *
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2595 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2596 bool upperCase,
2597 roundingMode rounding_mode) const
2598 {
2599 unsigned int count, valueBits, shift, partsCount, outputDigits;
2600 const char *hexDigitChars;
2601 const integerPart *significand;
2602 char *p;
2603 bool roundUp;
2604
2605 *dst++ = '0';
2606 *dst++ = upperCase ? 'X': 'x';
2607
2608 roundUp = false;
2609 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2610
2611 significand = significandParts();
2612 partsCount = partCount();
2613
2614 /* +3 because the first digit only uses the single integer bit, so
2615 we have 3 virtual zero most-significant-bits. */
2616 valueBits = semantics->precision + 3;
2617 shift = integerPartWidth - valueBits % integerPartWidth;
2618
2619 /* The natural number of digits required ignoring trailing
2620 insignificant zeroes. */
2621 outputDigits = (valueBits - significandLSB () + 3) / 4;
2622
2623 /* hexDigits of zero means use the required number for the
2624 precision. Otherwise, see if we are truncating. If we are,
2625 find out if we need to round away from zero. */
2626 if (hexDigits) {
2627 if (hexDigits < outputDigits) {
2628 /* We are dropping non-zero bits, so need to check how to round.
2629 "bits" is the number of dropped bits. */
2630 unsigned int bits;
2631 lostFraction fraction;
2632
2633 bits = valueBits - hexDigits * 4;
2634 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2635 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2636 }
2637 outputDigits = hexDigits;
2638 }
2639
2640 /* Write the digits consecutively, and start writing in the location
2641 of the hexadecimal point. We move the most significant digit
2642 left and add the hexadecimal point later. */
2643 p = ++dst;
2644
2645 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2646
2647 while (outputDigits && count) {
2648 integerPart part;
2649
2650 /* Put the most significant integerPartWidth bits in "part". */
2651 if (--count == partsCount)
2652 part = 0; /* An imaginary higher zero part. */
2653 else
2654 part = significand[count] << shift;
2655
2656 if (count && shift)
2657 part |= significand[count - 1] >> (integerPartWidth - shift);
2658
2659 /* Convert as much of "part" to hexdigits as we can. */
2660 unsigned int curDigits = integerPartWidth / 4;
2661
2662 if (curDigits > outputDigits)
2663 curDigits = outputDigits;
2664 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2665 outputDigits -= curDigits;
2666 }
2667
2668 if (roundUp) {
2669 char *q = dst;
2670
2671 /* Note that hexDigitChars has a trailing '0'. */
2672 do {
2673 q--;
2674 *q = hexDigitChars[hexDigitValue (*q) + 1];
2675 } while (*q == '0');
2676 assert(q >= p);
2677 } else {
2678 /* Add trailing zeroes. */
2679 memset (dst, '0', outputDigits);
2680 dst += outputDigits;
2681 }
2682
2683 /* Move the most significant digit to before the point, and if there
2684 is something after the decimal point add it. This must come
2685 after rounding above. */
2686 p[-1] = p[0];
2687 if (dst -1 == p)
2688 dst--;
2689 else
2690 p[0] = '.';
2691
2692 /* Finally output the exponent. */
2693 *dst++ = upperCase ? 'P': 'p';
2694
2695 return writeSignedDecimal (dst, exponent);
2696 }
2697
2698 // For good performance it is desirable for different APFloats
2699 // to produce different integers.
2700 uint32_t
getHashValue() const2701 APFloat::getHashValue() const
2702 {
2703 if (category==fcZero) return sign<<8 | semantics->precision ;
2704 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2705 else if (category==fcNaN) return 1<<10 | semantics->precision;
2706 else {
2707 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2708 const integerPart* p = significandParts();
2709 for (int i=partCount(); i>0; i--, p++)
2710 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2711 return hash;
2712 }
2713 }
2714
2715 // Conversion from APFloat to/from host float/double. It may eventually be
2716 // possible to eliminate these and have everybody deal with APFloats, but that
2717 // will take a while. This approach will not easily extend to long double.
2718 // Current implementation requires integerPartWidth==64, which is correct at
2719 // the moment but could be made more general.
2720
2721 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2722 // the actual IEEE respresentations. We compensate for that here.
2723
2724 APInt
convertF80LongDoubleAPFloatToAPInt() const2725 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2726 {
2727 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2728 assert(partCount()==2);
2729
2730 uint64_t myexponent, mysignificand;
2731
2732 if (category==fcNormal) {
2733 myexponent = exponent+16383; //bias
2734 mysignificand = significandParts()[0];
2735 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2736 myexponent = 0; // denormal
2737 } else if (category==fcZero) {
2738 myexponent = 0;
2739 mysignificand = 0;
2740 } else if (category==fcInfinity) {
2741 myexponent = 0x7fff;
2742 mysignificand = 0x8000000000000000ULL;
2743 } else {
2744 assert(category == fcNaN && "Unknown category");
2745 myexponent = 0x7fff;
2746 mysignificand = significandParts()[0];
2747 }
2748
2749 uint64_t words[2];
2750 words[0] = mysignificand;
2751 words[1] = ((uint64_t)(sign & 1) << 15) |
2752 (myexponent & 0x7fffLL);
2753 return APInt(80, words);
2754 }
2755
2756 APInt
convertPPCDoubleDoubleAPFloatToAPInt() const2757 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2758 {
2759 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2760 assert(partCount()==2);
2761
2762 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2763
2764 if (category==fcNormal) {
2765 myexponent = exponent + 1023; //bias
2766 myexponent2 = exponent2 + 1023;
2767 mysignificand = significandParts()[0];
2768 mysignificand2 = significandParts()[1];
2769 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2770 myexponent = 0; // denormal
2771 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2772 myexponent2 = 0; // denormal
2773 } else if (category==fcZero) {
2774 myexponent = 0;
2775 mysignificand = 0;
2776 myexponent2 = 0;
2777 mysignificand2 = 0;
2778 } else if (category==fcInfinity) {
2779 myexponent = 0x7ff;
2780 myexponent2 = 0;
2781 mysignificand = 0;
2782 mysignificand2 = 0;
2783 } else {
2784 assert(category == fcNaN && "Unknown category");
2785 myexponent = 0x7ff;
2786 mysignificand = significandParts()[0];
2787 myexponent2 = exponent2;
2788 mysignificand2 = significandParts()[1];
2789 }
2790
2791 uint64_t words[2];
2792 words[0] = ((uint64_t)(sign & 1) << 63) |
2793 ((myexponent & 0x7ff) << 52) |
2794 (mysignificand & 0xfffffffffffffLL);
2795 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2796 ((myexponent2 & 0x7ff) << 52) |
2797 (mysignificand2 & 0xfffffffffffffLL);
2798 return APInt(128, words);
2799 }
2800
2801 APInt
convertQuadrupleAPFloatToAPInt() const2802 APFloat::convertQuadrupleAPFloatToAPInt() const
2803 {
2804 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2805 assert(partCount()==2);
2806
2807 uint64_t myexponent, mysignificand, mysignificand2;
2808
2809 if (category==fcNormal) {
2810 myexponent = exponent+16383; //bias
2811 mysignificand = significandParts()[0];
2812 mysignificand2 = significandParts()[1];
2813 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2814 myexponent = 0; // denormal
2815 } else if (category==fcZero) {
2816 myexponent = 0;
2817 mysignificand = mysignificand2 = 0;
2818 } else if (category==fcInfinity) {
2819 myexponent = 0x7fff;
2820 mysignificand = mysignificand2 = 0;
2821 } else {
2822 assert(category == fcNaN && "Unknown category!");
2823 myexponent = 0x7fff;
2824 mysignificand = significandParts()[0];
2825 mysignificand2 = significandParts()[1];
2826 }
2827
2828 uint64_t words[2];
2829 words[0] = mysignificand;
2830 words[1] = ((uint64_t)(sign & 1) << 63) |
2831 ((myexponent & 0x7fff) << 48) |
2832 (mysignificand2 & 0xffffffffffffLL);
2833
2834 return APInt(128, words);
2835 }
2836
2837 APInt
convertDoubleAPFloatToAPInt() const2838 APFloat::convertDoubleAPFloatToAPInt() const
2839 {
2840 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2841 assert(partCount()==1);
2842
2843 uint64_t myexponent, mysignificand;
2844
2845 if (category==fcNormal) {
2846 myexponent = exponent+1023; //bias
2847 mysignificand = *significandParts();
2848 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2849 myexponent = 0; // denormal
2850 } else if (category==fcZero) {
2851 myexponent = 0;
2852 mysignificand = 0;
2853 } else if (category==fcInfinity) {
2854 myexponent = 0x7ff;
2855 mysignificand = 0;
2856 } else {
2857 assert(category == fcNaN && "Unknown category!");
2858 myexponent = 0x7ff;
2859 mysignificand = *significandParts();
2860 }
2861
2862 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2863 ((myexponent & 0x7ff) << 52) |
2864 (mysignificand & 0xfffffffffffffLL))));
2865 }
2866
2867 APInt
convertFloatAPFloatToAPInt() const2868 APFloat::convertFloatAPFloatToAPInt() const
2869 {
2870 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2871 assert(partCount()==1);
2872
2873 uint32_t myexponent, mysignificand;
2874
2875 if (category==fcNormal) {
2876 myexponent = exponent+127; //bias
2877 mysignificand = (uint32_t)*significandParts();
2878 if (myexponent == 1 && !(mysignificand & 0x800000))
2879 myexponent = 0; // denormal
2880 } else if (category==fcZero) {
2881 myexponent = 0;
2882 mysignificand = 0;
2883 } else if (category==fcInfinity) {
2884 myexponent = 0xff;
2885 mysignificand = 0;
2886 } else {
2887 assert(category == fcNaN && "Unknown category!");
2888 myexponent = 0xff;
2889 mysignificand = (uint32_t)*significandParts();
2890 }
2891
2892 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2893 (mysignificand & 0x7fffff)));
2894 }
2895
2896 APInt
convertHalfAPFloatToAPInt() const2897 APFloat::convertHalfAPFloatToAPInt() const
2898 {
2899 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2900 assert(partCount()==1);
2901
2902 uint32_t myexponent, mysignificand;
2903
2904 if (category==fcNormal) {
2905 myexponent = exponent+15; //bias
2906 mysignificand = (uint32_t)*significandParts();
2907 if (myexponent == 1 && !(mysignificand & 0x400))
2908 myexponent = 0; // denormal
2909 } else if (category==fcZero) {
2910 myexponent = 0;
2911 mysignificand = 0;
2912 } else if (category==fcInfinity) {
2913 myexponent = 0x1f;
2914 mysignificand = 0;
2915 } else {
2916 assert(category == fcNaN && "Unknown category!");
2917 myexponent = 0x1f;
2918 mysignificand = (uint32_t)*significandParts();
2919 }
2920
2921 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2922 (mysignificand & 0x3ff)));
2923 }
2924
2925 // This function creates an APInt that is just a bit map of the floating
2926 // point constant as it would appear in memory. It is not a conversion,
2927 // and treating the result as a normal integer is unlikely to be useful.
2928
2929 APInt
bitcastToAPInt() const2930 APFloat::bitcastToAPInt() const
2931 {
2932 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2933 return convertHalfAPFloatToAPInt();
2934
2935 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2936 return convertFloatAPFloatToAPInt();
2937
2938 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2939 return convertDoubleAPFloatToAPInt();
2940
2941 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2942 return convertQuadrupleAPFloatToAPInt();
2943
2944 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2945 return convertPPCDoubleDoubleAPFloatToAPInt();
2946
2947 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2948 "unknown format!");
2949 return convertF80LongDoubleAPFloatToAPInt();
2950 }
2951
2952 float
convertToFloat() const2953 APFloat::convertToFloat() const
2954 {
2955 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2956 "Float semantics are not IEEEsingle");
2957 APInt api = bitcastToAPInt();
2958 return api.bitsToFloat();
2959 }
2960
2961 double
convertToDouble() const2962 APFloat::convertToDouble() const
2963 {
2964 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2965 "Float semantics are not IEEEdouble");
2966 APInt api = bitcastToAPInt();
2967 return api.bitsToDouble();
2968 }
2969
2970 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2971 /// does not support these bit patterns:
2972 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2973 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2974 /// exponent = 0, integer bit 1 ("pseudodenormal")
2975 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2976 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2977 void
initFromF80LongDoubleAPInt(const APInt & api)2978 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2979 {
2980 assert(api.getBitWidth()==80);
2981 uint64_t i1 = api.getRawData()[0];
2982 uint64_t i2 = api.getRawData()[1];
2983 uint64_t myexponent = (i2 & 0x7fff);
2984 uint64_t mysignificand = i1;
2985
2986 initialize(&APFloat::x87DoubleExtended);
2987 assert(partCount()==2);
2988
2989 sign = static_cast<unsigned int>(i2>>15);
2990 if (myexponent==0 && mysignificand==0) {
2991 // exponent, significand meaningless
2992 category = fcZero;
2993 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2994 // exponent, significand meaningless
2995 category = fcInfinity;
2996 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2997 // exponent meaningless
2998 category = fcNaN;
2999 significandParts()[0] = mysignificand;
3000 significandParts()[1] = 0;
3001 } else {
3002 category = fcNormal;
3003 exponent = myexponent - 16383;
3004 significandParts()[0] = mysignificand;
3005 significandParts()[1] = 0;
3006 if (myexponent==0) // denormal
3007 exponent = -16382;
3008 }
3009 }
3010
3011 void
initFromPPCDoubleDoubleAPInt(const APInt & api)3012 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3013 {
3014 assert(api.getBitWidth()==128);
3015 uint64_t i1 = api.getRawData()[0];
3016 uint64_t i2 = api.getRawData()[1];
3017 uint64_t myexponent = (i1 >> 52) & 0x7ff;
3018 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
3019 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
3020 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
3021
3022 initialize(&APFloat::PPCDoubleDouble);
3023 assert(partCount()==2);
3024
3025 sign = static_cast<unsigned int>(i1>>63);
3026 sign2 = static_cast<unsigned int>(i2>>63);
3027 if (myexponent==0 && mysignificand==0) {
3028 // exponent, significand meaningless
3029 // exponent2 and significand2 are required to be 0; we don't check
3030 category = fcZero;
3031 } else if (myexponent==0x7ff && mysignificand==0) {
3032 // exponent, significand meaningless
3033 // exponent2 and significand2 are required to be 0; we don't check
3034 category = fcInfinity;
3035 } else if (myexponent==0x7ff && mysignificand!=0) {
3036 // exponent meaningless. So is the whole second word, but keep it
3037 // for determinism.
3038 category = fcNaN;
3039 exponent2 = myexponent2;
3040 significandParts()[0] = mysignificand;
3041 significandParts()[1] = mysignificand2;
3042 } else {
3043 category = fcNormal;
3044 // Note there is no category2; the second word is treated as if it is
3045 // fcNormal, although it might be something else considered by itself.
3046 exponent = myexponent - 1023;
3047 exponent2 = myexponent2 - 1023;
3048 significandParts()[0] = mysignificand;
3049 significandParts()[1] = mysignificand2;
3050 if (myexponent==0) // denormal
3051 exponent = -1022;
3052 else
3053 significandParts()[0] |= 0x10000000000000LL; // integer bit
3054 if (myexponent2==0)
3055 exponent2 = -1022;
3056 else
3057 significandParts()[1] |= 0x10000000000000LL; // integer bit
3058 }
3059 }
3060
3061 void
initFromQuadrupleAPInt(const APInt & api)3062 APFloat::initFromQuadrupleAPInt(const APInt &api)
3063 {
3064 assert(api.getBitWidth()==128);
3065 uint64_t i1 = api.getRawData()[0];
3066 uint64_t i2 = api.getRawData()[1];
3067 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3068 uint64_t mysignificand = i1;
3069 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3070
3071 initialize(&APFloat::IEEEquad);
3072 assert(partCount()==2);
3073
3074 sign = static_cast<unsigned int>(i2>>63);
3075 if (myexponent==0 &&
3076 (mysignificand==0 && mysignificand2==0)) {
3077 // exponent, significand meaningless
3078 category = fcZero;
3079 } else if (myexponent==0x7fff &&
3080 (mysignificand==0 && mysignificand2==0)) {
3081 // exponent, significand meaningless
3082 category = fcInfinity;
3083 } else if (myexponent==0x7fff &&
3084 (mysignificand!=0 || mysignificand2 !=0)) {
3085 // exponent meaningless
3086 category = fcNaN;
3087 significandParts()[0] = mysignificand;
3088 significandParts()[1] = mysignificand2;
3089 } else {
3090 category = fcNormal;
3091 exponent = myexponent - 16383;
3092 significandParts()[0] = mysignificand;
3093 significandParts()[1] = mysignificand2;
3094 if (myexponent==0) // denormal
3095 exponent = -16382;
3096 else
3097 significandParts()[1] |= 0x1000000000000LL; // integer bit
3098 }
3099 }
3100
3101 void
initFromDoubleAPInt(const APInt & api)3102 APFloat::initFromDoubleAPInt(const APInt &api)
3103 {
3104 assert(api.getBitWidth()==64);
3105 uint64_t i = *api.getRawData();
3106 uint64_t myexponent = (i >> 52) & 0x7ff;
3107 uint64_t mysignificand = i & 0xfffffffffffffLL;
3108
3109 initialize(&APFloat::IEEEdouble);
3110 assert(partCount()==1);
3111
3112 sign = static_cast<unsigned int>(i>>63);
3113 if (myexponent==0 && mysignificand==0) {
3114 // exponent, significand meaningless
3115 category = fcZero;
3116 } else if (myexponent==0x7ff && mysignificand==0) {
3117 // exponent, significand meaningless
3118 category = fcInfinity;
3119 } else if (myexponent==0x7ff && mysignificand!=0) {
3120 // exponent meaningless
3121 category = fcNaN;
3122 *significandParts() = mysignificand;
3123 } else {
3124 category = fcNormal;
3125 exponent = myexponent - 1023;
3126 *significandParts() = mysignificand;
3127 if (myexponent==0) // denormal
3128 exponent = -1022;
3129 else
3130 *significandParts() |= 0x10000000000000LL; // integer bit
3131 }
3132 }
3133
3134 void
initFromFloatAPInt(const APInt & api)3135 APFloat::initFromFloatAPInt(const APInt & api)
3136 {
3137 assert(api.getBitWidth()==32);
3138 uint32_t i = (uint32_t)*api.getRawData();
3139 uint32_t myexponent = (i >> 23) & 0xff;
3140 uint32_t mysignificand = i & 0x7fffff;
3141
3142 initialize(&APFloat::IEEEsingle);
3143 assert(partCount()==1);
3144
3145 sign = i >> 31;
3146 if (myexponent==0 && mysignificand==0) {
3147 // exponent, significand meaningless
3148 category = fcZero;
3149 } else if (myexponent==0xff && mysignificand==0) {
3150 // exponent, significand meaningless
3151 category = fcInfinity;
3152 } else if (myexponent==0xff && mysignificand!=0) {
3153 // sign, exponent, significand meaningless
3154 category = fcNaN;
3155 *significandParts() = mysignificand;
3156 } else {
3157 category = fcNormal;
3158 exponent = myexponent - 127; //bias
3159 *significandParts() = mysignificand;
3160 if (myexponent==0) // denormal
3161 exponent = -126;
3162 else
3163 *significandParts() |= 0x800000; // integer bit
3164 }
3165 }
3166
3167 void
initFromHalfAPInt(const APInt & api)3168 APFloat::initFromHalfAPInt(const APInt & api)
3169 {
3170 assert(api.getBitWidth()==16);
3171 uint32_t i = (uint32_t)*api.getRawData();
3172 uint32_t myexponent = (i >> 10) & 0x1f;
3173 uint32_t mysignificand = i & 0x3ff;
3174
3175 initialize(&APFloat::IEEEhalf);
3176 assert(partCount()==1);
3177
3178 sign = i >> 15;
3179 if (myexponent==0 && mysignificand==0) {
3180 // exponent, significand meaningless
3181 category = fcZero;
3182 } else if (myexponent==0x1f && mysignificand==0) {
3183 // exponent, significand meaningless
3184 category = fcInfinity;
3185 } else if (myexponent==0x1f && mysignificand!=0) {
3186 // sign, exponent, significand meaningless
3187 category = fcNaN;
3188 *significandParts() = mysignificand;
3189 } else {
3190 category = fcNormal;
3191 exponent = myexponent - 15; //bias
3192 *significandParts() = mysignificand;
3193 if (myexponent==0) // denormal
3194 exponent = -14;
3195 else
3196 *significandParts() |= 0x400; // integer bit
3197 }
3198 }
3199
3200 /// Treat api as containing the bits of a floating point number. Currently
3201 /// we infer the floating point type from the size of the APInt. The
3202 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3203 /// when the size is anything else).
3204 void
initFromAPInt(const APInt & api,bool isIEEE)3205 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3206 {
3207 if (api.getBitWidth() == 16)
3208 return initFromHalfAPInt(api);
3209 else if (api.getBitWidth() == 32)
3210 return initFromFloatAPInt(api);
3211 else if (api.getBitWidth()==64)
3212 return initFromDoubleAPInt(api);
3213 else if (api.getBitWidth()==80)
3214 return initFromF80LongDoubleAPInt(api);
3215 else if (api.getBitWidth()==128)
3216 return (isIEEE ?
3217 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3218 else
3219 llvm_unreachable(0);
3220 }
3221
3222 APFloat
getAllOnesValue(unsigned BitWidth,bool isIEEE)3223 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3224 {
3225 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3226 }
3227
getLargest(const fltSemantics & Sem,bool Negative)3228 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3229 APFloat Val(Sem, fcNormal, Negative);
3230
3231 // We want (in interchange format):
3232 // sign = {Negative}
3233 // exponent = 1..10
3234 // significand = 1..1
3235
3236 Val.exponent = Sem.maxExponent; // unbiased
3237
3238 // 1-initialize all bits....
3239 Val.zeroSignificand();
3240 integerPart *significand = Val.significandParts();
3241 unsigned N = partCountForBits(Sem.precision);
3242 for (unsigned i = 0; i != N; ++i)
3243 significand[i] = ~((integerPart) 0);
3244
3245 // ...and then clear the top bits for internal consistency.
3246 if (Sem.precision % integerPartWidth != 0)
3247 significand[N-1] &=
3248 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3249
3250 return Val;
3251 }
3252
getSmallest(const fltSemantics & Sem,bool Negative)3253 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3254 APFloat Val(Sem, fcNormal, Negative);
3255
3256 // We want (in interchange format):
3257 // sign = {Negative}
3258 // exponent = 0..0
3259 // significand = 0..01
3260
3261 Val.exponent = Sem.minExponent; // unbiased
3262 Val.zeroSignificand();
3263 Val.significandParts()[0] = 1;
3264 return Val;
3265 }
3266
getSmallestNormalized(const fltSemantics & Sem,bool Negative)3267 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3268 APFloat Val(Sem, fcNormal, Negative);
3269
3270 // We want (in interchange format):
3271 // sign = {Negative}
3272 // exponent = 0..0
3273 // significand = 10..0
3274
3275 Val.exponent = Sem.minExponent;
3276 Val.zeroSignificand();
3277 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3278 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3279
3280 return Val;
3281 }
3282
APFloat(const APInt & api,bool isIEEE)3283 APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
3284 initFromAPInt(api, isIEEE);
3285 }
3286
APFloat(float f)3287 APFloat::APFloat(float f) : exponent2(0), sign2(0) {
3288 initFromAPInt(APInt::floatToBits(f));
3289 }
3290
APFloat(double d)3291 APFloat::APFloat(double d) : exponent2(0), sign2(0) {
3292 initFromAPInt(APInt::doubleToBits(d));
3293 }
3294
3295 namespace {
append(SmallVectorImpl<char> & Buffer,unsigned N,const char * Str)3296 static void append(SmallVectorImpl<char> &Buffer,
3297 unsigned N, const char *Str) {
3298 unsigned Start = Buffer.size();
3299 Buffer.set_size(Start + N);
3300 memcpy(&Buffer[Start], Str, N);
3301 }
3302
3303 template <unsigned N>
append(SmallVectorImpl<char> & Buffer,const char (& Str)[N])3304 void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
3305 append(Buffer, N, Str);
3306 }
3307
3308 /// Removes data from the given significand until it is no more
3309 /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3310 void AdjustToPrecision(APInt &significand,
3311 int &exp, unsigned FormatPrecision) {
3312 unsigned bits = significand.getActiveBits();
3313
3314 // 196/59 is a very slight overestimate of lg_2(10).
3315 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3316
3317 if (bits <= bitsRequired) return;
3318
3319 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3320 if (!tensRemovable) return;
3321
3322 exp += tensRemovable;
3323
3324 APInt divisor(significand.getBitWidth(), 1);
3325 APInt powten(significand.getBitWidth(), 10);
3326 while (true) {
3327 if (tensRemovable & 1)
3328 divisor *= powten;
3329 tensRemovable >>= 1;
3330 if (!tensRemovable) break;
3331 powten *= powten;
3332 }
3333
3334 significand = significand.udiv(divisor);
3335
3336 // Truncate the significand down to its active bit count, but
3337 // don't try to drop below 32.
3338 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3339 significand = significand.trunc(newPrecision);
3340 }
3341
3342
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3343 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3344 int &exp, unsigned FormatPrecision) {
3345 unsigned N = buffer.size();
3346 if (N <= FormatPrecision) return;
3347
3348 // The most significant figures are the last ones in the buffer.
3349 unsigned FirstSignificant = N - FormatPrecision;
3350
3351 // Round.
3352 // FIXME: this probably shouldn't use 'round half up'.
3353
3354 // Rounding down is just a truncation, except we also want to drop
3355 // trailing zeros from the new result.
3356 if (buffer[FirstSignificant - 1] < '5') {
3357 while (buffer[FirstSignificant] == '0')
3358 FirstSignificant++;
3359
3360 exp += FirstSignificant;
3361 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3362 return;
3363 }
3364
3365 // Rounding up requires a decimal add-with-carry. If we continue
3366 // the carry, the newly-introduced zeros will just be truncated.
3367 for (unsigned I = FirstSignificant; I != N; ++I) {
3368 if (buffer[I] == '9') {
3369 FirstSignificant++;
3370 } else {
3371 buffer[I]++;
3372 break;
3373 }
3374 }
3375
3376 // If we carried through, we have exactly one digit of precision.
3377 if (FirstSignificant == N) {
3378 exp += FirstSignificant;
3379 buffer.clear();
3380 buffer.push_back('1');
3381 return;
3382 }
3383
3384 exp += FirstSignificant;
3385 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3386 }
3387 }
3388
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding) const3389 void APFloat::toString(SmallVectorImpl<char> &Str,
3390 unsigned FormatPrecision,
3391 unsigned FormatMaxPadding) const {
3392 switch (category) {
3393 case fcInfinity:
3394 if (isNegative())
3395 return append(Str, "-Inf");
3396 else
3397 return append(Str, "+Inf");
3398
3399 case fcNaN: return append(Str, "NaN");
3400
3401 case fcZero:
3402 if (isNegative())
3403 Str.push_back('-');
3404
3405 if (!FormatMaxPadding)
3406 append(Str, "0.0E+0");
3407 else
3408 Str.push_back('0');
3409 return;
3410
3411 case fcNormal:
3412 break;
3413 }
3414
3415 if (isNegative())
3416 Str.push_back('-');
3417
3418 // Decompose the number into an APInt and an exponent.
3419 int exp = exponent - ((int) semantics->precision - 1);
3420 APInt significand(semantics->precision,
3421 makeArrayRef(significandParts(),
3422 partCountForBits(semantics->precision)));
3423
3424 // Set FormatPrecision if zero. We want to do this before we
3425 // truncate trailing zeros, as those are part of the precision.
3426 if (!FormatPrecision) {
3427 // It's an interesting question whether to use the nominal
3428 // precision or the active precision here for denormals.
3429
3430 // FormatPrecision = ceil(significandBits / lg_2(10))
3431 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3432 }
3433
3434 // Ignore trailing binary zeros.
3435 int trailingZeros = significand.countTrailingZeros();
3436 exp += trailingZeros;
3437 significand = significand.lshr(trailingZeros);
3438
3439 // Change the exponent from 2^e to 10^e.
3440 if (exp == 0) {
3441 // Nothing to do.
3442 } else if (exp > 0) {
3443 // Just shift left.
3444 significand = significand.zext(semantics->precision + exp);
3445 significand <<= exp;
3446 exp = 0;
3447 } else { /* exp < 0 */
3448 int texp = -exp;
3449
3450 // We transform this using the identity:
3451 // (N)(2^-e) == (N)(5^e)(10^-e)
3452 // This means we have to multiply N (the significand) by 5^e.
3453 // To avoid overflow, we have to operate on numbers large
3454 // enough to store N * 5^e:
3455 // log2(N * 5^e) == log2(N) + e * log2(5)
3456 // <= semantics->precision + e * 137 / 59
3457 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3458
3459 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3460
3461 // Multiply significand by 5^e.
3462 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3463 significand = significand.zext(precision);
3464 APInt five_to_the_i(precision, 5);
3465 while (true) {
3466 if (texp & 1) significand *= five_to_the_i;
3467
3468 texp >>= 1;
3469 if (!texp) break;
3470 five_to_the_i *= five_to_the_i;
3471 }
3472 }
3473
3474 AdjustToPrecision(significand, exp, FormatPrecision);
3475
3476 llvm::SmallVector<char, 256> buffer;
3477
3478 // Fill the buffer.
3479 unsigned precision = significand.getBitWidth();
3480 APInt ten(precision, 10);
3481 APInt digit(precision, 0);
3482
3483 bool inTrail = true;
3484 while (significand != 0) {
3485 // digit <- significand % 10
3486 // significand <- significand / 10
3487 APInt::udivrem(significand, ten, significand, digit);
3488
3489 unsigned d = digit.getZExtValue();
3490
3491 // Drop trailing zeros.
3492 if (inTrail && !d) exp++;
3493 else {
3494 buffer.push_back((char) ('0' + d));
3495 inTrail = false;
3496 }
3497 }
3498
3499 assert(!buffer.empty() && "no characters in buffer!");
3500
3501 // Drop down to FormatPrecision.
3502 // TODO: don't do more precise calculations above than are required.
3503 AdjustToPrecision(buffer, exp, FormatPrecision);
3504
3505 unsigned NDigits = buffer.size();
3506
3507 // Check whether we should use scientific notation.
3508 bool FormatScientific;
3509 if (!FormatMaxPadding)
3510 FormatScientific = true;
3511 else {
3512 if (exp >= 0) {
3513 // 765e3 --> 765000
3514 // ^^^
3515 // But we shouldn't make the number look more precise than it is.
3516 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3517 NDigits + (unsigned) exp > FormatPrecision);
3518 } else {
3519 // Power of the most significant digit.
3520 int MSD = exp + (int) (NDigits - 1);
3521 if (MSD >= 0) {
3522 // 765e-2 == 7.65
3523 FormatScientific = false;
3524 } else {
3525 // 765e-5 == 0.00765
3526 // ^ ^^
3527 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3528 }
3529 }
3530 }
3531
3532 // Scientific formatting is pretty straightforward.
3533 if (FormatScientific) {
3534 exp += (NDigits - 1);
3535
3536 Str.push_back(buffer[NDigits-1]);
3537 Str.push_back('.');
3538 if (NDigits == 1)
3539 Str.push_back('0');
3540 else
3541 for (unsigned I = 1; I != NDigits; ++I)
3542 Str.push_back(buffer[NDigits-1-I]);
3543 Str.push_back('E');
3544
3545 Str.push_back(exp >= 0 ? '+' : '-');
3546 if (exp < 0) exp = -exp;
3547 SmallVector<char, 6> expbuf;
3548 do {
3549 expbuf.push_back((char) ('0' + (exp % 10)));
3550 exp /= 10;
3551 } while (exp);
3552 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3553 Str.push_back(expbuf[E-1-I]);
3554 return;
3555 }
3556
3557 // Non-scientific, positive exponents.
3558 if (exp >= 0) {
3559 for (unsigned I = 0; I != NDigits; ++I)
3560 Str.push_back(buffer[NDigits-1-I]);
3561 for (unsigned I = 0; I != (unsigned) exp; ++I)
3562 Str.push_back('0');
3563 return;
3564 }
3565
3566 // Non-scientific, negative exponents.
3567
3568 // The number of digits to the left of the decimal point.
3569 int NWholeDigits = exp + (int) NDigits;
3570
3571 unsigned I = 0;
3572 if (NWholeDigits > 0) {
3573 for (; I != (unsigned) NWholeDigits; ++I)
3574 Str.push_back(buffer[NDigits-I-1]);
3575 Str.push_back('.');
3576 } else {
3577 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3578
3579 Str.push_back('0');
3580 Str.push_back('.');
3581 for (unsigned Z = 1; Z != NZeros; ++Z)
3582 Str.push_back('0');
3583 }
3584
3585 for (; I != NDigits; ++I)
3586 Str.push_back(buffer[NDigits-I-1]);
3587 }
3588
getExactInverse(APFloat * inv) const3589 bool APFloat::getExactInverse(APFloat *inv) const {
3590 // We can only guarantee the existence of an exact inverse for IEEE floats.
3591 if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
3592 semantics != &IEEEdouble && semantics != &IEEEquad)
3593 return false;
3594
3595 // Special floats and denormals have no exact inverse.
3596 if (category != fcNormal)
3597 return false;
3598
3599 // Check that the number is a power of two by making sure that only the
3600 // integer bit is set in the significand.
3601 if (significandLSB() != semantics->precision - 1)
3602 return false;
3603
3604 // Get the inverse.
3605 APFloat reciprocal(*semantics, 1ULL);
3606 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3607 return false;
3608
3609 // Avoid multiplication with a denormal, it is not safe on all platforms and
3610 // may be slower than a normal division.
3611 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3612 return false;
3613
3614 assert(reciprocal.category == fcNormal &&
3615 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3616
3617 if (inv)
3618 *inv = reciprocal;
3619
3620 return true;
3621 }
3622