1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cstring>
27 #include <limits.h>
28
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
30 do { \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
36 } while (false)
37
38 using namespace llvm;
39
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
43 ///
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
47
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
51
52 namespace llvm {
53 /* Represents floating point arithmetic semantics. */
54 struct fltSemantics {
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent;
58
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent;
62
63 /* Number of bits in the significand. This includes the integer
64 bit. */
65 unsigned int precision;
66
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits;
69 };
70
71 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
72 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
73 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
74 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
75 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
76 static const fltSemantics semBogus = {0, 0, 0, 0};
77
78 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
79 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
80 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
81 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
82 to each other, and two 11-bit exponents.
83
84 Note: we need to make the value different from semBogus as otherwise
85 an unsafe optimization may collapse both values to a single address,
86 and we heavily rely on them having distinct addresses. */
87 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
88
89 /* These are legacy semantics for the fallback, inaccrurate implementation of
90 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
91 operation. It's equivalent to having an IEEE number with consecutive 106
92 bits of mantissa and 11 bits of exponent.
93
94 It's not equivalent to IBM double-double. For example, a legit IBM
95 double-double, 1 + epsilon:
96
97 1 + epsilon = 1 + (1 >> 1076)
98
99 is not representable by a consecutive 106 bits of mantissa.
100
101 Currently, these semantics are used in the following way:
102
103 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
104 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
105 semPPCDoubleDoubleLegacy -> IEEE operations
106
107 We use bitcastToAPInt() to get the bit representation (in APInt) of the
108 underlying IEEEdouble, then use the APInt constructor to construct the
109 legacy IEEE float.
110
111 TODO: Implement all operations in semPPCDoubleDouble, and delete these
112 semantics. */
113 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
114 53 + 53, 128};
115
EnumToSemantics(Semantics S)116 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
117 switch (S) {
118 case S_IEEEhalf:
119 return IEEEhalf();
120 case S_IEEEsingle:
121 return IEEEsingle();
122 case S_IEEEdouble:
123 return IEEEdouble();
124 case S_x87DoubleExtended:
125 return x87DoubleExtended();
126 case S_IEEEquad:
127 return IEEEquad();
128 case S_PPCDoubleDouble:
129 return PPCDoubleDouble();
130 }
131 llvm_unreachable("Unrecognised floating semantics");
132 }
133
134 APFloatBase::Semantics
SemanticsToEnum(const llvm::fltSemantics & Sem)135 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
136 if (&Sem == &llvm::APFloat::IEEEhalf())
137 return S_IEEEhalf;
138 else if (&Sem == &llvm::APFloat::IEEEsingle())
139 return S_IEEEsingle;
140 else if (&Sem == &llvm::APFloat::IEEEdouble())
141 return S_IEEEdouble;
142 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
143 return S_x87DoubleExtended;
144 else if (&Sem == &llvm::APFloat::IEEEquad())
145 return S_IEEEquad;
146 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
147 return S_PPCDoubleDouble;
148 else
149 llvm_unreachable("Unknown floating semantics");
150 }
151
IEEEhalf()152 const fltSemantics &APFloatBase::IEEEhalf() {
153 return semIEEEhalf;
154 }
IEEEsingle()155 const fltSemantics &APFloatBase::IEEEsingle() {
156 return semIEEEsingle;
157 }
IEEEdouble()158 const fltSemantics &APFloatBase::IEEEdouble() {
159 return semIEEEdouble;
160 }
IEEEquad()161 const fltSemantics &APFloatBase::IEEEquad() {
162 return semIEEEquad;
163 }
x87DoubleExtended()164 const fltSemantics &APFloatBase::x87DoubleExtended() {
165 return semX87DoubleExtended;
166 }
Bogus()167 const fltSemantics &APFloatBase::Bogus() {
168 return semBogus;
169 }
PPCDoubleDouble()170 const fltSemantics &APFloatBase::PPCDoubleDouble() {
171 return semPPCDoubleDouble;
172 }
173
174 /* A tight upper bound on number of parts required to hold the value
175 pow(5, power) is
176
177 power * 815 / (351 * integerPartWidth) + 1
178
179 However, whilst the result may require only this many parts,
180 because we are multiplying two values to get it, the
181 multiplication may require an extra part with the excess part
182 being zero (consider the trivial case of 1 * 1, tcFullMultiply
183 requires two parts to hold the single-part result). So we add an
184 extra one to guarantee enough space whilst multiplying. */
185 const unsigned int maxExponent = 16383;
186 const unsigned int maxPrecision = 113;
187 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
188 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
189
semanticsPrecision(const fltSemantics & semantics)190 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
191 return semantics.precision;
192 }
193 APFloatBase::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)194 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
195 return semantics.maxExponent;
196 }
197 APFloatBase::ExponentType
semanticsMinExponent(const fltSemantics & semantics)198 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
199 return semantics.minExponent;
200 }
semanticsSizeInBits(const fltSemantics & semantics)201 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
202 return semantics.sizeInBits;
203 }
204
getSizeInBits(const fltSemantics & Sem)205 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
206 return Sem.sizeInBits;
207 }
208
209 /* A bunch of private, handy routines. */
210
createError(const Twine & Err)211 static inline Error createError(const Twine &Err) {
212 return make_error<StringError>(Err, inconvertibleErrorCode());
213 }
214
215 static inline unsigned int
partCountForBits(unsigned int bits)216 partCountForBits(unsigned int bits)
217 {
218 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
219 }
220
221 /* Returns 0U-9U. Return values >= 10U are not digits. */
222 static inline unsigned int
decDigitValue(unsigned int c)223 decDigitValue(unsigned int c)
224 {
225 return c - '0';
226 }
227
228 /* Return the value of a decimal exponent of the form
229 [+-]ddddddd.
230
231 If the exponent overflows, returns a large exponent with the
232 appropriate sign. */
readExponent(StringRef::iterator begin,StringRef::iterator end)233 static Expected<int> readExponent(StringRef::iterator begin,
234 StringRef::iterator end) {
235 bool isNegative;
236 unsigned int absExponent;
237 const unsigned int overlargeExponent = 24000; /* FIXME. */
238 StringRef::iterator p = begin;
239
240 // Treat no exponent as 0 to match binutils
241 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
242 return 0;
243 }
244
245 isNegative = (*p == '-');
246 if (*p == '-' || *p == '+') {
247 p++;
248 if (p == end)
249 return createError("Exponent has no digits");
250 }
251
252 absExponent = decDigitValue(*p++);
253 if (absExponent >= 10U)
254 return createError("Invalid character in exponent");
255
256 for (; p != end; ++p) {
257 unsigned int value;
258
259 value = decDigitValue(*p);
260 if (value >= 10U)
261 return createError("Invalid character in exponent");
262
263 absExponent = absExponent * 10U + value;
264 if (absExponent >= overlargeExponent) {
265 absExponent = overlargeExponent;
266 break;
267 }
268 }
269
270 if (isNegative)
271 return -(int) absExponent;
272 else
273 return (int) absExponent;
274 }
275
276 /* This is ugly and needs cleaning up, but I don't immediately see
277 how whilst remaining safe. */
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)278 static Expected<int> totalExponent(StringRef::iterator p,
279 StringRef::iterator end,
280 int exponentAdjustment) {
281 int unsignedExponent;
282 bool negative, overflow;
283 int exponent = 0;
284
285 if (p == end)
286 return createError("Exponent has no digits");
287
288 negative = *p == '-';
289 if (*p == '-' || *p == '+') {
290 p++;
291 if (p == end)
292 return createError("Exponent has no digits");
293 }
294
295 unsignedExponent = 0;
296 overflow = false;
297 for (; p != end; ++p) {
298 unsigned int value;
299
300 value = decDigitValue(*p);
301 if (value >= 10U)
302 return createError("Invalid character in exponent");
303
304 unsignedExponent = unsignedExponent * 10 + value;
305 if (unsignedExponent > 32767) {
306 overflow = true;
307 break;
308 }
309 }
310
311 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
312 overflow = true;
313
314 if (!overflow) {
315 exponent = unsignedExponent;
316 if (negative)
317 exponent = -exponent;
318 exponent += exponentAdjustment;
319 if (exponent > 32767 || exponent < -32768)
320 overflow = true;
321 }
322
323 if (overflow)
324 exponent = negative ? -32768: 32767;
325
326 return exponent;
327 }
328
329 static Expected<StringRef::iterator>
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)330 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
331 StringRef::iterator *dot) {
332 StringRef::iterator p = begin;
333 *dot = end;
334 while (p != end && *p == '0')
335 p++;
336
337 if (p != end && *p == '.') {
338 *dot = p++;
339
340 if (end - begin == 1)
341 return createError("Significand has no digits");
342
343 while (p != end && *p == '0')
344 p++;
345 }
346
347 return p;
348 }
349
350 /* Given a normal decimal floating point number of the form
351
352 dddd.dddd[eE][+-]ddd
353
354 where the decimal point and exponent are optional, fill out the
355 structure D. Exponent is appropriate if the significand is
356 treated as an integer, and normalizedExponent if the significand
357 is taken to have the decimal point after a single leading
358 non-zero digit.
359
360 If the value is zero, V->firstSigDigit points to a non-digit, and
361 the return exponent is zero.
362 */
363 struct decimalInfo {
364 const char *firstSigDigit;
365 const char *lastSigDigit;
366 int exponent;
367 int normalizedExponent;
368 };
369
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)370 static Error interpretDecimal(StringRef::iterator begin,
371 StringRef::iterator end, decimalInfo *D) {
372 StringRef::iterator dot = end;
373
374 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
375 if (!PtrOrErr)
376 return PtrOrErr.takeError();
377 StringRef::iterator p = *PtrOrErr;
378
379 D->firstSigDigit = p;
380 D->exponent = 0;
381 D->normalizedExponent = 0;
382
383 for (; p != end; ++p) {
384 if (*p == '.') {
385 if (dot != end)
386 return createError("String contains multiple dots");
387 dot = p++;
388 if (p == end)
389 break;
390 }
391 if (decDigitValue(*p) >= 10U)
392 break;
393 }
394
395 if (p != end) {
396 if (*p != 'e' && *p != 'E')
397 return createError("Invalid character in significand");
398 if (p == begin)
399 return createError("Significand has no digits");
400 if (dot != end && p - begin == 1)
401 return createError("Significand has no digits");
402
403 /* p points to the first non-digit in the string */
404 auto ExpOrErr = readExponent(p + 1, end);
405 if (!ExpOrErr)
406 return ExpOrErr.takeError();
407 D->exponent = *ExpOrErr;
408
409 /* Implied decimal point? */
410 if (dot == end)
411 dot = p;
412 }
413
414 /* If number is all zeroes accept any exponent. */
415 if (p != D->firstSigDigit) {
416 /* Drop insignificant trailing zeroes. */
417 if (p != begin) {
418 do
419 do
420 p--;
421 while (p != begin && *p == '0');
422 while (p != begin && *p == '.');
423 }
424
425 /* Adjust the exponents for any decimal point. */
426 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
427 D->normalizedExponent = (D->exponent +
428 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
429 - (dot > D->firstSigDigit && dot < p)));
430 }
431
432 D->lastSigDigit = p;
433 return Error::success();
434 }
435
436 /* Return the trailing fraction of a hexadecimal number.
437 DIGITVALUE is the first hex digit of the fraction, P points to
438 the next digit. */
439 static Expected<lostFraction>
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)440 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
441 unsigned int digitValue) {
442 unsigned int hexDigit;
443
444 /* If the first trailing digit isn't 0 or 8 we can work out the
445 fraction immediately. */
446 if (digitValue > 8)
447 return lfMoreThanHalf;
448 else if (digitValue < 8 && digitValue > 0)
449 return lfLessThanHalf;
450
451 // Otherwise we need to find the first non-zero digit.
452 while (p != end && (*p == '0' || *p == '.'))
453 p++;
454
455 if (p == end)
456 return createError("Invalid trailing hexadecimal fraction!");
457
458 hexDigit = hexDigitValue(*p);
459
460 /* If we ran off the end it is exactly zero or one-half, otherwise
461 a little more. */
462 if (hexDigit == -1U)
463 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
464 else
465 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
466 }
467
468 /* Return the fraction lost were a bignum truncated losing the least
469 significant BITS bits. */
470 static lostFraction
lostFractionThroughTruncation(const APFloatBase::integerPart * parts,unsigned int partCount,unsigned int bits)471 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
472 unsigned int partCount,
473 unsigned int bits)
474 {
475 unsigned int lsb;
476
477 lsb = APInt::tcLSB(parts, partCount);
478
479 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
480 if (bits <= lsb)
481 return lfExactlyZero;
482 if (bits == lsb + 1)
483 return lfExactlyHalf;
484 if (bits <= partCount * APFloatBase::integerPartWidth &&
485 APInt::tcExtractBit(parts, bits - 1))
486 return lfMoreThanHalf;
487
488 return lfLessThanHalf;
489 }
490
491 /* Shift DST right BITS bits noting lost fraction. */
492 static lostFraction
shiftRight(APFloatBase::integerPart * dst,unsigned int parts,unsigned int bits)493 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
494 {
495 lostFraction lost_fraction;
496
497 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
498
499 APInt::tcShiftRight(dst, parts, bits);
500
501 return lost_fraction;
502 }
503
504 /* Combine the effect of two lost fractions. */
505 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)506 combineLostFractions(lostFraction moreSignificant,
507 lostFraction lessSignificant)
508 {
509 if (lessSignificant != lfExactlyZero) {
510 if (moreSignificant == lfExactlyZero)
511 moreSignificant = lfLessThanHalf;
512 else if (moreSignificant == lfExactlyHalf)
513 moreSignificant = lfMoreThanHalf;
514 }
515
516 return moreSignificant;
517 }
518
519 /* The error from the true value, in half-ulps, on multiplying two
520 floating point numbers, which differ from the value they
521 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
522 than the returned value.
523
524 See "How to Read Floating Point Numbers Accurately" by William D
525 Clinger. */
526 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)527 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
528 {
529 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
530
531 if (HUerr1 + HUerr2 == 0)
532 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
533 else
534 return inexactMultiply + 2 * (HUerr1 + HUerr2);
535 }
536
537 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
538 when the least significant BITS are truncated. BITS cannot be
539 zero. */
540 static APFloatBase::integerPart
ulpsFromBoundary(const APFloatBase::integerPart * parts,unsigned int bits,bool isNearest)541 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
542 bool isNearest) {
543 unsigned int count, partBits;
544 APFloatBase::integerPart part, boundary;
545
546 assert(bits != 0);
547
548 bits--;
549 count = bits / APFloatBase::integerPartWidth;
550 partBits = bits % APFloatBase::integerPartWidth + 1;
551
552 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
553
554 if (isNearest)
555 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
556 else
557 boundary = 0;
558
559 if (count == 0) {
560 if (part - boundary <= boundary - part)
561 return part - boundary;
562 else
563 return boundary - part;
564 }
565
566 if (part == boundary) {
567 while (--count)
568 if (parts[count])
569 return ~(APFloatBase::integerPart) 0; /* A lot. */
570
571 return parts[0];
572 } else if (part == boundary - 1) {
573 while (--count)
574 if (~parts[count])
575 return ~(APFloatBase::integerPart) 0; /* A lot. */
576
577 return -parts[0];
578 }
579
580 return ~(APFloatBase::integerPart) 0; /* A lot. */
581 }
582
583 /* Place pow(5, power) in DST, and return the number of parts used.
584 DST must be at least one part larger than size of the answer. */
585 static unsigned int
powerOf5(APFloatBase::integerPart * dst,unsigned int power)586 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
587 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
588 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
589 pow5s[0] = 78125 * 5;
590
591 unsigned int partsCount[16] = { 1 };
592 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
593 unsigned int result;
594 assert(power <= maxExponent);
595
596 p1 = dst;
597 p2 = scratch;
598
599 *p1 = firstEightPowers[power & 7];
600 power >>= 3;
601
602 result = 1;
603 pow5 = pow5s;
604
605 for (unsigned int n = 0; power; power >>= 1, n++) {
606 unsigned int pc;
607
608 pc = partsCount[n];
609
610 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
611 if (pc == 0) {
612 pc = partsCount[n - 1];
613 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
614 pc *= 2;
615 if (pow5[pc - 1] == 0)
616 pc--;
617 partsCount[n] = pc;
618 }
619
620 if (power & 1) {
621 APFloatBase::integerPart *tmp;
622
623 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
624 result += pc;
625 if (p2[result - 1] == 0)
626 result--;
627
628 /* Now result is in p1 with partsCount parts and p2 is scratch
629 space. */
630 tmp = p1;
631 p1 = p2;
632 p2 = tmp;
633 }
634
635 pow5 += pc;
636 }
637
638 if (p1 != dst)
639 APInt::tcAssign(dst, p1, result);
640
641 return result;
642 }
643
644 /* Zero at the end to avoid modular arithmetic when adding one; used
645 when rounding up during hexadecimal output. */
646 static const char hexDigitsLower[] = "0123456789abcdef0";
647 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
648 static const char infinityL[] = "infinity";
649 static const char infinityU[] = "INFINITY";
650 static const char NaNL[] = "nan";
651 static const char NaNU[] = "NAN";
652
653 /* Write out an integerPart in hexadecimal, starting with the most
654 significant nibble. Write out exactly COUNT hexdigits, return
655 COUNT. */
656 static unsigned int
partAsHex(char * dst,APFloatBase::integerPart part,unsigned int count,const char * hexDigitChars)657 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
658 const char *hexDigitChars)
659 {
660 unsigned int result = count;
661
662 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
663
664 part >>= (APFloatBase::integerPartWidth - 4 * count);
665 while (count--) {
666 dst[count] = hexDigitChars[part & 0xf];
667 part >>= 4;
668 }
669
670 return result;
671 }
672
673 /* Write out an unsigned decimal integer. */
674 static char *
writeUnsignedDecimal(char * dst,unsigned int n)675 writeUnsignedDecimal (char *dst, unsigned int n)
676 {
677 char buff[40], *p;
678
679 p = buff;
680 do
681 *p++ = '0' + n % 10;
682 while (n /= 10);
683
684 do
685 *dst++ = *--p;
686 while (p != buff);
687
688 return dst;
689 }
690
691 /* Write out a signed decimal integer. */
692 static char *
writeSignedDecimal(char * dst,int value)693 writeSignedDecimal (char *dst, int value)
694 {
695 if (value < 0) {
696 *dst++ = '-';
697 dst = writeUnsignedDecimal(dst, -(unsigned) value);
698 } else
699 dst = writeUnsignedDecimal(dst, value);
700
701 return dst;
702 }
703
704 namespace detail {
705 /* Constructors. */
initialize(const fltSemantics * ourSemantics)706 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
707 unsigned int count;
708
709 semantics = ourSemantics;
710 count = partCount();
711 if (count > 1)
712 significand.parts = new integerPart[count];
713 }
714
freeSignificand()715 void IEEEFloat::freeSignificand() {
716 if (needsCleanup())
717 delete [] significand.parts;
718 }
719
assign(const IEEEFloat & rhs)720 void IEEEFloat::assign(const IEEEFloat &rhs) {
721 assert(semantics == rhs.semantics);
722
723 sign = rhs.sign;
724 category = rhs.category;
725 exponent = rhs.exponent;
726 if (isFiniteNonZero() || category == fcNaN)
727 copySignificand(rhs);
728 }
729
copySignificand(const IEEEFloat & rhs)730 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
731 assert(isFiniteNonZero() || category == fcNaN);
732 assert(rhs.partCount() >= partCount());
733
734 APInt::tcAssign(significandParts(), rhs.significandParts(),
735 partCount());
736 }
737
738 /* Make this number a NaN, with an arbitrary but deterministic value
739 for the significand. If double or longer, this is a signalling NaN,
740 which may not be ideal. If float, this is QNaN(0). */
makeNaN(bool SNaN,bool Negative,const APInt * fill)741 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
742 category = fcNaN;
743 sign = Negative;
744
745 integerPart *significand = significandParts();
746 unsigned numParts = partCount();
747
748 // Set the significand bits to the fill.
749 if (!fill || fill->getNumWords() < numParts)
750 APInt::tcSet(significand, 0, numParts);
751 if (fill) {
752 APInt::tcAssign(significand, fill->getRawData(),
753 std::min(fill->getNumWords(), numParts));
754
755 // Zero out the excess bits of the significand.
756 unsigned bitsToPreserve = semantics->precision - 1;
757 unsigned part = bitsToPreserve / 64;
758 bitsToPreserve %= 64;
759 significand[part] &= ((1ULL << bitsToPreserve) - 1);
760 for (part++; part != numParts; ++part)
761 significand[part] = 0;
762 }
763
764 unsigned QNaNBit = semantics->precision - 2;
765
766 if (SNaN) {
767 // We always have to clear the QNaN bit to make it an SNaN.
768 APInt::tcClearBit(significand, QNaNBit);
769
770 // If there are no bits set in the payload, we have to set
771 // *something* to make it a NaN instead of an infinity;
772 // conventionally, this is the next bit down from the QNaN bit.
773 if (APInt::tcIsZero(significand, numParts))
774 APInt::tcSetBit(significand, QNaNBit - 1);
775 } else {
776 // We always have to set the QNaN bit to make it a QNaN.
777 APInt::tcSetBit(significand, QNaNBit);
778 }
779
780 // For x87 extended precision, we want to make a NaN, not a
781 // pseudo-NaN. Maybe we should expose the ability to make
782 // pseudo-NaNs?
783 if (semantics == &semX87DoubleExtended)
784 APInt::tcSetBit(significand, QNaNBit + 1);
785 }
786
operator =(const IEEEFloat & rhs)787 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
788 if (this != &rhs) {
789 if (semantics != rhs.semantics) {
790 freeSignificand();
791 initialize(rhs.semantics);
792 }
793 assign(rhs);
794 }
795
796 return *this;
797 }
798
operator =(IEEEFloat && rhs)799 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
800 freeSignificand();
801
802 semantics = rhs.semantics;
803 significand = rhs.significand;
804 exponent = rhs.exponent;
805 category = rhs.category;
806 sign = rhs.sign;
807
808 rhs.semantics = &semBogus;
809 return *this;
810 }
811
isDenormal() const812 bool IEEEFloat::isDenormal() const {
813 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
814 (APInt::tcExtractBit(significandParts(),
815 semantics->precision - 1) == 0);
816 }
817
isSmallest() const818 bool IEEEFloat::isSmallest() const {
819 // The smallest number by magnitude in our format will be the smallest
820 // denormal, i.e. the floating point number with exponent being minimum
821 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
822 return isFiniteNonZero() && exponent == semantics->minExponent &&
823 significandMSB() == 0;
824 }
825
isSignificandAllOnes() const826 bool IEEEFloat::isSignificandAllOnes() const {
827 // Test if the significand excluding the integral bit is all ones. This allows
828 // us to test for binade boundaries.
829 const integerPart *Parts = significandParts();
830 const unsigned PartCount = partCount();
831 for (unsigned i = 0; i < PartCount - 1; i++)
832 if (~Parts[i])
833 return false;
834
835 // Set the unused high bits to all ones when we compare.
836 const unsigned NumHighBits =
837 PartCount*integerPartWidth - semantics->precision + 1;
838 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
839 "fill than integerPartWidth");
840 const integerPart HighBitFill =
841 ~integerPart(0) << (integerPartWidth - NumHighBits);
842 if (~(Parts[PartCount - 1] | HighBitFill))
843 return false;
844
845 return true;
846 }
847
isSignificandAllZeros() const848 bool IEEEFloat::isSignificandAllZeros() const {
849 // Test if the significand excluding the integral bit is all zeros. This
850 // allows us to test for binade boundaries.
851 const integerPart *Parts = significandParts();
852 const unsigned PartCount = partCount();
853
854 for (unsigned i = 0; i < PartCount - 1; i++)
855 if (Parts[i])
856 return false;
857
858 const unsigned NumHighBits =
859 PartCount*integerPartWidth - semantics->precision + 1;
860 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
861 "clear than integerPartWidth");
862 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
863
864 if (Parts[PartCount - 1] & HighBitMask)
865 return false;
866
867 return true;
868 }
869
isLargest() const870 bool IEEEFloat::isLargest() const {
871 // The largest number by magnitude in our format will be the floating point
872 // number with maximum exponent and with significand that is all ones.
873 return isFiniteNonZero() && exponent == semantics->maxExponent
874 && isSignificandAllOnes();
875 }
876
isInteger() const877 bool IEEEFloat::isInteger() const {
878 // This could be made more efficient; I'm going for obviously correct.
879 if (!isFinite()) return false;
880 IEEEFloat truncated = *this;
881 truncated.roundToIntegral(rmTowardZero);
882 return compare(truncated) == cmpEqual;
883 }
884
bitwiseIsEqual(const IEEEFloat & rhs) const885 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
886 if (this == &rhs)
887 return true;
888 if (semantics != rhs.semantics ||
889 category != rhs.category ||
890 sign != rhs.sign)
891 return false;
892 if (category==fcZero || category==fcInfinity)
893 return true;
894
895 if (isFiniteNonZero() && exponent != rhs.exponent)
896 return false;
897
898 return std::equal(significandParts(), significandParts() + partCount(),
899 rhs.significandParts());
900 }
901
IEEEFloat(const fltSemantics & ourSemantics,integerPart value)902 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
903 initialize(&ourSemantics);
904 sign = 0;
905 category = fcNormal;
906 zeroSignificand();
907 exponent = ourSemantics.precision - 1;
908 significandParts()[0] = value;
909 normalize(rmNearestTiesToEven, lfExactlyZero);
910 }
911
IEEEFloat(const fltSemantics & ourSemantics)912 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
913 initialize(&ourSemantics);
914 category = fcZero;
915 sign = false;
916 }
917
918 // Delegate to the previous constructor, because later copy constructor may
919 // actually inspects category, which can't be garbage.
IEEEFloat(const fltSemantics & ourSemantics,uninitializedTag tag)920 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
921 : IEEEFloat(ourSemantics) {}
922
IEEEFloat(const IEEEFloat & rhs)923 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
924 initialize(rhs.semantics);
925 assign(rhs);
926 }
927
IEEEFloat(IEEEFloat && rhs)928 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
929 *this = std::move(rhs);
930 }
931
~IEEEFloat()932 IEEEFloat::~IEEEFloat() { freeSignificand(); }
933
partCount() const934 unsigned int IEEEFloat::partCount() const {
935 return partCountForBits(semantics->precision + 1);
936 }
937
significandParts() const938 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
939 return const_cast<IEEEFloat *>(this)->significandParts();
940 }
941
significandParts()942 IEEEFloat::integerPart *IEEEFloat::significandParts() {
943 if (partCount() > 1)
944 return significand.parts;
945 else
946 return &significand.part;
947 }
948
zeroSignificand()949 void IEEEFloat::zeroSignificand() {
950 APInt::tcSet(significandParts(), 0, partCount());
951 }
952
953 /* Increment an fcNormal floating point number's significand. */
incrementSignificand()954 void IEEEFloat::incrementSignificand() {
955 integerPart carry;
956
957 carry = APInt::tcIncrement(significandParts(), partCount());
958
959 /* Our callers should never cause us to overflow. */
960 assert(carry == 0);
961 (void)carry;
962 }
963
964 /* Add the significand of the RHS. Returns the carry flag. */
addSignificand(const IEEEFloat & rhs)965 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
966 integerPart *parts;
967
968 parts = significandParts();
969
970 assert(semantics == rhs.semantics);
971 assert(exponent == rhs.exponent);
972
973 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
974 }
975
976 /* Subtract the significand of the RHS with a borrow flag. Returns
977 the borrow flag. */
subtractSignificand(const IEEEFloat & rhs,integerPart borrow)978 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
979 integerPart borrow) {
980 integerPart *parts;
981
982 parts = significandParts();
983
984 assert(semantics == rhs.semantics);
985 assert(exponent == rhs.exponent);
986
987 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
988 partCount());
989 }
990
991 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
992 on to the full-precision result of the multiplication. Returns the
993 lost fraction. */
multiplySignificand(const IEEEFloat & rhs,IEEEFloat addend)994 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
995 IEEEFloat addend) {
996 unsigned int omsb; // One, not zero, based MSB.
997 unsigned int partsCount, newPartsCount, precision;
998 integerPart *lhsSignificand;
999 integerPart scratch[4];
1000 integerPart *fullSignificand;
1001 lostFraction lost_fraction;
1002 bool ignored;
1003
1004 assert(semantics == rhs.semantics);
1005
1006 precision = semantics->precision;
1007
1008 // Allocate space for twice as many bits as the original significand, plus one
1009 // extra bit for the addition to overflow into.
1010 newPartsCount = partCountForBits(precision * 2 + 1);
1011
1012 if (newPartsCount > 4)
1013 fullSignificand = new integerPart[newPartsCount];
1014 else
1015 fullSignificand = scratch;
1016
1017 lhsSignificand = significandParts();
1018 partsCount = partCount();
1019
1020 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1021 rhs.significandParts(), partsCount, partsCount);
1022
1023 lost_fraction = lfExactlyZero;
1024 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1025 exponent += rhs.exponent;
1026
1027 // Assume the operands involved in the multiplication are single-precision
1028 // FP, and the two multiplicants are:
1029 // *this = a23 . a22 ... a0 * 2^e1
1030 // rhs = b23 . b22 ... b0 * 2^e2
1031 // the result of multiplication is:
1032 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1033 // Note that there are three significant bits at the left-hand side of the
1034 // radix point: two for the multiplication, and an overflow bit for the
1035 // addition (that will always be zero at this point). Move the radix point
1036 // toward left by two bits, and adjust exponent accordingly.
1037 exponent += 2;
1038
1039 if (addend.isNonZero()) {
1040 // The intermediate result of the multiplication has "2 * precision"
1041 // signicant bit; adjust the addend to be consistent with mul result.
1042 //
1043 Significand savedSignificand = significand;
1044 const fltSemantics *savedSemantics = semantics;
1045 fltSemantics extendedSemantics;
1046 opStatus status;
1047 unsigned int extendedPrecision;
1048
1049 // Normalize our MSB to one below the top bit to allow for overflow.
1050 extendedPrecision = 2 * precision + 1;
1051 if (omsb != extendedPrecision - 1) {
1052 assert(extendedPrecision > omsb);
1053 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1054 (extendedPrecision - 1) - omsb);
1055 exponent -= (extendedPrecision - 1) - omsb;
1056 }
1057
1058 /* Create new semantics. */
1059 extendedSemantics = *semantics;
1060 extendedSemantics.precision = extendedPrecision;
1061
1062 if (newPartsCount == 1)
1063 significand.part = fullSignificand[0];
1064 else
1065 significand.parts = fullSignificand;
1066 semantics = &extendedSemantics;
1067
1068 // Make a copy so we can convert it to the extended semantics.
1069 // Note that we cannot convert the addend directly, as the extendedSemantics
1070 // is a local variable (which we take a reference to).
1071 IEEEFloat extendedAddend(addend);
1072 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1073 assert(status == opOK);
1074 (void)status;
1075
1076 // Shift the significand of the addend right by one bit. This guarantees
1077 // that the high bit of the significand is zero (same as fullSignificand),
1078 // so the addition will overflow (if it does overflow at all) into the top bit.
1079 lost_fraction = extendedAddend.shiftSignificandRight(1);
1080 assert(lost_fraction == lfExactlyZero &&
1081 "Lost precision while shifting addend for fused-multiply-add.");
1082
1083 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1084
1085 /* Restore our state. */
1086 if (newPartsCount == 1)
1087 fullSignificand[0] = significand.part;
1088 significand = savedSignificand;
1089 semantics = savedSemantics;
1090
1091 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1092 }
1093
1094 // Convert the result having "2 * precision" significant-bits back to the one
1095 // having "precision" significant-bits. First, move the radix point from
1096 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1097 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1098 exponent -= precision + 1;
1099
1100 // In case MSB resides at the left-hand side of radix point, shift the
1101 // mantissa right by some amount to make sure the MSB reside right before
1102 // the radix point (i.e. "MSB . rest-significant-bits").
1103 //
1104 // Note that the result is not normalized when "omsb < precision". So, the
1105 // caller needs to call IEEEFloat::normalize() if normalized value is
1106 // expected.
1107 if (omsb > precision) {
1108 unsigned int bits, significantParts;
1109 lostFraction lf;
1110
1111 bits = omsb - precision;
1112 significantParts = partCountForBits(omsb);
1113 lf = shiftRight(fullSignificand, significantParts, bits);
1114 lost_fraction = combineLostFractions(lf, lost_fraction);
1115 exponent += bits;
1116 }
1117
1118 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1119
1120 if (newPartsCount > 4)
1121 delete [] fullSignificand;
1122
1123 return lost_fraction;
1124 }
1125
multiplySignificand(const IEEEFloat & rhs)1126 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1127 return multiplySignificand(rhs, IEEEFloat(*semantics));
1128 }
1129
1130 /* Multiply the significands of LHS and RHS to DST. */
divideSignificand(const IEEEFloat & rhs)1131 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1132 unsigned int bit, i, partsCount;
1133 const integerPart *rhsSignificand;
1134 integerPart *lhsSignificand, *dividend, *divisor;
1135 integerPart scratch[4];
1136 lostFraction lost_fraction;
1137
1138 assert(semantics == rhs.semantics);
1139
1140 lhsSignificand = significandParts();
1141 rhsSignificand = rhs.significandParts();
1142 partsCount = partCount();
1143
1144 if (partsCount > 2)
1145 dividend = new integerPart[partsCount * 2];
1146 else
1147 dividend = scratch;
1148
1149 divisor = dividend + partsCount;
1150
1151 /* Copy the dividend and divisor as they will be modified in-place. */
1152 for (i = 0; i < partsCount; i++) {
1153 dividend[i] = lhsSignificand[i];
1154 divisor[i] = rhsSignificand[i];
1155 lhsSignificand[i] = 0;
1156 }
1157
1158 exponent -= rhs.exponent;
1159
1160 unsigned int precision = semantics->precision;
1161
1162 /* Normalize the divisor. */
1163 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1164 if (bit) {
1165 exponent += bit;
1166 APInt::tcShiftLeft(divisor, partsCount, bit);
1167 }
1168
1169 /* Normalize the dividend. */
1170 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1171 if (bit) {
1172 exponent -= bit;
1173 APInt::tcShiftLeft(dividend, partsCount, bit);
1174 }
1175
1176 /* Ensure the dividend >= divisor initially for the loop below.
1177 Incidentally, this means that the division loop below is
1178 guaranteed to set the integer bit to one. */
1179 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1180 exponent--;
1181 APInt::tcShiftLeft(dividend, partsCount, 1);
1182 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1183 }
1184
1185 /* Long division. */
1186 for (bit = precision; bit; bit -= 1) {
1187 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1188 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1189 APInt::tcSetBit(lhsSignificand, bit - 1);
1190 }
1191
1192 APInt::tcShiftLeft(dividend, partsCount, 1);
1193 }
1194
1195 /* Figure out the lost fraction. */
1196 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1197
1198 if (cmp > 0)
1199 lost_fraction = lfMoreThanHalf;
1200 else if (cmp == 0)
1201 lost_fraction = lfExactlyHalf;
1202 else if (APInt::tcIsZero(dividend, partsCount))
1203 lost_fraction = lfExactlyZero;
1204 else
1205 lost_fraction = lfLessThanHalf;
1206
1207 if (partsCount > 2)
1208 delete [] dividend;
1209
1210 return lost_fraction;
1211 }
1212
significandMSB() const1213 unsigned int IEEEFloat::significandMSB() const {
1214 return APInt::tcMSB(significandParts(), partCount());
1215 }
1216
significandLSB() const1217 unsigned int IEEEFloat::significandLSB() const {
1218 return APInt::tcLSB(significandParts(), partCount());
1219 }
1220
1221 /* Note that a zero result is NOT normalized to fcZero. */
shiftSignificandRight(unsigned int bits)1222 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1223 /* Our exponent should not overflow. */
1224 assert((ExponentType) (exponent + bits) >= exponent);
1225
1226 exponent += bits;
1227
1228 return shiftRight(significandParts(), partCount(), bits);
1229 }
1230
1231 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
shiftSignificandLeft(unsigned int bits)1232 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1233 assert(bits < semantics->precision);
1234
1235 if (bits) {
1236 unsigned int partsCount = partCount();
1237
1238 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1239 exponent -= bits;
1240
1241 assert(!APInt::tcIsZero(significandParts(), partsCount));
1242 }
1243 }
1244
1245 IEEEFloat::cmpResult
compareAbsoluteValue(const IEEEFloat & rhs) const1246 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1247 int compare;
1248
1249 assert(semantics == rhs.semantics);
1250 assert(isFiniteNonZero());
1251 assert(rhs.isFiniteNonZero());
1252
1253 compare = exponent - rhs.exponent;
1254
1255 /* If exponents are equal, do an unsigned bignum comparison of the
1256 significands. */
1257 if (compare == 0)
1258 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1259 partCount());
1260
1261 if (compare > 0)
1262 return cmpGreaterThan;
1263 else if (compare < 0)
1264 return cmpLessThan;
1265 else
1266 return cmpEqual;
1267 }
1268
1269 /* Handle overflow. Sign is preserved. We either become infinity or
1270 the largest finite number. */
handleOverflow(roundingMode rounding_mode)1271 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1272 /* Infinity? */
1273 if (rounding_mode == rmNearestTiesToEven ||
1274 rounding_mode == rmNearestTiesToAway ||
1275 (rounding_mode == rmTowardPositive && !sign) ||
1276 (rounding_mode == rmTowardNegative && sign)) {
1277 category = fcInfinity;
1278 return (opStatus) (opOverflow | opInexact);
1279 }
1280
1281 /* Otherwise we become the largest finite number. */
1282 category = fcNormal;
1283 exponent = semantics->maxExponent;
1284 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1285 semantics->precision);
1286
1287 return opInexact;
1288 }
1289
1290 /* Returns TRUE if, when truncating the current number, with BIT the
1291 new LSB, with the given lost fraction and rounding mode, the result
1292 would need to be rounded away from zero (i.e., by increasing the
1293 signficand). This routine must work for fcZero of both signs, and
1294 fcNormal numbers. */
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1295 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1296 lostFraction lost_fraction,
1297 unsigned int bit) const {
1298 /* NaNs and infinities should not have lost fractions. */
1299 assert(isFiniteNonZero() || category == fcZero);
1300
1301 /* Current callers never pass this so we don't handle it. */
1302 assert(lost_fraction != lfExactlyZero);
1303
1304 switch (rounding_mode) {
1305 case rmNearestTiesToAway:
1306 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1307
1308 case rmNearestTiesToEven:
1309 if (lost_fraction == lfMoreThanHalf)
1310 return true;
1311
1312 /* Our zeroes don't have a significand to test. */
1313 if (lost_fraction == lfExactlyHalf && category != fcZero)
1314 return APInt::tcExtractBit(significandParts(), bit);
1315
1316 return false;
1317
1318 case rmTowardZero:
1319 return false;
1320
1321 case rmTowardPositive:
1322 return !sign;
1323
1324 case rmTowardNegative:
1325 return sign;
1326 }
1327 llvm_unreachable("Invalid rounding mode found");
1328 }
1329
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1330 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1331 lostFraction lost_fraction) {
1332 unsigned int omsb; /* One, not zero, based MSB. */
1333 int exponentChange;
1334
1335 if (!isFiniteNonZero())
1336 return opOK;
1337
1338 /* Before rounding normalize the exponent of fcNormal numbers. */
1339 omsb = significandMSB() + 1;
1340
1341 if (omsb) {
1342 /* OMSB is numbered from 1. We want to place it in the integer
1343 bit numbered PRECISION if possible, with a compensating change in
1344 the exponent. */
1345 exponentChange = omsb - semantics->precision;
1346
1347 /* If the resulting exponent is too high, overflow according to
1348 the rounding mode. */
1349 if (exponent + exponentChange > semantics->maxExponent)
1350 return handleOverflow(rounding_mode);
1351
1352 /* Subnormal numbers have exponent minExponent, and their MSB
1353 is forced based on that. */
1354 if (exponent + exponentChange < semantics->minExponent)
1355 exponentChange = semantics->minExponent - exponent;
1356
1357 /* Shifting left is easy as we don't lose precision. */
1358 if (exponentChange < 0) {
1359 assert(lost_fraction == lfExactlyZero);
1360
1361 shiftSignificandLeft(-exponentChange);
1362
1363 return opOK;
1364 }
1365
1366 if (exponentChange > 0) {
1367 lostFraction lf;
1368
1369 /* Shift right and capture any new lost fraction. */
1370 lf = shiftSignificandRight(exponentChange);
1371
1372 lost_fraction = combineLostFractions(lf, lost_fraction);
1373
1374 /* Keep OMSB up-to-date. */
1375 if (omsb > (unsigned) exponentChange)
1376 omsb -= exponentChange;
1377 else
1378 omsb = 0;
1379 }
1380 }
1381
1382 /* Now round the number according to rounding_mode given the lost
1383 fraction. */
1384
1385 /* As specified in IEEE 754, since we do not trap we do not report
1386 underflow for exact results. */
1387 if (lost_fraction == lfExactlyZero) {
1388 /* Canonicalize zeroes. */
1389 if (omsb == 0)
1390 category = fcZero;
1391
1392 return opOK;
1393 }
1394
1395 /* Increment the significand if we're rounding away from zero. */
1396 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1397 if (omsb == 0)
1398 exponent = semantics->minExponent;
1399
1400 incrementSignificand();
1401 omsb = significandMSB() + 1;
1402
1403 /* Did the significand increment overflow? */
1404 if (omsb == (unsigned) semantics->precision + 1) {
1405 /* Renormalize by incrementing the exponent and shifting our
1406 significand right one. However if we already have the
1407 maximum exponent we overflow to infinity. */
1408 if (exponent == semantics->maxExponent) {
1409 category = fcInfinity;
1410
1411 return (opStatus) (opOverflow | opInexact);
1412 }
1413
1414 shiftSignificandRight(1);
1415
1416 return opInexact;
1417 }
1418 }
1419
1420 /* The normal case - we were and are not denormal, and any
1421 significand increment above didn't overflow. */
1422 if (omsb == semantics->precision)
1423 return opInexact;
1424
1425 /* We have a non-zero denormal. */
1426 assert(omsb < semantics->precision);
1427
1428 /* Canonicalize zeroes. */
1429 if (omsb == 0)
1430 category = fcZero;
1431
1432 /* The fcZero case is a denormal that underflowed to zero. */
1433 return (opStatus) (opUnderflow | opInexact);
1434 }
1435
addOrSubtractSpecials(const IEEEFloat & rhs,bool subtract)1436 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1437 bool subtract) {
1438 switch (PackCategoriesIntoKey(category, rhs.category)) {
1439 default:
1440 llvm_unreachable(nullptr);
1441
1442 case PackCategoriesIntoKey(fcNaN, fcZero):
1443 case PackCategoriesIntoKey(fcNaN, fcNormal):
1444 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1445 case PackCategoriesIntoKey(fcNaN, fcNaN):
1446 case PackCategoriesIntoKey(fcNormal, fcZero):
1447 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1448 case PackCategoriesIntoKey(fcInfinity, fcZero):
1449 return opOK;
1450
1451 case PackCategoriesIntoKey(fcZero, fcNaN):
1452 case PackCategoriesIntoKey(fcNormal, fcNaN):
1453 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1454 // We need to be sure to flip the sign here for subtraction because we
1455 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1456 sign = rhs.sign ^ subtract;
1457 category = fcNaN;
1458 copySignificand(rhs);
1459 return opOK;
1460
1461 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1462 case PackCategoriesIntoKey(fcZero, fcInfinity):
1463 category = fcInfinity;
1464 sign = rhs.sign ^ subtract;
1465 return opOK;
1466
1467 case PackCategoriesIntoKey(fcZero, fcNormal):
1468 assign(rhs);
1469 sign = rhs.sign ^ subtract;
1470 return opOK;
1471
1472 case PackCategoriesIntoKey(fcZero, fcZero):
1473 /* Sign depends on rounding mode; handled by caller. */
1474 return opOK;
1475
1476 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1477 /* Differently signed infinities can only be validly
1478 subtracted. */
1479 if (((sign ^ rhs.sign)!=0) != subtract) {
1480 makeNaN();
1481 return opInvalidOp;
1482 }
1483
1484 return opOK;
1485
1486 case PackCategoriesIntoKey(fcNormal, fcNormal):
1487 return opDivByZero;
1488 }
1489 }
1490
1491 /* Add or subtract two normal numbers. */
addOrSubtractSignificand(const IEEEFloat & rhs,bool subtract)1492 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1493 bool subtract) {
1494 integerPart carry;
1495 lostFraction lost_fraction;
1496 int bits;
1497
1498 /* Determine if the operation on the absolute values is effectively
1499 an addition or subtraction. */
1500 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1501
1502 /* Are we bigger exponent-wise than the RHS? */
1503 bits = exponent - rhs.exponent;
1504
1505 /* Subtraction is more subtle than one might naively expect. */
1506 if (subtract) {
1507 IEEEFloat temp_rhs(rhs);
1508
1509 if (bits == 0)
1510 lost_fraction = lfExactlyZero;
1511 else if (bits > 0) {
1512 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1513 shiftSignificandLeft(1);
1514 } else {
1515 lost_fraction = shiftSignificandRight(-bits - 1);
1516 temp_rhs.shiftSignificandLeft(1);
1517 }
1518
1519 // Should we reverse the subtraction.
1520 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1521 carry = temp_rhs.subtractSignificand
1522 (*this, lost_fraction != lfExactlyZero);
1523 copySignificand(temp_rhs);
1524 sign = !sign;
1525 } else {
1526 carry = subtractSignificand
1527 (temp_rhs, lost_fraction != lfExactlyZero);
1528 }
1529
1530 /* Invert the lost fraction - it was on the RHS and
1531 subtracted. */
1532 if (lost_fraction == lfLessThanHalf)
1533 lost_fraction = lfMoreThanHalf;
1534 else if (lost_fraction == lfMoreThanHalf)
1535 lost_fraction = lfLessThanHalf;
1536
1537 /* The code above is intended to ensure that no borrow is
1538 necessary. */
1539 assert(!carry);
1540 (void)carry;
1541 } else {
1542 if (bits > 0) {
1543 IEEEFloat temp_rhs(rhs);
1544
1545 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1546 carry = addSignificand(temp_rhs);
1547 } else {
1548 lost_fraction = shiftSignificandRight(-bits);
1549 carry = addSignificand(rhs);
1550 }
1551
1552 /* We have a guard bit; generating a carry cannot happen. */
1553 assert(!carry);
1554 (void)carry;
1555 }
1556
1557 return lost_fraction;
1558 }
1559
multiplySpecials(const IEEEFloat & rhs)1560 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1561 switch (PackCategoriesIntoKey(category, rhs.category)) {
1562 default:
1563 llvm_unreachable(nullptr);
1564
1565 case PackCategoriesIntoKey(fcNaN, fcZero):
1566 case PackCategoriesIntoKey(fcNaN, fcNormal):
1567 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1568 case PackCategoriesIntoKey(fcNaN, fcNaN):
1569 sign = false;
1570 return opOK;
1571
1572 case PackCategoriesIntoKey(fcZero, fcNaN):
1573 case PackCategoriesIntoKey(fcNormal, fcNaN):
1574 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1575 sign = false;
1576 category = fcNaN;
1577 copySignificand(rhs);
1578 return opOK;
1579
1580 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1581 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1582 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1583 category = fcInfinity;
1584 return opOK;
1585
1586 case PackCategoriesIntoKey(fcZero, fcNormal):
1587 case PackCategoriesIntoKey(fcNormal, fcZero):
1588 case PackCategoriesIntoKey(fcZero, fcZero):
1589 category = fcZero;
1590 return opOK;
1591
1592 case PackCategoriesIntoKey(fcZero, fcInfinity):
1593 case PackCategoriesIntoKey(fcInfinity, fcZero):
1594 makeNaN();
1595 return opInvalidOp;
1596
1597 case PackCategoriesIntoKey(fcNormal, fcNormal):
1598 return opOK;
1599 }
1600 }
1601
divideSpecials(const IEEEFloat & rhs)1602 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1603 switch (PackCategoriesIntoKey(category, rhs.category)) {
1604 default:
1605 llvm_unreachable(nullptr);
1606
1607 case PackCategoriesIntoKey(fcZero, fcNaN):
1608 case PackCategoriesIntoKey(fcNormal, fcNaN):
1609 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1610 category = fcNaN;
1611 copySignificand(rhs);
1612 LLVM_FALLTHROUGH;
1613 case PackCategoriesIntoKey(fcNaN, fcZero):
1614 case PackCategoriesIntoKey(fcNaN, fcNormal):
1615 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1616 case PackCategoriesIntoKey(fcNaN, fcNaN):
1617 sign = false;
1618 LLVM_FALLTHROUGH;
1619 case PackCategoriesIntoKey(fcInfinity, fcZero):
1620 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1621 case PackCategoriesIntoKey(fcZero, fcInfinity):
1622 case PackCategoriesIntoKey(fcZero, fcNormal):
1623 return opOK;
1624
1625 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1626 category = fcZero;
1627 return opOK;
1628
1629 case PackCategoriesIntoKey(fcNormal, fcZero):
1630 category = fcInfinity;
1631 return opDivByZero;
1632
1633 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1634 case PackCategoriesIntoKey(fcZero, fcZero):
1635 makeNaN();
1636 return opInvalidOp;
1637
1638 case PackCategoriesIntoKey(fcNormal, fcNormal):
1639 return opOK;
1640 }
1641 }
1642
modSpecials(const IEEEFloat & rhs)1643 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1644 switch (PackCategoriesIntoKey(category, rhs.category)) {
1645 default:
1646 llvm_unreachable(nullptr);
1647
1648 case PackCategoriesIntoKey(fcNaN, fcZero):
1649 case PackCategoriesIntoKey(fcNaN, fcNormal):
1650 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1651 case PackCategoriesIntoKey(fcNaN, fcNaN):
1652 case PackCategoriesIntoKey(fcZero, fcInfinity):
1653 case PackCategoriesIntoKey(fcZero, fcNormal):
1654 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1655 return opOK;
1656
1657 case PackCategoriesIntoKey(fcZero, fcNaN):
1658 case PackCategoriesIntoKey(fcNormal, fcNaN):
1659 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1660 sign = false;
1661 category = fcNaN;
1662 copySignificand(rhs);
1663 return opOK;
1664
1665 case PackCategoriesIntoKey(fcNormal, fcZero):
1666 case PackCategoriesIntoKey(fcInfinity, fcZero):
1667 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1668 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1669 case PackCategoriesIntoKey(fcZero, fcZero):
1670 makeNaN();
1671 return opInvalidOp;
1672
1673 case PackCategoriesIntoKey(fcNormal, fcNormal):
1674 return opOK;
1675 }
1676 }
1677
1678 /* Change sign. */
changeSign()1679 void IEEEFloat::changeSign() {
1680 /* Look mummy, this one's easy. */
1681 sign = !sign;
1682 }
1683
1684 /* Normalized addition or subtraction. */
addOrSubtract(const IEEEFloat & rhs,roundingMode rounding_mode,bool subtract)1685 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1686 roundingMode rounding_mode,
1687 bool subtract) {
1688 opStatus fs;
1689
1690 fs = addOrSubtractSpecials(rhs, subtract);
1691
1692 /* This return code means it was not a simple case. */
1693 if (fs == opDivByZero) {
1694 lostFraction lost_fraction;
1695
1696 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1697 fs = normalize(rounding_mode, lost_fraction);
1698
1699 /* Can only be zero if we lost no fraction. */
1700 assert(category != fcZero || lost_fraction == lfExactlyZero);
1701 }
1702
1703 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1704 positive zero unless rounding to minus infinity, except that
1705 adding two like-signed zeroes gives that zero. */
1706 if (category == fcZero) {
1707 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1708 sign = (rounding_mode == rmTowardNegative);
1709 }
1710
1711 return fs;
1712 }
1713
1714 /* Normalized addition. */
add(const IEEEFloat & rhs,roundingMode rounding_mode)1715 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1716 roundingMode rounding_mode) {
1717 return addOrSubtract(rhs, rounding_mode, false);
1718 }
1719
1720 /* Normalized subtraction. */
subtract(const IEEEFloat & rhs,roundingMode rounding_mode)1721 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1722 roundingMode rounding_mode) {
1723 return addOrSubtract(rhs, rounding_mode, true);
1724 }
1725
1726 /* Normalized multiply. */
multiply(const IEEEFloat & rhs,roundingMode rounding_mode)1727 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1728 roundingMode rounding_mode) {
1729 opStatus fs;
1730
1731 sign ^= rhs.sign;
1732 fs = multiplySpecials(rhs);
1733
1734 if (isFiniteNonZero()) {
1735 lostFraction lost_fraction = multiplySignificand(rhs);
1736 fs = normalize(rounding_mode, lost_fraction);
1737 if (lost_fraction != lfExactlyZero)
1738 fs = (opStatus) (fs | opInexact);
1739 }
1740
1741 return fs;
1742 }
1743
1744 /* Normalized divide. */
divide(const IEEEFloat & rhs,roundingMode rounding_mode)1745 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1746 roundingMode rounding_mode) {
1747 opStatus fs;
1748
1749 sign ^= rhs.sign;
1750 fs = divideSpecials(rhs);
1751
1752 if (isFiniteNonZero()) {
1753 lostFraction lost_fraction = divideSignificand(rhs);
1754 fs = normalize(rounding_mode, lost_fraction);
1755 if (lost_fraction != lfExactlyZero)
1756 fs = (opStatus) (fs | opInexact);
1757 }
1758
1759 return fs;
1760 }
1761
1762 /* Normalized remainder. This is not currently correct in all cases. */
remainder(const IEEEFloat & rhs)1763 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1764 opStatus fs;
1765 IEEEFloat V = *this;
1766 unsigned int origSign = sign;
1767
1768 fs = V.divide(rhs, rmNearestTiesToEven);
1769 if (fs == opDivByZero)
1770 return fs;
1771
1772 int parts = partCount();
1773 integerPart *x = new integerPart[parts];
1774 bool ignored;
1775 fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1776 parts * integerPartWidth, true, rmNearestTiesToEven,
1777 &ignored);
1778 if (fs == opInvalidOp) {
1779 delete[] x;
1780 return fs;
1781 }
1782
1783 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1784 rmNearestTiesToEven);
1785 assert(fs==opOK); // should always work
1786
1787 fs = V.multiply(rhs, rmNearestTiesToEven);
1788 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1789
1790 fs = subtract(V, rmNearestTiesToEven);
1791 assert(fs==opOK || fs==opInexact); // likewise
1792
1793 if (isZero())
1794 sign = origSign; // IEEE754 requires this
1795 delete[] x;
1796 return fs;
1797 }
1798
1799 /* Normalized llvm frem (C fmod). */
mod(const IEEEFloat & rhs)1800 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1801 opStatus fs;
1802 fs = modSpecials(rhs);
1803 unsigned int origSign = sign;
1804
1805 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1806 compareAbsoluteValue(rhs) != cmpLessThan) {
1807 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1808 if (compareAbsoluteValue(V) == cmpLessThan)
1809 V = scalbn(V, -1, rmNearestTiesToEven);
1810 V.sign = sign;
1811
1812 fs = subtract(V, rmNearestTiesToEven);
1813 assert(fs==opOK);
1814 }
1815 if (isZero())
1816 sign = origSign; // fmod requires this
1817 return fs;
1818 }
1819
1820 /* Normalized fused-multiply-add. */
fusedMultiplyAdd(const IEEEFloat & multiplicand,const IEEEFloat & addend,roundingMode rounding_mode)1821 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1822 const IEEEFloat &addend,
1823 roundingMode rounding_mode) {
1824 opStatus fs;
1825
1826 /* Post-multiplication sign, before addition. */
1827 sign ^= multiplicand.sign;
1828
1829 /* If and only if all arguments are normal do we need to do an
1830 extended-precision calculation. */
1831 if (isFiniteNonZero() &&
1832 multiplicand.isFiniteNonZero() &&
1833 addend.isFinite()) {
1834 lostFraction lost_fraction;
1835
1836 lost_fraction = multiplySignificand(multiplicand, addend);
1837 fs = normalize(rounding_mode, lost_fraction);
1838 if (lost_fraction != lfExactlyZero)
1839 fs = (opStatus) (fs | opInexact);
1840
1841 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1842 positive zero unless rounding to minus infinity, except that
1843 adding two like-signed zeroes gives that zero. */
1844 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1845 sign = (rounding_mode == rmTowardNegative);
1846 } else {
1847 fs = multiplySpecials(multiplicand);
1848
1849 /* FS can only be opOK or opInvalidOp. There is no more work
1850 to do in the latter case. The IEEE-754R standard says it is
1851 implementation-defined in this case whether, if ADDEND is a
1852 quiet NaN, we raise invalid op; this implementation does so.
1853
1854 If we need to do the addition we can do so with normal
1855 precision. */
1856 if (fs == opOK)
1857 fs = addOrSubtract(addend, rounding_mode, false);
1858 }
1859
1860 return fs;
1861 }
1862
1863 /* Rounding-mode corrrect round to integral value. */
roundToIntegral(roundingMode rounding_mode)1864 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1865 opStatus fs;
1866
1867 // If the exponent is large enough, we know that this value is already
1868 // integral, and the arithmetic below would potentially cause it to saturate
1869 // to +/-Inf. Bail out early instead.
1870 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1871 return opOK;
1872
1873 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1874 // precision of our format, and then subtract it back off again. The choice
1875 // of rounding modes for the addition/subtraction determines the rounding mode
1876 // for our integral rounding as well.
1877 // NOTE: When the input value is negative, we do subtraction followed by
1878 // addition instead.
1879 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1880 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1881 IEEEFloat MagicConstant(*semantics);
1882 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1883 rmNearestTiesToEven);
1884 MagicConstant.sign = sign;
1885
1886 if (fs != opOK)
1887 return fs;
1888
1889 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1890 bool inputSign = isNegative();
1891
1892 fs = add(MagicConstant, rounding_mode);
1893 if (fs != opOK && fs != opInexact)
1894 return fs;
1895
1896 fs = subtract(MagicConstant, rounding_mode);
1897
1898 // Restore the input sign.
1899 if (inputSign != isNegative())
1900 changeSign();
1901
1902 return fs;
1903 }
1904
1905
1906 /* Comparison requires normalized numbers. */
compare(const IEEEFloat & rhs) const1907 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1908 cmpResult result;
1909
1910 assert(semantics == rhs.semantics);
1911
1912 switch (PackCategoriesIntoKey(category, rhs.category)) {
1913 default:
1914 llvm_unreachable(nullptr);
1915
1916 case PackCategoriesIntoKey(fcNaN, fcZero):
1917 case PackCategoriesIntoKey(fcNaN, fcNormal):
1918 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1919 case PackCategoriesIntoKey(fcNaN, fcNaN):
1920 case PackCategoriesIntoKey(fcZero, fcNaN):
1921 case PackCategoriesIntoKey(fcNormal, fcNaN):
1922 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1923 return cmpUnordered;
1924
1925 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1926 case PackCategoriesIntoKey(fcInfinity, fcZero):
1927 case PackCategoriesIntoKey(fcNormal, fcZero):
1928 if (sign)
1929 return cmpLessThan;
1930 else
1931 return cmpGreaterThan;
1932
1933 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1934 case PackCategoriesIntoKey(fcZero, fcInfinity):
1935 case PackCategoriesIntoKey(fcZero, fcNormal):
1936 if (rhs.sign)
1937 return cmpGreaterThan;
1938 else
1939 return cmpLessThan;
1940
1941 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1942 if (sign == rhs.sign)
1943 return cmpEqual;
1944 else if (sign)
1945 return cmpLessThan;
1946 else
1947 return cmpGreaterThan;
1948
1949 case PackCategoriesIntoKey(fcZero, fcZero):
1950 return cmpEqual;
1951
1952 case PackCategoriesIntoKey(fcNormal, fcNormal):
1953 break;
1954 }
1955
1956 /* Two normal numbers. Do they have the same sign? */
1957 if (sign != rhs.sign) {
1958 if (sign)
1959 result = cmpLessThan;
1960 else
1961 result = cmpGreaterThan;
1962 } else {
1963 /* Compare absolute values; invert result if negative. */
1964 result = compareAbsoluteValue(rhs);
1965
1966 if (sign) {
1967 if (result == cmpLessThan)
1968 result = cmpGreaterThan;
1969 else if (result == cmpGreaterThan)
1970 result = cmpLessThan;
1971 }
1972 }
1973
1974 return result;
1975 }
1976
1977 /// IEEEFloat::convert - convert a value of one floating point type to another.
1978 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1979 /// records whether the transformation lost information, i.e. whether
1980 /// converting the result back to the original type will produce the
1981 /// original value (this is almost the same as return value==fsOK, but there
1982 /// are edge cases where this is not so).
1983
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)1984 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1985 roundingMode rounding_mode,
1986 bool *losesInfo) {
1987 lostFraction lostFraction;
1988 unsigned int newPartCount, oldPartCount;
1989 opStatus fs;
1990 int shift;
1991 const fltSemantics &fromSemantics = *semantics;
1992
1993 lostFraction = lfExactlyZero;
1994 newPartCount = partCountForBits(toSemantics.precision + 1);
1995 oldPartCount = partCount();
1996 shift = toSemantics.precision - fromSemantics.precision;
1997
1998 bool X86SpecialNan = false;
1999 if (&fromSemantics == &semX87DoubleExtended &&
2000 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2001 (!(*significandParts() & 0x8000000000000000ULL) ||
2002 !(*significandParts() & 0x4000000000000000ULL))) {
2003 // x86 has some unusual NaNs which cannot be represented in any other
2004 // format; note them here.
2005 X86SpecialNan = true;
2006 }
2007
2008 // If this is a truncation of a denormal number, and the target semantics
2009 // has larger exponent range than the source semantics (this can happen
2010 // when truncating from PowerPC double-double to double format), the
2011 // right shift could lose result mantissa bits. Adjust exponent instead
2012 // of performing excessive shift.
2013 if (shift < 0 && isFiniteNonZero()) {
2014 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2015 if (exponent + exponentChange < toSemantics.minExponent)
2016 exponentChange = toSemantics.minExponent - exponent;
2017 if (exponentChange < shift)
2018 exponentChange = shift;
2019 if (exponentChange < 0) {
2020 shift -= exponentChange;
2021 exponent += exponentChange;
2022 }
2023 }
2024
2025 // If this is a truncation, perform the shift before we narrow the storage.
2026 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2027 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2028
2029 // Fix the storage so it can hold to new value.
2030 if (newPartCount > oldPartCount) {
2031 // The new type requires more storage; make it available.
2032 integerPart *newParts;
2033 newParts = new integerPart[newPartCount];
2034 APInt::tcSet(newParts, 0, newPartCount);
2035 if (isFiniteNonZero() || category==fcNaN)
2036 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2037 freeSignificand();
2038 significand.parts = newParts;
2039 } else if (newPartCount == 1 && oldPartCount != 1) {
2040 // Switch to built-in storage for a single part.
2041 integerPart newPart = 0;
2042 if (isFiniteNonZero() || category==fcNaN)
2043 newPart = significandParts()[0];
2044 freeSignificand();
2045 significand.part = newPart;
2046 }
2047
2048 // Now that we have the right storage, switch the semantics.
2049 semantics = &toSemantics;
2050
2051 // If this is an extension, perform the shift now that the storage is
2052 // available.
2053 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2054 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2055
2056 if (isFiniteNonZero()) {
2057 fs = normalize(rounding_mode, lostFraction);
2058 *losesInfo = (fs != opOK);
2059 } else if (category == fcNaN) {
2060 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2061
2062 // For x87 extended precision, we want to make a NaN, not a special NaN if
2063 // the input wasn't special either.
2064 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2065 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2066
2067 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2068 // does not give you back the same bits. This is dubious, and we
2069 // don't currently do it. You're really supposed to get
2070 // an invalid operation signal at runtime, but nobody does that.
2071 fs = opOK;
2072 } else {
2073 *losesInfo = false;
2074 fs = opOK;
2075 }
2076
2077 return fs;
2078 }
2079
2080 /* Convert a floating point number to an integer according to the
2081 rounding mode. If the rounded integer value is out of range this
2082 returns an invalid operation exception and the contents of the
2083 destination parts are unspecified. If the rounded value is in
2084 range but the floating point number is not the exact integer, the C
2085 standard doesn't require an inexact exception to be raised. IEEE
2086 854 does require it so we do that.
2087
2088 Note that for conversions to integer type the C standard requires
2089 round-to-zero to always be used. */
convertToSignExtendedInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2090 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2091 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2092 roundingMode rounding_mode, bool *isExact) const {
2093 lostFraction lost_fraction;
2094 const integerPart *src;
2095 unsigned int dstPartsCount, truncatedBits;
2096
2097 *isExact = false;
2098
2099 /* Handle the three special cases first. */
2100 if (category == fcInfinity || category == fcNaN)
2101 return opInvalidOp;
2102
2103 dstPartsCount = partCountForBits(width);
2104 assert(dstPartsCount <= parts.size() && "Integer too big");
2105
2106 if (category == fcZero) {
2107 APInt::tcSet(parts.data(), 0, dstPartsCount);
2108 // Negative zero can't be represented as an int.
2109 *isExact = !sign;
2110 return opOK;
2111 }
2112
2113 src = significandParts();
2114
2115 /* Step 1: place our absolute value, with any fraction truncated, in
2116 the destination. */
2117 if (exponent < 0) {
2118 /* Our absolute value is less than one; truncate everything. */
2119 APInt::tcSet(parts.data(), 0, dstPartsCount);
2120 /* For exponent -1 the integer bit represents .5, look at that.
2121 For smaller exponents leftmost truncated bit is 0. */
2122 truncatedBits = semantics->precision -1U - exponent;
2123 } else {
2124 /* We want the most significant (exponent + 1) bits; the rest are
2125 truncated. */
2126 unsigned int bits = exponent + 1U;
2127
2128 /* Hopelessly large in magnitude? */
2129 if (bits > width)
2130 return opInvalidOp;
2131
2132 if (bits < semantics->precision) {
2133 /* We truncate (semantics->precision - bits) bits. */
2134 truncatedBits = semantics->precision - bits;
2135 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2136 } else {
2137 /* We want at least as many bits as are available. */
2138 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2139 0);
2140 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2141 bits - semantics->precision);
2142 truncatedBits = 0;
2143 }
2144 }
2145
2146 /* Step 2: work out any lost fraction, and increment the absolute
2147 value if we would round away from zero. */
2148 if (truncatedBits) {
2149 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2150 truncatedBits);
2151 if (lost_fraction != lfExactlyZero &&
2152 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2153 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2154 return opInvalidOp; /* Overflow. */
2155 }
2156 } else {
2157 lost_fraction = lfExactlyZero;
2158 }
2159
2160 /* Step 3: check if we fit in the destination. */
2161 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2162
2163 if (sign) {
2164 if (!isSigned) {
2165 /* Negative numbers cannot be represented as unsigned. */
2166 if (omsb != 0)
2167 return opInvalidOp;
2168 } else {
2169 /* It takes omsb bits to represent the unsigned integer value.
2170 We lose a bit for the sign, but care is needed as the
2171 maximally negative integer is a special case. */
2172 if (omsb == width &&
2173 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2174 return opInvalidOp;
2175
2176 /* This case can happen because of rounding. */
2177 if (omsb > width)
2178 return opInvalidOp;
2179 }
2180
2181 APInt::tcNegate (parts.data(), dstPartsCount);
2182 } else {
2183 if (omsb >= width + !isSigned)
2184 return opInvalidOp;
2185 }
2186
2187 if (lost_fraction == lfExactlyZero) {
2188 *isExact = true;
2189 return opOK;
2190 } else
2191 return opInexact;
2192 }
2193
2194 /* Same as convertToSignExtendedInteger, except we provide
2195 deterministic values in case of an invalid operation exception,
2196 namely zero for NaNs and the minimal or maximal value respectively
2197 for underflow or overflow.
2198 The *isExact output tells whether the result is exact, in the sense
2199 that converting it back to the original floating point type produces
2200 the original value. This is almost equivalent to result==opOK,
2201 except for negative zeroes.
2202 */
2203 IEEEFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2204 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2205 unsigned int width, bool isSigned,
2206 roundingMode rounding_mode, bool *isExact) const {
2207 opStatus fs;
2208
2209 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2210 isExact);
2211
2212 if (fs == opInvalidOp) {
2213 unsigned int bits, dstPartsCount;
2214
2215 dstPartsCount = partCountForBits(width);
2216 assert(dstPartsCount <= parts.size() && "Integer too big");
2217
2218 if (category == fcNaN)
2219 bits = 0;
2220 else if (sign)
2221 bits = isSigned;
2222 else
2223 bits = width - isSigned;
2224
2225 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2226 if (sign && isSigned)
2227 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2228 }
2229
2230 return fs;
2231 }
2232
2233 /* Convert an unsigned integer SRC to a floating point number,
2234 rounding according to ROUNDING_MODE. The sign of the floating
2235 point number is not modified. */
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2236 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2237 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2238 unsigned int omsb, precision, dstCount;
2239 integerPart *dst;
2240 lostFraction lost_fraction;
2241
2242 category = fcNormal;
2243 omsb = APInt::tcMSB(src, srcCount) + 1;
2244 dst = significandParts();
2245 dstCount = partCount();
2246 precision = semantics->precision;
2247
2248 /* We want the most significant PRECISION bits of SRC. There may not
2249 be that many; extract what we can. */
2250 if (precision <= omsb) {
2251 exponent = omsb - 1;
2252 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2253 omsb - precision);
2254 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2255 } else {
2256 exponent = precision - 1;
2257 lost_fraction = lfExactlyZero;
2258 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2259 }
2260
2261 return normalize(rounding_mode, lost_fraction);
2262 }
2263
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2264 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2265 roundingMode rounding_mode) {
2266 unsigned int partCount = Val.getNumWords();
2267 APInt api = Val;
2268
2269 sign = false;
2270 if (isSigned && api.isNegative()) {
2271 sign = true;
2272 api = -api;
2273 }
2274
2275 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2276 }
2277
2278 /* Convert a two's complement integer SRC to a floating point number,
2279 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2280 integer is signed, in which case it must be sign-extended. */
2281 IEEEFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2282 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2283 unsigned int srcCount, bool isSigned,
2284 roundingMode rounding_mode) {
2285 opStatus status;
2286
2287 if (isSigned &&
2288 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2289 integerPart *copy;
2290
2291 /* If we're signed and negative negate a copy. */
2292 sign = true;
2293 copy = new integerPart[srcCount];
2294 APInt::tcAssign(copy, src, srcCount);
2295 APInt::tcNegate(copy, srcCount);
2296 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2297 delete [] copy;
2298 } else {
2299 sign = false;
2300 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2301 }
2302
2303 return status;
2304 }
2305
2306 /* FIXME: should this just take a const APInt reference? */
2307 IEEEFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2308 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2309 unsigned int width, bool isSigned,
2310 roundingMode rounding_mode) {
2311 unsigned int partCount = partCountForBits(width);
2312 APInt api = APInt(width, makeArrayRef(parts, partCount));
2313
2314 sign = false;
2315 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2316 sign = true;
2317 api = -api;
2318 }
2319
2320 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2321 }
2322
2323 Expected<IEEEFloat::opStatus>
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2324 IEEEFloat::convertFromHexadecimalString(StringRef s,
2325 roundingMode rounding_mode) {
2326 lostFraction lost_fraction = lfExactlyZero;
2327
2328 category = fcNormal;
2329 zeroSignificand();
2330 exponent = 0;
2331
2332 integerPart *significand = significandParts();
2333 unsigned partsCount = partCount();
2334 unsigned bitPos = partsCount * integerPartWidth;
2335 bool computedTrailingFraction = false;
2336
2337 // Skip leading zeroes and any (hexa)decimal point.
2338 StringRef::iterator begin = s.begin();
2339 StringRef::iterator end = s.end();
2340 StringRef::iterator dot;
2341 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2342 if (!PtrOrErr)
2343 return PtrOrErr.takeError();
2344 StringRef::iterator p = *PtrOrErr;
2345 StringRef::iterator firstSignificantDigit = p;
2346
2347 while (p != end) {
2348 integerPart hex_value;
2349
2350 if (*p == '.') {
2351 if (dot != end)
2352 return createError("String contains multiple dots");
2353 dot = p++;
2354 continue;
2355 }
2356
2357 hex_value = hexDigitValue(*p);
2358 if (hex_value == -1U)
2359 break;
2360
2361 p++;
2362
2363 // Store the number while we have space.
2364 if (bitPos) {
2365 bitPos -= 4;
2366 hex_value <<= bitPos % integerPartWidth;
2367 significand[bitPos / integerPartWidth] |= hex_value;
2368 } else if (!computedTrailingFraction) {
2369 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2370 if (!FractOrErr)
2371 return FractOrErr.takeError();
2372 lost_fraction = *FractOrErr;
2373 computedTrailingFraction = true;
2374 }
2375 }
2376
2377 /* Hex floats require an exponent but not a hexadecimal point. */
2378 if (p == end)
2379 return createError("Hex strings require an exponent");
2380 if (*p != 'p' && *p != 'P')
2381 return createError("Invalid character in significand");
2382 if (p == begin)
2383 return createError("Significand has no digits");
2384 if (dot != end && p - begin == 1)
2385 return createError("Significand has no digits");
2386
2387 /* Ignore the exponent if we are zero. */
2388 if (p != firstSignificantDigit) {
2389 int expAdjustment;
2390
2391 /* Implicit hexadecimal point? */
2392 if (dot == end)
2393 dot = p;
2394
2395 /* Calculate the exponent adjustment implicit in the number of
2396 significant digits. */
2397 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2398 if (expAdjustment < 0)
2399 expAdjustment++;
2400 expAdjustment = expAdjustment * 4 - 1;
2401
2402 /* Adjust for writing the significand starting at the most
2403 significant nibble. */
2404 expAdjustment += semantics->precision;
2405 expAdjustment -= partsCount * integerPartWidth;
2406
2407 /* Adjust for the given exponent. */
2408 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2409 if (!ExpOrErr)
2410 return ExpOrErr.takeError();
2411 exponent = *ExpOrErr;
2412 }
2413
2414 return normalize(rounding_mode, lost_fraction);
2415 }
2416
2417 IEEEFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2418 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2419 unsigned sigPartCount, int exp,
2420 roundingMode rounding_mode) {
2421 unsigned int parts, pow5PartCount;
2422 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2423 integerPart pow5Parts[maxPowerOfFiveParts];
2424 bool isNearest;
2425
2426 isNearest = (rounding_mode == rmNearestTiesToEven ||
2427 rounding_mode == rmNearestTiesToAway);
2428
2429 parts = partCountForBits(semantics->precision + 11);
2430
2431 /* Calculate pow(5, abs(exp)). */
2432 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2433
2434 for (;; parts *= 2) {
2435 opStatus sigStatus, powStatus;
2436 unsigned int excessPrecision, truncatedBits;
2437
2438 calcSemantics.precision = parts * integerPartWidth - 1;
2439 excessPrecision = calcSemantics.precision - semantics->precision;
2440 truncatedBits = excessPrecision;
2441
2442 IEEEFloat decSig(calcSemantics, uninitialized);
2443 decSig.makeZero(sign);
2444 IEEEFloat pow5(calcSemantics);
2445
2446 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2447 rmNearestTiesToEven);
2448 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2449 rmNearestTiesToEven);
2450 /* Add exp, as 10^n = 5^n * 2^n. */
2451 decSig.exponent += exp;
2452
2453 lostFraction calcLostFraction;
2454 integerPart HUerr, HUdistance;
2455 unsigned int powHUerr;
2456
2457 if (exp >= 0) {
2458 /* multiplySignificand leaves the precision-th bit set to 1. */
2459 calcLostFraction = decSig.multiplySignificand(pow5);
2460 powHUerr = powStatus != opOK;
2461 } else {
2462 calcLostFraction = decSig.divideSignificand(pow5);
2463 /* Denormal numbers have less precision. */
2464 if (decSig.exponent < semantics->minExponent) {
2465 excessPrecision += (semantics->minExponent - decSig.exponent);
2466 truncatedBits = excessPrecision;
2467 if (excessPrecision > calcSemantics.precision)
2468 excessPrecision = calcSemantics.precision;
2469 }
2470 /* Extra half-ulp lost in reciprocal of exponent. */
2471 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2472 }
2473
2474 /* Both multiplySignificand and divideSignificand return the
2475 result with the integer bit set. */
2476 assert(APInt::tcExtractBit
2477 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2478
2479 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2480 powHUerr);
2481 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2482 excessPrecision, isNearest);
2483
2484 /* Are we guaranteed to round correctly if we truncate? */
2485 if (HUdistance >= HUerr) {
2486 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2487 calcSemantics.precision - excessPrecision,
2488 excessPrecision);
2489 /* Take the exponent of decSig. If we tcExtract-ed less bits
2490 above we must adjust our exponent to compensate for the
2491 implicit right shift. */
2492 exponent = (decSig.exponent + semantics->precision
2493 - (calcSemantics.precision - excessPrecision));
2494 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2495 decSig.partCount(),
2496 truncatedBits);
2497 return normalize(rounding_mode, calcLostFraction);
2498 }
2499 }
2500 }
2501
2502 Expected<IEEEFloat::opStatus>
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2503 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2504 decimalInfo D;
2505 opStatus fs;
2506
2507 /* Scan the text. */
2508 StringRef::iterator p = str.begin();
2509 if (Error Err = interpretDecimal(p, str.end(), &D))
2510 return std::move(Err);
2511
2512 /* Handle the quick cases. First the case of no significant digits,
2513 i.e. zero, and then exponents that are obviously too large or too
2514 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2515 definitely overflows if
2516
2517 (exp - 1) * L >= maxExponent
2518
2519 and definitely underflows to zero where
2520
2521 (exp + 1) * L <= minExponent - precision
2522
2523 With integer arithmetic the tightest bounds for L are
2524
2525 93/28 < L < 196/59 [ numerator <= 256 ]
2526 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2527 */
2528
2529 // Test if we have a zero number allowing for strings with no null terminators
2530 // and zero decimals with non-zero exponents.
2531 //
2532 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2533 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2534 // be at most one dot. On the other hand, if we have a zero with a non-zero
2535 // exponent, then we know that D.firstSigDigit will be non-numeric.
2536 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2537 category = fcZero;
2538 fs = opOK;
2539
2540 /* Check whether the normalized exponent is high enough to overflow
2541 max during the log-rebasing in the max-exponent check below. */
2542 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2543 fs = handleOverflow(rounding_mode);
2544
2545 /* If it wasn't, then it also wasn't high enough to overflow max
2546 during the log-rebasing in the min-exponent check. Check that it
2547 won't overflow min in either check, then perform the min-exponent
2548 check. */
2549 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2550 (D.normalizedExponent + 1) * 28738 <=
2551 8651 * (semantics->minExponent - (int) semantics->precision)) {
2552 /* Underflow to zero and round. */
2553 category = fcNormal;
2554 zeroSignificand();
2555 fs = normalize(rounding_mode, lfLessThanHalf);
2556
2557 /* We can finally safely perform the max-exponent check. */
2558 } else if ((D.normalizedExponent - 1) * 42039
2559 >= 12655 * semantics->maxExponent) {
2560 /* Overflow and round. */
2561 fs = handleOverflow(rounding_mode);
2562 } else {
2563 integerPart *decSignificand;
2564 unsigned int partCount;
2565
2566 /* A tight upper bound on number of bits required to hold an
2567 N-digit decimal integer is N * 196 / 59. Allocate enough space
2568 to hold the full significand, and an extra part required by
2569 tcMultiplyPart. */
2570 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2571 partCount = partCountForBits(1 + 196 * partCount / 59);
2572 decSignificand = new integerPart[partCount + 1];
2573 partCount = 0;
2574
2575 /* Convert to binary efficiently - we do almost all multiplication
2576 in an integerPart. When this would overflow do we do a single
2577 bignum multiplication, and then revert again to multiplication
2578 in an integerPart. */
2579 do {
2580 integerPart decValue, val, multiplier;
2581
2582 val = 0;
2583 multiplier = 1;
2584
2585 do {
2586 if (*p == '.') {
2587 p++;
2588 if (p == str.end()) {
2589 break;
2590 }
2591 }
2592 decValue = decDigitValue(*p++);
2593 if (decValue >= 10U) {
2594 delete[] decSignificand;
2595 return createError("Invalid character in significand");
2596 }
2597 multiplier *= 10;
2598 val = val * 10 + decValue;
2599 /* The maximum number that can be multiplied by ten with any
2600 digit added without overflowing an integerPart. */
2601 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2602
2603 /* Multiply out the current part. */
2604 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2605 partCount, partCount + 1, false);
2606
2607 /* If we used another part (likely but not guaranteed), increase
2608 the count. */
2609 if (decSignificand[partCount])
2610 partCount++;
2611 } while (p <= D.lastSigDigit);
2612
2613 category = fcNormal;
2614 fs = roundSignificandWithExponent(decSignificand, partCount,
2615 D.exponent, rounding_mode);
2616
2617 delete [] decSignificand;
2618 }
2619
2620 return fs;
2621 }
2622
convertFromStringSpecials(StringRef str)2623 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2624 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2625 makeInf(false);
2626 return true;
2627 }
2628
2629 if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) {
2630 makeInf(true);
2631 return true;
2632 }
2633
2634 if (str.equals("nan") || str.equals("NaN")) {
2635 makeNaN(false, false);
2636 return true;
2637 }
2638
2639 if (str.equals("-nan") || str.equals("-NaN")) {
2640 makeNaN(false, true);
2641 return true;
2642 }
2643
2644 return false;
2645 }
2646
2647 Expected<IEEEFloat::opStatus>
convertFromString(StringRef str,roundingMode rounding_mode)2648 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
2649 if (str.empty())
2650 return createError("Invalid string length");
2651
2652 // Handle special cases.
2653 if (convertFromStringSpecials(str))
2654 return opOK;
2655
2656 /* Handle a leading minus sign. */
2657 StringRef::iterator p = str.begin();
2658 size_t slen = str.size();
2659 sign = *p == '-' ? 1 : 0;
2660 if (*p == '-' || *p == '+') {
2661 p++;
2662 slen--;
2663 if (!slen)
2664 return createError("String has no digits");
2665 }
2666
2667 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2668 if (slen == 2)
2669 return createError("Invalid string");
2670 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2671 rounding_mode);
2672 }
2673
2674 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2675 }
2676
2677 /* Write out a hexadecimal representation of the floating point value
2678 to DST, which must be of sufficient size, in the C99 form
2679 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2680 excluding the terminating NUL.
2681
2682 If UPPERCASE, the output is in upper case, otherwise in lower case.
2683
2684 HEXDIGITS digits appear altogether, rounding the value if
2685 necessary. If HEXDIGITS is 0, the minimal precision to display the
2686 number precisely is used instead. If nothing would appear after
2687 the decimal point it is suppressed.
2688
2689 The decimal exponent is always printed and has at least one digit.
2690 Zero values display an exponent of zero. Infinities and NaNs
2691 appear as "infinity" or "nan" respectively.
2692
2693 The above rules are as specified by C99. There is ambiguity about
2694 what the leading hexadecimal digit should be. This implementation
2695 uses whatever is necessary so that the exponent is displayed as
2696 stored. This implies the exponent will fall within the IEEE format
2697 range, and the leading hexadecimal digit will be 0 (for denormals),
2698 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2699 any other digits zero).
2700 */
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2701 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2702 bool upperCase,
2703 roundingMode rounding_mode) const {
2704 char *p;
2705
2706 p = dst;
2707 if (sign)
2708 *dst++ = '-';
2709
2710 switch (category) {
2711 case fcInfinity:
2712 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2713 dst += sizeof infinityL - 1;
2714 break;
2715
2716 case fcNaN:
2717 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2718 dst += sizeof NaNU - 1;
2719 break;
2720
2721 case fcZero:
2722 *dst++ = '0';
2723 *dst++ = upperCase ? 'X': 'x';
2724 *dst++ = '0';
2725 if (hexDigits > 1) {
2726 *dst++ = '.';
2727 memset (dst, '0', hexDigits - 1);
2728 dst += hexDigits - 1;
2729 }
2730 *dst++ = upperCase ? 'P': 'p';
2731 *dst++ = '0';
2732 break;
2733
2734 case fcNormal:
2735 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2736 break;
2737 }
2738
2739 *dst = 0;
2740
2741 return static_cast<unsigned int>(dst - p);
2742 }
2743
2744 /* Does the hard work of outputting the correctly rounded hexadecimal
2745 form of a normal floating point number with the specified number of
2746 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2747 digits necessary to print the value precisely is output. */
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2748 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2749 bool upperCase,
2750 roundingMode rounding_mode) const {
2751 unsigned int count, valueBits, shift, partsCount, outputDigits;
2752 const char *hexDigitChars;
2753 const integerPart *significand;
2754 char *p;
2755 bool roundUp;
2756
2757 *dst++ = '0';
2758 *dst++ = upperCase ? 'X': 'x';
2759
2760 roundUp = false;
2761 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2762
2763 significand = significandParts();
2764 partsCount = partCount();
2765
2766 /* +3 because the first digit only uses the single integer bit, so
2767 we have 3 virtual zero most-significant-bits. */
2768 valueBits = semantics->precision + 3;
2769 shift = integerPartWidth - valueBits % integerPartWidth;
2770
2771 /* The natural number of digits required ignoring trailing
2772 insignificant zeroes. */
2773 outputDigits = (valueBits - significandLSB () + 3) / 4;
2774
2775 /* hexDigits of zero means use the required number for the
2776 precision. Otherwise, see if we are truncating. If we are,
2777 find out if we need to round away from zero. */
2778 if (hexDigits) {
2779 if (hexDigits < outputDigits) {
2780 /* We are dropping non-zero bits, so need to check how to round.
2781 "bits" is the number of dropped bits. */
2782 unsigned int bits;
2783 lostFraction fraction;
2784
2785 bits = valueBits - hexDigits * 4;
2786 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2787 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2788 }
2789 outputDigits = hexDigits;
2790 }
2791
2792 /* Write the digits consecutively, and start writing in the location
2793 of the hexadecimal point. We move the most significant digit
2794 left and add the hexadecimal point later. */
2795 p = ++dst;
2796
2797 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2798
2799 while (outputDigits && count) {
2800 integerPart part;
2801
2802 /* Put the most significant integerPartWidth bits in "part". */
2803 if (--count == partsCount)
2804 part = 0; /* An imaginary higher zero part. */
2805 else
2806 part = significand[count] << shift;
2807
2808 if (count && shift)
2809 part |= significand[count - 1] >> (integerPartWidth - shift);
2810
2811 /* Convert as much of "part" to hexdigits as we can. */
2812 unsigned int curDigits = integerPartWidth / 4;
2813
2814 if (curDigits > outputDigits)
2815 curDigits = outputDigits;
2816 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2817 outputDigits -= curDigits;
2818 }
2819
2820 if (roundUp) {
2821 char *q = dst;
2822
2823 /* Note that hexDigitChars has a trailing '0'. */
2824 do {
2825 q--;
2826 *q = hexDigitChars[hexDigitValue (*q) + 1];
2827 } while (*q == '0');
2828 assert(q >= p);
2829 } else {
2830 /* Add trailing zeroes. */
2831 memset (dst, '0', outputDigits);
2832 dst += outputDigits;
2833 }
2834
2835 /* Move the most significant digit to before the point, and if there
2836 is something after the decimal point add it. This must come
2837 after rounding above. */
2838 p[-1] = p[0];
2839 if (dst -1 == p)
2840 dst--;
2841 else
2842 p[0] = '.';
2843
2844 /* Finally output the exponent. */
2845 *dst++ = upperCase ? 'P': 'p';
2846
2847 return writeSignedDecimal (dst, exponent);
2848 }
2849
hash_value(const IEEEFloat & Arg)2850 hash_code hash_value(const IEEEFloat &Arg) {
2851 if (!Arg.isFiniteNonZero())
2852 return hash_combine((uint8_t)Arg.category,
2853 // NaN has no sign, fix it at zero.
2854 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2855 Arg.semantics->precision);
2856
2857 // Normal floats need their exponent and significand hashed.
2858 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2859 Arg.semantics->precision, Arg.exponent,
2860 hash_combine_range(
2861 Arg.significandParts(),
2862 Arg.significandParts() + Arg.partCount()));
2863 }
2864
2865 // Conversion from APFloat to/from host float/double. It may eventually be
2866 // possible to eliminate these and have everybody deal with APFloats, but that
2867 // will take a while. This approach will not easily extend to long double.
2868 // Current implementation requires integerPartWidth==64, which is correct at
2869 // the moment but could be made more general.
2870
2871 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2872 // the actual IEEE respresentations. We compensate for that here.
2873
convertF80LongDoubleAPFloatToAPInt() const2874 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2875 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2876 assert(partCount()==2);
2877
2878 uint64_t myexponent, mysignificand;
2879
2880 if (isFiniteNonZero()) {
2881 myexponent = exponent+16383; //bias
2882 mysignificand = significandParts()[0];
2883 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2884 myexponent = 0; // denormal
2885 } else if (category==fcZero) {
2886 myexponent = 0;
2887 mysignificand = 0;
2888 } else if (category==fcInfinity) {
2889 myexponent = 0x7fff;
2890 mysignificand = 0x8000000000000000ULL;
2891 } else {
2892 assert(category == fcNaN && "Unknown category");
2893 myexponent = 0x7fff;
2894 mysignificand = significandParts()[0];
2895 }
2896
2897 uint64_t words[2];
2898 words[0] = mysignificand;
2899 words[1] = ((uint64_t)(sign & 1) << 15) |
2900 (myexponent & 0x7fffLL);
2901 return APInt(80, words);
2902 }
2903
convertPPCDoubleDoubleAPFloatToAPInt() const2904 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2905 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2906 assert(partCount()==2);
2907
2908 uint64_t words[2];
2909 opStatus fs;
2910 bool losesInfo;
2911
2912 // Convert number to double. To avoid spurious underflows, we re-
2913 // normalize against the "double" minExponent first, and only *then*
2914 // truncate the mantissa. The result of that second conversion
2915 // may be inexact, but should never underflow.
2916 // Declare fltSemantics before APFloat that uses it (and
2917 // saves pointer to it) to ensure correct destruction order.
2918 fltSemantics extendedSemantics = *semantics;
2919 extendedSemantics.minExponent = semIEEEdouble.minExponent;
2920 IEEEFloat extended(*this);
2921 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2922 assert(fs == opOK && !losesInfo);
2923 (void)fs;
2924
2925 IEEEFloat u(extended);
2926 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2927 assert(fs == opOK || fs == opInexact);
2928 (void)fs;
2929 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2930
2931 // If conversion was exact or resulted in a special case, we're done;
2932 // just set the second double to zero. Otherwise, re-convert back to
2933 // the extended format and compute the difference. This now should
2934 // convert exactly to double.
2935 if (u.isFiniteNonZero() && losesInfo) {
2936 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2937 assert(fs == opOK && !losesInfo);
2938 (void)fs;
2939
2940 IEEEFloat v(extended);
2941 v.subtract(u, rmNearestTiesToEven);
2942 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2943 assert(fs == opOK && !losesInfo);
2944 (void)fs;
2945 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2946 } else {
2947 words[1] = 0;
2948 }
2949
2950 return APInt(128, words);
2951 }
2952
convertQuadrupleAPFloatToAPInt() const2953 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2954 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2955 assert(partCount()==2);
2956
2957 uint64_t myexponent, mysignificand, mysignificand2;
2958
2959 if (isFiniteNonZero()) {
2960 myexponent = exponent+16383; //bias
2961 mysignificand = significandParts()[0];
2962 mysignificand2 = significandParts()[1];
2963 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2964 myexponent = 0; // denormal
2965 } else if (category==fcZero) {
2966 myexponent = 0;
2967 mysignificand = mysignificand2 = 0;
2968 } else if (category==fcInfinity) {
2969 myexponent = 0x7fff;
2970 mysignificand = mysignificand2 = 0;
2971 } else {
2972 assert(category == fcNaN && "Unknown category!");
2973 myexponent = 0x7fff;
2974 mysignificand = significandParts()[0];
2975 mysignificand2 = significandParts()[1];
2976 }
2977
2978 uint64_t words[2];
2979 words[0] = mysignificand;
2980 words[1] = ((uint64_t)(sign & 1) << 63) |
2981 ((myexponent & 0x7fff) << 48) |
2982 (mysignificand2 & 0xffffffffffffLL);
2983
2984 return APInt(128, words);
2985 }
2986
convertDoubleAPFloatToAPInt() const2987 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2988 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2989 assert(partCount()==1);
2990
2991 uint64_t myexponent, mysignificand;
2992
2993 if (isFiniteNonZero()) {
2994 myexponent = exponent+1023; //bias
2995 mysignificand = *significandParts();
2996 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2997 myexponent = 0; // denormal
2998 } else if (category==fcZero) {
2999 myexponent = 0;
3000 mysignificand = 0;
3001 } else if (category==fcInfinity) {
3002 myexponent = 0x7ff;
3003 mysignificand = 0;
3004 } else {
3005 assert(category == fcNaN && "Unknown category!");
3006 myexponent = 0x7ff;
3007 mysignificand = *significandParts();
3008 }
3009
3010 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3011 ((myexponent & 0x7ff) << 52) |
3012 (mysignificand & 0xfffffffffffffLL))));
3013 }
3014
convertFloatAPFloatToAPInt() const3015 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3016 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
3017 assert(partCount()==1);
3018
3019 uint32_t myexponent, mysignificand;
3020
3021 if (isFiniteNonZero()) {
3022 myexponent = exponent+127; //bias
3023 mysignificand = (uint32_t)*significandParts();
3024 if (myexponent == 1 && !(mysignificand & 0x800000))
3025 myexponent = 0; // denormal
3026 } else if (category==fcZero) {
3027 myexponent = 0;
3028 mysignificand = 0;
3029 } else if (category==fcInfinity) {
3030 myexponent = 0xff;
3031 mysignificand = 0;
3032 } else {
3033 assert(category == fcNaN && "Unknown category!");
3034 myexponent = 0xff;
3035 mysignificand = (uint32_t)*significandParts();
3036 }
3037
3038 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3039 (mysignificand & 0x7fffff)));
3040 }
3041
convertHalfAPFloatToAPInt() const3042 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3043 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3044 assert(partCount()==1);
3045
3046 uint32_t myexponent, mysignificand;
3047
3048 if (isFiniteNonZero()) {
3049 myexponent = exponent+15; //bias
3050 mysignificand = (uint32_t)*significandParts();
3051 if (myexponent == 1 && !(mysignificand & 0x400))
3052 myexponent = 0; // denormal
3053 } else if (category==fcZero) {
3054 myexponent = 0;
3055 mysignificand = 0;
3056 } else if (category==fcInfinity) {
3057 myexponent = 0x1f;
3058 mysignificand = 0;
3059 } else {
3060 assert(category == fcNaN && "Unknown category!");
3061 myexponent = 0x1f;
3062 mysignificand = (uint32_t)*significandParts();
3063 }
3064
3065 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3066 (mysignificand & 0x3ff)));
3067 }
3068
3069 // This function creates an APInt that is just a bit map of the floating
3070 // point constant as it would appear in memory. It is not a conversion,
3071 // and treating the result as a normal integer is unlikely to be useful.
3072
bitcastToAPInt() const3073 APInt IEEEFloat::bitcastToAPInt() const {
3074 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3075 return convertHalfAPFloatToAPInt();
3076
3077 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3078 return convertFloatAPFloatToAPInt();
3079
3080 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3081 return convertDoubleAPFloatToAPInt();
3082
3083 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3084 return convertQuadrupleAPFloatToAPInt();
3085
3086 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3087 return convertPPCDoubleDoubleAPFloatToAPInt();
3088
3089 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3090 "unknown format!");
3091 return convertF80LongDoubleAPFloatToAPInt();
3092 }
3093
convertToFloat() const3094 float IEEEFloat::convertToFloat() const {
3095 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3096 "Float semantics are not IEEEsingle");
3097 APInt api = bitcastToAPInt();
3098 return api.bitsToFloat();
3099 }
3100
convertToDouble() const3101 double IEEEFloat::convertToDouble() const {
3102 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3103 "Float semantics are not IEEEdouble");
3104 APInt api = bitcastToAPInt();
3105 return api.bitsToDouble();
3106 }
3107
3108 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3109 /// does not support these bit patterns:
3110 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3111 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3112 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3113 /// exponent = 0, integer bit 1 ("pseudodenormal")
3114 /// At the moment, the first three are treated as NaNs, the last one as Normal.
initFromF80LongDoubleAPInt(const APInt & api)3115 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3116 assert(api.getBitWidth()==80);
3117 uint64_t i1 = api.getRawData()[0];
3118 uint64_t i2 = api.getRawData()[1];
3119 uint64_t myexponent = (i2 & 0x7fff);
3120 uint64_t mysignificand = i1;
3121 uint8_t myintegerbit = mysignificand >> 63;
3122
3123 initialize(&semX87DoubleExtended);
3124 assert(partCount()==2);
3125
3126 sign = static_cast<unsigned int>(i2>>15);
3127 if (myexponent == 0 && mysignificand == 0) {
3128 // exponent, significand meaningless
3129 category = fcZero;
3130 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3131 // exponent, significand meaningless
3132 category = fcInfinity;
3133 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3134 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3135 // exponent meaningless
3136 category = fcNaN;
3137 significandParts()[0] = mysignificand;
3138 significandParts()[1] = 0;
3139 } else {
3140 category = fcNormal;
3141 exponent = myexponent - 16383;
3142 significandParts()[0] = mysignificand;
3143 significandParts()[1] = 0;
3144 if (myexponent==0) // denormal
3145 exponent = -16382;
3146 }
3147 }
3148
initFromPPCDoubleDoubleAPInt(const APInt & api)3149 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3150 assert(api.getBitWidth()==128);
3151 uint64_t i1 = api.getRawData()[0];
3152 uint64_t i2 = api.getRawData()[1];
3153 opStatus fs;
3154 bool losesInfo;
3155
3156 // Get the first double and convert to our format.
3157 initFromDoubleAPInt(APInt(64, i1));
3158 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3159 assert(fs == opOK && !losesInfo);
3160 (void)fs;
3161
3162 // Unless we have a special case, add in second double.
3163 if (isFiniteNonZero()) {
3164 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3165 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3166 assert(fs == opOK && !losesInfo);
3167 (void)fs;
3168
3169 add(v, rmNearestTiesToEven);
3170 }
3171 }
3172
initFromQuadrupleAPInt(const APInt & api)3173 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3174 assert(api.getBitWidth()==128);
3175 uint64_t i1 = api.getRawData()[0];
3176 uint64_t i2 = api.getRawData()[1];
3177 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3178 uint64_t mysignificand = i1;
3179 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3180
3181 initialize(&semIEEEquad);
3182 assert(partCount()==2);
3183
3184 sign = static_cast<unsigned int>(i2>>63);
3185 if (myexponent==0 &&
3186 (mysignificand==0 && mysignificand2==0)) {
3187 // exponent, significand meaningless
3188 category = fcZero;
3189 } else if (myexponent==0x7fff &&
3190 (mysignificand==0 && mysignificand2==0)) {
3191 // exponent, significand meaningless
3192 category = fcInfinity;
3193 } else if (myexponent==0x7fff &&
3194 (mysignificand!=0 || mysignificand2 !=0)) {
3195 // exponent meaningless
3196 category = fcNaN;
3197 significandParts()[0] = mysignificand;
3198 significandParts()[1] = mysignificand2;
3199 } else {
3200 category = fcNormal;
3201 exponent = myexponent - 16383;
3202 significandParts()[0] = mysignificand;
3203 significandParts()[1] = mysignificand2;
3204 if (myexponent==0) // denormal
3205 exponent = -16382;
3206 else
3207 significandParts()[1] |= 0x1000000000000LL; // integer bit
3208 }
3209 }
3210
initFromDoubleAPInt(const APInt & api)3211 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3212 assert(api.getBitWidth()==64);
3213 uint64_t i = *api.getRawData();
3214 uint64_t myexponent = (i >> 52) & 0x7ff;
3215 uint64_t mysignificand = i & 0xfffffffffffffLL;
3216
3217 initialize(&semIEEEdouble);
3218 assert(partCount()==1);
3219
3220 sign = static_cast<unsigned int>(i>>63);
3221 if (myexponent==0 && mysignificand==0) {
3222 // exponent, significand meaningless
3223 category = fcZero;
3224 } else if (myexponent==0x7ff && mysignificand==0) {
3225 // exponent, significand meaningless
3226 category = fcInfinity;
3227 } else if (myexponent==0x7ff && mysignificand!=0) {
3228 // exponent meaningless
3229 category = fcNaN;
3230 *significandParts() = mysignificand;
3231 } else {
3232 category = fcNormal;
3233 exponent = myexponent - 1023;
3234 *significandParts() = mysignificand;
3235 if (myexponent==0) // denormal
3236 exponent = -1022;
3237 else
3238 *significandParts() |= 0x10000000000000LL; // integer bit
3239 }
3240 }
3241
initFromFloatAPInt(const APInt & api)3242 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3243 assert(api.getBitWidth()==32);
3244 uint32_t i = (uint32_t)*api.getRawData();
3245 uint32_t myexponent = (i >> 23) & 0xff;
3246 uint32_t mysignificand = i & 0x7fffff;
3247
3248 initialize(&semIEEEsingle);
3249 assert(partCount()==1);
3250
3251 sign = i >> 31;
3252 if (myexponent==0 && mysignificand==0) {
3253 // exponent, significand meaningless
3254 category = fcZero;
3255 } else if (myexponent==0xff && mysignificand==0) {
3256 // exponent, significand meaningless
3257 category = fcInfinity;
3258 } else if (myexponent==0xff && mysignificand!=0) {
3259 // sign, exponent, significand meaningless
3260 category = fcNaN;
3261 *significandParts() = mysignificand;
3262 } else {
3263 category = fcNormal;
3264 exponent = myexponent - 127; //bias
3265 *significandParts() = mysignificand;
3266 if (myexponent==0) // denormal
3267 exponent = -126;
3268 else
3269 *significandParts() |= 0x800000; // integer bit
3270 }
3271 }
3272
initFromHalfAPInt(const APInt & api)3273 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3274 assert(api.getBitWidth()==16);
3275 uint32_t i = (uint32_t)*api.getRawData();
3276 uint32_t myexponent = (i >> 10) & 0x1f;
3277 uint32_t mysignificand = i & 0x3ff;
3278
3279 initialize(&semIEEEhalf);
3280 assert(partCount()==1);
3281
3282 sign = i >> 15;
3283 if (myexponent==0 && mysignificand==0) {
3284 // exponent, significand meaningless
3285 category = fcZero;
3286 } else if (myexponent==0x1f && mysignificand==0) {
3287 // exponent, significand meaningless
3288 category = fcInfinity;
3289 } else if (myexponent==0x1f && mysignificand!=0) {
3290 // sign, exponent, significand meaningless
3291 category = fcNaN;
3292 *significandParts() = mysignificand;
3293 } else {
3294 category = fcNormal;
3295 exponent = myexponent - 15; //bias
3296 *significandParts() = mysignificand;
3297 if (myexponent==0) // denormal
3298 exponent = -14;
3299 else
3300 *significandParts() |= 0x400; // integer bit
3301 }
3302 }
3303
3304 /// Treat api as containing the bits of a floating point number. Currently
3305 /// we infer the floating point type from the size of the APInt. The
3306 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3307 /// when the size is anything else).
initFromAPInt(const fltSemantics * Sem,const APInt & api)3308 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3309 if (Sem == &semIEEEhalf)
3310 return initFromHalfAPInt(api);
3311 if (Sem == &semIEEEsingle)
3312 return initFromFloatAPInt(api);
3313 if (Sem == &semIEEEdouble)
3314 return initFromDoubleAPInt(api);
3315 if (Sem == &semX87DoubleExtended)
3316 return initFromF80LongDoubleAPInt(api);
3317 if (Sem == &semIEEEquad)
3318 return initFromQuadrupleAPInt(api);
3319 if (Sem == &semPPCDoubleDoubleLegacy)
3320 return initFromPPCDoubleDoubleAPInt(api);
3321
3322 llvm_unreachable(nullptr);
3323 }
3324
3325 /// Make this number the largest magnitude normal number in the given
3326 /// semantics.
makeLargest(bool Negative)3327 void IEEEFloat::makeLargest(bool Negative) {
3328 // We want (in interchange format):
3329 // sign = {Negative}
3330 // exponent = 1..10
3331 // significand = 1..1
3332 category = fcNormal;
3333 sign = Negative;
3334 exponent = semantics->maxExponent;
3335
3336 // Use memset to set all but the highest integerPart to all ones.
3337 integerPart *significand = significandParts();
3338 unsigned PartCount = partCount();
3339 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3340
3341 // Set the high integerPart especially setting all unused top bits for
3342 // internal consistency.
3343 const unsigned NumUnusedHighBits =
3344 PartCount*integerPartWidth - semantics->precision;
3345 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3346 ? (~integerPart(0) >> NumUnusedHighBits)
3347 : 0;
3348 }
3349
3350 /// Make this number the smallest magnitude denormal number in the given
3351 /// semantics.
makeSmallest(bool Negative)3352 void IEEEFloat::makeSmallest(bool Negative) {
3353 // We want (in interchange format):
3354 // sign = {Negative}
3355 // exponent = 0..0
3356 // significand = 0..01
3357 category = fcNormal;
3358 sign = Negative;
3359 exponent = semantics->minExponent;
3360 APInt::tcSet(significandParts(), 1, partCount());
3361 }
3362
makeSmallestNormalized(bool Negative)3363 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3364 // We want (in interchange format):
3365 // sign = {Negative}
3366 // exponent = 0..0
3367 // significand = 10..0
3368
3369 category = fcNormal;
3370 zeroSignificand();
3371 sign = Negative;
3372 exponent = semantics->minExponent;
3373 significandParts()[partCountForBits(semantics->precision) - 1] |=
3374 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3375 }
3376
IEEEFloat(const fltSemantics & Sem,const APInt & API)3377 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3378 initFromAPInt(&Sem, API);
3379 }
3380
IEEEFloat(float f)3381 IEEEFloat::IEEEFloat(float f) {
3382 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3383 }
3384
IEEEFloat(double d)3385 IEEEFloat::IEEEFloat(double d) {
3386 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3387 }
3388
3389 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3390 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3391 Buffer.append(Str.begin(), Str.end());
3392 }
3393
3394 /// Removes data from the given significand until it is no more
3395 /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3396 void AdjustToPrecision(APInt &significand,
3397 int &exp, unsigned FormatPrecision) {
3398 unsigned bits = significand.getActiveBits();
3399
3400 // 196/59 is a very slight overestimate of lg_2(10).
3401 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3402
3403 if (bits <= bitsRequired) return;
3404
3405 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3406 if (!tensRemovable) return;
3407
3408 exp += tensRemovable;
3409
3410 APInt divisor(significand.getBitWidth(), 1);
3411 APInt powten(significand.getBitWidth(), 10);
3412 while (true) {
3413 if (tensRemovable & 1)
3414 divisor *= powten;
3415 tensRemovable >>= 1;
3416 if (!tensRemovable) break;
3417 powten *= powten;
3418 }
3419
3420 significand = significand.udiv(divisor);
3421
3422 // Truncate the significand down to its active bit count.
3423 significand = significand.trunc(significand.getActiveBits());
3424 }
3425
3426
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3427 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3428 int &exp, unsigned FormatPrecision) {
3429 unsigned N = buffer.size();
3430 if (N <= FormatPrecision) return;
3431
3432 // The most significant figures are the last ones in the buffer.
3433 unsigned FirstSignificant = N - FormatPrecision;
3434
3435 // Round.
3436 // FIXME: this probably shouldn't use 'round half up'.
3437
3438 // Rounding down is just a truncation, except we also want to drop
3439 // trailing zeros from the new result.
3440 if (buffer[FirstSignificant - 1] < '5') {
3441 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3442 FirstSignificant++;
3443
3444 exp += FirstSignificant;
3445 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3446 return;
3447 }
3448
3449 // Rounding up requires a decimal add-with-carry. If we continue
3450 // the carry, the newly-introduced zeros will just be truncated.
3451 for (unsigned I = FirstSignificant; I != N; ++I) {
3452 if (buffer[I] == '9') {
3453 FirstSignificant++;
3454 } else {
3455 buffer[I]++;
3456 break;
3457 }
3458 }
3459
3460 // If we carried through, we have exactly one digit of precision.
3461 if (FirstSignificant == N) {
3462 exp += FirstSignificant;
3463 buffer.clear();
3464 buffer.push_back('1');
3465 return;
3466 }
3467
3468 exp += FirstSignificant;
3469 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3470 }
3471 }
3472
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const3473 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3474 unsigned FormatMaxPadding, bool TruncateZero) const {
3475 switch (category) {
3476 case fcInfinity:
3477 if (isNegative())
3478 return append(Str, "-Inf");
3479 else
3480 return append(Str, "+Inf");
3481
3482 case fcNaN: return append(Str, "NaN");
3483
3484 case fcZero:
3485 if (isNegative())
3486 Str.push_back('-');
3487
3488 if (!FormatMaxPadding) {
3489 if (TruncateZero)
3490 append(Str, "0.0E+0");
3491 else {
3492 append(Str, "0.0");
3493 if (FormatPrecision > 1)
3494 Str.append(FormatPrecision - 1, '0');
3495 append(Str, "e+00");
3496 }
3497 } else
3498 Str.push_back('0');
3499 return;
3500
3501 case fcNormal:
3502 break;
3503 }
3504
3505 if (isNegative())
3506 Str.push_back('-');
3507
3508 // Decompose the number into an APInt and an exponent.
3509 int exp = exponent - ((int) semantics->precision - 1);
3510 APInt significand(semantics->precision,
3511 makeArrayRef(significandParts(),
3512 partCountForBits(semantics->precision)));
3513
3514 // Set FormatPrecision if zero. We want to do this before we
3515 // truncate trailing zeros, as those are part of the precision.
3516 if (!FormatPrecision) {
3517 // We use enough digits so the number can be round-tripped back to an
3518 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3519 // Accurately" by Steele and White.
3520 // FIXME: Using a formula based purely on the precision is conservative;
3521 // we can print fewer digits depending on the actual value being printed.
3522
3523 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3524 FormatPrecision = 2 + semantics->precision * 59 / 196;
3525 }
3526
3527 // Ignore trailing binary zeros.
3528 int trailingZeros = significand.countTrailingZeros();
3529 exp += trailingZeros;
3530 significand.lshrInPlace(trailingZeros);
3531
3532 // Change the exponent from 2^e to 10^e.
3533 if (exp == 0) {
3534 // Nothing to do.
3535 } else if (exp > 0) {
3536 // Just shift left.
3537 significand = significand.zext(semantics->precision + exp);
3538 significand <<= exp;
3539 exp = 0;
3540 } else { /* exp < 0 */
3541 int texp = -exp;
3542
3543 // We transform this using the identity:
3544 // (N)(2^-e) == (N)(5^e)(10^-e)
3545 // This means we have to multiply N (the significand) by 5^e.
3546 // To avoid overflow, we have to operate on numbers large
3547 // enough to store N * 5^e:
3548 // log2(N * 5^e) == log2(N) + e * log2(5)
3549 // <= semantics->precision + e * 137 / 59
3550 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3551
3552 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3553
3554 // Multiply significand by 5^e.
3555 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3556 significand = significand.zext(precision);
3557 APInt five_to_the_i(precision, 5);
3558 while (true) {
3559 if (texp & 1) significand *= five_to_the_i;
3560
3561 texp >>= 1;
3562 if (!texp) break;
3563 five_to_the_i *= five_to_the_i;
3564 }
3565 }
3566
3567 AdjustToPrecision(significand, exp, FormatPrecision);
3568
3569 SmallVector<char, 256> buffer;
3570
3571 // Fill the buffer.
3572 unsigned precision = significand.getBitWidth();
3573 APInt ten(precision, 10);
3574 APInt digit(precision, 0);
3575
3576 bool inTrail = true;
3577 while (significand != 0) {
3578 // digit <- significand % 10
3579 // significand <- significand / 10
3580 APInt::udivrem(significand, ten, significand, digit);
3581
3582 unsigned d = digit.getZExtValue();
3583
3584 // Drop trailing zeros.
3585 if (inTrail && !d) exp++;
3586 else {
3587 buffer.push_back((char) ('0' + d));
3588 inTrail = false;
3589 }
3590 }
3591
3592 assert(!buffer.empty() && "no characters in buffer!");
3593
3594 // Drop down to FormatPrecision.
3595 // TODO: don't do more precise calculations above than are required.
3596 AdjustToPrecision(buffer, exp, FormatPrecision);
3597
3598 unsigned NDigits = buffer.size();
3599
3600 // Check whether we should use scientific notation.
3601 bool FormatScientific;
3602 if (!FormatMaxPadding)
3603 FormatScientific = true;
3604 else {
3605 if (exp >= 0) {
3606 // 765e3 --> 765000
3607 // ^^^
3608 // But we shouldn't make the number look more precise than it is.
3609 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3610 NDigits + (unsigned) exp > FormatPrecision);
3611 } else {
3612 // Power of the most significant digit.
3613 int MSD = exp + (int) (NDigits - 1);
3614 if (MSD >= 0) {
3615 // 765e-2 == 7.65
3616 FormatScientific = false;
3617 } else {
3618 // 765e-5 == 0.00765
3619 // ^ ^^
3620 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3621 }
3622 }
3623 }
3624
3625 // Scientific formatting is pretty straightforward.
3626 if (FormatScientific) {
3627 exp += (NDigits - 1);
3628
3629 Str.push_back(buffer[NDigits-1]);
3630 Str.push_back('.');
3631 if (NDigits == 1 && TruncateZero)
3632 Str.push_back('0');
3633 else
3634 for (unsigned I = 1; I != NDigits; ++I)
3635 Str.push_back(buffer[NDigits-1-I]);
3636 // Fill with zeros up to FormatPrecision.
3637 if (!TruncateZero && FormatPrecision > NDigits - 1)
3638 Str.append(FormatPrecision - NDigits + 1, '0');
3639 // For !TruncateZero we use lower 'e'.
3640 Str.push_back(TruncateZero ? 'E' : 'e');
3641
3642 Str.push_back(exp >= 0 ? '+' : '-');
3643 if (exp < 0) exp = -exp;
3644 SmallVector<char, 6> expbuf;
3645 do {
3646 expbuf.push_back((char) ('0' + (exp % 10)));
3647 exp /= 10;
3648 } while (exp);
3649 // Exponent always at least two digits if we do not truncate zeros.
3650 if (!TruncateZero && expbuf.size() < 2)
3651 expbuf.push_back('0');
3652 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3653 Str.push_back(expbuf[E-1-I]);
3654 return;
3655 }
3656
3657 // Non-scientific, positive exponents.
3658 if (exp >= 0) {
3659 for (unsigned I = 0; I != NDigits; ++I)
3660 Str.push_back(buffer[NDigits-1-I]);
3661 for (unsigned I = 0; I != (unsigned) exp; ++I)
3662 Str.push_back('0');
3663 return;
3664 }
3665
3666 // Non-scientific, negative exponents.
3667
3668 // The number of digits to the left of the decimal point.
3669 int NWholeDigits = exp + (int) NDigits;
3670
3671 unsigned I = 0;
3672 if (NWholeDigits > 0) {
3673 for (; I != (unsigned) NWholeDigits; ++I)
3674 Str.push_back(buffer[NDigits-I-1]);
3675 Str.push_back('.');
3676 } else {
3677 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3678
3679 Str.push_back('0');
3680 Str.push_back('.');
3681 for (unsigned Z = 1; Z != NZeros; ++Z)
3682 Str.push_back('0');
3683 }
3684
3685 for (; I != NDigits; ++I)
3686 Str.push_back(buffer[NDigits-I-1]);
3687 }
3688
getExactInverse(APFloat * inv) const3689 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3690 // Special floats and denormals have no exact inverse.
3691 if (!isFiniteNonZero())
3692 return false;
3693
3694 // Check that the number is a power of two by making sure that only the
3695 // integer bit is set in the significand.
3696 if (significandLSB() != semantics->precision - 1)
3697 return false;
3698
3699 // Get the inverse.
3700 IEEEFloat reciprocal(*semantics, 1ULL);
3701 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3702 return false;
3703
3704 // Avoid multiplication with a denormal, it is not safe on all platforms and
3705 // may be slower than a normal division.
3706 if (reciprocal.isDenormal())
3707 return false;
3708
3709 assert(reciprocal.isFiniteNonZero() &&
3710 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3711
3712 if (inv)
3713 *inv = APFloat(reciprocal, *semantics);
3714
3715 return true;
3716 }
3717
isSignaling() const3718 bool IEEEFloat::isSignaling() const {
3719 if (!isNaN())
3720 return false;
3721
3722 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3723 // first bit of the trailing significand being 0.
3724 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3725 }
3726
3727 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3728 ///
3729 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3730 /// appropriate sign switching before/after the computation.
next(bool nextDown)3731 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3732 // If we are performing nextDown, swap sign so we have -x.
3733 if (nextDown)
3734 changeSign();
3735
3736 // Compute nextUp(x)
3737 opStatus result = opOK;
3738
3739 // Handle each float category separately.
3740 switch (category) {
3741 case fcInfinity:
3742 // nextUp(+inf) = +inf
3743 if (!isNegative())
3744 break;
3745 // nextUp(-inf) = -getLargest()
3746 makeLargest(true);
3747 break;
3748 case fcNaN:
3749 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3750 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3751 // change the payload.
3752 if (isSignaling()) {
3753 result = opInvalidOp;
3754 // For consistency, propagate the sign of the sNaN to the qNaN.
3755 makeNaN(false, isNegative(), nullptr);
3756 }
3757 break;
3758 case fcZero:
3759 // nextUp(pm 0) = +getSmallest()
3760 makeSmallest(false);
3761 break;
3762 case fcNormal:
3763 // nextUp(-getSmallest()) = -0
3764 if (isSmallest() && isNegative()) {
3765 APInt::tcSet(significandParts(), 0, partCount());
3766 category = fcZero;
3767 exponent = 0;
3768 break;
3769 }
3770
3771 // nextUp(getLargest()) == INFINITY
3772 if (isLargest() && !isNegative()) {
3773 APInt::tcSet(significandParts(), 0, partCount());
3774 category = fcInfinity;
3775 exponent = semantics->maxExponent + 1;
3776 break;
3777 }
3778
3779 // nextUp(normal) == normal + inc.
3780 if (isNegative()) {
3781 // If we are negative, we need to decrement the significand.
3782
3783 // We only cross a binade boundary that requires adjusting the exponent
3784 // if:
3785 // 1. exponent != semantics->minExponent. This implies we are not in the
3786 // smallest binade or are dealing with denormals.
3787 // 2. Our significand excluding the integral bit is all zeros.
3788 bool WillCrossBinadeBoundary =
3789 exponent != semantics->minExponent && isSignificandAllZeros();
3790
3791 // Decrement the significand.
3792 //
3793 // We always do this since:
3794 // 1. If we are dealing with a non-binade decrement, by definition we
3795 // just decrement the significand.
3796 // 2. If we are dealing with a normal -> normal binade decrement, since
3797 // we have an explicit integral bit the fact that all bits but the
3798 // integral bit are zero implies that subtracting one will yield a
3799 // significand with 0 integral bit and 1 in all other spots. Thus we
3800 // must just adjust the exponent and set the integral bit to 1.
3801 // 3. If we are dealing with a normal -> denormal binade decrement,
3802 // since we set the integral bit to 0 when we represent denormals, we
3803 // just decrement the significand.
3804 integerPart *Parts = significandParts();
3805 APInt::tcDecrement(Parts, partCount());
3806
3807 if (WillCrossBinadeBoundary) {
3808 // Our result is a normal number. Do the following:
3809 // 1. Set the integral bit to 1.
3810 // 2. Decrement the exponent.
3811 APInt::tcSetBit(Parts, semantics->precision - 1);
3812 exponent--;
3813 }
3814 } else {
3815 // If we are positive, we need to increment the significand.
3816
3817 // We only cross a binade boundary that requires adjusting the exponent if
3818 // the input is not a denormal and all of said input's significand bits
3819 // are set. If all of said conditions are true: clear the significand, set
3820 // the integral bit to 1, and increment the exponent. If we have a
3821 // denormal always increment since moving denormals and the numbers in the
3822 // smallest normal binade have the same exponent in our representation.
3823 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3824
3825 if (WillCrossBinadeBoundary) {
3826 integerPart *Parts = significandParts();
3827 APInt::tcSet(Parts, 0, partCount());
3828 APInt::tcSetBit(Parts, semantics->precision - 1);
3829 assert(exponent != semantics->maxExponent &&
3830 "We can not increment an exponent beyond the maxExponent allowed"
3831 " by the given floating point semantics.");
3832 exponent++;
3833 } else {
3834 incrementSignificand();
3835 }
3836 }
3837 break;
3838 }
3839
3840 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3841 if (nextDown)
3842 changeSign();
3843
3844 return result;
3845 }
3846
makeInf(bool Negative)3847 void IEEEFloat::makeInf(bool Negative) {
3848 category = fcInfinity;
3849 sign = Negative;
3850 exponent = semantics->maxExponent + 1;
3851 APInt::tcSet(significandParts(), 0, partCount());
3852 }
3853
makeZero(bool Negative)3854 void IEEEFloat::makeZero(bool Negative) {
3855 category = fcZero;
3856 sign = Negative;
3857 exponent = semantics->minExponent-1;
3858 APInt::tcSet(significandParts(), 0, partCount());
3859 }
3860
makeQuiet()3861 void IEEEFloat::makeQuiet() {
3862 assert(isNaN());
3863 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3864 }
3865
ilogb(const IEEEFloat & Arg)3866 int ilogb(const IEEEFloat &Arg) {
3867 if (Arg.isNaN())
3868 return IEEEFloat::IEK_NaN;
3869 if (Arg.isZero())
3870 return IEEEFloat::IEK_Zero;
3871 if (Arg.isInfinity())
3872 return IEEEFloat::IEK_Inf;
3873 if (!Arg.isDenormal())
3874 return Arg.exponent;
3875
3876 IEEEFloat Normalized(Arg);
3877 int SignificandBits = Arg.getSemantics().precision - 1;
3878
3879 Normalized.exponent += SignificandBits;
3880 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3881 return Normalized.exponent - SignificandBits;
3882 }
3883
scalbn(IEEEFloat X,int Exp,IEEEFloat::roundingMode RoundingMode)3884 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3885 auto MaxExp = X.getSemantics().maxExponent;
3886 auto MinExp = X.getSemantics().minExponent;
3887
3888 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3889 // overflow; clamp it to a safe range before adding, but ensure that the range
3890 // is large enough that the clamp does not change the result. The range we
3891 // need to support is the difference between the largest possible exponent and
3892 // the normalized exponent of half the smallest denormal.
3893
3894 int SignificandBits = X.getSemantics().precision - 1;
3895 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3896
3897 // Clamp to one past the range ends to let normalize handle overlflow.
3898 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3899 X.normalize(RoundingMode, lfExactlyZero);
3900 if (X.isNaN())
3901 X.makeQuiet();
3902 return X;
3903 }
3904
frexp(const IEEEFloat & Val,int & Exp,IEEEFloat::roundingMode RM)3905 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3906 Exp = ilogb(Val);
3907
3908 // Quiet signalling nans.
3909 if (Exp == IEEEFloat::IEK_NaN) {
3910 IEEEFloat Quiet(Val);
3911 Quiet.makeQuiet();
3912 return Quiet;
3913 }
3914
3915 if (Exp == IEEEFloat::IEK_Inf)
3916 return Val;
3917
3918 // 1 is added because frexp is defined to return a normalized fraction in
3919 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3920 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3921 return scalbn(Val, -Exp, RM);
3922 }
3923
DoubleAPFloat(const fltSemantics & S)3924 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3925 : Semantics(&S),
3926 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3927 assert(Semantics == &semPPCDoubleDouble);
3928 }
3929
DoubleAPFloat(const fltSemantics & S,uninitializedTag)3930 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3931 : Semantics(&S),
3932 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3933 APFloat(semIEEEdouble, uninitialized)}) {
3934 assert(Semantics == &semPPCDoubleDouble);
3935 }
3936
DoubleAPFloat(const fltSemantics & S,integerPart I)3937 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3938 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3939 APFloat(semIEEEdouble)}) {
3940 assert(Semantics == &semPPCDoubleDouble);
3941 }
3942
DoubleAPFloat(const fltSemantics & S,const APInt & I)3943 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3944 : Semantics(&S),
3945 Floats(new APFloat[2]{
3946 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3947 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3948 assert(Semantics == &semPPCDoubleDouble);
3949 }
3950
DoubleAPFloat(const fltSemantics & S,APFloat && First,APFloat && Second)3951 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3952 APFloat &&Second)
3953 : Semantics(&S),
3954 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3955 assert(Semantics == &semPPCDoubleDouble);
3956 assert(&Floats[0].getSemantics() == &semIEEEdouble);
3957 assert(&Floats[1].getSemantics() == &semIEEEdouble);
3958 }
3959
DoubleAPFloat(const DoubleAPFloat & RHS)3960 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3961 : Semantics(RHS.Semantics),
3962 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3963 APFloat(RHS.Floats[1])}
3964 : nullptr) {
3965 assert(Semantics == &semPPCDoubleDouble);
3966 }
3967
DoubleAPFloat(DoubleAPFloat && RHS)3968 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3969 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3970 RHS.Semantics = &semBogus;
3971 assert(Semantics == &semPPCDoubleDouble);
3972 }
3973
operator =(const DoubleAPFloat & RHS)3974 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3975 if (Semantics == RHS.Semantics && RHS.Floats) {
3976 Floats[0] = RHS.Floats[0];
3977 Floats[1] = RHS.Floats[1];
3978 } else if (this != &RHS) {
3979 this->~DoubleAPFloat();
3980 new (this) DoubleAPFloat(RHS);
3981 }
3982 return *this;
3983 }
3984
3985 // Implement addition, subtraction, multiplication and division based on:
3986 // "Software for Doubled-Precision Floating-Point Computations",
3987 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
addImpl(const APFloat & a,const APFloat & aa,const APFloat & c,const APFloat & cc,roundingMode RM)3988 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3989 const APFloat &c, const APFloat &cc,
3990 roundingMode RM) {
3991 int Status = opOK;
3992 APFloat z = a;
3993 Status |= z.add(c, RM);
3994 if (!z.isFinite()) {
3995 if (!z.isInfinity()) {
3996 Floats[0] = std::move(z);
3997 Floats[1].makeZero(/* Neg = */ false);
3998 return (opStatus)Status;
3999 }
4000 Status = opOK;
4001 auto AComparedToC = a.compareAbsoluteValue(c);
4002 z = cc;
4003 Status |= z.add(aa, RM);
4004 if (AComparedToC == APFloat::cmpGreaterThan) {
4005 // z = cc + aa + c + a;
4006 Status |= z.add(c, RM);
4007 Status |= z.add(a, RM);
4008 } else {
4009 // z = cc + aa + a + c;
4010 Status |= z.add(a, RM);
4011 Status |= z.add(c, RM);
4012 }
4013 if (!z.isFinite()) {
4014 Floats[0] = std::move(z);
4015 Floats[1].makeZero(/* Neg = */ false);
4016 return (opStatus)Status;
4017 }
4018 Floats[0] = z;
4019 APFloat zz = aa;
4020 Status |= zz.add(cc, RM);
4021 if (AComparedToC == APFloat::cmpGreaterThan) {
4022 // Floats[1] = a - z + c + zz;
4023 Floats[1] = a;
4024 Status |= Floats[1].subtract(z, RM);
4025 Status |= Floats[1].add(c, RM);
4026 Status |= Floats[1].add(zz, RM);
4027 } else {
4028 // Floats[1] = c - z + a + zz;
4029 Floats[1] = c;
4030 Status |= Floats[1].subtract(z, RM);
4031 Status |= Floats[1].add(a, RM);
4032 Status |= Floats[1].add(zz, RM);
4033 }
4034 } else {
4035 // q = a - z;
4036 APFloat q = a;
4037 Status |= q.subtract(z, RM);
4038
4039 // zz = q + c + (a - (q + z)) + aa + cc;
4040 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4041 auto zz = q;
4042 Status |= zz.add(c, RM);
4043 Status |= q.add(z, RM);
4044 Status |= q.subtract(a, RM);
4045 q.changeSign();
4046 Status |= zz.add(q, RM);
4047 Status |= zz.add(aa, RM);
4048 Status |= zz.add(cc, RM);
4049 if (zz.isZero() && !zz.isNegative()) {
4050 Floats[0] = std::move(z);
4051 Floats[1].makeZero(/* Neg = */ false);
4052 return opOK;
4053 }
4054 Floats[0] = z;
4055 Status |= Floats[0].add(zz, RM);
4056 if (!Floats[0].isFinite()) {
4057 Floats[1].makeZero(/* Neg = */ false);
4058 return (opStatus)Status;
4059 }
4060 Floats[1] = std::move(z);
4061 Status |= Floats[1].subtract(Floats[0], RM);
4062 Status |= Floats[1].add(zz, RM);
4063 }
4064 return (opStatus)Status;
4065 }
4066
addWithSpecial(const DoubleAPFloat & LHS,const DoubleAPFloat & RHS,DoubleAPFloat & Out,roundingMode RM)4067 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4068 const DoubleAPFloat &RHS,
4069 DoubleAPFloat &Out,
4070 roundingMode RM) {
4071 if (LHS.getCategory() == fcNaN) {
4072 Out = LHS;
4073 return opOK;
4074 }
4075 if (RHS.getCategory() == fcNaN) {
4076 Out = RHS;
4077 return opOK;
4078 }
4079 if (LHS.getCategory() == fcZero) {
4080 Out = RHS;
4081 return opOK;
4082 }
4083 if (RHS.getCategory() == fcZero) {
4084 Out = LHS;
4085 return opOK;
4086 }
4087 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4088 LHS.isNegative() != RHS.isNegative()) {
4089 Out.makeNaN(false, Out.isNegative(), nullptr);
4090 return opInvalidOp;
4091 }
4092 if (LHS.getCategory() == fcInfinity) {
4093 Out = LHS;
4094 return opOK;
4095 }
4096 if (RHS.getCategory() == fcInfinity) {
4097 Out = RHS;
4098 return opOK;
4099 }
4100 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4101
4102 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4103 CC(RHS.Floats[1]);
4104 assert(&A.getSemantics() == &semIEEEdouble);
4105 assert(&AA.getSemantics() == &semIEEEdouble);
4106 assert(&C.getSemantics() == &semIEEEdouble);
4107 assert(&CC.getSemantics() == &semIEEEdouble);
4108 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4109 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4110 return Out.addImpl(A, AA, C, CC, RM);
4111 }
4112
add(const DoubleAPFloat & RHS,roundingMode RM)4113 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4114 roundingMode RM) {
4115 return addWithSpecial(*this, RHS, *this, RM);
4116 }
4117
subtract(const DoubleAPFloat & RHS,roundingMode RM)4118 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4119 roundingMode RM) {
4120 changeSign();
4121 auto Ret = add(RHS, RM);
4122 changeSign();
4123 return Ret;
4124 }
4125
multiply(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4126 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4127 APFloat::roundingMode RM) {
4128 const auto &LHS = *this;
4129 auto &Out = *this;
4130 /* Interesting observation: For special categories, finding the lowest
4131 common ancestor of the following layered graph gives the correct
4132 return category:
4133
4134 NaN
4135 / \
4136 Zero Inf
4137 \ /
4138 Normal
4139
4140 e.g. NaN * NaN = NaN
4141 Zero * Inf = NaN
4142 Normal * Zero = Zero
4143 Normal * Inf = Inf
4144 */
4145 if (LHS.getCategory() == fcNaN) {
4146 Out = LHS;
4147 return opOK;
4148 }
4149 if (RHS.getCategory() == fcNaN) {
4150 Out = RHS;
4151 return opOK;
4152 }
4153 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4154 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4155 Out.makeNaN(false, false, nullptr);
4156 return opOK;
4157 }
4158 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4159 Out = LHS;
4160 return opOK;
4161 }
4162 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4163 Out = RHS;
4164 return opOK;
4165 }
4166 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4167 "Special cases not handled exhaustively");
4168
4169 int Status = opOK;
4170 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4171 // t = a * c
4172 APFloat T = A;
4173 Status |= T.multiply(C, RM);
4174 if (!T.isFiniteNonZero()) {
4175 Floats[0] = T;
4176 Floats[1].makeZero(/* Neg = */ false);
4177 return (opStatus)Status;
4178 }
4179
4180 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4181 APFloat Tau = A;
4182 T.changeSign();
4183 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4184 T.changeSign();
4185 {
4186 // v = a * d
4187 APFloat V = A;
4188 Status |= V.multiply(D, RM);
4189 // w = b * c
4190 APFloat W = B;
4191 Status |= W.multiply(C, RM);
4192 Status |= V.add(W, RM);
4193 // tau += v + w
4194 Status |= Tau.add(V, RM);
4195 }
4196 // u = t + tau
4197 APFloat U = T;
4198 Status |= U.add(Tau, RM);
4199
4200 Floats[0] = U;
4201 if (!U.isFinite()) {
4202 Floats[1].makeZero(/* Neg = */ false);
4203 } else {
4204 // Floats[1] = (t - u) + tau
4205 Status |= T.subtract(U, RM);
4206 Status |= T.add(Tau, RM);
4207 Floats[1] = T;
4208 }
4209 return (opStatus)Status;
4210 }
4211
divide(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4212 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4213 APFloat::roundingMode RM) {
4214 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4215 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4216 auto Ret =
4217 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4218 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4219 return Ret;
4220 }
4221
remainder(const DoubleAPFloat & RHS)4222 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4223 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4224 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4225 auto Ret =
4226 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4227 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4228 return Ret;
4229 }
4230
mod(const DoubleAPFloat & RHS)4231 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4232 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4233 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4234 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4235 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4236 return Ret;
4237 }
4238
4239 APFloat::opStatus
fusedMultiplyAdd(const DoubleAPFloat & Multiplicand,const DoubleAPFloat & Addend,APFloat::roundingMode RM)4240 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4241 const DoubleAPFloat &Addend,
4242 APFloat::roundingMode RM) {
4243 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4244 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4245 auto Ret = Tmp.fusedMultiplyAdd(
4246 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4247 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4248 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4249 return Ret;
4250 }
4251
roundToIntegral(APFloat::roundingMode RM)4252 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4253 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4254 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4255 auto Ret = Tmp.roundToIntegral(RM);
4256 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4257 return Ret;
4258 }
4259
changeSign()4260 void DoubleAPFloat::changeSign() {
4261 Floats[0].changeSign();
4262 Floats[1].changeSign();
4263 }
4264
4265 APFloat::cmpResult
compareAbsoluteValue(const DoubleAPFloat & RHS) const4266 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4267 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4268 if (Result != cmpEqual)
4269 return Result;
4270 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4271 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4272 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4273 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4274 if (Against && !RHSAgainst)
4275 return cmpLessThan;
4276 if (!Against && RHSAgainst)
4277 return cmpGreaterThan;
4278 if (!Against && !RHSAgainst)
4279 return Result;
4280 if (Against && RHSAgainst)
4281 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4282 }
4283 return Result;
4284 }
4285
getCategory() const4286 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4287 return Floats[0].getCategory();
4288 }
4289
isNegative() const4290 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4291
makeInf(bool Neg)4292 void DoubleAPFloat::makeInf(bool Neg) {
4293 Floats[0].makeInf(Neg);
4294 Floats[1].makeZero(/* Neg = */ false);
4295 }
4296
makeZero(bool Neg)4297 void DoubleAPFloat::makeZero(bool Neg) {
4298 Floats[0].makeZero(Neg);
4299 Floats[1].makeZero(/* Neg = */ false);
4300 }
4301
makeLargest(bool Neg)4302 void DoubleAPFloat::makeLargest(bool Neg) {
4303 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4304 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4305 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4306 if (Neg)
4307 changeSign();
4308 }
4309
makeSmallest(bool Neg)4310 void DoubleAPFloat::makeSmallest(bool Neg) {
4311 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4312 Floats[0].makeSmallest(Neg);
4313 Floats[1].makeZero(/* Neg = */ false);
4314 }
4315
makeSmallestNormalized(bool Neg)4316 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4317 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4318 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4319 if (Neg)
4320 Floats[0].changeSign();
4321 Floats[1].makeZero(/* Neg = */ false);
4322 }
4323
makeNaN(bool SNaN,bool Neg,const APInt * fill)4324 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4325 Floats[0].makeNaN(SNaN, Neg, fill);
4326 Floats[1].makeZero(/* Neg = */ false);
4327 }
4328
compare(const DoubleAPFloat & RHS) const4329 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4330 auto Result = Floats[0].compare(RHS.Floats[0]);
4331 // |Float[0]| > |Float[1]|
4332 if (Result == APFloat::cmpEqual)
4333 return Floats[1].compare(RHS.Floats[1]);
4334 return Result;
4335 }
4336
bitwiseIsEqual(const DoubleAPFloat & RHS) const4337 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4338 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4339 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4340 }
4341
hash_value(const DoubleAPFloat & Arg)4342 hash_code hash_value(const DoubleAPFloat &Arg) {
4343 if (Arg.Floats)
4344 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4345 return hash_combine(Arg.Semantics);
4346 }
4347
bitcastToAPInt() const4348 APInt DoubleAPFloat::bitcastToAPInt() const {
4349 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4350 uint64_t Data[] = {
4351 Floats[0].bitcastToAPInt().getRawData()[0],
4352 Floats[1].bitcastToAPInt().getRawData()[0],
4353 };
4354 return APInt(128, 2, Data);
4355 }
4356
convertFromString(StringRef S,roundingMode RM)4357 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4358 roundingMode RM) {
4359 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4360 APFloat Tmp(semPPCDoubleDoubleLegacy);
4361 auto Ret = Tmp.convertFromString(S, RM);
4362 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4363 return Ret;
4364 }
4365
next(bool nextDown)4366 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4367 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4368 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4369 auto Ret = Tmp.next(nextDown);
4370 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4371 return Ret;
4372 }
4373
4374 APFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact) const4375 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4376 unsigned int Width, bool IsSigned,
4377 roundingMode RM, bool *IsExact) const {
4378 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4379 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4380 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4381 }
4382
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)4383 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4384 bool IsSigned,
4385 roundingMode RM) {
4386 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4387 APFloat Tmp(semPPCDoubleDoubleLegacy);
4388 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4389 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4390 return Ret;
4391 }
4392
4393 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4394 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4395 unsigned int InputSize,
4396 bool IsSigned, roundingMode RM) {
4397 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4398 APFloat Tmp(semPPCDoubleDoubleLegacy);
4399 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4400 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4401 return Ret;
4402 }
4403
4404 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4405 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4406 unsigned int InputSize,
4407 bool IsSigned, roundingMode RM) {
4408 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4409 APFloat Tmp(semPPCDoubleDoubleLegacy);
4410 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4411 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4412 return Ret;
4413 }
4414
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM) const4415 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4416 unsigned int HexDigits,
4417 bool UpperCase,
4418 roundingMode RM) const {
4419 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4420 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4421 .convertToHexString(DST, HexDigits, UpperCase, RM);
4422 }
4423
isDenormal() const4424 bool DoubleAPFloat::isDenormal() const {
4425 return getCategory() == fcNormal &&
4426 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4427 // (double)(Hi + Lo) == Hi defines a normal number.
4428 Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4429 }
4430
isSmallest() const4431 bool DoubleAPFloat::isSmallest() const {
4432 if (getCategory() != fcNormal)
4433 return false;
4434 DoubleAPFloat Tmp(*this);
4435 Tmp.makeSmallest(this->isNegative());
4436 return Tmp.compare(*this) == cmpEqual;
4437 }
4438
isLargest() const4439 bool DoubleAPFloat::isLargest() const {
4440 if (getCategory() != fcNormal)
4441 return false;
4442 DoubleAPFloat Tmp(*this);
4443 Tmp.makeLargest(this->isNegative());
4444 return Tmp.compare(*this) == cmpEqual;
4445 }
4446
isInteger() const4447 bool DoubleAPFloat::isInteger() const {
4448 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4449 return Floats[0].isInteger() && Floats[1].isInteger();
4450 }
4451
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const4452 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4453 unsigned FormatPrecision,
4454 unsigned FormatMaxPadding,
4455 bool TruncateZero) const {
4456 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4457 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4458 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4459 }
4460
getExactInverse(APFloat * inv) const4461 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4462 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4463 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4464 if (!inv)
4465 return Tmp.getExactInverse(nullptr);
4466 APFloat Inv(semPPCDoubleDoubleLegacy);
4467 auto Ret = Tmp.getExactInverse(&Inv);
4468 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4469 return Ret;
4470 }
4471
scalbn(DoubleAPFloat Arg,int Exp,APFloat::roundingMode RM)4472 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4473 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4474 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4475 scalbn(Arg.Floats[1], Exp, RM));
4476 }
4477
frexp(const DoubleAPFloat & Arg,int & Exp,APFloat::roundingMode RM)4478 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4479 APFloat::roundingMode RM) {
4480 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4481 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4482 APFloat Second = Arg.Floats[1];
4483 if (Arg.getCategory() == APFloat::fcNormal)
4484 Second = scalbn(Second, -Exp, RM);
4485 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4486 }
4487
4488 } // End detail namespace
4489
Storage(IEEEFloat F,const fltSemantics & Semantics)4490 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4491 if (usesLayout<IEEEFloat>(Semantics)) {
4492 new (&IEEE) IEEEFloat(std::move(F));
4493 return;
4494 }
4495 if (usesLayout<DoubleAPFloat>(Semantics)) {
4496 const fltSemantics& S = F.getSemantics();
4497 new (&Double)
4498 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
4499 APFloat(semIEEEdouble));
4500 return;
4501 }
4502 llvm_unreachable("Unexpected semantics");
4503 }
4504
convertFromString(StringRef Str,roundingMode RM)4505 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
4506 roundingMode RM) {
4507 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4508 }
4509
hash_value(const APFloat & Arg)4510 hash_code hash_value(const APFloat &Arg) {
4511 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4512 return hash_value(Arg.U.IEEE);
4513 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4514 return hash_value(Arg.U.Double);
4515 llvm_unreachable("Unexpected semantics");
4516 }
4517
APFloat(const fltSemantics & Semantics,StringRef S)4518 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4519 : APFloat(Semantics) {
4520 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
4521 assert(StatusOrErr && "Invalid floating point representation");
4522 consumeError(StatusOrErr.takeError());
4523 }
4524
convert(const fltSemantics & ToSemantics,roundingMode RM,bool * losesInfo)4525 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4526 roundingMode RM, bool *losesInfo) {
4527 if (&getSemantics() == &ToSemantics) {
4528 *losesInfo = false;
4529 return opOK;
4530 }
4531 if (usesLayout<IEEEFloat>(getSemantics()) &&
4532 usesLayout<IEEEFloat>(ToSemantics))
4533 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4534 if (usesLayout<IEEEFloat>(getSemantics()) &&
4535 usesLayout<DoubleAPFloat>(ToSemantics)) {
4536 assert(&ToSemantics == &semPPCDoubleDouble);
4537 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4538 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4539 return Ret;
4540 }
4541 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4542 usesLayout<IEEEFloat>(ToSemantics)) {
4543 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4544 *this = APFloat(std::move(getIEEE()), ToSemantics);
4545 return Ret;
4546 }
4547 llvm_unreachable("Unexpected semantics");
4548 }
4549
getAllOnesValue(unsigned BitWidth,bool isIEEE)4550 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4551 if (isIEEE) {
4552 switch (BitWidth) {
4553 case 16:
4554 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4555 case 32:
4556 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4557 case 64:
4558 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4559 case 80:
4560 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4561 case 128:
4562 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4563 default:
4564 llvm_unreachable("Unknown floating bit width");
4565 }
4566 } else {
4567 assert(BitWidth == 128);
4568 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4569 }
4570 }
4571
print(raw_ostream & OS) const4572 void APFloat::print(raw_ostream &OS) const {
4573 SmallVector<char, 16> Buffer;
4574 toString(Buffer);
4575 OS << Buffer << "\n";
4576 }
4577
4578 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const4579 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4580 #endif
4581
Profile(FoldingSetNodeID & NID) const4582 void APFloat::Profile(FoldingSetNodeID &NID) const {
4583 NID.Add(bitcastToAPInt());
4584 }
4585
4586 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4587 an APSInt, whose initial bit-width and signed-ness are used to determine the
4588 precision of the conversion.
4589 */
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const4590 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4591 roundingMode rounding_mode,
4592 bool *isExact) const {
4593 unsigned bitWidth = result.getBitWidth();
4594 SmallVector<uint64_t, 4> parts(result.getNumWords());
4595 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4596 rounding_mode, isExact);
4597 // Keeps the original signed-ness.
4598 result = APInt(bitWidth, parts);
4599 return status;
4600 }
4601
4602 } // End llvm namespace
4603
4604 #undef APFLOAT_DISPATCH_ON_SEMANTICS
4605