1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32
33 #define DEBUG_TYPE "apint"
34
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
getClearedMemory(unsigned numWords)37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38 uint64_t *result = new uint64_t[numWords];
39 memset(result, 0, numWords * sizeof(uint64_t));
40 return result;
41 }
42
43 /// A utility function for allocating memory and checking for allocation
44 /// failure. The content is not zeroed.
getMemory(unsigned numWords)45 inline static uint64_t* getMemory(unsigned numWords) {
46 return new uint64_t[numWords];
47 }
48
49 /// A utility function that converts a character to a digit.
getDigit(char cdigit,uint8_t radix)50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51 unsigned r;
52
53 if (radix == 16 || radix == 36) {
54 r = cdigit - '0';
55 if (r <= 9)
56 return r;
57
58 r = cdigit - 'A';
59 if (r <= radix - 11U)
60 return r + 10;
61
62 r = cdigit - 'a';
63 if (r <= radix - 11U)
64 return r + 10;
65
66 radix = 10;
67 }
68
69 r = cdigit - '0';
70 if (r < radix)
71 return r;
72
73 return -1U;
74 }
75
76
initSlowCase(uint64_t val,bool isSigned)77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78 U.pVal = getClearedMemory(getNumWords());
79 U.pVal[0] = val;
80 if (isSigned && int64_t(val) < 0)
81 for (unsigned i = 1; i < getNumWords(); ++i)
82 U.pVal[i] = WORDTYPE_MAX;
83 clearUnusedBits();
84 }
85
initSlowCase(const APInt & that)86 void APInt::initSlowCase(const APInt& that) {
87 U.pVal = getMemory(getNumWords());
88 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90
initFromArray(ArrayRef<uint64_t> bigVal)91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92 assert(BitWidth && "Bitwidth too small");
93 assert(bigVal.data() && "Null pointer detected!");
94 if (isSingleWord())
95 U.VAL = bigVal[0];
96 else {
97 // Get memory, cleared to 0
98 U.pVal = getClearedMemory(getNumWords());
99 // Calculate the number of words to copy
100 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101 // Copy the words from bigVal to pVal
102 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103 }
104 // Make sure unused high bits are cleared
105 clearUnusedBits();
106 }
107
APInt(unsigned numBits,ArrayRef<uint64_t> bigVal)108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109 : BitWidth(numBits) {
110 initFromArray(bigVal);
111 }
112
APInt(unsigned numBits,unsigned numWords,const uint64_t bigVal[])113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114 : BitWidth(numBits) {
115 initFromArray(makeArrayRef(bigVal, numWords));
116 }
117
APInt(unsigned numbits,StringRef Str,uint8_t radix)118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119 : BitWidth(numbits) {
120 assert(BitWidth && "Bitwidth too small");
121 fromString(numbits, Str, radix);
122 }
123
reallocate(unsigned NewBitWidth)124 void APInt::reallocate(unsigned NewBitWidth) {
125 // If the number of words is the same we can just change the width and stop.
126 if (getNumWords() == getNumWords(NewBitWidth)) {
127 BitWidth = NewBitWidth;
128 return;
129 }
130
131 // If we have an allocation, delete it.
132 if (!isSingleWord())
133 delete [] U.pVal;
134
135 // Update BitWidth.
136 BitWidth = NewBitWidth;
137
138 // If we are supposed to have an allocation, create it.
139 if (!isSingleWord())
140 U.pVal = getMemory(getNumWords());
141 }
142
AssignSlowCase(const APInt & RHS)143 void APInt::AssignSlowCase(const APInt& RHS) {
144 // Don't do anything for X = X
145 if (this == &RHS)
146 return;
147
148 // Adjust the bit width and handle allocations as necessary.
149 reallocate(RHS.getBitWidth());
150
151 // Copy the data.
152 if (isSingleWord())
153 U.VAL = RHS.U.VAL;
154 else
155 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157
158 /// This method 'profiles' an APInt for use with FoldingSet.
Profile(FoldingSetNodeID & ID) const159 void APInt::Profile(FoldingSetNodeID& ID) const {
160 ID.AddInteger(BitWidth);
161
162 if (isSingleWord()) {
163 ID.AddInteger(U.VAL);
164 return;
165 }
166
167 unsigned NumWords = getNumWords();
168 for (unsigned i = 0; i < NumWords; ++i)
169 ID.AddInteger(U.pVal[i]);
170 }
171
172 /// Prefix increment operator. Increments the APInt by one.
operator ++()173 APInt& APInt::operator++() {
174 if (isSingleWord())
175 ++U.VAL;
176 else
177 tcIncrement(U.pVal, getNumWords());
178 return clearUnusedBits();
179 }
180
181 /// Prefix decrement operator. Decrements the APInt by one.
operator --()182 APInt& APInt::operator--() {
183 if (isSingleWord())
184 --U.VAL;
185 else
186 tcDecrement(U.pVal, getNumWords());
187 return clearUnusedBits();
188 }
189
190 /// Adds the RHS APInt to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
operator +=(const APInt & RHS)193 APInt& APInt::operator+=(const APInt& RHS) {
194 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195 if (isSingleWord())
196 U.VAL += RHS.U.VAL;
197 else
198 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199 return clearUnusedBits();
200 }
201
operator +=(uint64_t RHS)202 APInt& APInt::operator+=(uint64_t RHS) {
203 if (isSingleWord())
204 U.VAL += RHS;
205 else
206 tcAddPart(U.pVal, RHS, getNumWords());
207 return clearUnusedBits();
208 }
209
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
operator -=(const APInt & RHS)213 APInt& APInt::operator-=(const APInt& RHS) {
214 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215 if (isSingleWord())
216 U.VAL -= RHS.U.VAL;
217 else
218 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219 return clearUnusedBits();
220 }
221
operator -=(uint64_t RHS)222 APInt& APInt::operator-=(uint64_t RHS) {
223 if (isSingleWord())
224 U.VAL -= RHS;
225 else
226 tcSubtractPart(U.pVal, RHS, getNumWords());
227 return clearUnusedBits();
228 }
229
operator *(const APInt & RHS) const230 APInt APInt::operator*(const APInt& RHS) const {
231 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232 if (isSingleWord())
233 return APInt(BitWidth, U.VAL * RHS.U.VAL);
234
235 APInt Result(getMemory(getNumWords()), getBitWidth());
236
237 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238
239 Result.clearUnusedBits();
240 return Result;
241 }
242
AndAssignSlowCase(const APInt & RHS)243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244 tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246
OrAssignSlowCase(const APInt & RHS)247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248 tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250
XorAssignSlowCase(const APInt & RHS)251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252 tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254
operator *=(const APInt & RHS)255 APInt& APInt::operator*=(const APInt& RHS) {
256 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257 *this = *this * RHS;
258 return *this;
259 }
260
operator *=(uint64_t RHS)261 APInt& APInt::operator*=(uint64_t RHS) {
262 if (isSingleWord()) {
263 U.VAL *= RHS;
264 } else {
265 unsigned NumWords = getNumWords();
266 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267 }
268 return clearUnusedBits();
269 }
270
EqualSlowCase(const APInt & RHS) const271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274
compare(const APInt & RHS) const275 int APInt::compare(const APInt& RHS) const {
276 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277 if (isSingleWord())
278 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279
280 return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282
compareSigned(const APInt & RHS) const283 int APInt::compareSigned(const APInt& RHS) const {
284 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285 if (isSingleWord()) {
286 int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289 }
290
291 bool lhsNeg = isNegative();
292 bool rhsNeg = RHS.isNegative();
293
294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295 if (lhsNeg != rhsNeg)
296 return lhsNeg ? -1 : 1;
297
298 // Otherwise we can just use an unsigned comparison, because even negative
299 // numbers compare correctly this way if both have the same signed-ness.
300 return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302
setBitsSlowCase(unsigned loBit,unsigned hiBit)303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304 unsigned loWord = whichWord(loBit);
305 unsigned hiWord = whichWord(hiBit);
306
307 // Create an initial mask for the low word with zeros below loBit.
308 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309
310 // If hiBit is not aligned, we need a high mask.
311 unsigned hiShiftAmt = whichBit(hiBit);
312 if (hiShiftAmt != 0) {
313 // Create a high mask with zeros above hiBit.
314 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316 // set the bits in hiWord.
317 if (hiWord == loWord)
318 loMask &= hiMask;
319 else
320 U.pVal[hiWord] |= hiMask;
321 }
322 // Apply the mask to the low word.
323 U.pVal[loWord] |= loMask;
324
325 // Fill any words between loWord and hiWord with all ones.
326 for (unsigned word = loWord + 1; word < hiWord; ++word)
327 U.pVal[word] = WORDTYPE_MAX;
328 }
329
330 /// Toggle every bit to its opposite value.
flipAllBitsSlowCase()331 void APInt::flipAllBitsSlowCase() {
332 tcComplement(U.pVal, getNumWords());
333 clearUnusedBits();
334 }
335
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
flipBit(unsigned bitPosition)339 void APInt::flipBit(unsigned bitPosition) {
340 assert(bitPosition < BitWidth && "Out of the bit-width range!");
341 if ((*this)[bitPosition]) clearBit(bitPosition);
342 else setBit(bitPosition);
343 }
344
insertBits(const APInt & subBits,unsigned bitPosition)345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346 unsigned subBitWidth = subBits.getBitWidth();
347 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348 "Illegal bit insertion");
349
350 // Insertion is a direct copy.
351 if (subBitWidth == BitWidth) {
352 *this = subBits;
353 return;
354 }
355
356 // Single word result can be done as a direct bitmask.
357 if (isSingleWord()) {
358 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359 U.VAL &= ~(mask << bitPosition);
360 U.VAL |= (subBits.U.VAL << bitPosition);
361 return;
362 }
363
364 unsigned loBit = whichBit(bitPosition);
365 unsigned loWord = whichWord(bitPosition);
366 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367
368 // Insertion within a single word can be done as a direct bitmask.
369 if (loWord == hi1Word) {
370 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371 U.pVal[loWord] &= ~(mask << loBit);
372 U.pVal[loWord] |= (subBits.U.VAL << loBit);
373 return;
374 }
375
376 // Insert on word boundaries.
377 if (loBit == 0) {
378 // Direct copy whole words.
379 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380 memcpy(U.pVal + loWord, subBits.getRawData(),
381 numWholeSubWords * APINT_WORD_SIZE);
382
383 // Mask+insert remaining bits.
384 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385 if (remainingBits != 0) {
386 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387 U.pVal[hi1Word] &= ~mask;
388 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389 }
390 return;
391 }
392
393 // General case - set/clear individual bits in dst based on src.
394 // TODO - there is scope for optimization here, but at the moment this code
395 // path is barely used so prefer readability over performance.
396 for (unsigned i = 0; i != subBitWidth; ++i) {
397 if (subBits[i])
398 setBit(bitPosition + i);
399 else
400 clearBit(bitPosition + i);
401 }
402 }
403
insertBits(uint64_t subBits,unsigned bitPosition,unsigned numBits)404 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
405 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
406 subBits &= maskBits;
407 if (isSingleWord()) {
408 U.VAL &= ~(maskBits << bitPosition);
409 U.VAL |= subBits << bitPosition;
410 return;
411 }
412
413 unsigned loBit = whichBit(bitPosition);
414 unsigned loWord = whichWord(bitPosition);
415 unsigned hiWord = whichWord(bitPosition + numBits - 1);
416 if (loWord == hiWord) {
417 U.pVal[loWord] &= ~(maskBits << loBit);
418 U.pVal[loWord] |= subBits << loBit;
419 return;
420 }
421
422 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
423 unsigned wordBits = 8 * sizeof(WordType);
424 U.pVal[loWord] &= ~(maskBits << loBit);
425 U.pVal[loWord] |= subBits << loBit;
426
427 U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
428 U.pVal[hiWord] |= subBits >> (wordBits - loBit);
429 }
430
extractBits(unsigned numBits,unsigned bitPosition) const431 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
432 assert(numBits > 0 && "Can't extract zero bits");
433 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
434 "Illegal bit extraction");
435
436 if (isSingleWord())
437 return APInt(numBits, U.VAL >> bitPosition);
438
439 unsigned loBit = whichBit(bitPosition);
440 unsigned loWord = whichWord(bitPosition);
441 unsigned hiWord = whichWord(bitPosition + numBits - 1);
442
443 // Single word result extracting bits from a single word source.
444 if (loWord == hiWord)
445 return APInt(numBits, U.pVal[loWord] >> loBit);
446
447 // Extracting bits that start on a source word boundary can be done
448 // as a fast memory copy.
449 if (loBit == 0)
450 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
451
452 // General case - shift + copy source words directly into place.
453 APInt Result(numBits, 0);
454 unsigned NumSrcWords = getNumWords();
455 unsigned NumDstWords = Result.getNumWords();
456
457 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
458 for (unsigned word = 0; word < NumDstWords; ++word) {
459 uint64_t w0 = U.pVal[loWord + word];
460 uint64_t w1 =
461 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
462 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
463 }
464
465 return Result.clearUnusedBits();
466 }
467
extractBitsAsZExtValue(unsigned numBits,unsigned bitPosition) const468 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
469 unsigned bitPosition) const {
470 assert(numBits > 0 && "Can't extract zero bits");
471 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
472 "Illegal bit extraction");
473 assert(numBits <= 64 && "Illegal bit extraction");
474
475 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
476 if (isSingleWord())
477 return (U.VAL >> bitPosition) & maskBits;
478
479 unsigned loBit = whichBit(bitPosition);
480 unsigned loWord = whichWord(bitPosition);
481 unsigned hiWord = whichWord(bitPosition + numBits - 1);
482 if (loWord == hiWord)
483 return (U.pVal[loWord] >> loBit) & maskBits;
484
485 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
486 unsigned wordBits = 8 * sizeof(WordType);
487 uint64_t retBits = U.pVal[loWord] >> loBit;
488 retBits |= U.pVal[hiWord] << (wordBits - loBit);
489 retBits &= maskBits;
490 return retBits;
491 }
492
getBitsNeeded(StringRef str,uint8_t radix)493 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
494 assert(!str.empty() && "Invalid string length");
495 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
496 radix == 36) &&
497 "Radix should be 2, 8, 10, 16, or 36!");
498
499 size_t slen = str.size();
500
501 // Each computation below needs to know if it's negative.
502 StringRef::iterator p = str.begin();
503 unsigned isNegative = *p == '-';
504 if (*p == '-' || *p == '+') {
505 p++;
506 slen--;
507 assert(slen && "String is only a sign, needs a value.");
508 }
509
510 // For radixes of power-of-two values, the bits required is accurately and
511 // easily computed
512 if (radix == 2)
513 return slen + isNegative;
514 if (radix == 8)
515 return slen * 3 + isNegative;
516 if (radix == 16)
517 return slen * 4 + isNegative;
518
519 // FIXME: base 36
520
521 // This is grossly inefficient but accurate. We could probably do something
522 // with a computation of roughly slen*64/20 and then adjust by the value of
523 // the first few digits. But, I'm not sure how accurate that could be.
524
525 // Compute a sufficient number of bits that is always large enough but might
526 // be too large. This avoids the assertion in the constructor. This
527 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
528 // bits in that case.
529 unsigned sufficient
530 = radix == 10? (slen == 1 ? 4 : slen * 64/18)
531 : (slen == 1 ? 7 : slen * 16/3);
532
533 // Convert to the actual binary value.
534 APInt tmp(sufficient, StringRef(p, slen), radix);
535
536 // Compute how many bits are required. If the log is infinite, assume we need
537 // just bit. If the log is exact and value is negative, then the value is
538 // MinSignedValue with (log + 1) bits.
539 unsigned log = tmp.logBase2();
540 if (log == (unsigned)-1) {
541 return isNegative + 1;
542 } else if (isNegative && tmp.isPowerOf2()) {
543 return isNegative + log;
544 } else {
545 return isNegative + log + 1;
546 }
547 }
548
hash_value(const APInt & Arg)549 hash_code llvm::hash_value(const APInt &Arg) {
550 if (Arg.isSingleWord())
551 return hash_combine(Arg.U.VAL);
552
553 return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
554 }
555
isSplat(unsigned SplatSizeInBits) const556 bool APInt::isSplat(unsigned SplatSizeInBits) const {
557 assert(getBitWidth() % SplatSizeInBits == 0 &&
558 "SplatSizeInBits must divide width!");
559 // We can check that all parts of an integer are equal by making use of a
560 // little trick: rotate and check if it's still the same value.
561 return *this == rotl(SplatSizeInBits);
562 }
563
564 /// This function returns the high "numBits" bits of this APInt.
getHiBits(unsigned numBits) const565 APInt APInt::getHiBits(unsigned numBits) const {
566 return this->lshr(BitWidth - numBits);
567 }
568
569 /// This function returns the low "numBits" bits of this APInt.
getLoBits(unsigned numBits) const570 APInt APInt::getLoBits(unsigned numBits) const {
571 APInt Result(getLowBitsSet(BitWidth, numBits));
572 Result &= *this;
573 return Result;
574 }
575
576 /// Return a value containing V broadcasted over NewLen bits.
getSplat(unsigned NewLen,const APInt & V)577 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
578 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
579
580 APInt Val = V.zextOrSelf(NewLen);
581 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
582 Val |= Val << I;
583
584 return Val;
585 }
586
countLeadingZerosSlowCase() const587 unsigned APInt::countLeadingZerosSlowCase() const {
588 unsigned Count = 0;
589 for (int i = getNumWords()-1; i >= 0; --i) {
590 uint64_t V = U.pVal[i];
591 if (V == 0)
592 Count += APINT_BITS_PER_WORD;
593 else {
594 Count += llvm::countLeadingZeros(V);
595 break;
596 }
597 }
598 // Adjust for unused bits in the most significant word (they are zero).
599 unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
600 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
601 return Count;
602 }
603
countLeadingOnesSlowCase() const604 unsigned APInt::countLeadingOnesSlowCase() const {
605 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
606 unsigned shift;
607 if (!highWordBits) {
608 highWordBits = APINT_BITS_PER_WORD;
609 shift = 0;
610 } else {
611 shift = APINT_BITS_PER_WORD - highWordBits;
612 }
613 int i = getNumWords() - 1;
614 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
615 if (Count == highWordBits) {
616 for (i--; i >= 0; --i) {
617 if (U.pVal[i] == WORDTYPE_MAX)
618 Count += APINT_BITS_PER_WORD;
619 else {
620 Count += llvm::countLeadingOnes(U.pVal[i]);
621 break;
622 }
623 }
624 }
625 return Count;
626 }
627
countTrailingZerosSlowCase() const628 unsigned APInt::countTrailingZerosSlowCase() const {
629 unsigned Count = 0;
630 unsigned i = 0;
631 for (; i < getNumWords() && U.pVal[i] == 0; ++i)
632 Count += APINT_BITS_PER_WORD;
633 if (i < getNumWords())
634 Count += llvm::countTrailingZeros(U.pVal[i]);
635 return std::min(Count, BitWidth);
636 }
637
countTrailingOnesSlowCase() const638 unsigned APInt::countTrailingOnesSlowCase() const {
639 unsigned Count = 0;
640 unsigned i = 0;
641 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
642 Count += APINT_BITS_PER_WORD;
643 if (i < getNumWords())
644 Count += llvm::countTrailingOnes(U.pVal[i]);
645 assert(Count <= BitWidth);
646 return Count;
647 }
648
countPopulationSlowCase() const649 unsigned APInt::countPopulationSlowCase() const {
650 unsigned Count = 0;
651 for (unsigned i = 0; i < getNumWords(); ++i)
652 Count += llvm::countPopulation(U.pVal[i]);
653 return Count;
654 }
655
intersectsSlowCase(const APInt & RHS) const656 bool APInt::intersectsSlowCase(const APInt &RHS) const {
657 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
658 if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
659 return true;
660
661 return false;
662 }
663
isSubsetOfSlowCase(const APInt & RHS) const664 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
665 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
666 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
667 return false;
668
669 return true;
670 }
671
byteSwap() const672 APInt APInt::byteSwap() const {
673 assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
674 if (BitWidth == 16)
675 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
676 if (BitWidth == 32)
677 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
678 if (BitWidth == 48) {
679 unsigned Tmp1 = unsigned(U.VAL >> 16);
680 Tmp1 = ByteSwap_32(Tmp1);
681 uint16_t Tmp2 = uint16_t(U.VAL);
682 Tmp2 = ByteSwap_16(Tmp2);
683 return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
684 }
685 if (BitWidth == 64)
686 return APInt(BitWidth, ByteSwap_64(U.VAL));
687
688 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
689 for (unsigned I = 0, N = getNumWords(); I != N; ++I)
690 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
691 if (Result.BitWidth != BitWidth) {
692 Result.lshrInPlace(Result.BitWidth - BitWidth);
693 Result.BitWidth = BitWidth;
694 }
695 return Result;
696 }
697
reverseBits() const698 APInt APInt::reverseBits() const {
699 switch (BitWidth) {
700 case 64:
701 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
702 case 32:
703 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
704 case 16:
705 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
706 case 8:
707 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
708 default:
709 break;
710 }
711
712 APInt Val(*this);
713 APInt Reversed(BitWidth, 0);
714 unsigned S = BitWidth;
715
716 for (; Val != 0; Val.lshrInPlace(1)) {
717 Reversed <<= 1;
718 Reversed |= Val[0];
719 --S;
720 }
721
722 Reversed <<= S;
723 return Reversed;
724 }
725
GreatestCommonDivisor(APInt A,APInt B)726 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
727 // Fast-path a common case.
728 if (A == B) return A;
729
730 // Corner cases: if either operand is zero, the other is the gcd.
731 if (!A) return B;
732 if (!B) return A;
733
734 // Count common powers of 2 and remove all other powers of 2.
735 unsigned Pow2;
736 {
737 unsigned Pow2_A = A.countTrailingZeros();
738 unsigned Pow2_B = B.countTrailingZeros();
739 if (Pow2_A > Pow2_B) {
740 A.lshrInPlace(Pow2_A - Pow2_B);
741 Pow2 = Pow2_B;
742 } else if (Pow2_B > Pow2_A) {
743 B.lshrInPlace(Pow2_B - Pow2_A);
744 Pow2 = Pow2_A;
745 } else {
746 Pow2 = Pow2_A;
747 }
748 }
749
750 // Both operands are odd multiples of 2^Pow_2:
751 //
752 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
753 //
754 // This is a modified version of Stein's algorithm, taking advantage of
755 // efficient countTrailingZeros().
756 while (A != B) {
757 if (A.ugt(B)) {
758 A -= B;
759 A.lshrInPlace(A.countTrailingZeros() - Pow2);
760 } else {
761 B -= A;
762 B.lshrInPlace(B.countTrailingZeros() - Pow2);
763 }
764 }
765
766 return A;
767 }
768
RoundDoubleToAPInt(double Double,unsigned width)769 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
770 uint64_t I = bit_cast<uint64_t>(Double);
771
772 // Get the sign bit from the highest order bit
773 bool isNeg = I >> 63;
774
775 // Get the 11-bit exponent and adjust for the 1023 bit bias
776 int64_t exp = ((I >> 52) & 0x7ff) - 1023;
777
778 // If the exponent is negative, the value is < 0 so just return 0.
779 if (exp < 0)
780 return APInt(width, 0u);
781
782 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
783 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
784
785 // If the exponent doesn't shift all bits out of the mantissa
786 if (exp < 52)
787 return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
788 APInt(width, mantissa >> (52 - exp));
789
790 // If the client didn't provide enough bits for us to shift the mantissa into
791 // then the result is undefined, just return 0
792 if (width <= exp - 52)
793 return APInt(width, 0);
794
795 // Otherwise, we have to shift the mantissa bits up to the right location
796 APInt Tmp(width, mantissa);
797 Tmp <<= (unsigned)exp - 52;
798 return isNeg ? -Tmp : Tmp;
799 }
800
801 /// This function converts this APInt to a double.
802 /// The layout for double is as following (IEEE Standard 754):
803 /// --------------------------------------
804 /// | Sign Exponent Fraction Bias |
805 /// |-------------------------------------- |
806 /// | 1[63] 11[62-52] 52[51-00] 1023 |
807 /// --------------------------------------
roundToDouble(bool isSigned) const808 double APInt::roundToDouble(bool isSigned) const {
809
810 // Handle the simple case where the value is contained in one uint64_t.
811 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
812 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
813 if (isSigned) {
814 int64_t sext = SignExtend64(getWord(0), BitWidth);
815 return double(sext);
816 } else
817 return double(getWord(0));
818 }
819
820 // Determine if the value is negative.
821 bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
822
823 // Construct the absolute value if we're negative.
824 APInt Tmp(isNeg ? -(*this) : (*this));
825
826 // Figure out how many bits we're using.
827 unsigned n = Tmp.getActiveBits();
828
829 // The exponent (without bias normalization) is just the number of bits
830 // we are using. Note that the sign bit is gone since we constructed the
831 // absolute value.
832 uint64_t exp = n;
833
834 // Return infinity for exponent overflow
835 if (exp > 1023) {
836 if (!isSigned || !isNeg)
837 return std::numeric_limits<double>::infinity();
838 else
839 return -std::numeric_limits<double>::infinity();
840 }
841 exp += 1023; // Increment for 1023 bias
842
843 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
844 // extract the high 52 bits from the correct words in pVal.
845 uint64_t mantissa;
846 unsigned hiWord = whichWord(n-1);
847 if (hiWord == 0) {
848 mantissa = Tmp.U.pVal[0];
849 if (n > 52)
850 mantissa >>= n - 52; // shift down, we want the top 52 bits.
851 } else {
852 assert(hiWord > 0 && "huh?");
853 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
854 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
855 mantissa = hibits | lobits;
856 }
857
858 // The leading bit of mantissa is implicit, so get rid of it.
859 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
860 uint64_t I = sign | (exp << 52) | mantissa;
861 return bit_cast<double>(I);
862 }
863
864 // Truncate to new width.
trunc(unsigned width) const865 APInt APInt::trunc(unsigned width) const {
866 assert(width < BitWidth && "Invalid APInt Truncate request");
867 assert(width && "Can't truncate to 0 bits");
868
869 if (width <= APINT_BITS_PER_WORD)
870 return APInt(width, getRawData()[0]);
871
872 APInt Result(getMemory(getNumWords(width)), width);
873
874 // Copy full words.
875 unsigned i;
876 for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
877 Result.U.pVal[i] = U.pVal[i];
878
879 // Truncate and copy any partial word.
880 unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
881 if (bits != 0)
882 Result.U.pVal[i] = U.pVal[i] << bits >> bits;
883
884 return Result;
885 }
886
887 // Truncate to new width with unsigned saturation.
truncUSat(unsigned width) const888 APInt APInt::truncUSat(unsigned width) const {
889 assert(width < BitWidth && "Invalid APInt Truncate request");
890 assert(width && "Can't truncate to 0 bits");
891
892 // Can we just losslessly truncate it?
893 if (isIntN(width))
894 return trunc(width);
895 // If not, then just return the new limit.
896 return APInt::getMaxValue(width);
897 }
898
899 // Truncate to new width with signed saturation.
truncSSat(unsigned width) const900 APInt APInt::truncSSat(unsigned width) const {
901 assert(width < BitWidth && "Invalid APInt Truncate request");
902 assert(width && "Can't truncate to 0 bits");
903
904 // Can we just losslessly truncate it?
905 if (isSignedIntN(width))
906 return trunc(width);
907 // If not, then just return the new limits.
908 return isNegative() ? APInt::getSignedMinValue(width)
909 : APInt::getSignedMaxValue(width);
910 }
911
912 // Sign extend to a new width.
sext(unsigned Width) const913 APInt APInt::sext(unsigned Width) const {
914 assert(Width > BitWidth && "Invalid APInt SignExtend request");
915
916 if (Width <= APINT_BITS_PER_WORD)
917 return APInt(Width, SignExtend64(U.VAL, BitWidth));
918
919 APInt Result(getMemory(getNumWords(Width)), Width);
920
921 // Copy words.
922 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
923
924 // Sign extend the last word since there may be unused bits in the input.
925 Result.U.pVal[getNumWords() - 1] =
926 SignExtend64(Result.U.pVal[getNumWords() - 1],
927 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
928
929 // Fill with sign bits.
930 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
931 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
932 Result.clearUnusedBits();
933 return Result;
934 }
935
936 // Zero extend to a new width.
zext(unsigned width) const937 APInt APInt::zext(unsigned width) const {
938 assert(width > BitWidth && "Invalid APInt ZeroExtend request");
939
940 if (width <= APINT_BITS_PER_WORD)
941 return APInt(width, U.VAL);
942
943 APInt Result(getMemory(getNumWords(width)), width);
944
945 // Copy words.
946 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
947
948 // Zero remaining words.
949 std::memset(Result.U.pVal + getNumWords(), 0,
950 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
951
952 return Result;
953 }
954
zextOrTrunc(unsigned width) const955 APInt APInt::zextOrTrunc(unsigned width) const {
956 if (BitWidth < width)
957 return zext(width);
958 if (BitWidth > width)
959 return trunc(width);
960 return *this;
961 }
962
sextOrTrunc(unsigned width) const963 APInt APInt::sextOrTrunc(unsigned width) const {
964 if (BitWidth < width)
965 return sext(width);
966 if (BitWidth > width)
967 return trunc(width);
968 return *this;
969 }
970
zextOrSelf(unsigned width) const971 APInt APInt::zextOrSelf(unsigned width) const {
972 if (BitWidth < width)
973 return zext(width);
974 return *this;
975 }
976
sextOrSelf(unsigned width) const977 APInt APInt::sextOrSelf(unsigned width) const {
978 if (BitWidth < width)
979 return sext(width);
980 return *this;
981 }
982
983 /// Arithmetic right-shift this APInt by shiftAmt.
984 /// Arithmetic right-shift function.
ashrInPlace(const APInt & shiftAmt)985 void APInt::ashrInPlace(const APInt &shiftAmt) {
986 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
987 }
988
989 /// Arithmetic right-shift this APInt by shiftAmt.
990 /// Arithmetic right-shift function.
ashrSlowCase(unsigned ShiftAmt)991 void APInt::ashrSlowCase(unsigned ShiftAmt) {
992 // Don't bother performing a no-op shift.
993 if (!ShiftAmt)
994 return;
995
996 // Save the original sign bit for later.
997 bool Negative = isNegative();
998
999 // WordShift is the inter-part shift; BitShift is intra-part shift.
1000 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1001 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1002
1003 unsigned WordsToMove = getNumWords() - WordShift;
1004 if (WordsToMove != 0) {
1005 // Sign extend the last word to fill in the unused bits.
1006 U.pVal[getNumWords() - 1] = SignExtend64(
1007 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1008
1009 // Fastpath for moving by whole words.
1010 if (BitShift == 0) {
1011 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1012 } else {
1013 // Move the words containing significant bits.
1014 for (unsigned i = 0; i != WordsToMove - 1; ++i)
1015 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1016 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1017
1018 // Handle the last word which has no high bits to copy.
1019 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1020 // Sign extend one more time.
1021 U.pVal[WordsToMove - 1] =
1022 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1023 }
1024 }
1025
1026 // Fill in the remainder based on the original sign.
1027 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1028 WordShift * APINT_WORD_SIZE);
1029 clearUnusedBits();
1030 }
1031
1032 /// Logical right-shift this APInt by shiftAmt.
1033 /// Logical right-shift function.
lshrInPlace(const APInt & shiftAmt)1034 void APInt::lshrInPlace(const APInt &shiftAmt) {
1035 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1036 }
1037
1038 /// Logical right-shift this APInt by shiftAmt.
1039 /// Logical right-shift function.
lshrSlowCase(unsigned ShiftAmt)1040 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1041 tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1042 }
1043
1044 /// Left-shift this APInt by shiftAmt.
1045 /// Left-shift function.
operator <<=(const APInt & shiftAmt)1046 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1047 // It's undefined behavior in C to shift by BitWidth or greater.
1048 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1049 return *this;
1050 }
1051
shlSlowCase(unsigned ShiftAmt)1052 void APInt::shlSlowCase(unsigned ShiftAmt) {
1053 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1054 clearUnusedBits();
1055 }
1056
1057 // Calculate the rotate amount modulo the bit width.
rotateModulo(unsigned BitWidth,const APInt & rotateAmt)1058 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1059 unsigned rotBitWidth = rotateAmt.getBitWidth();
1060 APInt rot = rotateAmt;
1061 if (rotBitWidth < BitWidth) {
1062 // Extend the rotate APInt, so that the urem doesn't divide by 0.
1063 // e.g. APInt(1, 32) would give APInt(1, 0).
1064 rot = rotateAmt.zext(BitWidth);
1065 }
1066 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1067 return rot.getLimitedValue(BitWidth);
1068 }
1069
rotl(const APInt & rotateAmt) const1070 APInt APInt::rotl(const APInt &rotateAmt) const {
1071 return rotl(rotateModulo(BitWidth, rotateAmt));
1072 }
1073
rotl(unsigned rotateAmt) const1074 APInt APInt::rotl(unsigned rotateAmt) const {
1075 rotateAmt %= BitWidth;
1076 if (rotateAmt == 0)
1077 return *this;
1078 return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1079 }
1080
rotr(const APInt & rotateAmt) const1081 APInt APInt::rotr(const APInt &rotateAmt) const {
1082 return rotr(rotateModulo(BitWidth, rotateAmt));
1083 }
1084
rotr(unsigned rotateAmt) const1085 APInt APInt::rotr(unsigned rotateAmt) const {
1086 rotateAmt %= BitWidth;
1087 if (rotateAmt == 0)
1088 return *this;
1089 return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1090 }
1091
1092 // Square Root - this method computes and returns the square root of "this".
1093 // Three mechanisms are used for computation. For small values (<= 5 bits),
1094 // a table lookup is done. This gets some performance for common cases. For
1095 // values using less than 52 bits, the value is converted to double and then
1096 // the libc sqrt function is called. The result is rounded and then converted
1097 // back to a uint64_t which is then used to construct the result. Finally,
1098 // the Babylonian method for computing square roots is used.
sqrt() const1099 APInt APInt::sqrt() const {
1100
1101 // Determine the magnitude of the value.
1102 unsigned magnitude = getActiveBits();
1103
1104 // Use a fast table for some small values. This also gets rid of some
1105 // rounding errors in libc sqrt for small values.
1106 if (magnitude <= 5) {
1107 static const uint8_t results[32] = {
1108 /* 0 */ 0,
1109 /* 1- 2 */ 1, 1,
1110 /* 3- 6 */ 2, 2, 2, 2,
1111 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1112 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1113 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1114 /* 31 */ 6
1115 };
1116 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1117 }
1118
1119 // If the magnitude of the value fits in less than 52 bits (the precision of
1120 // an IEEE double precision floating point value), then we can use the
1121 // libc sqrt function which will probably use a hardware sqrt computation.
1122 // This should be faster than the algorithm below.
1123 if (magnitude < 52) {
1124 return APInt(BitWidth,
1125 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1126 : U.pVal[0])))));
1127 }
1128
1129 // Okay, all the short cuts are exhausted. We must compute it. The following
1130 // is a classical Babylonian method for computing the square root. This code
1131 // was adapted to APInt from a wikipedia article on such computations.
1132 // See http://www.wikipedia.org/ and go to the page named
1133 // Calculate_an_integer_square_root.
1134 unsigned nbits = BitWidth, i = 4;
1135 APInt testy(BitWidth, 16);
1136 APInt x_old(BitWidth, 1);
1137 APInt x_new(BitWidth, 0);
1138 APInt two(BitWidth, 2);
1139
1140 // Select a good starting value using binary logarithms.
1141 for (;; i += 2, testy = testy.shl(2))
1142 if (i >= nbits || this->ule(testy)) {
1143 x_old = x_old.shl(i / 2);
1144 break;
1145 }
1146
1147 // Use the Babylonian method to arrive at the integer square root:
1148 for (;;) {
1149 x_new = (this->udiv(x_old) + x_old).udiv(two);
1150 if (x_old.ule(x_new))
1151 break;
1152 x_old = x_new;
1153 }
1154
1155 // Make sure we return the closest approximation
1156 // NOTE: The rounding calculation below is correct. It will produce an
1157 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1158 // determined to be a rounding issue with pari/gp as it begins to use a
1159 // floating point representation after 192 bits. There are no discrepancies
1160 // between this algorithm and pari/gp for bit widths < 192 bits.
1161 APInt square(x_old * x_old);
1162 APInt nextSquare((x_old + 1) * (x_old +1));
1163 if (this->ult(square))
1164 return x_old;
1165 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1166 APInt midpoint((nextSquare - square).udiv(two));
1167 APInt offset(*this - square);
1168 if (offset.ult(midpoint))
1169 return x_old;
1170 return x_old + 1;
1171 }
1172
1173 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1174 /// iterative extended Euclidean algorithm is used to solve for this value,
1175 /// however we simplify it to speed up calculating only the inverse, and take
1176 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1177 /// (potentially large) APInts around.
1178 /// WARNING: a value of '0' may be returned,
1179 /// signifying that no multiplicative inverse exists!
multiplicativeInverse(const APInt & modulo) const1180 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1181 assert(ult(modulo) && "This APInt must be smaller than the modulo");
1182
1183 // Using the properties listed at the following web page (accessed 06/21/08):
1184 // http://www.numbertheory.org/php/euclid.html
1185 // (especially the properties numbered 3, 4 and 9) it can be proved that
1186 // BitWidth bits suffice for all the computations in the algorithm implemented
1187 // below. More precisely, this number of bits suffice if the multiplicative
1188 // inverse exists, but may not suffice for the general extended Euclidean
1189 // algorithm.
1190
1191 APInt r[2] = { modulo, *this };
1192 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1193 APInt q(BitWidth, 0);
1194
1195 unsigned i;
1196 for (i = 0; r[i^1] != 0; i ^= 1) {
1197 // An overview of the math without the confusing bit-flipping:
1198 // q = r[i-2] / r[i-1]
1199 // r[i] = r[i-2] % r[i-1]
1200 // t[i] = t[i-2] - t[i-1] * q
1201 udivrem(r[i], r[i^1], q, r[i]);
1202 t[i] -= t[i^1] * q;
1203 }
1204
1205 // If this APInt and the modulo are not coprime, there is no multiplicative
1206 // inverse, so return 0. We check this by looking at the next-to-last
1207 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1208 // algorithm.
1209 if (r[i] != 1)
1210 return APInt(BitWidth, 0);
1211
1212 // The next-to-last t is the multiplicative inverse. However, we are
1213 // interested in a positive inverse. Calculate a positive one from a negative
1214 // one if necessary. A simple addition of the modulo suffices because
1215 // abs(t[i]) is known to be less than *this/2 (see the link above).
1216 if (t[i].isNegative())
1217 t[i] += modulo;
1218
1219 return std::move(t[i]);
1220 }
1221
1222 /// Calculate the magic numbers required to implement a signed integer division
1223 /// by a constant as a sequence of multiplies, adds and shifts. Requires that
1224 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
1225 /// Warren, Jr., chapter 10.
magic() const1226 APInt::ms APInt::magic() const {
1227 const APInt& d = *this;
1228 unsigned p;
1229 APInt ad, anc, delta, q1, r1, q2, r2, t;
1230 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1231 struct ms mag;
1232
1233 ad = d.abs();
1234 t = signedMin + (d.lshr(d.getBitWidth() - 1));
1235 anc = t - 1 - t.urem(ad); // absolute value of nc
1236 p = d.getBitWidth() - 1; // initialize p
1237 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
1238 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
1239 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
1240 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
1241 do {
1242 p = p + 1;
1243 q1 = q1<<1; // update q1 = 2p/abs(nc)
1244 r1 = r1<<1; // update r1 = rem(2p/abs(nc))
1245 if (r1.uge(anc)) { // must be unsigned comparison
1246 q1 = q1 + 1;
1247 r1 = r1 - anc;
1248 }
1249 q2 = q2<<1; // update q2 = 2p/abs(d)
1250 r2 = r2<<1; // update r2 = rem(2p/abs(d))
1251 if (r2.uge(ad)) { // must be unsigned comparison
1252 q2 = q2 + 1;
1253 r2 = r2 - ad;
1254 }
1255 delta = ad - r2;
1256 } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1257
1258 mag.m = q2 + 1;
1259 if (d.isNegative()) mag.m = -mag.m; // resulting magic number
1260 mag.s = p - d.getBitWidth(); // resulting shift
1261 return mag;
1262 }
1263
1264 /// Calculate the magic numbers required to implement an unsigned integer
1265 /// division by a constant as a sequence of multiplies, adds and shifts.
1266 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
1267 /// S. Warren, Jr., chapter 10.
1268 /// LeadingZeros can be used to simplify the calculation if the upper bits
1269 /// of the divided value are known zero.
magicu(unsigned LeadingZeros) const1270 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1271 const APInt& d = *this;
1272 unsigned p;
1273 APInt nc, delta, q1, r1, q2, r2;
1274 struct mu magu;
1275 magu.a = 0; // initialize "add" indicator
1276 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1277 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1278 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1279
1280 nc = allOnes - (allOnes - d).urem(d);
1281 p = d.getBitWidth() - 1; // initialize p
1282 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
1283 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
1284 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
1285 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
1286 do {
1287 p = p + 1;
1288 if (r1.uge(nc - r1)) {
1289 q1 = q1 + q1 + 1; // update q1
1290 r1 = r1 + r1 - nc; // update r1
1291 }
1292 else {
1293 q1 = q1+q1; // update q1
1294 r1 = r1+r1; // update r1
1295 }
1296 if ((r2 + 1).uge(d - r2)) {
1297 if (q2.uge(signedMax)) magu.a = 1;
1298 q2 = q2+q2 + 1; // update q2
1299 r2 = r2+r2 + 1 - d; // update r2
1300 }
1301 else {
1302 if (q2.uge(signedMin)) magu.a = 1;
1303 q2 = q2+q2; // update q2
1304 r2 = r2+r2 + 1; // update r2
1305 }
1306 delta = d - 1 - r2;
1307 } while (p < d.getBitWidth()*2 &&
1308 (q1.ult(delta) || (q1 == delta && r1 == 0)));
1309 magu.m = q2 + 1; // resulting magic number
1310 magu.s = p - d.getBitWidth(); // resulting shift
1311 return magu;
1312 }
1313
1314 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1315 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1316 /// variables here have the same names as in the algorithm. Comments explain
1317 /// the algorithm and any deviation from it.
KnuthDiv(uint32_t * u,uint32_t * v,uint32_t * q,uint32_t * r,unsigned m,unsigned n)1318 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1319 unsigned m, unsigned n) {
1320 assert(u && "Must provide dividend");
1321 assert(v && "Must provide divisor");
1322 assert(q && "Must provide quotient");
1323 assert(u != v && u != q && v != q && "Must use different memory");
1324 assert(n>1 && "n must be > 1");
1325
1326 // b denotes the base of the number system. In our case b is 2^32.
1327 const uint64_t b = uint64_t(1) << 32;
1328
1329 // The DEBUG macros here tend to be spam in the debug output if you're not
1330 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1331 #ifdef KNUTH_DEBUG
1332 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1333 #else
1334 #define DEBUG_KNUTH(X) do {} while(false)
1335 #endif
1336
1337 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1338 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1339 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1340 DEBUG_KNUTH(dbgs() << " by");
1341 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1342 DEBUG_KNUTH(dbgs() << '\n');
1343 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1344 // u and v by d. Note that we have taken Knuth's advice here to use a power
1345 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1346 // 2 allows us to shift instead of multiply and it is easy to determine the
1347 // shift amount from the leading zeros. We are basically normalizing the u
1348 // and v so that its high bits are shifted to the top of v's range without
1349 // overflow. Note that this can require an extra word in u so that u must
1350 // be of length m+n+1.
1351 unsigned shift = countLeadingZeros(v[n-1]);
1352 uint32_t v_carry = 0;
1353 uint32_t u_carry = 0;
1354 if (shift) {
1355 for (unsigned i = 0; i < m+n; ++i) {
1356 uint32_t u_tmp = u[i] >> (32 - shift);
1357 u[i] = (u[i] << shift) | u_carry;
1358 u_carry = u_tmp;
1359 }
1360 for (unsigned i = 0; i < n; ++i) {
1361 uint32_t v_tmp = v[i] >> (32 - shift);
1362 v[i] = (v[i] << shift) | v_carry;
1363 v_carry = v_tmp;
1364 }
1365 }
1366 u[m+n] = u_carry;
1367
1368 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1369 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1370 DEBUG_KNUTH(dbgs() << " by");
1371 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1372 DEBUG_KNUTH(dbgs() << '\n');
1373
1374 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1375 int j = m;
1376 do {
1377 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1378 // D3. [Calculate q'.].
1379 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1380 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1381 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1382 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1383 // on v[n-2] determines at high speed most of the cases in which the trial
1384 // value qp is one too large, and it eliminates all cases where qp is two
1385 // too large.
1386 uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1387 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1388 uint64_t qp = dividend / v[n-1];
1389 uint64_t rp = dividend % v[n-1];
1390 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1391 qp--;
1392 rp += v[n-1];
1393 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1394 qp--;
1395 }
1396 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1397
1398 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1399 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1400 // consists of a simple multiplication by a one-place number, combined with
1401 // a subtraction.
1402 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1403 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1404 // true value plus b**(n+1), namely as the b's complement of
1405 // the true value, and a "borrow" to the left should be remembered.
1406 int64_t borrow = 0;
1407 for (unsigned i = 0; i < n; ++i) {
1408 uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1409 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1410 u[j+i] = Lo_32(subres);
1411 borrow = Hi_32(p) - Hi_32(subres);
1412 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1413 << ", borrow = " << borrow << '\n');
1414 }
1415 bool isNeg = u[j+n] < borrow;
1416 u[j+n] -= Lo_32(borrow);
1417
1418 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1419 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1420 DEBUG_KNUTH(dbgs() << '\n');
1421
1422 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1423 // negative, go to step D6; otherwise go on to step D7.
1424 q[j] = Lo_32(qp);
1425 if (isNeg) {
1426 // D6. [Add back]. The probability that this step is necessary is very
1427 // small, on the order of only 2/b. Make sure that test data accounts for
1428 // this possibility. Decrease q[j] by 1
1429 q[j]--;
1430 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1431 // A carry will occur to the left of u[j+n], and it should be ignored
1432 // since it cancels with the borrow that occurred in D4.
1433 bool carry = false;
1434 for (unsigned i = 0; i < n; i++) {
1435 uint32_t limit = std::min(u[j+i],v[i]);
1436 u[j+i] += v[i] + carry;
1437 carry = u[j+i] < limit || (carry && u[j+i] == limit);
1438 }
1439 u[j+n] += carry;
1440 }
1441 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1442 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1443 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1444
1445 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1446 } while (--j >= 0);
1447
1448 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1449 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1450 DEBUG_KNUTH(dbgs() << '\n');
1451
1452 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1453 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1454 // compute the remainder (urem uses this).
1455 if (r) {
1456 // The value d is expressed by the "shift" value above since we avoided
1457 // multiplication by d by using a shift left. So, all we have to do is
1458 // shift right here.
1459 if (shift) {
1460 uint32_t carry = 0;
1461 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1462 for (int i = n-1; i >= 0; i--) {
1463 r[i] = (u[i] >> shift) | carry;
1464 carry = u[i] << (32 - shift);
1465 DEBUG_KNUTH(dbgs() << " " << r[i]);
1466 }
1467 } else {
1468 for (int i = n-1; i >= 0; i--) {
1469 r[i] = u[i];
1470 DEBUG_KNUTH(dbgs() << " " << r[i]);
1471 }
1472 }
1473 DEBUG_KNUTH(dbgs() << '\n');
1474 }
1475 DEBUG_KNUTH(dbgs() << '\n');
1476 }
1477
divide(const WordType * LHS,unsigned lhsWords,const WordType * RHS,unsigned rhsWords,WordType * Quotient,WordType * Remainder)1478 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1479 unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1480 assert(lhsWords >= rhsWords && "Fractional result");
1481
1482 // First, compose the values into an array of 32-bit words instead of
1483 // 64-bit words. This is a necessity of both the "short division" algorithm
1484 // and the Knuth "classical algorithm" which requires there to be native
1485 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1486 // can't use 64-bit operands here because we don't have native results of
1487 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1488 // work on large-endian machines.
1489 unsigned n = rhsWords * 2;
1490 unsigned m = (lhsWords * 2) - n;
1491
1492 // Allocate space for the temporary values we need either on the stack, if
1493 // it will fit, or on the heap if it won't.
1494 uint32_t SPACE[128];
1495 uint32_t *U = nullptr;
1496 uint32_t *V = nullptr;
1497 uint32_t *Q = nullptr;
1498 uint32_t *R = nullptr;
1499 if ((Remainder?4:3)*n+2*m+1 <= 128) {
1500 U = &SPACE[0];
1501 V = &SPACE[m+n+1];
1502 Q = &SPACE[(m+n+1) + n];
1503 if (Remainder)
1504 R = &SPACE[(m+n+1) + n + (m+n)];
1505 } else {
1506 U = new uint32_t[m + n + 1];
1507 V = new uint32_t[n];
1508 Q = new uint32_t[m+n];
1509 if (Remainder)
1510 R = new uint32_t[n];
1511 }
1512
1513 // Initialize the dividend
1514 memset(U, 0, (m+n+1)*sizeof(uint32_t));
1515 for (unsigned i = 0; i < lhsWords; ++i) {
1516 uint64_t tmp = LHS[i];
1517 U[i * 2] = Lo_32(tmp);
1518 U[i * 2 + 1] = Hi_32(tmp);
1519 }
1520 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1521
1522 // Initialize the divisor
1523 memset(V, 0, (n)*sizeof(uint32_t));
1524 for (unsigned i = 0; i < rhsWords; ++i) {
1525 uint64_t tmp = RHS[i];
1526 V[i * 2] = Lo_32(tmp);
1527 V[i * 2 + 1] = Hi_32(tmp);
1528 }
1529
1530 // initialize the quotient and remainder
1531 memset(Q, 0, (m+n) * sizeof(uint32_t));
1532 if (Remainder)
1533 memset(R, 0, n * sizeof(uint32_t));
1534
1535 // Now, adjust m and n for the Knuth division. n is the number of words in
1536 // the divisor. m is the number of words by which the dividend exceeds the
1537 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1538 // contain any zero words or the Knuth algorithm fails.
1539 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1540 n--;
1541 m++;
1542 }
1543 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1544 m--;
1545
1546 // If we're left with only a single word for the divisor, Knuth doesn't work
1547 // so we implement the short division algorithm here. This is much simpler
1548 // and faster because we are certain that we can divide a 64-bit quantity
1549 // by a 32-bit quantity at hardware speed and short division is simply a
1550 // series of such operations. This is just like doing short division but we
1551 // are using base 2^32 instead of base 10.
1552 assert(n != 0 && "Divide by zero?");
1553 if (n == 1) {
1554 uint32_t divisor = V[0];
1555 uint32_t remainder = 0;
1556 for (int i = m; i >= 0; i--) {
1557 uint64_t partial_dividend = Make_64(remainder, U[i]);
1558 if (partial_dividend == 0) {
1559 Q[i] = 0;
1560 remainder = 0;
1561 } else if (partial_dividend < divisor) {
1562 Q[i] = 0;
1563 remainder = Lo_32(partial_dividend);
1564 } else if (partial_dividend == divisor) {
1565 Q[i] = 1;
1566 remainder = 0;
1567 } else {
1568 Q[i] = Lo_32(partial_dividend / divisor);
1569 remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1570 }
1571 }
1572 if (R)
1573 R[0] = remainder;
1574 } else {
1575 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1576 // case n > 1.
1577 KnuthDiv(U, V, Q, R, m, n);
1578 }
1579
1580 // If the caller wants the quotient
1581 if (Quotient) {
1582 for (unsigned i = 0; i < lhsWords; ++i)
1583 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1584 }
1585
1586 // If the caller wants the remainder
1587 if (Remainder) {
1588 for (unsigned i = 0; i < rhsWords; ++i)
1589 Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1590 }
1591
1592 // Clean up the memory we allocated.
1593 if (U != &SPACE[0]) {
1594 delete [] U;
1595 delete [] V;
1596 delete [] Q;
1597 delete [] R;
1598 }
1599 }
1600
udiv(const APInt & RHS) const1601 APInt APInt::udiv(const APInt &RHS) const {
1602 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1603
1604 // First, deal with the easy case
1605 if (isSingleWord()) {
1606 assert(RHS.U.VAL != 0 && "Divide by zero?");
1607 return APInt(BitWidth, U.VAL / RHS.U.VAL);
1608 }
1609
1610 // Get some facts about the LHS and RHS number of bits and words
1611 unsigned lhsWords = getNumWords(getActiveBits());
1612 unsigned rhsBits = RHS.getActiveBits();
1613 unsigned rhsWords = getNumWords(rhsBits);
1614 assert(rhsWords && "Divided by zero???");
1615
1616 // Deal with some degenerate cases
1617 if (!lhsWords)
1618 // 0 / X ===> 0
1619 return APInt(BitWidth, 0);
1620 if (rhsBits == 1)
1621 // X / 1 ===> X
1622 return *this;
1623 if (lhsWords < rhsWords || this->ult(RHS))
1624 // X / Y ===> 0, iff X < Y
1625 return APInt(BitWidth, 0);
1626 if (*this == RHS)
1627 // X / X ===> 1
1628 return APInt(BitWidth, 1);
1629 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1630 // All high words are zero, just use native divide
1631 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1632
1633 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1634 APInt Quotient(BitWidth, 0); // to hold result.
1635 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1636 return Quotient;
1637 }
1638
udiv(uint64_t RHS) const1639 APInt APInt::udiv(uint64_t RHS) const {
1640 assert(RHS != 0 && "Divide by zero?");
1641
1642 // First, deal with the easy case
1643 if (isSingleWord())
1644 return APInt(BitWidth, U.VAL / RHS);
1645
1646 // Get some facts about the LHS words.
1647 unsigned lhsWords = getNumWords(getActiveBits());
1648
1649 // Deal with some degenerate cases
1650 if (!lhsWords)
1651 // 0 / X ===> 0
1652 return APInt(BitWidth, 0);
1653 if (RHS == 1)
1654 // X / 1 ===> X
1655 return *this;
1656 if (this->ult(RHS))
1657 // X / Y ===> 0, iff X < Y
1658 return APInt(BitWidth, 0);
1659 if (*this == RHS)
1660 // X / X ===> 1
1661 return APInt(BitWidth, 1);
1662 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1663 // All high words are zero, just use native divide
1664 return APInt(BitWidth, this->U.pVal[0] / RHS);
1665
1666 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1667 APInt Quotient(BitWidth, 0); // to hold result.
1668 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1669 return Quotient;
1670 }
1671
sdiv(const APInt & RHS) const1672 APInt APInt::sdiv(const APInt &RHS) const {
1673 if (isNegative()) {
1674 if (RHS.isNegative())
1675 return (-(*this)).udiv(-RHS);
1676 return -((-(*this)).udiv(RHS));
1677 }
1678 if (RHS.isNegative())
1679 return -(this->udiv(-RHS));
1680 return this->udiv(RHS);
1681 }
1682
sdiv(int64_t RHS) const1683 APInt APInt::sdiv(int64_t RHS) const {
1684 if (isNegative()) {
1685 if (RHS < 0)
1686 return (-(*this)).udiv(-RHS);
1687 return -((-(*this)).udiv(RHS));
1688 }
1689 if (RHS < 0)
1690 return -(this->udiv(-RHS));
1691 return this->udiv(RHS);
1692 }
1693
urem(const APInt & RHS) const1694 APInt APInt::urem(const APInt &RHS) const {
1695 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1696 if (isSingleWord()) {
1697 assert(RHS.U.VAL != 0 && "Remainder by zero?");
1698 return APInt(BitWidth, U.VAL % RHS.U.VAL);
1699 }
1700
1701 // Get some facts about the LHS
1702 unsigned lhsWords = getNumWords(getActiveBits());
1703
1704 // Get some facts about the RHS
1705 unsigned rhsBits = RHS.getActiveBits();
1706 unsigned rhsWords = getNumWords(rhsBits);
1707 assert(rhsWords && "Performing remainder operation by zero ???");
1708
1709 // Check the degenerate cases
1710 if (lhsWords == 0)
1711 // 0 % Y ===> 0
1712 return APInt(BitWidth, 0);
1713 if (rhsBits == 1)
1714 // X % 1 ===> 0
1715 return APInt(BitWidth, 0);
1716 if (lhsWords < rhsWords || this->ult(RHS))
1717 // X % Y ===> X, iff X < Y
1718 return *this;
1719 if (*this == RHS)
1720 // X % X == 0;
1721 return APInt(BitWidth, 0);
1722 if (lhsWords == 1)
1723 // All high words are zero, just use native remainder
1724 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1725
1726 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1727 APInt Remainder(BitWidth, 0);
1728 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1729 return Remainder;
1730 }
1731
urem(uint64_t RHS) const1732 uint64_t APInt::urem(uint64_t RHS) const {
1733 assert(RHS != 0 && "Remainder by zero?");
1734
1735 if (isSingleWord())
1736 return U.VAL % RHS;
1737
1738 // Get some facts about the LHS
1739 unsigned lhsWords = getNumWords(getActiveBits());
1740
1741 // Check the degenerate cases
1742 if (lhsWords == 0)
1743 // 0 % Y ===> 0
1744 return 0;
1745 if (RHS == 1)
1746 // X % 1 ===> 0
1747 return 0;
1748 if (this->ult(RHS))
1749 // X % Y ===> X, iff X < Y
1750 return getZExtValue();
1751 if (*this == RHS)
1752 // X % X == 0;
1753 return 0;
1754 if (lhsWords == 1)
1755 // All high words are zero, just use native remainder
1756 return U.pVal[0] % RHS;
1757
1758 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1759 uint64_t Remainder;
1760 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1761 return Remainder;
1762 }
1763
srem(const APInt & RHS) const1764 APInt APInt::srem(const APInt &RHS) const {
1765 if (isNegative()) {
1766 if (RHS.isNegative())
1767 return -((-(*this)).urem(-RHS));
1768 return -((-(*this)).urem(RHS));
1769 }
1770 if (RHS.isNegative())
1771 return this->urem(-RHS);
1772 return this->urem(RHS);
1773 }
1774
srem(int64_t RHS) const1775 int64_t APInt::srem(int64_t RHS) const {
1776 if (isNegative()) {
1777 if (RHS < 0)
1778 return -((-(*this)).urem(-RHS));
1779 return -((-(*this)).urem(RHS));
1780 }
1781 if (RHS < 0)
1782 return this->urem(-RHS);
1783 return this->urem(RHS);
1784 }
1785
udivrem(const APInt & LHS,const APInt & RHS,APInt & Quotient,APInt & Remainder)1786 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1787 APInt &Quotient, APInt &Remainder) {
1788 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1789 unsigned BitWidth = LHS.BitWidth;
1790
1791 // First, deal with the easy case
1792 if (LHS.isSingleWord()) {
1793 assert(RHS.U.VAL != 0 && "Divide by zero?");
1794 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1795 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1796 Quotient = APInt(BitWidth, QuotVal);
1797 Remainder = APInt(BitWidth, RemVal);
1798 return;
1799 }
1800
1801 // Get some size facts about the dividend and divisor
1802 unsigned lhsWords = getNumWords(LHS.getActiveBits());
1803 unsigned rhsBits = RHS.getActiveBits();
1804 unsigned rhsWords = getNumWords(rhsBits);
1805 assert(rhsWords && "Performing divrem operation by zero ???");
1806
1807 // Check the degenerate cases
1808 if (lhsWords == 0) {
1809 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
1810 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0
1811 return;
1812 }
1813
1814 if (rhsBits == 1) {
1815 Quotient = LHS; // X / 1 ===> X
1816 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0
1817 }
1818
1819 if (lhsWords < rhsWords || LHS.ult(RHS)) {
1820 Remainder = LHS; // X % Y ===> X, iff X < Y
1821 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
1822 return;
1823 }
1824
1825 if (LHS == RHS) {
1826 Quotient = APInt(BitWidth, 1); // X / X ===> 1
1827 Remainder = APInt(BitWidth, 0); // X % X ===> 0;
1828 return;
1829 }
1830
1831 // Make sure there is enough space to hold the results.
1832 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1833 // change the size. This is necessary if Quotient or Remainder is aliased
1834 // with LHS or RHS.
1835 Quotient.reallocate(BitWidth);
1836 Remainder.reallocate(BitWidth);
1837
1838 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1839 // There is only one word to consider so use the native versions.
1840 uint64_t lhsValue = LHS.U.pVal[0];
1841 uint64_t rhsValue = RHS.U.pVal[0];
1842 Quotient = lhsValue / rhsValue;
1843 Remainder = lhsValue % rhsValue;
1844 return;
1845 }
1846
1847 // Okay, lets do it the long way
1848 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1849 Remainder.U.pVal);
1850 // Clear the rest of the Quotient and Remainder.
1851 std::memset(Quotient.U.pVal + lhsWords, 0,
1852 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1853 std::memset(Remainder.U.pVal + rhsWords, 0,
1854 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1855 }
1856
udivrem(const APInt & LHS,uint64_t RHS,APInt & Quotient,uint64_t & Remainder)1857 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1858 uint64_t &Remainder) {
1859 assert(RHS != 0 && "Divide by zero?");
1860 unsigned BitWidth = LHS.BitWidth;
1861
1862 // First, deal with the easy case
1863 if (LHS.isSingleWord()) {
1864 uint64_t QuotVal = LHS.U.VAL / RHS;
1865 Remainder = LHS.U.VAL % RHS;
1866 Quotient = APInt(BitWidth, QuotVal);
1867 return;
1868 }
1869
1870 // Get some size facts about the dividend and divisor
1871 unsigned lhsWords = getNumWords(LHS.getActiveBits());
1872
1873 // Check the degenerate cases
1874 if (lhsWords == 0) {
1875 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
1876 Remainder = 0; // 0 % Y ===> 0
1877 return;
1878 }
1879
1880 if (RHS == 1) {
1881 Quotient = LHS; // X / 1 ===> X
1882 Remainder = 0; // X % 1 ===> 0
1883 return;
1884 }
1885
1886 if (LHS.ult(RHS)) {
1887 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y
1888 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
1889 return;
1890 }
1891
1892 if (LHS == RHS) {
1893 Quotient = APInt(BitWidth, 1); // X / X ===> 1
1894 Remainder = 0; // X % X ===> 0;
1895 return;
1896 }
1897
1898 // Make sure there is enough space to hold the results.
1899 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1900 // change the size. This is necessary if Quotient is aliased with LHS.
1901 Quotient.reallocate(BitWidth);
1902
1903 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1904 // There is only one word to consider so use the native versions.
1905 uint64_t lhsValue = LHS.U.pVal[0];
1906 Quotient = lhsValue / RHS;
1907 Remainder = lhsValue % RHS;
1908 return;
1909 }
1910
1911 // Okay, lets do it the long way
1912 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1913 // Clear the rest of the Quotient.
1914 std::memset(Quotient.U.pVal + lhsWords, 0,
1915 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1916 }
1917
sdivrem(const APInt & LHS,const APInt & RHS,APInt & Quotient,APInt & Remainder)1918 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1919 APInt &Quotient, APInt &Remainder) {
1920 if (LHS.isNegative()) {
1921 if (RHS.isNegative())
1922 APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1923 else {
1924 APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1925 Quotient.negate();
1926 }
1927 Remainder.negate();
1928 } else if (RHS.isNegative()) {
1929 APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1930 Quotient.negate();
1931 } else {
1932 APInt::udivrem(LHS, RHS, Quotient, Remainder);
1933 }
1934 }
1935
sdivrem(const APInt & LHS,int64_t RHS,APInt & Quotient,int64_t & Remainder)1936 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1937 APInt &Quotient, int64_t &Remainder) {
1938 uint64_t R = Remainder;
1939 if (LHS.isNegative()) {
1940 if (RHS < 0)
1941 APInt::udivrem(-LHS, -RHS, Quotient, R);
1942 else {
1943 APInt::udivrem(-LHS, RHS, Quotient, R);
1944 Quotient.negate();
1945 }
1946 R = -R;
1947 } else if (RHS < 0) {
1948 APInt::udivrem(LHS, -RHS, Quotient, R);
1949 Quotient.negate();
1950 } else {
1951 APInt::udivrem(LHS, RHS, Quotient, R);
1952 }
1953 Remainder = R;
1954 }
1955
sadd_ov(const APInt & RHS,bool & Overflow) const1956 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1957 APInt Res = *this+RHS;
1958 Overflow = isNonNegative() == RHS.isNonNegative() &&
1959 Res.isNonNegative() != isNonNegative();
1960 return Res;
1961 }
1962
uadd_ov(const APInt & RHS,bool & Overflow) const1963 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1964 APInt Res = *this+RHS;
1965 Overflow = Res.ult(RHS);
1966 return Res;
1967 }
1968
ssub_ov(const APInt & RHS,bool & Overflow) const1969 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1970 APInt Res = *this - RHS;
1971 Overflow = isNonNegative() != RHS.isNonNegative() &&
1972 Res.isNonNegative() != isNonNegative();
1973 return Res;
1974 }
1975
usub_ov(const APInt & RHS,bool & Overflow) const1976 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1977 APInt Res = *this-RHS;
1978 Overflow = Res.ugt(*this);
1979 return Res;
1980 }
1981
sdiv_ov(const APInt & RHS,bool & Overflow) const1982 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1983 // MININT/-1 --> overflow.
1984 Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1985 return sdiv(RHS);
1986 }
1987
smul_ov(const APInt & RHS,bool & Overflow) const1988 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1989 APInt Res = *this * RHS;
1990
1991 if (*this != 0 && RHS != 0)
1992 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1993 else
1994 Overflow = false;
1995 return Res;
1996 }
1997
umul_ov(const APInt & RHS,bool & Overflow) const1998 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1999 if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
2000 Overflow = true;
2001 return *this * RHS;
2002 }
2003
2004 APInt Res = lshr(1) * RHS;
2005 Overflow = Res.isNegative();
2006 Res <<= 1;
2007 if ((*this)[0]) {
2008 Res += RHS;
2009 if (Res.ult(RHS))
2010 Overflow = true;
2011 }
2012 return Res;
2013 }
2014
sshl_ov(const APInt & ShAmt,bool & Overflow) const2015 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2016 Overflow = ShAmt.uge(getBitWidth());
2017 if (Overflow)
2018 return APInt(BitWidth, 0);
2019
2020 if (isNonNegative()) // Don't allow sign change.
2021 Overflow = ShAmt.uge(countLeadingZeros());
2022 else
2023 Overflow = ShAmt.uge(countLeadingOnes());
2024
2025 return *this << ShAmt;
2026 }
2027
ushl_ov(const APInt & ShAmt,bool & Overflow) const2028 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2029 Overflow = ShAmt.uge(getBitWidth());
2030 if (Overflow)
2031 return APInt(BitWidth, 0);
2032
2033 Overflow = ShAmt.ugt(countLeadingZeros());
2034
2035 return *this << ShAmt;
2036 }
2037
sadd_sat(const APInt & RHS) const2038 APInt APInt::sadd_sat(const APInt &RHS) const {
2039 bool Overflow;
2040 APInt Res = sadd_ov(RHS, Overflow);
2041 if (!Overflow)
2042 return Res;
2043
2044 return isNegative() ? APInt::getSignedMinValue(BitWidth)
2045 : APInt::getSignedMaxValue(BitWidth);
2046 }
2047
uadd_sat(const APInt & RHS) const2048 APInt APInt::uadd_sat(const APInt &RHS) const {
2049 bool Overflow;
2050 APInt Res = uadd_ov(RHS, Overflow);
2051 if (!Overflow)
2052 return Res;
2053
2054 return APInt::getMaxValue(BitWidth);
2055 }
2056
ssub_sat(const APInt & RHS) const2057 APInt APInt::ssub_sat(const APInt &RHS) const {
2058 bool Overflow;
2059 APInt Res = ssub_ov(RHS, Overflow);
2060 if (!Overflow)
2061 return Res;
2062
2063 return isNegative() ? APInt::getSignedMinValue(BitWidth)
2064 : APInt::getSignedMaxValue(BitWidth);
2065 }
2066
usub_sat(const APInt & RHS) const2067 APInt APInt::usub_sat(const APInt &RHS) const {
2068 bool Overflow;
2069 APInt Res = usub_ov(RHS, Overflow);
2070 if (!Overflow)
2071 return Res;
2072
2073 return APInt(BitWidth, 0);
2074 }
2075
smul_sat(const APInt & RHS) const2076 APInt APInt::smul_sat(const APInt &RHS) const {
2077 bool Overflow;
2078 APInt Res = smul_ov(RHS, Overflow);
2079 if (!Overflow)
2080 return Res;
2081
2082 // The result is negative if one and only one of inputs is negative.
2083 bool ResIsNegative = isNegative() ^ RHS.isNegative();
2084
2085 return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2086 : APInt::getSignedMaxValue(BitWidth);
2087 }
2088
umul_sat(const APInt & RHS) const2089 APInt APInt::umul_sat(const APInt &RHS) const {
2090 bool Overflow;
2091 APInt Res = umul_ov(RHS, Overflow);
2092 if (!Overflow)
2093 return Res;
2094
2095 return APInt::getMaxValue(BitWidth);
2096 }
2097
sshl_sat(const APInt & RHS) const2098 APInt APInt::sshl_sat(const APInt &RHS) const {
2099 bool Overflow;
2100 APInt Res = sshl_ov(RHS, Overflow);
2101 if (!Overflow)
2102 return Res;
2103
2104 return isNegative() ? APInt::getSignedMinValue(BitWidth)
2105 : APInt::getSignedMaxValue(BitWidth);
2106 }
2107
ushl_sat(const APInt & RHS) const2108 APInt APInt::ushl_sat(const APInt &RHS) const {
2109 bool Overflow;
2110 APInt Res = ushl_ov(RHS, Overflow);
2111 if (!Overflow)
2112 return Res;
2113
2114 return APInt::getMaxValue(BitWidth);
2115 }
2116
fromString(unsigned numbits,StringRef str,uint8_t radix)2117 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2118 // Check our assumptions here
2119 assert(!str.empty() && "Invalid string length");
2120 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2121 radix == 36) &&
2122 "Radix should be 2, 8, 10, 16, or 36!");
2123
2124 StringRef::iterator p = str.begin();
2125 size_t slen = str.size();
2126 bool isNeg = *p == '-';
2127 if (*p == '-' || *p == '+') {
2128 p++;
2129 slen--;
2130 assert(slen && "String is only a sign, needs a value.");
2131 }
2132 assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2133 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2134 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2135 assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2136 "Insufficient bit width");
2137
2138 // Allocate memory if needed
2139 if (isSingleWord())
2140 U.VAL = 0;
2141 else
2142 U.pVal = getClearedMemory(getNumWords());
2143
2144 // Figure out if we can shift instead of multiply
2145 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2146
2147 // Enter digit traversal loop
2148 for (StringRef::iterator e = str.end(); p != e; ++p) {
2149 unsigned digit = getDigit(*p, radix);
2150 assert(digit < radix && "Invalid character in digit string");
2151
2152 // Shift or multiply the value by the radix
2153 if (slen > 1) {
2154 if (shift)
2155 *this <<= shift;
2156 else
2157 *this *= radix;
2158 }
2159
2160 // Add in the digit we just interpreted
2161 *this += digit;
2162 }
2163 // If its negative, put it in two's complement form
2164 if (isNeg)
2165 this->negate();
2166 }
2167
toString(SmallVectorImpl<char> & Str,unsigned Radix,bool Signed,bool formatAsCLiteral) const2168 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2169 bool Signed, bool formatAsCLiteral) const {
2170 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2171 Radix == 36) &&
2172 "Radix should be 2, 8, 10, 16, or 36!");
2173
2174 const char *Prefix = "";
2175 if (formatAsCLiteral) {
2176 switch (Radix) {
2177 case 2:
2178 // Binary literals are a non-standard extension added in gcc 4.3:
2179 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2180 Prefix = "0b";
2181 break;
2182 case 8:
2183 Prefix = "0";
2184 break;
2185 case 10:
2186 break; // No prefix
2187 case 16:
2188 Prefix = "0x";
2189 break;
2190 default:
2191 llvm_unreachable("Invalid radix!");
2192 }
2193 }
2194
2195 // First, check for a zero value and just short circuit the logic below.
2196 if (*this == 0) {
2197 while (*Prefix) {
2198 Str.push_back(*Prefix);
2199 ++Prefix;
2200 };
2201 Str.push_back('0');
2202 return;
2203 }
2204
2205 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2206
2207 if (isSingleWord()) {
2208 char Buffer[65];
2209 char *BufPtr = std::end(Buffer);
2210
2211 uint64_t N;
2212 if (!Signed) {
2213 N = getZExtValue();
2214 } else {
2215 int64_t I = getSExtValue();
2216 if (I >= 0) {
2217 N = I;
2218 } else {
2219 Str.push_back('-');
2220 N = -(uint64_t)I;
2221 }
2222 }
2223
2224 while (*Prefix) {
2225 Str.push_back(*Prefix);
2226 ++Prefix;
2227 };
2228
2229 while (N) {
2230 *--BufPtr = Digits[N % Radix];
2231 N /= Radix;
2232 }
2233 Str.append(BufPtr, std::end(Buffer));
2234 return;
2235 }
2236
2237 APInt Tmp(*this);
2238
2239 if (Signed && isNegative()) {
2240 // They want to print the signed version and it is a negative value
2241 // Flip the bits and add one to turn it into the equivalent positive
2242 // value and put a '-' in the result.
2243 Tmp.negate();
2244 Str.push_back('-');
2245 }
2246
2247 while (*Prefix) {
2248 Str.push_back(*Prefix);
2249 ++Prefix;
2250 };
2251
2252 // We insert the digits backward, then reverse them to get the right order.
2253 unsigned StartDig = Str.size();
2254
2255 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2256 // because the number of bits per digit (1, 3 and 4 respectively) divides
2257 // equally. We just shift until the value is zero.
2258 if (Radix == 2 || Radix == 8 || Radix == 16) {
2259 // Just shift tmp right for each digit width until it becomes zero
2260 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2261 unsigned MaskAmt = Radix - 1;
2262
2263 while (Tmp.getBoolValue()) {
2264 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2265 Str.push_back(Digits[Digit]);
2266 Tmp.lshrInPlace(ShiftAmt);
2267 }
2268 } else {
2269 while (Tmp.getBoolValue()) {
2270 uint64_t Digit;
2271 udivrem(Tmp, Radix, Tmp, Digit);
2272 assert(Digit < Radix && "divide failed");
2273 Str.push_back(Digits[Digit]);
2274 }
2275 }
2276
2277 // Reverse the digits before returning.
2278 std::reverse(Str.begin()+StartDig, Str.end());
2279 }
2280
2281 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2282 /// It is better to pass in a SmallVector/SmallString to the methods above.
toString(unsigned Radix=10,bool Signed=true) const2283 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2284 SmallString<40> S;
2285 toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2286 return S.str();
2287 }
2288
2289 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const2290 LLVM_DUMP_METHOD void APInt::dump() const {
2291 SmallString<40> S, U;
2292 this->toStringUnsigned(U);
2293 this->toStringSigned(S);
2294 dbgs() << "APInt(" << BitWidth << "b, "
2295 << U << "u " << S << "s)\n";
2296 }
2297 #endif
2298
print(raw_ostream & OS,bool isSigned) const2299 void APInt::print(raw_ostream &OS, bool isSigned) const {
2300 SmallString<40> S;
2301 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2302 OS << S;
2303 }
2304
2305 // This implements a variety of operations on a representation of
2306 // arbitrary precision, two's-complement, bignum integer values.
2307
2308 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2309 // and unrestricting assumption.
2310 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2311 "Part width must be divisible by 2!");
2312
2313 /* Some handy functions local to this file. */
2314
2315 /* Returns the integer part with the least significant BITS set.
2316 BITS cannot be zero. */
lowBitMask(unsigned bits)2317 static inline APInt::WordType lowBitMask(unsigned bits) {
2318 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2319
2320 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2321 }
2322
2323 /* Returns the value of the lower half of PART. */
lowHalf(APInt::WordType part)2324 static inline APInt::WordType lowHalf(APInt::WordType part) {
2325 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2326 }
2327
2328 /* Returns the value of the upper half of PART. */
highHalf(APInt::WordType part)2329 static inline APInt::WordType highHalf(APInt::WordType part) {
2330 return part >> (APInt::APINT_BITS_PER_WORD / 2);
2331 }
2332
2333 /* Returns the bit number of the most significant set bit of a part.
2334 If the input number has no bits set -1U is returned. */
partMSB(APInt::WordType value)2335 static unsigned partMSB(APInt::WordType value) {
2336 return findLastSet(value, ZB_Max);
2337 }
2338
2339 /* Returns the bit number of the least significant set bit of a
2340 part. If the input number has no bits set -1U is returned. */
partLSB(APInt::WordType value)2341 static unsigned partLSB(APInt::WordType value) {
2342 return findFirstSet(value, ZB_Max);
2343 }
2344
2345 /* Sets the least significant part of a bignum to the input value, and
2346 zeroes out higher parts. */
tcSet(WordType * dst,WordType part,unsigned parts)2347 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2348 assert(parts > 0);
2349
2350 dst[0] = part;
2351 for (unsigned i = 1; i < parts; i++)
2352 dst[i] = 0;
2353 }
2354
2355 /* Assign one bignum to another. */
tcAssign(WordType * dst,const WordType * src,unsigned parts)2356 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2357 for (unsigned i = 0; i < parts; i++)
2358 dst[i] = src[i];
2359 }
2360
2361 /* Returns true if a bignum is zero, false otherwise. */
tcIsZero(const WordType * src,unsigned parts)2362 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2363 for (unsigned i = 0; i < parts; i++)
2364 if (src[i])
2365 return false;
2366
2367 return true;
2368 }
2369
2370 /* Extract the given bit of a bignum; returns 0 or 1. */
tcExtractBit(const WordType * parts,unsigned bit)2371 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2372 return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2373 }
2374
2375 /* Set the given bit of a bignum. */
tcSetBit(WordType * parts,unsigned bit)2376 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2377 parts[whichWord(bit)] |= maskBit(bit);
2378 }
2379
2380 /* Clears the given bit of a bignum. */
tcClearBit(WordType * parts,unsigned bit)2381 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2382 parts[whichWord(bit)] &= ~maskBit(bit);
2383 }
2384
2385 /* Returns the bit number of the least significant set bit of a
2386 number. If the input number has no bits set -1U is returned. */
tcLSB(const WordType * parts,unsigned n)2387 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2388 for (unsigned i = 0; i < n; i++) {
2389 if (parts[i] != 0) {
2390 unsigned lsb = partLSB(parts[i]);
2391
2392 return lsb + i * APINT_BITS_PER_WORD;
2393 }
2394 }
2395
2396 return -1U;
2397 }
2398
2399 /* Returns the bit number of the most significant set bit of a number.
2400 If the input number has no bits set -1U is returned. */
tcMSB(const WordType * parts,unsigned n)2401 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2402 do {
2403 --n;
2404
2405 if (parts[n] != 0) {
2406 unsigned msb = partMSB(parts[n]);
2407
2408 return msb + n * APINT_BITS_PER_WORD;
2409 }
2410 } while (n);
2411
2412 return -1U;
2413 }
2414
2415 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2416 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2417 the least significant bit of DST. All high bits above srcBITS in
2418 DST are zero-filled. */
2419 void
tcExtract(WordType * dst,unsigned dstCount,const WordType * src,unsigned srcBits,unsigned srcLSB)2420 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2421 unsigned srcBits, unsigned srcLSB) {
2422 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2423 assert(dstParts <= dstCount);
2424
2425 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2426 tcAssign (dst, src + firstSrcPart, dstParts);
2427
2428 unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2429 tcShiftRight (dst, dstParts, shift);
2430
2431 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2432 in DST. If this is less that srcBits, append the rest, else
2433 clear the high bits. */
2434 unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2435 if (n < srcBits) {
2436 WordType mask = lowBitMask (srcBits - n);
2437 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2438 << n % APINT_BITS_PER_WORD);
2439 } else if (n > srcBits) {
2440 if (srcBits % APINT_BITS_PER_WORD)
2441 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2442 }
2443
2444 /* Clear high parts. */
2445 while (dstParts < dstCount)
2446 dst[dstParts++] = 0;
2447 }
2448
2449 /* DST += RHS + C where C is zero or one. Returns the carry flag. */
tcAdd(WordType * dst,const WordType * rhs,WordType c,unsigned parts)2450 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2451 WordType c, unsigned parts) {
2452 assert(c <= 1);
2453
2454 for (unsigned i = 0; i < parts; i++) {
2455 WordType l = dst[i];
2456 if (c) {
2457 dst[i] += rhs[i] + 1;
2458 c = (dst[i] <= l);
2459 } else {
2460 dst[i] += rhs[i];
2461 c = (dst[i] < l);
2462 }
2463 }
2464
2465 return c;
2466 }
2467
2468 /// This function adds a single "word" integer, src, to the multiple
2469 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2470 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2471 /// @returns the carry of the addition.
tcAddPart(WordType * dst,WordType src,unsigned parts)2472 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2473 unsigned parts) {
2474 for (unsigned i = 0; i < parts; ++i) {
2475 dst[i] += src;
2476 if (dst[i] >= src)
2477 return 0; // No need to carry so exit early.
2478 src = 1; // Carry one to next digit.
2479 }
2480
2481 return 1;
2482 }
2483
2484 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */
tcSubtract(WordType * dst,const WordType * rhs,WordType c,unsigned parts)2485 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2486 WordType c, unsigned parts) {
2487 assert(c <= 1);
2488
2489 for (unsigned i = 0; i < parts; i++) {
2490 WordType l = dst[i];
2491 if (c) {
2492 dst[i] -= rhs[i] + 1;
2493 c = (dst[i] >= l);
2494 } else {
2495 dst[i] -= rhs[i];
2496 c = (dst[i] > l);
2497 }
2498 }
2499
2500 return c;
2501 }
2502
2503 /// This function subtracts a single "word" (64-bit word), src, from
2504 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2505 /// no further borrowing is needed or it runs out of "words" in dst. The result
2506 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2507 /// exhausted. In other words, if src > dst then this function returns 1,
2508 /// otherwise 0.
2509 /// @returns the borrow out of the subtraction
tcSubtractPart(WordType * dst,WordType src,unsigned parts)2510 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2511 unsigned parts) {
2512 for (unsigned i = 0; i < parts; ++i) {
2513 WordType Dst = dst[i];
2514 dst[i] -= src;
2515 if (src <= Dst)
2516 return 0; // No need to borrow so exit early.
2517 src = 1; // We have to "borrow 1" from next "word"
2518 }
2519
2520 return 1;
2521 }
2522
2523 /* Negate a bignum in-place. */
tcNegate(WordType * dst,unsigned parts)2524 void APInt::tcNegate(WordType *dst, unsigned parts) {
2525 tcComplement(dst, parts);
2526 tcIncrement(dst, parts);
2527 }
2528
2529 /* DST += SRC * MULTIPLIER + CARRY if add is true
2530 DST = SRC * MULTIPLIER + CARRY if add is false
2531
2532 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2533 they must start at the same point, i.e. DST == SRC.
2534
2535 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2536 returned. Otherwise DST is filled with the least significant
2537 DSTPARTS parts of the result, and if all of the omitted higher
2538 parts were zero return zero, otherwise overflow occurred and
2539 return one. */
tcMultiplyPart(WordType * dst,const WordType * src,WordType multiplier,WordType carry,unsigned srcParts,unsigned dstParts,bool add)2540 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2541 WordType multiplier, WordType carry,
2542 unsigned srcParts, unsigned dstParts,
2543 bool add) {
2544 /* Otherwise our writes of DST kill our later reads of SRC. */
2545 assert(dst <= src || dst >= src + srcParts);
2546 assert(dstParts <= srcParts + 1);
2547
2548 /* N loops; minimum of dstParts and srcParts. */
2549 unsigned n = std::min(dstParts, srcParts);
2550
2551 for (unsigned i = 0; i < n; i++) {
2552 WordType low, mid, high, srcPart;
2553
2554 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2555
2556 This cannot overflow, because
2557
2558 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2559
2560 which is less than n^2. */
2561
2562 srcPart = src[i];
2563
2564 if (multiplier == 0 || srcPart == 0) {
2565 low = carry;
2566 high = 0;
2567 } else {
2568 low = lowHalf(srcPart) * lowHalf(multiplier);
2569 high = highHalf(srcPart) * highHalf(multiplier);
2570
2571 mid = lowHalf(srcPart) * highHalf(multiplier);
2572 high += highHalf(mid);
2573 mid <<= APINT_BITS_PER_WORD / 2;
2574 if (low + mid < low)
2575 high++;
2576 low += mid;
2577
2578 mid = highHalf(srcPart) * lowHalf(multiplier);
2579 high += highHalf(mid);
2580 mid <<= APINT_BITS_PER_WORD / 2;
2581 if (low + mid < low)
2582 high++;
2583 low += mid;
2584
2585 /* Now add carry. */
2586 if (low + carry < low)
2587 high++;
2588 low += carry;
2589 }
2590
2591 if (add) {
2592 /* And now DST[i], and store the new low part there. */
2593 if (low + dst[i] < low)
2594 high++;
2595 dst[i] += low;
2596 } else
2597 dst[i] = low;
2598
2599 carry = high;
2600 }
2601
2602 if (srcParts < dstParts) {
2603 /* Full multiplication, there is no overflow. */
2604 assert(srcParts + 1 == dstParts);
2605 dst[srcParts] = carry;
2606 return 0;
2607 }
2608
2609 /* We overflowed if there is carry. */
2610 if (carry)
2611 return 1;
2612
2613 /* We would overflow if any significant unwritten parts would be
2614 non-zero. This is true if any remaining src parts are non-zero
2615 and the multiplier is non-zero. */
2616 if (multiplier)
2617 for (unsigned i = dstParts; i < srcParts; i++)
2618 if (src[i])
2619 return 1;
2620
2621 /* We fitted in the narrow destination. */
2622 return 0;
2623 }
2624
2625 /* DST = LHS * RHS, where DST has the same width as the operands and
2626 is filled with the least significant parts of the result. Returns
2627 one if overflow occurred, otherwise zero. DST must be disjoint
2628 from both operands. */
tcMultiply(WordType * dst,const WordType * lhs,const WordType * rhs,unsigned parts)2629 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2630 const WordType *rhs, unsigned parts) {
2631 assert(dst != lhs && dst != rhs);
2632
2633 int overflow = 0;
2634 tcSet(dst, 0, parts);
2635
2636 for (unsigned i = 0; i < parts; i++)
2637 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2638 parts - i, true);
2639
2640 return overflow;
2641 }
2642
2643 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2644 /// operands. No overflow occurs. DST must be disjoint from both operands.
tcFullMultiply(WordType * dst,const WordType * lhs,const WordType * rhs,unsigned lhsParts,unsigned rhsParts)2645 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2646 const WordType *rhs, unsigned lhsParts,
2647 unsigned rhsParts) {
2648 /* Put the narrower number on the LHS for less loops below. */
2649 if (lhsParts > rhsParts)
2650 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2651
2652 assert(dst != lhs && dst != rhs);
2653
2654 tcSet(dst, 0, rhsParts);
2655
2656 for (unsigned i = 0; i < lhsParts; i++)
2657 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2658 }
2659
2660 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2661 Otherwise set LHS to LHS / RHS with the fractional part discarded,
2662 set REMAINDER to the remainder, return zero. i.e.
2663
2664 OLD_LHS = RHS * LHS + REMAINDER
2665
2666 SCRATCH is a bignum of the same size as the operands and result for
2667 use by the routine; its contents need not be initialized and are
2668 destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2669 */
tcDivide(WordType * lhs,const WordType * rhs,WordType * remainder,WordType * srhs,unsigned parts)2670 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2671 WordType *remainder, WordType *srhs,
2672 unsigned parts) {
2673 assert(lhs != remainder && lhs != srhs && remainder != srhs);
2674
2675 unsigned shiftCount = tcMSB(rhs, parts) + 1;
2676 if (shiftCount == 0)
2677 return true;
2678
2679 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2680 unsigned n = shiftCount / APINT_BITS_PER_WORD;
2681 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2682
2683 tcAssign(srhs, rhs, parts);
2684 tcShiftLeft(srhs, parts, shiftCount);
2685 tcAssign(remainder, lhs, parts);
2686 tcSet(lhs, 0, parts);
2687
2688 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2689 the total. */
2690 for (;;) {
2691 int compare = tcCompare(remainder, srhs, parts);
2692 if (compare >= 0) {
2693 tcSubtract(remainder, srhs, 0, parts);
2694 lhs[n] |= mask;
2695 }
2696
2697 if (shiftCount == 0)
2698 break;
2699 shiftCount--;
2700 tcShiftRight(srhs, parts, 1);
2701 if ((mask >>= 1) == 0) {
2702 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2703 n--;
2704 }
2705 }
2706
2707 return false;
2708 }
2709
2710 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2711 /// no restrictions on Count.
tcShiftLeft(WordType * Dst,unsigned Words,unsigned Count)2712 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2713 // Don't bother performing a no-op shift.
2714 if (!Count)
2715 return;
2716
2717 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2718 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2719 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2720
2721 // Fastpath for moving by whole words.
2722 if (BitShift == 0) {
2723 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2724 } else {
2725 while (Words-- > WordShift) {
2726 Dst[Words] = Dst[Words - WordShift] << BitShift;
2727 if (Words > WordShift)
2728 Dst[Words] |=
2729 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2730 }
2731 }
2732
2733 // Fill in the remainder with 0s.
2734 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2735 }
2736
2737 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2738 /// are no restrictions on Count.
tcShiftRight(WordType * Dst,unsigned Words,unsigned Count)2739 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2740 // Don't bother performing a no-op shift.
2741 if (!Count)
2742 return;
2743
2744 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2745 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2746 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2747
2748 unsigned WordsToMove = Words - WordShift;
2749 // Fastpath for moving by whole words.
2750 if (BitShift == 0) {
2751 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2752 } else {
2753 for (unsigned i = 0; i != WordsToMove; ++i) {
2754 Dst[i] = Dst[i + WordShift] >> BitShift;
2755 if (i + 1 != WordsToMove)
2756 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2757 }
2758 }
2759
2760 // Fill in the remainder with 0s.
2761 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2762 }
2763
2764 /* Bitwise and of two bignums. */
tcAnd(WordType * dst,const WordType * rhs,unsigned parts)2765 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2766 for (unsigned i = 0; i < parts; i++)
2767 dst[i] &= rhs[i];
2768 }
2769
2770 /* Bitwise inclusive or of two bignums. */
tcOr(WordType * dst,const WordType * rhs,unsigned parts)2771 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2772 for (unsigned i = 0; i < parts; i++)
2773 dst[i] |= rhs[i];
2774 }
2775
2776 /* Bitwise exclusive or of two bignums. */
tcXor(WordType * dst,const WordType * rhs,unsigned parts)2777 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2778 for (unsigned i = 0; i < parts; i++)
2779 dst[i] ^= rhs[i];
2780 }
2781
2782 /* Complement a bignum in-place. */
tcComplement(WordType * dst,unsigned parts)2783 void APInt::tcComplement(WordType *dst, unsigned parts) {
2784 for (unsigned i = 0; i < parts; i++)
2785 dst[i] = ~dst[i];
2786 }
2787
2788 /* Comparison (unsigned) of two bignums. */
tcCompare(const WordType * lhs,const WordType * rhs,unsigned parts)2789 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2790 unsigned parts) {
2791 while (parts) {
2792 parts--;
2793 if (lhs[parts] != rhs[parts])
2794 return (lhs[parts] > rhs[parts]) ? 1 : -1;
2795 }
2796
2797 return 0;
2798 }
2799
2800 /* Set the least significant BITS bits of a bignum, clear the
2801 rest. */
tcSetLeastSignificantBits(WordType * dst,unsigned parts,unsigned bits)2802 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2803 unsigned bits) {
2804 unsigned i = 0;
2805 while (bits > APINT_BITS_PER_WORD) {
2806 dst[i++] = ~(WordType) 0;
2807 bits -= APINT_BITS_PER_WORD;
2808 }
2809
2810 if (bits)
2811 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2812
2813 while (i < parts)
2814 dst[i++] = 0;
2815 }
2816
RoundingUDiv(const APInt & A,const APInt & B,APInt::Rounding RM)2817 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2818 APInt::Rounding RM) {
2819 // Currently udivrem always rounds down.
2820 switch (RM) {
2821 case APInt::Rounding::DOWN:
2822 case APInt::Rounding::TOWARD_ZERO:
2823 return A.udiv(B);
2824 case APInt::Rounding::UP: {
2825 APInt Quo, Rem;
2826 APInt::udivrem(A, B, Quo, Rem);
2827 if (Rem == 0)
2828 return Quo;
2829 return Quo + 1;
2830 }
2831 }
2832 llvm_unreachable("Unknown APInt::Rounding enum");
2833 }
2834
RoundingSDiv(const APInt & A,const APInt & B,APInt::Rounding RM)2835 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2836 APInt::Rounding RM) {
2837 switch (RM) {
2838 case APInt::Rounding::DOWN:
2839 case APInt::Rounding::UP: {
2840 APInt Quo, Rem;
2841 APInt::sdivrem(A, B, Quo, Rem);
2842 if (Rem == 0)
2843 return Quo;
2844 // This algorithm deals with arbitrary rounding mode used by sdivrem.
2845 // We want to check whether the non-integer part of the mathematical value
2846 // is negative or not. If the non-integer part is negative, we need to round
2847 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2848 // already rounded down.
2849 if (RM == APInt::Rounding::DOWN) {
2850 if (Rem.isNegative() != B.isNegative())
2851 return Quo - 1;
2852 return Quo;
2853 }
2854 if (Rem.isNegative() != B.isNegative())
2855 return Quo;
2856 return Quo + 1;
2857 }
2858 // Currently sdiv rounds towards zero.
2859 case APInt::Rounding::TOWARD_ZERO:
2860 return A.sdiv(B);
2861 }
2862 llvm_unreachable("Unknown APInt::Rounding enum");
2863 }
2864
2865 Optional<APInt>
SolveQuadraticEquationWrap(APInt A,APInt B,APInt C,unsigned RangeWidth)2866 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2867 unsigned RangeWidth) {
2868 unsigned CoeffWidth = A.getBitWidth();
2869 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2870 assert(RangeWidth <= CoeffWidth &&
2871 "Value range width should be less than coefficient width");
2872 assert(RangeWidth > 1 && "Value range bit width should be > 1");
2873
2874 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2875 << "x + " << C << ", rw:" << RangeWidth << '\n');
2876
2877 // Identify 0 as a (non)solution immediately.
2878 if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2879 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2880 return APInt(CoeffWidth, 0);
2881 }
2882
2883 // The result of APInt arithmetic has the same bit width as the operands,
2884 // so it can actually lose high bits. A product of two n-bit integers needs
2885 // 2n-1 bits to represent the full value.
2886 // The operation done below (on quadratic coefficients) that can produce
2887 // the largest value is the evaluation of the equation during bisection,
2888 // which needs 3 times the bitwidth of the coefficient, so the total number
2889 // of required bits is 3n.
2890 //
2891 // The purpose of this extension is to simulate the set Z of all integers,
2892 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2893 // and negative numbers (not so much in a modulo arithmetic). The method
2894 // used to solve the equation is based on the standard formula for real
2895 // numbers, and uses the concepts of "positive" and "negative" with their
2896 // usual meanings.
2897 CoeffWidth *= 3;
2898 A = A.sext(CoeffWidth);
2899 B = B.sext(CoeffWidth);
2900 C = C.sext(CoeffWidth);
2901
2902 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2903 // the bit width has increased.
2904 if (A.isNegative()) {
2905 A.negate();
2906 B.negate();
2907 C.negate();
2908 }
2909
2910 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2911 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2912 // and R = 2^BitWidth.
2913 // Since we're trying not only to find exact solutions, but also values
2914 // that "wrap around", such a set will always have a solution, i.e. an x
2915 // that satisfies at least one of the equations, or such that |q(x)|
2916 // exceeds kR, while |q(x-1)| for the same k does not.
2917 //
2918 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2919 // positive solution n (in the above sense), and also such that the n
2920 // will be the least among all solutions corresponding to k = 0, 1, ...
2921 // (more precisely, the least element in the set
2922 // { n(k) | k is such that a solution n(k) exists }).
2923 //
2924 // Consider the parabola (over real numbers) that corresponds to the
2925 // quadratic equation. Since A > 0, the arms of the parabola will point
2926 // up. Picking different values of k will shift it up and down by R.
2927 //
2928 // We want to shift the parabola in such a way as to reduce the problem
2929 // of solving q(x) = kR to solving shifted_q(x) = 0.
2930 // (The interesting solutions are the ceilings of the real number
2931 // solutions.)
2932 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2933 APInt TwoA = 2 * A;
2934 APInt SqrB = B * B;
2935 bool PickLow;
2936
2937 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2938 assert(A.isStrictlyPositive());
2939 APInt T = V.abs().urem(A);
2940 if (T.isNullValue())
2941 return V;
2942 return V.isNegative() ? V+T : V+(A-T);
2943 };
2944
2945 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2946 // iff B is positive.
2947 if (B.isNonNegative()) {
2948 // If B >= 0, the vertex it at a negative location (or at 0), so in
2949 // order to have a non-negative solution we need to pick k that makes
2950 // C-kR negative. To satisfy all the requirements for the solution
2951 // that we are looking for, it needs to be closest to 0 of all k.
2952 C = C.srem(R);
2953 if (C.isStrictlyPositive())
2954 C -= R;
2955 // Pick the greater solution.
2956 PickLow = false;
2957 } else {
2958 // If B < 0, the vertex is at a positive location. For any solution
2959 // to exist, the discriminant must be non-negative. This means that
2960 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2961 // lower bound on values of k: kR >= C - B^2/4A.
2962 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2963 // Round LowkR up (towards +inf) to the nearest kR.
2964 LowkR = RoundUp(LowkR, R);
2965
2966 // If there exists k meeting the condition above, and such that
2967 // C-kR > 0, there will be two positive real number solutions of
2968 // q(x) = kR. Out of all such values of k, pick the one that makes
2969 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2970 // In other words, find maximum k such that LowkR <= kR < C.
2971 if (C.sgt(LowkR)) {
2972 // If LowkR < C, then such a k is guaranteed to exist because
2973 // LowkR itself is a multiple of R.
2974 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R)
2975 // Pick the smaller solution.
2976 PickLow = true;
2977 } else {
2978 // If C-kR < 0 for all potential k's, it means that one solution
2979 // will be negative, while the other will be positive. The positive
2980 // solution will shift towards 0 if the parabola is moved up.
2981 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2982 // to 0, or in other words, out of all parabolas that have solutions,
2983 // pick the one that is the farthest "up").
2984 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2985 C -= LowkR;
2986 // Pick the greater solution.
2987 PickLow = false;
2988 }
2989 }
2990
2991 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2992 << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2993
2994 APInt D = SqrB - 4*A*C;
2995 assert(D.isNonNegative() && "Negative discriminant");
2996 APInt SQ = D.sqrt();
2997
2998 APInt Q = SQ * SQ;
2999 bool InexactSQ = Q != D;
3000 // The calculated SQ may actually be greater than the exact (non-integer)
3001 // value. If that's the case, decrement SQ to get a value that is lower.
3002 if (Q.sgt(D))
3003 SQ -= 1;
3004
3005 APInt X;
3006 APInt Rem;
3007
3008 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3009 // When using the quadratic formula directly, the calculated low root
3010 // may be greater than the exact one, since we would be subtracting SQ.
3011 // To make sure that the calculated root is not greater than the exact
3012 // one, subtract SQ+1 when calculating the low root (for inexact value
3013 // of SQ).
3014 if (PickLow)
3015 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3016 else
3017 APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3018
3019 // The updated coefficients should be such that the (exact) solution is
3020 // positive. Since APInt division rounds towards 0, the calculated one
3021 // can be 0, but cannot be negative.
3022 assert(X.isNonNegative() && "Solution should be non-negative");
3023
3024 if (!InexactSQ && Rem.isNullValue()) {
3025 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3026 return X;
3027 }
3028
3029 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3030 // The exact value of the square root of D should be between SQ and SQ+1.
3031 // This implies that the solution should be between that corresponding to
3032 // SQ (i.e. X) and that corresponding to SQ+1.
3033 //
3034 // The calculated X cannot be greater than the exact (real) solution.
3035 // Actually it must be strictly less than the exact solution, while
3036 // X+1 will be greater than or equal to it.
3037
3038 APInt VX = (A*X + B)*X + C;
3039 APInt VY = VX + TwoA*X + A + B;
3040 bool SignChange = VX.isNegative() != VY.isNegative() ||
3041 VX.isNullValue() != VY.isNullValue();
3042 // If the sign did not change between X and X+1, X is not a valid solution.
3043 // This could happen when the actual (exact) roots don't have an integer
3044 // between them, so they would both be contained between X and X+1.
3045 if (!SignChange) {
3046 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3047 return None;
3048 }
3049
3050 X += 1;
3051 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3052 return X;
3053 }
3054
3055 Optional<unsigned>
GetMostSignificantDifferentBit(const APInt & A,const APInt & B)3056 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3057 assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3058 if (A == B)
3059 return llvm::None;
3060 return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3061 }
3062
3063 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3064 /// with the integer held in IntVal.
StoreIntToMemory(const APInt & IntVal,uint8_t * Dst,unsigned StoreBytes)3065 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3066 unsigned StoreBytes) {
3067 assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3068 const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3069
3070 if (sys::IsLittleEndianHost) {
3071 // Little-endian host - the source is ordered from LSB to MSB. Order the
3072 // destination from LSB to MSB: Do a straight copy.
3073 memcpy(Dst, Src, StoreBytes);
3074 } else {
3075 // Big-endian host - the source is an array of 64 bit words ordered from
3076 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
3077 // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3078 while (StoreBytes > sizeof(uint64_t)) {
3079 StoreBytes -= sizeof(uint64_t);
3080 // May not be aligned so use memcpy.
3081 memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3082 Src += sizeof(uint64_t);
3083 }
3084
3085 memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3086 }
3087 }
3088
3089 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3090 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
LoadIntFromMemory(APInt & IntVal,uint8_t * Src,unsigned LoadBytes)3091 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
3092 assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3093 uint8_t *Dst = reinterpret_cast<uint8_t *>(
3094 const_cast<uint64_t *>(IntVal.getRawData()));
3095
3096 if (sys::IsLittleEndianHost)
3097 // Little-endian host - the destination must be ordered from LSB to MSB.
3098 // The source is ordered from LSB to MSB: Do a straight copy.
3099 memcpy(Dst, Src, LoadBytes);
3100 else {
3101 // Big-endian - the destination is an array of 64 bit words ordered from
3102 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
3103 // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3104 // a word.
3105 while (LoadBytes > sizeof(uint64_t)) {
3106 LoadBytes -= sizeof(uint64_t);
3107 // May not be aligned so use memcpy.
3108 memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3109 Dst += sizeof(uint64_t);
3110 }
3111
3112 memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3113 }
3114 }
3115