1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This header defines the BitstreamReader class. This class can be used to 11 // read an arbitrary bitstream, regardless of its contents. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_BITCODE_BITSTREAMREADER_H 16 #define LLVM_BITCODE_BITSTREAMREADER_H 17 18 #include "llvm/Bitcode/BitCodes.h" 19 #include "llvm/Support/Endian.h" 20 #include "llvm/Support/MathExtras.h" 21 #include "llvm/Support/StreamingMemoryObject.h" 22 #include <climits> 23 #include <string> 24 #include <vector> 25 26 namespace llvm { 27 28 /// This class is used to read from an LLVM bitcode stream, maintaining 29 /// information that is global to decoding the entire file. While a file is 30 /// being read, multiple cursors can be independently advanced or skipped around 31 /// within the file. These are represented by the BitstreamCursor class. 32 class BitstreamReader { 33 public: 34 /// This contains information emitted to BLOCKINFO_BLOCK blocks. These 35 /// describe abbreviations that all blocks of the specified ID inherit. 36 struct BlockInfo { 37 unsigned BlockID; 38 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs; 39 std::string Name; 40 41 std::vector<std::pair<unsigned, std::string> > RecordNames; 42 }; 43 private: 44 std::unique_ptr<MemoryObject> BitcodeBytes; 45 46 std::vector<BlockInfo> BlockInfoRecords; 47 48 /// This is set to true if we don't care about the block/record name 49 /// information in the BlockInfo block. Only llvm-bcanalyzer uses this. 50 bool IgnoreBlockInfoNames; 51 52 BitstreamReader(const BitstreamReader&) = delete; 53 void operator=(const BitstreamReader&) = delete; 54 public: BitstreamReader()55 BitstreamReader() : IgnoreBlockInfoNames(true) { 56 } 57 BitstreamReader(const unsigned char * Start,const unsigned char * End)58 BitstreamReader(const unsigned char *Start, const unsigned char *End) 59 : IgnoreBlockInfoNames(true) { 60 init(Start, End); 61 } 62 BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes)63 BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes) 64 : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {} 65 BitstreamReader(BitstreamReader && Other)66 BitstreamReader(BitstreamReader &&Other) { 67 *this = std::move(Other); 68 } 69 70 BitstreamReader &operator=(BitstreamReader &&Other) { 71 BitcodeBytes = std::move(Other.BitcodeBytes); 72 // Explicitly swap block info, so that nothing gets destroyed twice. 73 std::swap(BlockInfoRecords, Other.BlockInfoRecords); 74 IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames; 75 return *this; 76 } 77 init(const unsigned char * Start,const unsigned char * End)78 void init(const unsigned char *Start, const unsigned char *End) { 79 assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); 80 BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); 81 } 82 getBitcodeBytes()83 MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } 84 85 /// This is called by clients that want block/record name information. CollectBlockInfoNames()86 void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } isIgnoringBlockInfoNames()87 bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } 88 89 //===--------------------------------------------------------------------===// 90 // Block Manipulation 91 //===--------------------------------------------------------------------===// 92 93 /// Return true if we've already read and processed the block info block for 94 /// this Bitstream. We only process it for the first cursor that walks over 95 /// it. hasBlockInfoRecords()96 bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } 97 98 /// If there is block info for the specified ID, return it, otherwise return 99 /// null. getBlockInfo(unsigned BlockID)100 const BlockInfo *getBlockInfo(unsigned BlockID) const { 101 // Common case, the most recent entry matches BlockID. 102 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 103 return &BlockInfoRecords.back(); 104 105 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 106 i != e; ++i) 107 if (BlockInfoRecords[i].BlockID == BlockID) 108 return &BlockInfoRecords[i]; 109 return nullptr; 110 } 111 getOrCreateBlockInfo(unsigned BlockID)112 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 113 if (const BlockInfo *BI = getBlockInfo(BlockID)) 114 return *const_cast<BlockInfo*>(BI); 115 116 // Otherwise, add a new record. 117 BlockInfoRecords.emplace_back(); 118 BlockInfoRecords.back().BlockID = BlockID; 119 return BlockInfoRecords.back(); 120 } 121 122 /// Takes block info from the other bitstream reader. 123 /// 124 /// This is a "take" operation because BlockInfo records are non-trivial, and 125 /// indeed rather expensive. takeBlockInfo(BitstreamReader && Other)126 void takeBlockInfo(BitstreamReader &&Other) { 127 assert(!hasBlockInfoRecords()); 128 BlockInfoRecords = std::move(Other.BlockInfoRecords); 129 } 130 }; 131 132 /// This represents a position within a bitstream. There may be multiple 133 /// independent cursors reading within one bitstream, each maintaining their 134 /// own local state. 135 class SimpleBitstreamCursor { 136 BitstreamReader *R = nullptr; 137 size_t NextChar = 0; 138 139 // The size of the bicode. 0 if we don't know it yet. 140 size_t Size = 0; 141 142 /// This is the current data we have pulled from the stream but have not 143 /// returned to the client. This is specifically and intentionally defined to 144 /// follow the word size of the host machine for efficiency. We use word_t in 145 /// places that are aware of this to make it perfectly explicit what is going 146 /// on. 147 public: 148 typedef size_t word_t; 149 150 private: 151 word_t CurWord = 0; 152 153 /// This is the number of bits in CurWord that are valid. This is always from 154 /// [0...bits_of(size_t)-1] inclusive. 155 unsigned BitsInCurWord = 0; 156 157 public: 158 static const size_t MaxChunkSize = sizeof(word_t) * 8; 159 160 SimpleBitstreamCursor() = default; 161 SimpleBitstreamCursor(BitstreamReader & R)162 explicit SimpleBitstreamCursor(BitstreamReader &R) : R(&R) {} SimpleBitstreamCursor(BitstreamReader * R)163 explicit SimpleBitstreamCursor(BitstreamReader *R) : R(R) {} 164 canSkipToPos(size_t pos)165 bool canSkipToPos(size_t pos) const { 166 // pos can be skipped to if it is a valid address or one byte past the end. 167 return pos == 0 || 168 R->getBitcodeBytes().isValidAddress(static_cast<uint64_t>(pos - 1)); 169 } 170 AtEndOfStream()171 bool AtEndOfStream() { 172 if (BitsInCurWord != 0) 173 return false; 174 if (Size != 0) 175 return Size <= NextChar; 176 fillCurWord(); 177 return BitsInCurWord == 0; 178 } 179 180 /// Return the bit # of the bit we are reading. GetCurrentBitNo()181 uint64_t GetCurrentBitNo() const { 182 return NextChar*CHAR_BIT - BitsInCurWord; 183 } 184 185 // Return the byte # of the current bit. getCurrentByteNo()186 uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; } 187 getBitStreamReader()188 BitstreamReader *getBitStreamReader() { return R; } getBitStreamReader()189 const BitstreamReader *getBitStreamReader() const { return R; } 190 191 /// Reset the stream to the specified bit number. JumpToBit(uint64_t BitNo)192 void JumpToBit(uint64_t BitNo) { 193 size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1); 194 unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); 195 assert(canSkipToPos(ByteNo) && "Invalid location"); 196 197 // Move the cursor to the right word. 198 NextChar = ByteNo; 199 BitsInCurWord = 0; 200 201 // Skip over any bits that are already consumed. 202 if (WordBitNo) 203 Read(WordBitNo); 204 } 205 206 /// Reset the stream to the bit pointed at by the specified pointer. 207 /// 208 /// The pointer must be a dereferenceable pointer into the bytes in the 209 /// underlying memory object. jumpToPointer(const uint8_t * Pointer)210 void jumpToPointer(const uint8_t *Pointer) { 211 auto *Pointer0 = getPointerToByte(0, 1); 212 assert((intptr_t)Pointer0 <= (intptr_t)Pointer && 213 "Expected pointer into bitstream"); 214 215 JumpToBit(8 * (Pointer - Pointer0)); 216 assert((intptr_t)getPointerToByte(getCurrentByteNo(), 1) == 217 (intptr_t)Pointer && 218 "Expected to reach pointer"); 219 } jumpToPointer(const char * Pointer)220 void jumpToPointer(const char *Pointer) { 221 jumpToPointer((const uint8_t *)Pointer); 222 } 223 224 /// Get a pointer into the bitstream at the specified byte offset. getPointerToByte(uint64_t ByteNo,uint64_t NumBytes)225 const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) { 226 return R->getBitcodeBytes().getPointer(ByteNo, NumBytes); 227 } 228 229 /// Get a pointer into the bitstream at the specified bit offset. 230 /// 231 /// The bit offset must be on a byte boundary. getPointerToBit(uint64_t BitNo,uint64_t NumBytes)232 const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) { 233 assert(!(BitNo % 8) && "Expected bit on byte boundary"); 234 return getPointerToByte(BitNo / 8, NumBytes); 235 } 236 fillCurWord()237 void fillCurWord() { 238 if (Size != 0 && NextChar >= Size) 239 report_fatal_error("Unexpected end of file"); 240 241 // Read the next word from the stream. 242 uint8_t Array[sizeof(word_t)] = {0}; 243 244 uint64_t BytesRead = 245 R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); 246 247 // If we run out of data, stop at the end of the stream. 248 if (BytesRead == 0) { 249 CurWord = 0; 250 BitsInCurWord = 0; 251 Size = NextChar; 252 return; 253 } 254 255 CurWord = 256 support::endian::read<word_t, support::little, support::unaligned>( 257 Array); 258 NextChar += BytesRead; 259 BitsInCurWord = BytesRead * 8; 260 } 261 Read(unsigned NumBits)262 word_t Read(unsigned NumBits) { 263 static const unsigned BitsInWord = MaxChunkSize; 264 265 assert(NumBits && NumBits <= BitsInWord && 266 "Cannot return zero or more than BitsInWord bits!"); 267 268 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; 269 270 // If the field is fully contained by CurWord, return it quickly. 271 if (BitsInCurWord >= NumBits) { 272 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); 273 274 // Use a mask to avoid undefined behavior. 275 CurWord >>= (NumBits & Mask); 276 277 BitsInCurWord -= NumBits; 278 return R; 279 } 280 281 word_t R = BitsInCurWord ? CurWord : 0; 282 unsigned BitsLeft = NumBits - BitsInCurWord; 283 284 fillCurWord(); 285 286 // If we run out of data, stop at the end of the stream. 287 if (BitsLeft > BitsInCurWord) 288 return 0; 289 290 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); 291 292 // Use a mask to avoid undefined behavior. 293 CurWord >>= (BitsLeft & Mask); 294 295 BitsInCurWord -= BitsLeft; 296 297 R |= R2 << (NumBits - BitsLeft); 298 299 return R; 300 } 301 ReadVBR(unsigned NumBits)302 uint32_t ReadVBR(unsigned NumBits) { 303 uint32_t Piece = Read(NumBits); 304 if ((Piece & (1U << (NumBits-1))) == 0) 305 return Piece; 306 307 uint32_t Result = 0; 308 unsigned NextBit = 0; 309 while (1) { 310 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 311 312 if ((Piece & (1U << (NumBits-1))) == 0) 313 return Result; 314 315 NextBit += NumBits-1; 316 Piece = Read(NumBits); 317 } 318 } 319 320 // Read a VBR that may have a value up to 64-bits in size. The chunk size of 321 // the VBR must still be <= 32 bits though. ReadVBR64(unsigned NumBits)322 uint64_t ReadVBR64(unsigned NumBits) { 323 uint32_t Piece = Read(NumBits); 324 if ((Piece & (1U << (NumBits-1))) == 0) 325 return uint64_t(Piece); 326 327 uint64_t Result = 0; 328 unsigned NextBit = 0; 329 while (1) { 330 Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; 331 332 if ((Piece & (1U << (NumBits-1))) == 0) 333 return Result; 334 335 NextBit += NumBits-1; 336 Piece = Read(NumBits); 337 } 338 } 339 SkipToFourByteBoundary()340 void SkipToFourByteBoundary() { 341 // If word_t is 64-bits and if we've read less than 32 bits, just dump 342 // the bits we have up to the next 32-bit boundary. 343 if (sizeof(word_t) > 4 && 344 BitsInCurWord >= 32) { 345 CurWord >>= BitsInCurWord-32; 346 BitsInCurWord = 32; 347 return; 348 } 349 350 BitsInCurWord = 0; 351 } 352 353 /// Skip to the end of the file. skipToEnd()354 void skipToEnd() { NextChar = R->getBitcodeBytes().getExtent(); } 355 356 /// Prevent the cursor from reading past a byte boundary. 357 /// 358 /// Prevent the cursor from requesting byte reads past \c Limit. This is 359 /// useful when working with a cursor on a StreamingMemoryObject, when it's 360 /// desirable to avoid invalidating the result of getPointerToByte(). 361 /// 362 /// If \c Limit is on a word boundary, AtEndOfStream() will return true if 363 /// the cursor position reaches or exceeds \c Limit, regardless of the true 364 /// number of available bytes. Otherwise, AtEndOfStream() returns true when 365 /// it reaches or exceeds the next word boundary. setArtificialByteLimit(uint64_t Limit)366 void setArtificialByteLimit(uint64_t Limit) { 367 assert(getCurrentByteNo() < Limit && "Move cursor before lowering limit"); 368 369 // Round to word boundary. 370 Limit = alignTo(Limit, sizeof(word_t)); 371 372 // Only change size if the new one is lower. 373 if (!Size || Size > Limit) 374 Size = Limit; 375 } 376 377 /// Return the Size, if known. getSizeIfKnown()378 uint64_t getSizeIfKnown() const { return Size; } 379 }; 380 381 /// When advancing through a bitstream cursor, each advance can discover a few 382 /// different kinds of entries: 383 struct BitstreamEntry { 384 enum { 385 Error, // Malformed bitcode was found. 386 EndBlock, // We've reached the end of the current block, (or the end of the 387 // file, which is treated like a series of EndBlock records. 388 SubBlock, // This is the start of a new subblock of a specific ID. 389 Record // This is a record with a specific AbbrevID. 390 } Kind; 391 392 unsigned ID; 393 getErrorBitstreamEntry394 static BitstreamEntry getError() { 395 BitstreamEntry E; E.Kind = Error; return E; 396 } getEndBlockBitstreamEntry397 static BitstreamEntry getEndBlock() { 398 BitstreamEntry E; E.Kind = EndBlock; return E; 399 } getSubBlockBitstreamEntry400 static BitstreamEntry getSubBlock(unsigned ID) { 401 BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; 402 } getRecordBitstreamEntry403 static BitstreamEntry getRecord(unsigned AbbrevID) { 404 BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; 405 } 406 }; 407 408 /// This represents a position within a bitcode file, implemented on top of a 409 /// SimpleBitstreamCursor. 410 /// 411 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not 412 /// be passed by value. 413 class BitstreamCursor : SimpleBitstreamCursor { 414 // This is the declared size of code values used for the current block, in 415 // bits. 416 unsigned CurCodeSize = 2; 417 418 /// Abbrevs installed at in this block. 419 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs; 420 421 struct Block { 422 unsigned PrevCodeSize; 423 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs; BlockBlock424 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 425 }; 426 427 /// This tracks the codesize of parent blocks. 428 SmallVector<Block, 8> BlockScope; 429 430 431 public: 432 static const size_t MaxChunkSize = sizeof(word_t) * 8; 433 434 BitstreamCursor() = default; 435 BitstreamCursor(BitstreamReader & R)436 explicit BitstreamCursor(BitstreamReader &R) { init(&R); } 437 init(BitstreamReader * R)438 void init(BitstreamReader *R) { 439 freeState(); 440 SimpleBitstreamCursor::operator=(SimpleBitstreamCursor(R)); 441 CurCodeSize = 2; 442 } 443 444 void freeState(); 445 446 using SimpleBitstreamCursor::canSkipToPos; 447 using SimpleBitstreamCursor::AtEndOfStream; 448 using SimpleBitstreamCursor::GetCurrentBitNo; 449 using SimpleBitstreamCursor::getCurrentByteNo; 450 using SimpleBitstreamCursor::getPointerToByte; 451 using SimpleBitstreamCursor::getBitStreamReader; 452 using SimpleBitstreamCursor::JumpToBit; 453 using SimpleBitstreamCursor::fillCurWord; 454 using SimpleBitstreamCursor::Read; 455 using SimpleBitstreamCursor::ReadVBR; 456 using SimpleBitstreamCursor::ReadVBR64; 457 458 /// Return the number of bits used to encode an abbrev #. getAbbrevIDWidth()459 unsigned getAbbrevIDWidth() const { return CurCodeSize; } 460 461 /// Flags that modify the behavior of advance(). 462 enum { 463 /// If this flag is used, the advance() method does not automatically pop 464 /// the block scope when the end of a block is reached. 465 AF_DontPopBlockAtEnd = 1, 466 467 /// If this flag is used, abbrev entries are returned just like normal 468 /// records. 469 AF_DontAutoprocessAbbrevs = 2 470 }; 471 472 /// Advance the current bitstream, returning the next entry in the stream. 473 BitstreamEntry advance(unsigned Flags = 0) { 474 while (1) { 475 unsigned Code = ReadCode(); 476 if (Code == bitc::END_BLOCK) { 477 // Pop the end of the block unless Flags tells us not to. 478 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 479 return BitstreamEntry::getError(); 480 return BitstreamEntry::getEndBlock(); 481 } 482 483 if (Code == bitc::ENTER_SUBBLOCK) 484 return BitstreamEntry::getSubBlock(ReadSubBlockID()); 485 486 if (Code == bitc::DEFINE_ABBREV && 487 !(Flags & AF_DontAutoprocessAbbrevs)) { 488 // We read and accumulate abbrev's, the client can't do anything with 489 // them anyway. 490 ReadAbbrevRecord(); 491 continue; 492 } 493 494 return BitstreamEntry::getRecord(Code); 495 } 496 } 497 498 /// This is a convenience function for clients that don't expect any 499 /// subblocks. This just skips over them automatically. 500 BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { 501 while (1) { 502 // If we found a normal entry, return it. 503 BitstreamEntry Entry = advance(Flags); 504 if (Entry.Kind != BitstreamEntry::SubBlock) 505 return Entry; 506 507 // If we found a sub-block, just skip over it and check the next entry. 508 if (SkipBlock()) 509 return BitstreamEntry::getError(); 510 } 511 } 512 ReadCode()513 unsigned ReadCode() { 514 return Read(CurCodeSize); 515 } 516 517 518 // Block header: 519 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 520 521 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. ReadSubBlockID()522 unsigned ReadSubBlockID() { 523 return ReadVBR(bitc::BlockIDWidth); 524 } 525 526 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body 527 /// of this block. If the block record is malformed, return true. SkipBlock()528 bool SkipBlock() { 529 // Read and ignore the codelen value. Since we are skipping this block, we 530 // don't care what code widths are used inside of it. 531 ReadVBR(bitc::CodeLenWidth); 532 SkipToFourByteBoundary(); 533 unsigned NumFourBytes = Read(bitc::BlockSizeWidth); 534 535 // Check that the block wasn't partially defined, and that the offset isn't 536 // bogus. 537 size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8; 538 if (AtEndOfStream() || !canSkipToPos(SkipTo/8)) 539 return true; 540 541 JumpToBit(SkipTo); 542 return false; 543 } 544 545 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true 546 /// if the block has an error. 547 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 548 ReadBlockEnd()549 bool ReadBlockEnd() { 550 if (BlockScope.empty()) return true; 551 552 // Block tail: 553 // [END_BLOCK, <align4bytes>] 554 SkipToFourByteBoundary(); 555 556 popBlockScope(); 557 return false; 558 } 559 560 private: 561 popBlockScope()562 void popBlockScope() { 563 CurCodeSize = BlockScope.back().PrevCodeSize; 564 565 CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); 566 BlockScope.pop_back(); 567 } 568 569 //===--------------------------------------------------------------------===// 570 // Record Processing 571 //===--------------------------------------------------------------------===// 572 573 public: 574 /// Return the abbreviation for the specified AbbrevId. getAbbrev(unsigned AbbrevID)575 const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { 576 unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV; 577 if (AbbrevNo >= CurAbbrevs.size()) 578 report_fatal_error("Invalid abbrev number"); 579 return CurAbbrevs[AbbrevNo].get(); 580 } 581 582 /// Read the current record and discard it. 583 void skipRecord(unsigned AbbrevID); 584 585 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals, 586 StringRef *Blob = nullptr); 587 588 //===--------------------------------------------------------------------===// 589 // Abbrev Processing 590 //===--------------------------------------------------------------------===// 591 void ReadAbbrevRecord(); 592 593 bool ReadBlockInfoBlock(); 594 }; 595 596 } // End llvm namespace 597 598 #endif 599