1 //===- NaClBitstreamReader.h -----------------------------------*- C++ -*-===// 2 // Low-level bitstream reader interface 3 // 4 // The LLVM Compiler Infrastructure 5 // 6 // This file is distributed under the University of Illinois Open Source 7 // License. See LICENSE.TXT for details. 8 // 9 //===----------------------------------------------------------------------===// 10 // 11 // This header defines the BitstreamReader class. This class can be used to 12 // read an arbitrary bitstream, regardless of its contents. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H 17 #define LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H 18 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h" 21 #include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/StreamingMemoryObject.h" 24 #include <atomic> 25 #include <climits> 26 #include <mutex> 27 #include <unordered_map> 28 #include <vector> 29 30 namespace llvm { 31 32 class Deserializer; 33 class NaClBitstreamCursor; 34 35 namespace naclbitc { 36 37 /// Returns the Bit as a Byte:BitInByte string. 38 std::string getBitAddress(uint64_t Bit); 39 40 /// Severity levels for reporting errors. 41 enum ErrorLevel { Warning, Error, Fatal }; 42 43 // Basic printing routine to generate the beginning of an error 44 // message. BitPosition is the bit position the error was found. 45 // Level is the severity of the error. 46 raw_ostream &ErrorAt(raw_ostream &Out, ErrorLevel Level, uint64_t BitPosition); 47 48 } // End namespace naclbitc. 49 50 /// This class is used to read from a NaCl bitcode wire format stream, 51 /// maintaining information that is global to decoding the entire file. 52 /// While a file is being read, multiple cursors can be independently 53 /// advanced or skipped around within the file. These are represented by 54 /// the NaClBitstreamCursor class. 55 class NaClBitstreamReader { 56 public: 57 // Models a raw list of abbreviations. 58 static const size_t DefaultAbbrevListSize = 12; 59 using AbbrevListVector = 60 SmallVector<NaClBitCodeAbbrev *, DefaultAbbrevListSize>; 61 62 // Models and maintains a list of abbreviations. In particular, it maintains 63 // updating reference counts of abbreviation operators within the abbreviation 64 // list. 65 class AbbrevList { 66 public: 67 AbbrevList() = default; AbbrevList(const AbbrevList & NewAbbrevs)68 explicit AbbrevList(const AbbrevList &NewAbbrevs) { 69 appendList(NewAbbrevs); 70 } 71 AbbrevList &operator=(const AbbrevList &Rhs) { 72 clear(); 73 appendList(Rhs); 74 return *this; 75 } 76 // Creates a new (empty) abbreviation, appends it to this, and then returns 77 // the new abbreviation. appendCreate()78 NaClBitCodeAbbrev *appendCreate() { 79 NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev(); 80 Abbrevs.push_back(Abbv); 81 return Abbv; 82 } 83 // Appends the given abbreviation to this. append(NaClBitCodeAbbrev * Abbrv)84 void append(NaClBitCodeAbbrev *Abbrv) { 85 Abbrv->addRef(); 86 Abbrevs.push_back(Abbrv); 87 } 88 // Appends the contents of NewAbbrevs to this. appendList(const AbbrevList & NewAbbrevs)89 void appendList(const AbbrevList &NewAbbrevs) { 90 for (NaClBitCodeAbbrev *Abbrv : NewAbbrevs.Abbrevs) 91 append(Abbrv); 92 } 93 // Returns last abbreviation on list. last()94 NaClBitCodeAbbrev *last() { return Abbrevs.back(); } 95 // Removes the last element of the list. popLast()96 void popLast() { 97 Abbrevs.back()->dropRef(); 98 Abbrevs.pop_back(); 99 } 100 // Empties abbreviation list. clear()101 void clear() { 102 while (!Abbrevs.empty()) 103 popLast(); 104 } 105 // Allow read access to vector defining list. getVector()106 const AbbrevListVector &getVector() const { return Abbrevs; } ~AbbrevList()107 ~AbbrevList() { clear(); } 108 109 private: 110 AbbrevListVector Abbrevs; 111 }; 112 113 /// This contains information about abbreviations in blocks defined in the 114 /// BLOCKINFO_BLOCK block. These describe global abbreviations that apply to 115 /// all succeeding blocks of the specified ID. 116 class BlockInfo { 117 BlockInfo &operator=(const BlockInfo &) = delete; 118 119 public: 120 BlockInfo() = default; BlockInfo(unsigned BlockID)121 explicit BlockInfo(unsigned BlockID) : BlockID(BlockID), Abbrevs() {} 122 BlockInfo(const BlockInfo &) = default; getBlockID()123 unsigned getBlockID() const { return BlockID; } setBlockID(unsigned ID)124 void setBlockID(unsigned ID) { BlockID = ID; } getAbbrevs()125 AbbrevList &getAbbrevs() { return Abbrevs; } ~BlockInfo()126 ~BlockInfo() {} 127 128 private: 129 unsigned BlockID; 130 AbbrevList Abbrevs; 131 }; 132 133 class BlockInfoRecordsMap; 134 using SharedBlockInfoMap = std::shared_ptr<BlockInfoRecordsMap>; 135 136 // Holds the global abbreviations in the BlockInfo block of the bitcode file. 137 // Sharing is used to allow parallel parses. Share by using std::share_ptr's 138 // and std::shared_from_this(). 139 // 140 // Note: The BlockInfo block must be parsed before sharing of the 141 // BlockInfoRecordsMap. Therefore, before changing to a parallel parse, the 142 // BlockInfoRecordsMap must be frozen. Failure to do so, can lead to 143 // unexpected behaviour. 144 // 145 // In practice, this means that only function blocks can be parsed in 146 // parallel. 147 class BlockInfoRecordsMap 148 : public std::enable_shared_from_this<BlockInfoRecordsMap> { 149 friend class NaClBitstreamReader; 150 BlockInfoRecordsMap(const BlockInfoRecordsMap &) = delete; 151 BlockInfoRecordsMap &operator=(const BlockInfoRecordsMap &) = delete; 152 153 public: 154 using InfosMap = std::unordered_map<unsigned, std::unique_ptr<BlockInfo>>; 155 create()156 static SharedBlockInfoMap create() { 157 return SharedBlockInfoMap(new BlockInfoRecordsMap()); 158 } 159 ~BlockInfoRecordsMap() = default; 160 isFrozen()161 bool isFrozen() const { return IsFrozen.load(); } 162 163 // Returns true if already frozen. freeze()164 bool freeze() { return IsFrozen.exchange(true); } 165 getBlockInfo(unsigned BlockID)166 BlockInfo *getBlockInfo(unsigned BlockID) { 167 auto Pos = KnownInfos.find(BlockID); 168 if (Pos != KnownInfos.end()) 169 return Pos->second.get(); 170 return getOrCreateUnknownBlockInfo(BlockID); 171 } 172 173 // Locks the BlockInfoRecordsMap for the lifetime of the UpdateLock. Used 174 // to allow the parsing of a BlockInfo block, and install global 175 // abbreviations. 176 // 177 // Verifies that the BlockInfoRecordsMap didn't get frozen during the 178 // instance's lifetime as a safety precaution. That is, it checks that no 179 // bitstream reader was created to share the global abbreviations before the 180 // global abbreviations are defined. 181 class UpdateLock { 182 UpdateLock() = delete; 183 UpdateLock(const UpdateLock &) = delete; 184 UpdateLock &operator=(const UpdateLock &) = delete; 185 186 public: 187 explicit UpdateLock(BlockInfoRecordsMap &BlockInfoRecords); 188 ~UpdateLock(); 189 190 private: 191 // The BlockInfoRecordsMap to update. 192 BlockInfoRecordsMap &BlockInfoRecords; 193 // The locked mutex from BlockInfoRecordsMap; 194 std::unique_lock<std::mutex> Lock; 195 }; 196 197 private: 198 // The set of known BlockInfo's. This map is prepopulated so that fast 199 // lookup can be performed thread safe (i.e. without using a lock). 200 InfosMap KnownInfos; 201 // The set of unknown BlockInfo's. This map is to handle unknown (and hence, 202 // invalid) PNaCl bitcode files. This map is updated incrementally, and uses 203 // UnknownBlockInfoLock to make it thread safe. 204 InfosMap UnknownInfos; 205 // True if the known BlockInfo blocks are frozen (i.e. the bitstream reader 206 // will ignore the BlockInfo block). 207 std::atomic_bool IsFrozen; 208 // Lock to use to update this data structure. 209 std::mutex UpdateRecordsLock; 210 // Lock to get/create an unknonw block info. 211 std::mutex UnknownBlockInfoLock; 212 213 BlockInfoRecordsMap(); 214 215 BlockInfo *getOrCreateUnknownBlockInfo(unsigned BlockID); 216 }; 217 218 private: 219 friend class NaClBitstreamCursor; 220 221 std::unique_ptr<MemoryObject> BitcodeBytes; 222 223 SharedBlockInfoMap BlockInfoRecords; 224 225 /// \brief Holds the offset of the first byte after the header. 226 size_t InitialAddress; 227 228 // Holds the number of bytes to add to the bitcode position, when reporting 229 // errors. Useful when using parallel parses of function blocks. 230 size_t ErrorOffset = 0; 231 232 // True if filler should be added to byte align records. 233 bool AlignBitcodeRecords = false; 234 NaClBitstreamReader(const NaClBitstreamReader &) = delete; 235 void operator=(const NaClBitstreamReader &) = delete; 236 initFromHeader(NaClBitcodeHeader & Header)237 void initFromHeader(NaClBitcodeHeader &Header) { 238 InitialAddress = Header.getHeaderSize(); 239 AlignBitcodeRecords = Header.getAlignBitcodeRecords(); 240 } 241 242 public: 243 /// Read stream from sequence of bytes [Start .. End) after parsing 244 /// the given bitcode header. NaClBitstreamReader(const unsigned char * Start,const unsigned char * End,NaClBitcodeHeader & Header)245 NaClBitstreamReader(const unsigned char *Start, const unsigned char *End, 246 NaClBitcodeHeader &Header) 247 : BitcodeBytes(getNonStreamedMemoryObject(Start, End)), 248 BlockInfoRecords(BlockInfoRecordsMap::create()) { 249 initFromHeader(Header); 250 } 251 252 /// Read stream from Bytes, after parsing the given bitcode header. NaClBitstreamReader(MemoryObject * Bytes,NaClBitcodeHeader & Header)253 NaClBitstreamReader(MemoryObject *Bytes, NaClBitcodeHeader &Header) 254 : BitcodeBytes(Bytes), BlockInfoRecords(BlockInfoRecordsMap::create()) { 255 initFromHeader(Header); 256 } 257 258 /// Read stream from bytes, starting at the given initial address. 259 /// Provides simple API for unit testing. NaClBitstreamReader(MemoryObject * Bytes,size_t InitialAddress)260 NaClBitstreamReader(MemoryObject *Bytes, size_t InitialAddress) 261 : BitcodeBytes(Bytes), BlockInfoRecords(BlockInfoRecordsMap::create()), 262 InitialAddress(InitialAddress) {} 263 264 /// Read stream from sequence of bytes [Start .. End), using the global 265 /// abbreviations of the given bitstream reader. Assumes that [Start .. End) 266 /// is copied from Reader's memory object. NaClBitstreamReader(size_t StartAddress,const unsigned char * Start,const unsigned char * End,NaClBitstreamReader * Reader)267 NaClBitstreamReader(size_t StartAddress, const unsigned char *Start, 268 const unsigned char *End, NaClBitstreamReader *Reader) 269 : BitcodeBytes(getNonStreamedMemoryObject(Start, End)), 270 BlockInfoRecords(Reader->BlockInfoRecords), InitialAddress(0), 271 ErrorOffset(StartAddress) { 272 BlockInfoRecords->freeze(); 273 } 274 275 // Returns the memory object that is being read. getBitcodeBytes()276 MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } 277 ~NaClBitstreamReader()278 ~NaClBitstreamReader() {} 279 280 /// \brief Returns the initial address (after the header) of the input stream. getInitialAddress()281 size_t getInitialAddress() const { return InitialAddress; } 282 283 /// Returns the byte address of the first byte in the bitstream. Used 284 /// for error reporting. getErrorOffset()285 size_t getErrorOffset() const { return ErrorOffset; } 286 287 //===--------------------------------------------------------------------===// 288 // Block Manipulation 289 //===--------------------------------------------------------------------===// 290 getBlockInfo(unsigned BlockID)291 BlockInfo *getBlockInfo(unsigned BlockID) { 292 return BlockInfoRecords->getBlockInfo(BlockID); 293 } 294 }; 295 296 /// When advancing through a bitstream cursor, each advance can discover a few 297 /// different kinds of entries: 298 struct NaClBitstreamEntry { 299 enum { 300 Error, // Malformed bitcode was found. 301 EndBlock, // We've reached the end of the current block, (or the end of the 302 // file, which is treated like a series of EndBlock records. 303 SubBlock, // This is the start of a new subblock of a specific ID. 304 Record // This is a record with a specific AbbrevID. 305 } Kind; 306 307 unsigned ID; 308 getErrorNaClBitstreamEntry309 static NaClBitstreamEntry getError() { 310 NaClBitstreamEntry E; 311 E.Kind = Error; 312 return E; 313 } getEndBlockNaClBitstreamEntry314 static NaClBitstreamEntry getEndBlock() { 315 NaClBitstreamEntry E; 316 E.Kind = EndBlock; 317 return E; 318 } getSubBlockNaClBitstreamEntry319 static NaClBitstreamEntry getSubBlock(unsigned ID) { 320 NaClBitstreamEntry E; 321 E.Kind = SubBlock; 322 E.ID = ID; 323 return E; 324 } getRecordNaClBitstreamEntry325 static NaClBitstreamEntry getRecord(unsigned AbbrevID) { 326 NaClBitstreamEntry E; 327 E.Kind = Record; 328 E.ID = AbbrevID; 329 return E; 330 } 331 }; 332 333 /// Models default view of a bitcode record. 334 typedef SmallVector<uint64_t, 8> NaClBitcodeRecordVector; 335 336 /// Class NaClAbbrevListener is used to allow instances of class 337 /// NaClBitcodeParser to listen to record details when processing 338 /// abbreviations. The major reason for using a listener is that the 339 /// NaCl bitcode reader would require a major rewrite (including the 340 /// introduction of more overhead) if we were to lift abbreviations up 341 /// to the bitcode reader. That is, not only would we have to lift the 342 /// block processing up into the readers (i.e. many blocks in 343 /// NaClBitcodeReader and NaClBitcodeParser), but add many new API's 344 /// to allow the readers to update internals of the bit stream reader 345 /// appropriately. 346 class NaClAbbrevListener { 347 NaClAbbrevListener(const NaClAbbrevListener &) = delete; 348 void operator=(const NaClAbbrevListener &) = delete; 349 350 public: NaClAbbrevListener()351 NaClAbbrevListener() {} ~NaClAbbrevListener()352 virtual ~NaClAbbrevListener() {} 353 354 /// Called to process the read abbreviation. 355 virtual void ProcessAbbreviation(NaClBitCodeAbbrev *Abbrv, bool IsLocal) = 0; 356 357 /// Called after entering block. NumWords is the number of words 358 /// in the block. 359 virtual void BeginBlockInfoBlock(unsigned NumWords) = 0; 360 361 /// Called if a naclbitc::BLOCKINFO_CODE_SETBID record is found in 362 /// NaClBitstreamCursor::ReadBlockInfoBlock. 363 virtual void SetBID() = 0; 364 365 /// Called just before an EndBlock record is processed by 366 /// NaClBitstreamCursor::ReadBlockInfoBlock 367 virtual void EndBlockInfoBlock() = 0; 368 369 /// The values of the bitcode record associated with the called 370 /// virtual function. 371 NaClBitcodeRecordVector Values; 372 373 /// Start bit for current record being processed in 374 /// NaClBitstreamCursor::ReadBlockInfoBlock. 375 uint64_t StartBit; 376 }; 377 378 /// This represents a position within a bitcode file. There may be multiple 379 /// independent cursors reading within one bitstream, each maintaining their 380 /// own local state. 381 /// 382 /// Unlike iterators, NaClBitstreamCursors are heavy-weight objects 383 /// that should not be passed by value. 384 class NaClBitstreamCursor { 385 public: 386 /// This class handles errors in the bitstream reader. Redirects 387 /// fatal error messages to virtual method Fatal. 388 class ErrorHandler { 389 ErrorHandler(const ErrorHandler &) = delete; 390 ErrorHandler &operator=(const ErrorHandler &) = delete; 391 392 public: ErrorHandler(NaClBitstreamCursor & Cursor)393 explicit ErrorHandler(NaClBitstreamCursor &Cursor) : Cursor(Cursor) {} 394 LLVM_ATTRIBUTE_NORETURN 395 virtual void Fatal(const std::string &ErrorMessage) const; ~ErrorHandler()396 virtual ~ErrorHandler() {} getCurrentBitNo()397 uint64_t getCurrentBitNo() const { return Cursor.GetCurrentBitNo(); } 398 399 private: 400 NaClBitstreamCursor &Cursor; 401 }; 402 403 private: 404 friend class Deserializer; 405 NaClBitstreamReader *BitStream; 406 size_t NextChar; 407 // The current error handler for the bitstream reader. 408 std::unique_ptr<ErrorHandler> ErrHandler; 409 410 // The size of the bitcode. 0 if we don't know it yet. 411 size_t Size; 412 413 /// This is the current data we have pulled from the stream but have not 414 /// returned to the client. This is specifically and intentionally defined to 415 /// follow the word size of the host machine for efficiency. We use word_t in 416 /// places that are aware of this to make it perfectly explicit what is going 417 /// on. 418 typedef size_t word_t; 419 word_t CurWord; 420 421 /// This is the number of bits in CurWord that are valid. This 422 /// is always from [0...bits_of(word_t)-1] inclusive. 423 unsigned BitsInCurWord; 424 425 // Data specific to a block being scanned. 426 class Block { 427 public: 428 Block() = delete; 429 Block &operator=(const Block &Rhs) = default; Block(NaClBitstreamReader::BlockInfo * GlobalAbbrevs,NaClBitcodeSelectorAbbrev & CodeAbbrev)430 Block(NaClBitstreamReader::BlockInfo *GlobalAbbrevs, 431 NaClBitcodeSelectorAbbrev &CodeAbbrev) 432 : GlobalAbbrevs(GlobalAbbrevs), 433 NumGlobalAbbrevs(GlobalAbbrevs->getAbbrevs().getVector().size()), 434 LocalAbbrevs(), CodeAbbrev(CodeAbbrev) {} Block(NaClBitstreamReader::BlockInfo * GlobalAbbrevs)435 Block(NaClBitstreamReader::BlockInfo *GlobalAbbrevs) 436 : GlobalAbbrevs(GlobalAbbrevs), 437 NumGlobalAbbrevs(GlobalAbbrevs->getAbbrevs().getVector().size()), 438 LocalAbbrevs(), CodeAbbrev() {} 439 ~Block() = default; getGlobalAbbrevs()440 const NaClBitstreamReader::AbbrevList &getGlobalAbbrevs() const { 441 return GlobalAbbrevs->getAbbrevs(); 442 } getNumGlobalAbbrevs()443 unsigned getNumGlobalAbbrevs() const { return NumGlobalAbbrevs; } getLocalAbbrevs()444 const NaClBitstreamReader::AbbrevList &getLocalAbbrevs() const { 445 return LocalAbbrevs; 446 } getCodeAbbrev()447 const NaClBitcodeSelectorAbbrev &getCodeAbbrev() const { 448 return CodeAbbrev; 449 } setCodeAbbrev(NaClBitcodeSelectorAbbrev & Abbrev)450 void setCodeAbbrev(NaClBitcodeSelectorAbbrev &Abbrev) { 451 CodeAbbrev = Abbrev; 452 } appendLocalCreate()453 NaClBitCodeAbbrev *appendLocalCreate() { 454 return LocalAbbrevs.appendCreate(); 455 } moveLocalAbbrevToAbbrevList(NaClBitstreamReader::AbbrevList * List)456 void moveLocalAbbrevToAbbrevList(NaClBitstreamReader::AbbrevList *List) { 457 if (List != &LocalAbbrevs) { 458 NaClBitCodeAbbrev *Abbv = LocalAbbrevs.last(); 459 List->append(Abbv); 460 LocalAbbrevs.popLast(); 461 } 462 } 463 464 private: 465 friend class NaClBitstreamCursor; 466 // The global abbreviations associated with this scope. 467 NaClBitstreamReader::BlockInfo *GlobalAbbrevs; 468 // Number of abbreviations when block was entered. Used to limit scope of 469 // CurBlockInfo, since any abbreviation added inside a BlockInfo block 470 // (within this block) must not effect global abbreviations. 471 unsigned NumGlobalAbbrevs; 472 NaClBitstreamReader::AbbrevList LocalAbbrevs; 473 // This is the declared size of code values used for the current block, in 474 // bits. 475 NaClBitcodeSelectorAbbrev CodeAbbrev; 476 }; 477 478 /// This tracks the Block-specific information for each nested block. 479 SmallVector<Block, 8> BlockScope; 480 481 NaClBitstreamCursor(const NaClBitstreamCursor &) = delete; 482 NaClBitstreamCursor &operator=(const NaClBitstreamCursor &) = delete; 483 484 public: NaClBitstreamCursor()485 NaClBitstreamCursor() : ErrHandler(new ErrorHandler(*this)) { init(nullptr); } 486 NaClBitstreamCursor(NaClBitstreamReader & R)487 explicit NaClBitstreamCursor(NaClBitstreamReader &R) 488 : ErrHandler(new ErrorHandler(*this)) { 489 init(&R); 490 } 491 init(NaClBitstreamReader * R)492 void init(NaClBitstreamReader *R) { 493 freeState(); 494 BitStream = R; 495 NextChar = (BitStream == nullptr) ? 0 : BitStream->getInitialAddress(); 496 Size = 0; 497 BitsInCurWord = 0; 498 if (BitStream) { 499 BlockScope.push_back( 500 Block(BitStream->getBlockInfo(naclbitc::TOP_LEVEL_BLOCKID))); 501 } 502 } 503 ~NaClBitstreamCursor()504 ~NaClBitstreamCursor() { freeState(); } 505 freeState()506 void freeState() { 507 while (!BlockScope.empty()) 508 BlockScope.pop_back(); 509 } 510 511 // Replaces the current bitstream error handler with the new 512 // handler. Takes ownership of the new handler and deletes it when 513 // it is no longer needed. setErrorHandler(std::unique_ptr<ErrorHandler> & NewHandler)514 void setErrorHandler(std::unique_ptr<ErrorHandler> &NewHandler) { 515 ErrHandler = std::move(NewHandler); 516 } 517 canSkipToPos(size_t pos)518 bool canSkipToPos(size_t pos) const { 519 // pos can be skipped to if it is a valid address or one byte past the end. 520 return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( 521 static_cast<uint64_t>(pos - 1)); 522 } 523 AtEndOfStream()524 bool AtEndOfStream() { 525 if (BitsInCurWord != 0) 526 return false; 527 if (Size != 0) 528 return Size == NextChar; 529 fillCurWord(); 530 return BitsInCurWord == 0; 531 } 532 533 /// Return the number of bits used to encode an abbrev #. getAbbrevIDWidth()534 unsigned getAbbrevIDWidth() const { 535 return BlockScope.back().getCodeAbbrev().NumBits; 536 } 537 538 /// Return the bit # of the bit we are reading. GetCurrentBitNo()539 uint64_t GetCurrentBitNo() const { 540 return NextChar * CHAR_BIT - BitsInCurWord; 541 } 542 543 /// Converts the given position into the corresponding Error position. getErrorBitNo(uint64_t Position)544 uint64_t getErrorBitNo(uint64_t Position) const { 545 return BitStream->getErrorOffset() * CHAR_BIT + Position; 546 } 547 548 /// Returns the current bit address for reporting errors. getErrorBitNo()549 uint64_t getErrorBitNo() const { return getErrorBitNo(GetCurrentBitNo()); } 550 getBitStreamReader()551 NaClBitstreamReader *getBitStreamReader() { return BitStream; } getBitStreamReader()552 const NaClBitstreamReader *getBitStreamReader() const { return BitStream; } 553 554 /// Returns the current bit address (string) of the bit cursor. getCurrentBitAddress()555 std::string getCurrentBitAddress() const { 556 return naclbitc::getBitAddress(GetCurrentBitNo()); 557 } 558 559 /// Flags that modify the behavior of advance(). 560 enum { 561 /// If this flag is used, the advance() method does not automatically pop 562 /// the block scope when the end of a block is reached. 563 AF_DontPopBlockAtEnd = 1, 564 565 /// If this flag is used, abbrev entries are returned just like normal 566 /// records. 567 AF_DontAutoprocessAbbrevs = 2 568 }; 569 570 /// Advance the current bitstream, returning the next entry in the stream. 571 /// Use the given abbreviation listener (if provided). advance(unsigned Flags,NaClAbbrevListener * Listener)572 NaClBitstreamEntry advance(unsigned Flags, NaClAbbrevListener *Listener) { 573 while (1) { 574 unsigned Code = ReadCode(); 575 if (Code == naclbitc::END_BLOCK) { 576 // Pop the end of the block unless Flags tells us not to. 577 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 578 return NaClBitstreamEntry::getError(); 579 return NaClBitstreamEntry::getEndBlock(); 580 } 581 582 if (Code == naclbitc::ENTER_SUBBLOCK) 583 return NaClBitstreamEntry::getSubBlock(ReadSubBlockID()); 584 585 if (Code == naclbitc::DEFINE_ABBREV && 586 !(Flags & AF_DontAutoprocessAbbrevs)) { 587 // We read and accumulate abbrev's, the client can't do anything with 588 // them anyway. 589 ReadAbbrevRecord(true, Listener); 590 continue; 591 } 592 593 return NaClBitstreamEntry::getRecord(Code); 594 } 595 } 596 597 /// This is a convenience function for clients that don't expect any 598 /// subblocks. This just skips over them automatically. 599 NaClBitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { 600 while (1) { 601 // If we found a normal entry, return it. 602 NaClBitstreamEntry Entry = advance(Flags, 0); 603 if (Entry.Kind != NaClBitstreamEntry::SubBlock) 604 return Entry; 605 606 // If we found a sub-block, just skip over it and check the next entry. 607 if (SkipBlock()) 608 return NaClBitstreamEntry::getError(); 609 } 610 } 611 612 /// Returns the starting byte of the word containing BitNo. getStartWordByteForBit(uint64_t BitNo)613 uintptr_t getStartWordByteForBit(uint64_t BitNo) const { 614 return uintptr_t(BitNo / CHAR_BIT) & ~(sizeof(word_t) - 1); 615 } 616 617 /// Returns the index of BitNo within the word it appears in. getWordBitNo(uint64_t BitNo)618 unsigned getWordBitNo(uint64_t BitNo) const { 619 return unsigned(BitNo & (sizeof(word_t) * CHAR_BIT - 1)); 620 } 621 622 /// Returns the ending byte of the word containing BitNo. getEndWordByteForBit(uint64_t BitNo)623 uintptr_t getEndWordByteForBit(uint64_t BitNo) const { 624 return getStartWordByteForBit(BitNo) + 625 (getWordBitNo(BitNo) ? sizeof(word_t) : 0); 626 } 627 628 /// Fills Buffer[Size] using bytes at Address (in the memory object being 629 /// read). Returns number of bytes filled (less than Size if at end of memory 630 /// object). fillBuffer(uint8_t * Buffer,size_t Size,size_t Address)631 uint64_t fillBuffer(uint8_t *Buffer, size_t Size, size_t Address) const { 632 return BitStream->getBitcodeBytes().readBytes(Buffer, Size, Address); 633 } 634 635 /// Reset the stream to the specified bit number. JumpToBit(uint64_t BitNo)636 void JumpToBit(uint64_t BitNo) { 637 const uintptr_t ByteNo = getStartWordByteForBit(BitNo); 638 const unsigned WordBitNo = getWordBitNo(BitNo); 639 if (!canSkipToPos(ByteNo)) 640 reportInvalidJumpToBit(BitNo); 641 642 // Move the cursor to the right word. 643 NextChar = ByteNo; 644 BitsInCurWord = 0; 645 646 // Skip over any bits that are already consumed. 647 if (WordBitNo) 648 Read(WordBitNo); 649 } 650 fillCurWord()651 void fillCurWord() { 652 assert(Size == 0 || NextChar < (unsigned)Size); 653 654 // Read the next word from the stream. 655 uint8_t Array[sizeof(word_t)] = {0}; 656 657 uint64_t BytesRead = fillBuffer(Array, sizeof(Array), NextChar); 658 659 // If we run out of data, stop at the end of the stream. 660 if (BytesRead == 0) { 661 Size = NextChar; 662 return; 663 } 664 665 CurWord = 666 support::endian::read<word_t, support::little, support::unaligned>( 667 Array); 668 NextChar += BytesRead; 669 BitsInCurWord = BytesRead * CHAR_BIT; 670 } 671 Read(unsigned NumBits)672 word_t Read(unsigned NumBits) { 673 static const unsigned BitsInWord = sizeof(word_t) * CHAR_BIT; 674 675 assert(NumBits && NumBits <= BitsInWord && 676 "Cannot return zero or more than BitsInWord bits!"); 677 678 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; 679 680 // If the field is fully contained by CurWord, return it quickly. 681 if (BitsInCurWord >= NumBits) { 682 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); 683 684 // Use a mask to avoid undefined behavior. 685 CurWord >>= (NumBits & Mask); 686 687 BitsInCurWord -= NumBits; 688 return R; 689 } 690 691 word_t R = BitsInCurWord ? CurWord : 0; 692 unsigned BitsLeft = NumBits - BitsInCurWord; 693 694 fillCurWord(); 695 696 // If we run out of data, stop at the end of the stream. 697 if (BitsLeft > BitsInCurWord) 698 return 0; 699 700 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); 701 702 // Use a mask to avoid undefined behavior. 703 CurWord >>= (BitsLeft & Mask); 704 705 BitsInCurWord -= BitsLeft; 706 707 R |= R2 << (NumBits - BitsLeft); 708 709 return R; 710 } 711 ReadVBR(unsigned NumBits)712 uint32_t ReadVBR(unsigned NumBits) { 713 uint32_t Piece = Read(NumBits); 714 if ((Piece & (1U << (NumBits - 1))) == 0) 715 return Piece; 716 717 uint32_t Result = 0; 718 unsigned NextBit = 0; 719 while (1) { 720 Result |= (Piece & ((1U << (NumBits - 1)) - 1)) << NextBit; 721 722 if ((Piece & (1U << (NumBits - 1))) == 0) 723 return Result; 724 725 NextBit += NumBits - 1; 726 Piece = Read(NumBits); 727 } 728 } 729 730 // Read a VBR that may have a value up to 64-bits in size. The chunk size of 731 // the VBR must still be <= 32 bits though. ReadVBR64(unsigned NumBits)732 uint64_t ReadVBR64(unsigned NumBits) { 733 uint32_t Piece = Read(NumBits); 734 if ((Piece & (1U << (NumBits - 1))) == 0) 735 return uint64_t(Piece); 736 737 uint64_t Result = 0; 738 unsigned NextBit = 0; 739 while (1) { 740 Result |= uint64_t(Piece & ((1U << (NumBits - 1)) - 1)) << NextBit; 741 742 if ((Piece & (1U << (NumBits - 1))) == 0) 743 return Result; 744 745 NextBit += NumBits - 1; 746 Piece = Read(NumBits); 747 } 748 } 749 750 private: SkipToByteBoundary()751 void SkipToByteBoundary() { 752 unsigned BitsToSkip = BitsInCurWord % CHAR_BIT; 753 if (BitsToSkip) { 754 CurWord >>= BitsToSkip; 755 BitsInCurWord -= BitsToSkip; 756 } 757 } 758 SkipToByteBoundaryIfAligned()759 void SkipToByteBoundaryIfAligned() { 760 if (BitStream->AlignBitcodeRecords) 761 SkipToByteBoundary(); 762 } 763 SkipToFourByteBoundary()764 void SkipToFourByteBoundary() { 765 // If word_t is 64-bits and if we've read less than 32 bits, just dump 766 // the bits we have up to the next 32-bit boundary. 767 if (sizeof(word_t) > 4 && BitsInCurWord >= 32) { 768 CurWord >>= BitsInCurWord - 32; 769 BitsInCurWord = 32; 770 return; 771 } 772 773 BitsInCurWord = 0; 774 } 775 776 public: ReadCode()777 unsigned ReadCode() { 778 const NaClBitcodeSelectorAbbrev &CodeAbbrev = 779 BlockScope.back().getCodeAbbrev(); 780 return CodeAbbrev.IsFixed ? Read(CodeAbbrev.NumBits) 781 : ReadVBR(CodeAbbrev.NumBits); 782 } 783 784 // Block header: 785 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 786 787 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. ReadSubBlockID()788 unsigned ReadSubBlockID() { return ReadVBR(naclbitc::BlockIDWidth); } 789 790 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body 791 /// of this block. If the block record is malformed, return true. SkipBlock()792 bool SkipBlock() { 793 // Read and ignore the codelen value. Since we are skipping this block, we 794 // don't care what code widths are used inside of it. 795 ReadVBR(naclbitc::CodeLenWidth); 796 SkipToFourByteBoundary(); 797 unsigned NumFourBytes = Read(naclbitc::BlockSizeWidth); 798 799 // Check that the block wasn't partially defined, and that the offset isn't 800 // bogus. 801 size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * CHAR_BIT; 802 if (AtEndOfStream() || !canSkipToPos(SkipTo / CHAR_BIT)) 803 return true; 804 805 JumpToBit(SkipTo); 806 return false; 807 } 808 809 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true 810 /// if the block has an error. 811 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 812 ReadBlockEnd()813 bool ReadBlockEnd() { 814 if (BlockScope.empty()) 815 return true; 816 817 // Block tail: 818 // [END_BLOCK, <align4bytes>] 819 SkipToFourByteBoundary(); 820 821 BlockScope.pop_back(); 822 return false; 823 } 824 825 private: 826 //===--------------------------------------------------------------------===// 827 // Record Processing 828 //===--------------------------------------------------------------------===// 829 830 private: 831 // Returns abbreviation encoding associated with Value. 832 NaClBitCodeAbbrevOp::Encoding getEncoding(uint64_t Value); 833 834 void skipAbbreviatedField(const NaClBitCodeAbbrevOp &Op); 835 836 // Reads the next Value using the abbreviation Op. Returns true only 837 // if Op is an array (and sets Value to the number of elements in the 838 // array). 839 inline bool readRecordAbbrevField(const NaClBitCodeAbbrevOp &Op, 840 uint64_t &Value); 841 842 // Reads and returns the next value using the abbreviation Op, 843 // assuming Op appears after an array abbreviation. 844 inline uint64_t readArrayAbbreviatedField(const NaClBitCodeAbbrevOp &Op); 845 846 // Reads the array abbreviation Op, NumArrayElements times, putting 847 // the read values in Vals. 848 inline void readArrayAbbrev(const NaClBitCodeAbbrevOp &Op, 849 unsigned NumArrayElements, 850 SmallVectorImpl<uint64_t> &Vals); 851 852 // Reports that that abbreviation Index is not valid. 853 void reportInvalidAbbrevNumber(unsigned Index) const; 854 855 // Reports that jumping to Bit is not valid. 856 void reportInvalidJumpToBit(uint64_t Bit) const; 857 858 public: 859 /// Return the abbreviation for the specified AbbrevId. getAbbrev(unsigned AbbrevID)860 const NaClBitCodeAbbrev *getAbbrev(unsigned AbbrevID) const { 861 unsigned AbbrevNo = AbbrevID - naclbitc::FIRST_APPLICATION_ABBREV; 862 const Block &CurBlock = BlockScope.back(); 863 const unsigned NumGlobalAbbrevs = CurBlock.getNumGlobalAbbrevs(); 864 if (AbbrevNo < NumGlobalAbbrevs) 865 return CurBlock.getGlobalAbbrevs().getVector()[AbbrevNo]; 866 unsigned LocalAbbrevNo = AbbrevNo - NumGlobalAbbrevs; 867 NaClBitstreamReader::AbbrevListVector LocalAbbrevs = 868 CurBlock.getLocalAbbrevs().getVector(); 869 if (LocalAbbrevNo >= LocalAbbrevs.size()) 870 reportInvalidAbbrevNumber(AbbrevID); 871 return LocalAbbrevs[LocalAbbrevNo]; 872 } 873 874 /// Read the current record and discard it. 875 void skipRecord(unsigned AbbrevID); 876 877 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals); 878 879 //===--------------------------------------------------------------------===// 880 // Abbrev Processing 881 //===--------------------------------------------------------------------===// 882 // IsLocal indicates where the abbreviation occurs. If it is in the 883 // BlockInfo block, IsLocal is false. In all other cases, IsLocal is 884 // true. 885 void ReadAbbrevRecord(bool IsLocal, NaClAbbrevListener *Listener); 886 887 // Skips over an abbreviation record. Duplicates code of ReadAbbrevRecord, 888 // except that no abbreviation is built. 889 void SkipAbbrevRecord(); 890 891 bool ReadBlockInfoBlock(NaClAbbrevListener *Listener); 892 }; 893 894 } // namespace llvm 895 896 #endif 897