1 //===- NaClBitstreamReader.h -----------------------------------*- C++ -*-===// 2 // Low-level bitstream reader interface 3 // 4 // The LLVM Compiler Infrastructure 5 // 6 // This file is distributed under the University of Illinois Open Source 7 // License. See LICENSE.TXT for details. 8 // 9 //===----------------------------------------------------------------------===// 10 // 11 // This header defines the BitstreamReader class. This class can be used to 12 // read an arbitrary bitstream, regardless of its contents. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H 17 #define LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H 18 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h" 21 #include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/StreamingMemoryObject.h" 24 #include <atomic> 25 #include <climits> 26 #include <mutex> 27 #include <unordered_map> 28 #include <vector> 29 30 namespace llvm { 31 32 class Deserializer; 33 class NaClBitstreamCursor; 34 35 namespace naclbitc { 36 37 /// Returns the Bit as a Byte:BitInByte string. 38 std::string getBitAddress(uint64_t Bit); 39 40 /// Severity levels for reporting errors. 41 enum ErrorLevel { Warning, Error, Fatal }; 42 43 // Basic printing routine to generate the beginning of an error 44 // message. BitPosition is the bit position the error was found. 45 // Level is the severity of the error. 46 raw_ostream &ErrorAt(raw_ostream &Out, ErrorLevel Level, uint64_t BitPosition); 47 48 } // End namespace naclbitc. 49 50 /// This class is used to read from a NaCl bitcode wire format stream, 51 /// maintaining information that is global to decoding the entire file. 52 /// While a file is being read, multiple cursors can be independently 53 /// advanced or skipped around within the file. These are represented by 54 /// the NaClBitstreamCursor class. 55 class NaClBitstreamReader { 56 public: 57 // Models a raw list of abbreviations. 58 static const size_t DefaultAbbrevListSize = 12; 59 using AbbrevListVector = 60 SmallVector<NaClBitCodeAbbrev *, DefaultAbbrevListSize>; 61 62 // Models and maintains a list of abbreviations. In particular, it maintains 63 // updating reference counts of abbreviation operators within the abbreviation 64 // list. 65 class AbbrevList { 66 public: 67 AbbrevList() = default; AbbrevList(const AbbrevList & NewAbbrevs)68 explicit AbbrevList(const AbbrevList &NewAbbrevs) { 69 appendList(NewAbbrevs); 70 } 71 AbbrevList &operator=(const AbbrevList &Rhs) { 72 clear(); 73 appendList(Rhs); 74 return *this; 75 } 76 // Creates a new (empty) abbreviation, appends it to this, and then returns 77 // the new abbreviation. appendCreate()78 NaClBitCodeAbbrev *appendCreate() { 79 NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev(); 80 Abbrevs.push_back(Abbv); 81 return Abbv; 82 } 83 // Appends the given abbreviation to this. append(NaClBitCodeAbbrev * Abbrv)84 void append(NaClBitCodeAbbrev *Abbrv) { 85 Abbrv->addRef(); 86 Abbrevs.push_back(Abbrv); 87 } 88 // Appends the contents of NewAbbrevs to this. appendList(const AbbrevList & NewAbbrevs)89 void appendList(const AbbrevList &NewAbbrevs) { 90 for (NaClBitCodeAbbrev *Abbrv : NewAbbrevs.Abbrevs) 91 append(Abbrv); 92 } 93 // Returns last abbreviation on list. last()94 NaClBitCodeAbbrev *last() { return Abbrevs.back(); } 95 // Removes the last element of the list. popLast()96 void popLast() { 97 Abbrevs.back()->dropRef(); 98 Abbrevs.pop_back(); 99 } 100 // Empties abbreviation list. clear()101 void clear() { 102 while (!Abbrevs.empty()) 103 popLast(); 104 } 105 // Allow read access to vector defining list. getVector()106 const AbbrevListVector &getVector() const { return Abbrevs; } ~AbbrevList()107 ~AbbrevList() { clear(); } 108 109 private: 110 AbbrevListVector Abbrevs; 111 }; 112 113 /// This contains information about abbreviations in blocks defined in the 114 /// BLOCKINFO_BLOCK block. These describe global abbreviations that apply to 115 /// all succeeding blocks of the specified ID. 116 class BlockInfo { 117 BlockInfo &operator=(const BlockInfo &) = delete; 118 119 public: 120 BlockInfo() = default; BlockInfo(unsigned BlockID)121 explicit BlockInfo(unsigned BlockID) : BlockID(BlockID), Abbrevs() {} 122 BlockInfo(const BlockInfo &) = default; getBlockID()123 unsigned getBlockID() const { return BlockID; } setBlockID(unsigned ID)124 void setBlockID(unsigned ID) { BlockID = ID; } getAbbrevs()125 AbbrevList &getAbbrevs() { return Abbrevs; } ~BlockInfo()126 ~BlockInfo() {} 127 128 private: 129 unsigned BlockID; 130 AbbrevList Abbrevs; 131 }; 132 133 class BlockInfoRecordsMap; 134 using SharedBlockInfoMap = std::shared_ptr<BlockInfoRecordsMap>; 135 136 // Holds the global abbreviations in the BlockInfo block of the bitcode file. 137 // Sharing is used to allow parallel parses. Share by using std::share_ptr's 138 // and std::shared_from_this(). 139 // 140 // Note: The BlockInfo block must be parsed before sharing of the 141 // BlockInfoRecordsMap. Therefore, before changing to a parallel parse, the 142 // BlockInfoRecordsMap must be frozen. Failure to do so, can lead to 143 // unexpected behaviour. 144 // 145 // In practice, this means that only function blocks can be parsed in 146 // parallel. 147 class BlockInfoRecordsMap 148 : public std::enable_shared_from_this<BlockInfoRecordsMap> { 149 friend class NaClBitstreamReader; 150 BlockInfoRecordsMap(const BlockInfoRecordsMap &) = delete; 151 BlockInfoRecordsMap &operator=(const BlockInfoRecordsMap &) = delete; 152 153 public: 154 using InfosMap = std::unordered_map<unsigned, std::unique_ptr<BlockInfo>>; 155 create()156 static SharedBlockInfoMap create() { 157 return SharedBlockInfoMap(new BlockInfoRecordsMap()); 158 } 159 ~BlockInfoRecordsMap() = default; 160 isFrozen()161 bool isFrozen() const { return IsFrozen.load(); } 162 163 // Returns true if already frozen. freeze()164 bool freeze() { return IsFrozen.exchange(true); } 165 getBlockInfo(unsigned BlockID)166 BlockInfo *getBlockInfo(unsigned BlockID) { 167 auto Pos = KnownInfos.find(BlockID); 168 if (Pos != KnownInfos.end()) 169 return Pos->second.get(); 170 return getOrCreateUnknownBlockInfo(BlockID); 171 } 172 173 // Locks the BlockInfoRecordsMap for the lifetime of the UpdateLock. Used 174 // to allow the parsing of a BlockInfo block, and install global 175 // abbreviations. 176 // 177 // Verifies that the BlockInfoRecordsMap didn't get frozen during the 178 // instance's lifetime as a safety precaution. That is, it checks that no 179 // bitstream reader was created to share the global abbreviations before the 180 // global abbreviations are defined. 181 class UpdateLock { 182 UpdateLock() = delete; 183 UpdateLock(const UpdateLock &) = delete; 184 UpdateLock &operator=(const UpdateLock &) = delete; 185 186 public: 187 explicit UpdateLock(BlockInfoRecordsMap &BlockInfoRecords); 188 ~UpdateLock(); 189 190 private: 191 // The BlockInfoRecordsMap to update. 192 BlockInfoRecordsMap &BlockInfoRecords; 193 // The locked mutex from BlockInfoRecordsMap; 194 std::unique_lock<std::mutex> Lock; 195 }; 196 197 private: 198 // The set of known BlockInfo's. This map is prepopulated so that fast 199 // lookup can be performed thread safe (i.e. without using a lock). 200 InfosMap KnownInfos; 201 // The set of unknown BlockInfo's. This map is to handle unknown (and hence, 202 // invalid) PNaCl bitcode files. This map is updated incrementally, and uses 203 // UnknownBlockInfoLock to make it thread safe. 204 InfosMap UnknownInfos; 205 // True if the known BlockInfo blocks are frozen (i.e. the bitstream reader 206 // will ignore the BlockInfo block). 207 std::atomic_bool IsFrozen; 208 // Lock to use to update this data structure. 209 std::mutex UpdateRecordsLock; 210 // Lock to get/create an unknonw block info. 211 std::mutex UnknownBlockInfoLock; 212 213 BlockInfoRecordsMap(); 214 215 BlockInfo *getOrCreateUnknownBlockInfo(unsigned BlockID); 216 }; 217 218 private: 219 friend class NaClBitstreamCursor; 220 221 std::unique_ptr<MemoryObject> BitcodeBytes; 222 223 SharedBlockInfoMap BlockInfoRecords; 224 225 /// \brief Holds the offset of the first byte after the header. 226 size_t InitialAddress; 227 228 // Holds the number of bytes to add to the bitcode position, when reporting 229 // errors. Useful when using parallel parses of function blocks. 230 size_t ErrorOffset = 0; 231 232 // True if filler should be added to byte align records. 233 bool AlignBitcodeRecords = false; 234 NaClBitstreamReader(const NaClBitstreamReader &) = delete; 235 void operator=(const NaClBitstreamReader &) = delete; 236 initFromHeader(NaClBitcodeHeader & Header)237 void initFromHeader(NaClBitcodeHeader &Header) { 238 InitialAddress = Header.getHeaderSize(); 239 AlignBitcodeRecords = Header.getAlignBitcodeRecords(); 240 } 241 242 public: 243 /// Read stream from sequence of bytes [Start .. End) after parsing 244 /// the given bitcode header. NaClBitstreamReader(const unsigned char * Start,const unsigned char * End,NaClBitcodeHeader & Header)245 NaClBitstreamReader(const unsigned char *Start, const unsigned char *End, 246 NaClBitcodeHeader &Header) 247 : BitcodeBytes(getNonStreamedMemoryObject(Start, End)), 248 BlockInfoRecords(BlockInfoRecordsMap::create()) { 249 initFromHeader(Header); 250 } 251 252 /// Read stream from Bytes, after parsing the given bitcode header. NaClBitstreamReader(MemoryObject * Bytes,NaClBitcodeHeader & Header)253 NaClBitstreamReader(MemoryObject *Bytes, NaClBitcodeHeader &Header) 254 : BitcodeBytes(Bytes), BlockInfoRecords(BlockInfoRecordsMap::create()) { 255 initFromHeader(Header); 256 } 257 258 /// Read stream from bytes, starting at the given initial address. 259 /// Provides simple API for unit testing. NaClBitstreamReader(MemoryObject * Bytes,size_t InitialAddress)260 NaClBitstreamReader(MemoryObject *Bytes, size_t InitialAddress) 261 : BitcodeBytes(Bytes), BlockInfoRecords(BlockInfoRecordsMap::create()), 262 InitialAddress(InitialAddress) {} 263 264 /// Read stream from sequence of bytes [Start .. End), using the global 265 /// abbreviations of the given bitstream reader. Assumes that [Start .. End) 266 /// is copied from Reader's memory object. NaClBitstreamReader(size_t StartAddress,const unsigned char * Start,const unsigned char * End,NaClBitstreamReader * Reader)267 NaClBitstreamReader(size_t StartAddress, const unsigned char *Start, 268 const unsigned char *End, NaClBitstreamReader *Reader) 269 : BitcodeBytes(getNonStreamedMemoryObject(Start, End)), 270 BlockInfoRecords(Reader->BlockInfoRecords), InitialAddress(0), 271 ErrorOffset(StartAddress) { 272 BlockInfoRecords->freeze(); 273 } 274 275 // Returns the memory object that is being read. getBitcodeBytes()276 MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } 277 ~NaClBitstreamReader()278 ~NaClBitstreamReader() {} 279 280 /// \brief Returns the initial address (after the header) of the input stream. getInitialAddress()281 size_t getInitialAddress() const { return InitialAddress; } 282 283 /// Returns the byte address of the first byte in the bitstream. Used 284 /// for error reporting. getErrorOffset()285 size_t getErrorOffset() const { return ErrorOffset; } 286 287 //===--------------------------------------------------------------------===// 288 // Block Manipulation 289 //===--------------------------------------------------------------------===// 290 getBlockInfo(unsigned BlockID)291 BlockInfo *getBlockInfo(unsigned BlockID) { 292 return BlockInfoRecords->getBlockInfo(BlockID); 293 } 294 }; 295 296 /// When advancing through a bitstream cursor, each advance can discover a few 297 /// different kinds of entries: 298 struct NaClBitstreamEntry { 299 enum { 300 Error, // Malformed bitcode was found. 301 EndBlock, // We've reached the end of the current block, (or the end of the 302 // file, which is treated like a series of EndBlock records. 303 SubBlock, // This is the start of a new subblock of a specific ID. 304 Record // This is a record with a specific AbbrevID. 305 } Kind; 306 307 unsigned ID; 308 getErrorNaClBitstreamEntry309 static NaClBitstreamEntry getError() { 310 NaClBitstreamEntry E; 311 E.Kind = Error; 312 return E; 313 } getEndBlockNaClBitstreamEntry314 static NaClBitstreamEntry getEndBlock() { 315 NaClBitstreamEntry E; 316 E.Kind = EndBlock; 317 return E; 318 } getSubBlockNaClBitstreamEntry319 static NaClBitstreamEntry getSubBlock(unsigned ID) { 320 NaClBitstreamEntry E; 321 E.Kind = SubBlock; 322 E.ID = ID; 323 return E; 324 } getRecordNaClBitstreamEntry325 static NaClBitstreamEntry getRecord(unsigned AbbrevID) { 326 NaClBitstreamEntry E; 327 E.Kind = Record; 328 E.ID = AbbrevID; 329 return E; 330 } 331 }; 332 333 /// Models default view of a bitcode record. 334 typedef SmallVector<uint64_t, 8> NaClBitcodeRecordVector; 335 336 /// Class NaClAbbrevListener is used to allow instances of class 337 /// NaClBitcodeParser to listen to record details when processing 338 /// abbreviations. The major reason for using a listener is that the 339 /// NaCl bitcode reader would require a major rewrite (including the 340 /// introduction of more overhead) if we were to lift abbreviations up 341 /// to the bitcode reader. That is, not only would we have to lift the 342 /// block processing up into the readers (i.e. many blocks in 343 /// NaClBitcodeReader and NaClBitcodeParser), but add many new API's 344 /// to allow the readers to update internals of the bit stream reader 345 /// appropriately. 346 class NaClAbbrevListener { 347 NaClAbbrevListener(const NaClAbbrevListener &) = delete; 348 void operator=(const NaClAbbrevListener &) = delete; 349 350 public: NaClAbbrevListener()351 NaClAbbrevListener() {} ~NaClAbbrevListener()352 virtual ~NaClAbbrevListener() {} 353 354 /// Called to process the read abbreviation. 355 virtual void ProcessAbbreviation(NaClBitCodeAbbrev *Abbrv, bool IsLocal) = 0; 356 357 /// Called after entering block. NumWords is the number of words 358 /// in the block. 359 virtual void BeginBlockInfoBlock(unsigned NumWords) = 0; 360 361 /// Called if a naclbitc::BLOCKINFO_CODE_SETBID record is found in 362 /// NaClBitstreamCursor::ReadBlockInfoBlock. 363 virtual void SetBID() = 0; 364 365 /// Called just before an EndBlock record is processed by 366 /// NaClBitstreamCursor::ReadBlockInfoBlock 367 virtual void EndBlockInfoBlock() = 0; 368 369 /// The values of the bitcode record associated with the called 370 /// virtual function. 371 NaClBitcodeRecordVector Values; 372 373 /// Start bit for current record being processed in 374 /// NaClBitstreamCursor::ReadBlockInfoBlock. 375 uint64_t StartBit; 376 }; 377 378 /// This represents a position within a bitcode file. There may be multiple 379 /// independent cursors reading within one bitstream, each maintaining their 380 /// own local state. 381 /// 382 /// Unlike iterators, NaClBitstreamCursors are heavy-weight objects 383 /// that should not be passed by value. 384 class NaClBitstreamCursor { 385 public: 386 /// This class handles errors in the bitstream reader. Redirects 387 /// fatal error messages to virtual method Fatal. 388 class ErrorHandler { 389 ErrorHandler(const ErrorHandler &) = delete; 390 ErrorHandler &operator=(const ErrorHandler &) = delete; 391 392 public: ErrorHandler(NaClBitstreamCursor & Cursor)393 explicit ErrorHandler(NaClBitstreamCursor &Cursor) : Cursor(Cursor) {} 394 LLVM_ATTRIBUTE_NORETURN 395 virtual void Fatal(const std::string &ErrorMessage) const; ~ErrorHandler()396 virtual ~ErrorHandler() {} getCurrentBitNo()397 uint64_t getCurrentBitNo() const { return Cursor.GetCurrentBitNo(); } 398 399 private: 400 NaClBitstreamCursor &Cursor; 401 }; 402 403 private: 404 friend class Deserializer; 405 NaClBitstreamReader *BitStream; 406 size_t NextChar; 407 // The current error handler for the bitstream reader. 408 std::unique_ptr<ErrorHandler> ErrHandler; 409 410 // The size of the bitcode. 0 if we don't know it yet. 411 size_t Size; 412 413 /// This is the current data we have pulled from the stream but have not 414 /// returned to the client. This is specifically and intentionally defined to 415 /// follow the word size of the host machine for efficiency. We use word_t in 416 /// places that are aware of this to make it perfectly explicit what is going 417 /// on. 418 typedef size_t word_t; 419 word_t CurWord; 420 421 /// This is the number of bits in CurWord that are valid. This 422 /// is always from [0...bits_of(word_t)-1] inclusive. 423 unsigned BitsInCurWord; 424 425 // Data specific to a block being scanned. 426 class Block { 427 public: 428 Block() = delete; 429 Block &operator=(const Block &Rhs) { 430 GlobalAbbrevs = Rhs.GlobalAbbrevs; 431 NumGlobalAbbrevs = Rhs.NumGlobalAbbrevs; 432 LocalAbbrevs = Rhs.LocalAbbrevs; 433 CodeAbbrev = Rhs.CodeAbbrev; 434 return *this; 435 } Block(NaClBitstreamReader::BlockInfo * GlobalAbbrevs,NaClBitcodeSelectorAbbrev & CodeAbbrev)436 Block(NaClBitstreamReader::BlockInfo *GlobalAbbrevs, 437 NaClBitcodeSelectorAbbrev &CodeAbbrev) 438 : GlobalAbbrevs(GlobalAbbrevs), 439 NumGlobalAbbrevs(GlobalAbbrevs->getAbbrevs().getVector().size()), 440 LocalAbbrevs(), CodeAbbrev(CodeAbbrev) {} Block(NaClBitstreamReader::BlockInfo * GlobalAbbrevs)441 Block(NaClBitstreamReader::BlockInfo *GlobalAbbrevs) 442 : GlobalAbbrevs(GlobalAbbrevs), 443 NumGlobalAbbrevs(GlobalAbbrevs->getAbbrevs().getVector().size()), 444 LocalAbbrevs(), CodeAbbrev() {} 445 ~Block() = default; getGlobalAbbrevs()446 const NaClBitstreamReader::AbbrevList &getGlobalAbbrevs() const { 447 return GlobalAbbrevs->getAbbrevs(); 448 } getNumGlobalAbbrevs()449 unsigned getNumGlobalAbbrevs() const { return NumGlobalAbbrevs; } getLocalAbbrevs()450 const NaClBitstreamReader::AbbrevList &getLocalAbbrevs() const { 451 return LocalAbbrevs; 452 } getCodeAbbrev()453 const NaClBitcodeSelectorAbbrev &getCodeAbbrev() const { 454 return CodeAbbrev; 455 } setCodeAbbrev(NaClBitcodeSelectorAbbrev & Abbrev)456 void setCodeAbbrev(NaClBitcodeSelectorAbbrev &Abbrev) { 457 CodeAbbrev = Abbrev; 458 } appendLocalCreate()459 NaClBitCodeAbbrev *appendLocalCreate() { 460 return LocalAbbrevs.appendCreate(); 461 } moveLocalAbbrevToAbbrevList(NaClBitstreamReader::AbbrevList * List)462 void moveLocalAbbrevToAbbrevList(NaClBitstreamReader::AbbrevList *List) { 463 if (List != &LocalAbbrevs) { 464 NaClBitCodeAbbrev *Abbv = LocalAbbrevs.last(); 465 List->append(Abbv); 466 LocalAbbrevs.popLast(); 467 } 468 } 469 470 private: 471 friend class NaClBitstreamCursor; 472 // The global abbreviations associated with this scope. 473 NaClBitstreamReader::BlockInfo *GlobalAbbrevs; 474 // Number of abbreviations when block was entered. Used to limit scope of 475 // CurBlockInfo, since any abbreviation added inside a BlockInfo block 476 // (within this block) must not effect global abbreviations. 477 unsigned NumGlobalAbbrevs; 478 NaClBitstreamReader::AbbrevList LocalAbbrevs; 479 // This is the declared size of code values used for the current block, in 480 // bits. 481 NaClBitcodeSelectorAbbrev CodeAbbrev; 482 }; 483 484 /// This tracks the Block-specific information for each nested block. 485 SmallVector<Block, 8> BlockScope; 486 487 NaClBitstreamCursor(const NaClBitstreamCursor &) = delete; 488 NaClBitstreamCursor &operator=(const NaClBitstreamCursor &) = delete; 489 490 public: NaClBitstreamCursor()491 NaClBitstreamCursor() : ErrHandler(new ErrorHandler(*this)) { init(nullptr); } 492 NaClBitstreamCursor(NaClBitstreamReader & R)493 explicit NaClBitstreamCursor(NaClBitstreamReader &R) 494 : ErrHandler(new ErrorHandler(*this)) { 495 init(&R); 496 } 497 init(NaClBitstreamReader * R)498 void init(NaClBitstreamReader *R) { 499 freeState(); 500 BitStream = R; 501 NextChar = (BitStream == nullptr) ? 0 : BitStream->getInitialAddress(); 502 Size = 0; 503 BitsInCurWord = 0; 504 if (BitStream) { 505 BlockScope.push_back( 506 Block(BitStream->getBlockInfo(naclbitc::TOP_LEVEL_BLOCKID))); 507 } 508 } 509 ~NaClBitstreamCursor()510 ~NaClBitstreamCursor() { freeState(); } 511 freeState()512 void freeState() { 513 while (!BlockScope.empty()) 514 BlockScope.pop_back(); 515 } 516 517 // Replaces the current bitstream error handler with the new 518 // handler. Takes ownership of the new handler and deletes it when 519 // it is no longer needed. setErrorHandler(std::unique_ptr<ErrorHandler> & NewHandler)520 void setErrorHandler(std::unique_ptr<ErrorHandler> &NewHandler) { 521 ErrHandler = std::move(NewHandler); 522 } 523 canSkipToPos(size_t pos)524 bool canSkipToPos(size_t pos) const { 525 // pos can be skipped to if it is a valid address or one byte past the end. 526 return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( 527 static_cast<uint64_t>(pos - 1)); 528 } 529 AtEndOfStream()530 bool AtEndOfStream() { 531 if (BitsInCurWord != 0) 532 return false; 533 if (Size != 0) 534 return Size == NextChar; 535 fillCurWord(); 536 return BitsInCurWord == 0; 537 } 538 539 /// Return the number of bits used to encode an abbrev #. getAbbrevIDWidth()540 unsigned getAbbrevIDWidth() const { 541 return BlockScope.back().getCodeAbbrev().NumBits; 542 } 543 544 /// Return the bit # of the bit we are reading. GetCurrentBitNo()545 uint64_t GetCurrentBitNo() const { 546 return NextChar * CHAR_BIT - BitsInCurWord; 547 } 548 549 /// Converts the given position into the corresponding Error position. getErrorBitNo(uint64_t Position)550 uint64_t getErrorBitNo(uint64_t Position) const { 551 return BitStream->getErrorOffset() * CHAR_BIT + Position; 552 } 553 554 /// Returns the current bit address for reporting errors. getErrorBitNo()555 uint64_t getErrorBitNo() const { return getErrorBitNo(GetCurrentBitNo()); } 556 getBitStreamReader()557 NaClBitstreamReader *getBitStreamReader() { return BitStream; } getBitStreamReader()558 const NaClBitstreamReader *getBitStreamReader() const { return BitStream; } 559 560 /// Returns the current bit address (string) of the bit cursor. getCurrentBitAddress()561 std::string getCurrentBitAddress() const { 562 return naclbitc::getBitAddress(GetCurrentBitNo()); 563 } 564 565 /// Flags that modify the behavior of advance(). 566 enum { 567 /// If this flag is used, the advance() method does not automatically pop 568 /// the block scope when the end of a block is reached. 569 AF_DontPopBlockAtEnd = 1, 570 571 /// If this flag is used, abbrev entries are returned just like normal 572 /// records. 573 AF_DontAutoprocessAbbrevs = 2 574 }; 575 576 /// Advance the current bitstream, returning the next entry in the stream. 577 /// Use the given abbreviation listener (if provided). advance(unsigned Flags,NaClAbbrevListener * Listener)578 NaClBitstreamEntry advance(unsigned Flags, NaClAbbrevListener *Listener) { 579 while (1) { 580 unsigned Code = ReadCode(); 581 if (Code == naclbitc::END_BLOCK) { 582 // Pop the end of the block unless Flags tells us not to. 583 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 584 return NaClBitstreamEntry::getError(); 585 return NaClBitstreamEntry::getEndBlock(); 586 } 587 588 if (Code == naclbitc::ENTER_SUBBLOCK) 589 return NaClBitstreamEntry::getSubBlock(ReadSubBlockID()); 590 591 if (Code == naclbitc::DEFINE_ABBREV && 592 !(Flags & AF_DontAutoprocessAbbrevs)) { 593 // We read and accumulate abbrev's, the client can't do anything with 594 // them anyway. 595 ReadAbbrevRecord(true, Listener); 596 continue; 597 } 598 599 return NaClBitstreamEntry::getRecord(Code); 600 } 601 } 602 603 /// This is a convenience function for clients that don't expect any 604 /// subblocks. This just skips over them automatically. 605 NaClBitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { 606 while (1) { 607 // If we found a normal entry, return it. 608 NaClBitstreamEntry Entry = advance(Flags, 0); 609 if (Entry.Kind != NaClBitstreamEntry::SubBlock) 610 return Entry; 611 612 // If we found a sub-block, just skip over it and check the next entry. 613 if (SkipBlock()) 614 return NaClBitstreamEntry::getError(); 615 } 616 } 617 618 /// Returns the starting byte of the word containing BitNo. getStartWordByteForBit(uint64_t BitNo)619 uintptr_t getStartWordByteForBit(uint64_t BitNo) const { 620 return uintptr_t(BitNo / CHAR_BIT) & ~(sizeof(word_t) - 1); 621 } 622 623 /// Returns the index of BitNo within the word it appears in. getWordBitNo(uint64_t BitNo)624 unsigned getWordBitNo(uint64_t BitNo) const { 625 return unsigned(BitNo & (sizeof(word_t) * CHAR_BIT - 1)); 626 } 627 628 /// Returns the ending byte of the word containing BitNo. getEndWordByteForBit(uint64_t BitNo)629 uintptr_t getEndWordByteForBit(uint64_t BitNo) const { 630 return getStartWordByteForBit(BitNo) + 631 (getWordBitNo(BitNo) ? sizeof(word_t) : 0); 632 } 633 634 /// Fills Buffer[Size] using bytes at Address (in the memory object being 635 /// read). Returns number of bytes filled (less than Size if at end of memory 636 /// object). fillBuffer(uint8_t * Buffer,size_t Size,size_t Address)637 uint64_t fillBuffer(uint8_t *Buffer, size_t Size, size_t Address) const { 638 return BitStream->getBitcodeBytes().readBytes(Buffer, Size, Address); 639 } 640 641 /// Reset the stream to the specified bit number. JumpToBit(uint64_t BitNo)642 void JumpToBit(uint64_t BitNo) { 643 const uintptr_t ByteNo = getStartWordByteForBit(BitNo); 644 const unsigned WordBitNo = getWordBitNo(BitNo); 645 if (!canSkipToPos(ByteNo)) 646 reportInvalidJumpToBit(BitNo); 647 648 // Move the cursor to the right word. 649 NextChar = ByteNo; 650 BitsInCurWord = 0; 651 652 // Skip over any bits that are already consumed. 653 if (WordBitNo) 654 Read(WordBitNo); 655 } 656 fillCurWord()657 void fillCurWord() { 658 assert(Size == 0 || NextChar < (unsigned)Size); 659 660 // Read the next word from the stream. 661 uint8_t Array[sizeof(word_t)] = {0}; 662 663 uint64_t BytesRead = fillBuffer(Array, sizeof(Array), NextChar); 664 665 // If we run out of data, stop at the end of the stream. 666 if (BytesRead == 0) { 667 Size = NextChar; 668 return; 669 } 670 671 CurWord = 672 support::endian::read<word_t, support::little, support::unaligned>( 673 Array); 674 NextChar += BytesRead; 675 BitsInCurWord = BytesRead * CHAR_BIT; 676 } 677 Read(unsigned NumBits)678 word_t Read(unsigned NumBits) { 679 static const unsigned BitsInWord = sizeof(word_t) * CHAR_BIT; 680 681 assert(NumBits && NumBits <= BitsInWord && 682 "Cannot return zero or more than BitsInWord bits!"); 683 684 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; 685 686 // If the field is fully contained by CurWord, return it quickly. 687 if (BitsInCurWord >= NumBits) { 688 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); 689 690 // Use a mask to avoid undefined behavior. 691 CurWord >>= (NumBits & Mask); 692 693 BitsInCurWord -= NumBits; 694 return R; 695 } 696 697 word_t R = BitsInCurWord ? CurWord : 0; 698 unsigned BitsLeft = NumBits - BitsInCurWord; 699 700 fillCurWord(); 701 702 // If we run out of data, stop at the end of the stream. 703 if (BitsLeft > BitsInCurWord) 704 return 0; 705 706 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); 707 708 // Use a mask to avoid undefined behavior. 709 CurWord >>= (BitsLeft & Mask); 710 711 BitsInCurWord -= BitsLeft; 712 713 R |= R2 << (NumBits - BitsLeft); 714 715 return R; 716 } 717 ReadVBR(unsigned NumBits)718 uint32_t ReadVBR(unsigned NumBits) { 719 uint32_t Piece = Read(NumBits); 720 if ((Piece & (1U << (NumBits - 1))) == 0) 721 return Piece; 722 723 uint32_t Result = 0; 724 unsigned NextBit = 0; 725 while (1) { 726 Result |= (Piece & ((1U << (NumBits - 1)) - 1)) << NextBit; 727 728 if ((Piece & (1U << (NumBits - 1))) == 0) 729 return Result; 730 731 NextBit += NumBits - 1; 732 Piece = Read(NumBits); 733 } 734 } 735 736 // Read a VBR that may have a value up to 64-bits in size. The chunk size of 737 // the VBR must still be <= 32 bits though. ReadVBR64(unsigned NumBits)738 uint64_t ReadVBR64(unsigned NumBits) { 739 uint32_t Piece = Read(NumBits); 740 if ((Piece & (1U << (NumBits - 1))) == 0) 741 return uint64_t(Piece); 742 743 uint64_t Result = 0; 744 unsigned NextBit = 0; 745 while (1) { 746 Result |= uint64_t(Piece & ((1U << (NumBits - 1)) - 1)) << NextBit; 747 748 if ((Piece & (1U << (NumBits - 1))) == 0) 749 return Result; 750 751 NextBit += NumBits - 1; 752 Piece = Read(NumBits); 753 } 754 } 755 756 private: SkipToByteBoundary()757 void SkipToByteBoundary() { 758 unsigned BitsToSkip = BitsInCurWord % CHAR_BIT; 759 if (BitsToSkip) { 760 CurWord >>= BitsToSkip; 761 BitsInCurWord -= BitsToSkip; 762 } 763 } 764 SkipToByteBoundaryIfAligned()765 void SkipToByteBoundaryIfAligned() { 766 if (BitStream->AlignBitcodeRecords) 767 SkipToByteBoundary(); 768 } 769 SkipToFourByteBoundary()770 void SkipToFourByteBoundary() { 771 // If word_t is 64-bits and if we've read less than 32 bits, just dump 772 // the bits we have up to the next 32-bit boundary. 773 if (sizeof(word_t) > 4 && BitsInCurWord >= 32) { 774 CurWord >>= BitsInCurWord - 32; 775 BitsInCurWord = 32; 776 return; 777 } 778 779 BitsInCurWord = 0; 780 } 781 782 public: ReadCode()783 unsigned ReadCode() { 784 const NaClBitcodeSelectorAbbrev &CodeAbbrev = 785 BlockScope.back().getCodeAbbrev(); 786 return CodeAbbrev.IsFixed ? Read(CodeAbbrev.NumBits) 787 : ReadVBR(CodeAbbrev.NumBits); 788 } 789 790 // Block header: 791 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 792 793 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. ReadSubBlockID()794 unsigned ReadSubBlockID() { return ReadVBR(naclbitc::BlockIDWidth); } 795 796 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body 797 /// of this block. If the block record is malformed, return true. SkipBlock()798 bool SkipBlock() { 799 // Read and ignore the codelen value. Since we are skipping this block, we 800 // don't care what code widths are used inside of it. 801 ReadVBR(naclbitc::CodeLenWidth); 802 SkipToFourByteBoundary(); 803 unsigned NumFourBytes = Read(naclbitc::BlockSizeWidth); 804 805 // Check that the block wasn't partially defined, and that the offset isn't 806 // bogus. 807 size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * CHAR_BIT; 808 if (AtEndOfStream() || !canSkipToPos(SkipTo / CHAR_BIT)) 809 return true; 810 811 JumpToBit(SkipTo); 812 return false; 813 } 814 815 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true 816 /// if the block has an error. 817 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 818 ReadBlockEnd()819 bool ReadBlockEnd() { 820 if (BlockScope.empty()) 821 return true; 822 823 // Block tail: 824 // [END_BLOCK, <align4bytes>] 825 SkipToFourByteBoundary(); 826 827 BlockScope.pop_back(); 828 return false; 829 } 830 831 private: 832 //===--------------------------------------------------------------------===// 833 // Record Processing 834 //===--------------------------------------------------------------------===// 835 836 private: 837 // Returns abbreviation encoding associated with Value. 838 NaClBitCodeAbbrevOp::Encoding getEncoding(uint64_t Value); 839 840 void skipAbbreviatedField(const NaClBitCodeAbbrevOp &Op); 841 842 // Reads the next Value using the abbreviation Op. Returns true only 843 // if Op is an array (and sets Value to the number of elements in the 844 // array). 845 inline bool readRecordAbbrevField(const NaClBitCodeAbbrevOp &Op, 846 uint64_t &Value); 847 848 // Reads and returns the next value using the abbreviation Op, 849 // assuming Op appears after an array abbreviation. 850 inline uint64_t readArrayAbbreviatedField(const NaClBitCodeAbbrevOp &Op); 851 852 // Reads the array abbreviation Op, NumArrayElements times, putting 853 // the read values in Vals. 854 inline void readArrayAbbrev(const NaClBitCodeAbbrevOp &Op, 855 unsigned NumArrayElements, 856 SmallVectorImpl<uint64_t> &Vals); 857 858 // Reports that that abbreviation Index is not valid. 859 void reportInvalidAbbrevNumber(unsigned Index) const; 860 861 // Reports that jumping to Bit is not valid. 862 void reportInvalidJumpToBit(uint64_t Bit) const; 863 864 public: 865 /// Return the abbreviation for the specified AbbrevId. getAbbrev(unsigned AbbrevID)866 const NaClBitCodeAbbrev *getAbbrev(unsigned AbbrevID) const { 867 unsigned AbbrevNo = AbbrevID - naclbitc::FIRST_APPLICATION_ABBREV; 868 const Block &CurBlock = BlockScope.back(); 869 const unsigned NumGlobalAbbrevs = CurBlock.getNumGlobalAbbrevs(); 870 if (AbbrevNo < NumGlobalAbbrevs) 871 return CurBlock.getGlobalAbbrevs().getVector()[AbbrevNo]; 872 unsigned LocalAbbrevNo = AbbrevNo - NumGlobalAbbrevs; 873 NaClBitstreamReader::AbbrevListVector LocalAbbrevs = 874 CurBlock.getLocalAbbrevs().getVector(); 875 if (LocalAbbrevNo >= LocalAbbrevs.size()) 876 reportInvalidAbbrevNumber(AbbrevID); 877 return LocalAbbrevs[LocalAbbrevNo]; 878 } 879 880 /// Read the current record and discard it. 881 void skipRecord(unsigned AbbrevID); 882 883 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals); 884 885 //===--------------------------------------------------------------------===// 886 // Abbrev Processing 887 //===--------------------------------------------------------------------===// 888 // IsLocal indicates where the abbreviation occurs. If it is in the 889 // BlockInfo block, IsLocal is false. In all other cases, IsLocal is 890 // true. 891 void ReadAbbrevRecord(bool IsLocal, NaClAbbrevListener *Listener); 892 893 // Skips over an abbreviation record. Duplicates code of ReadAbbrevRecord, 894 // except that no abbreviation is built. 895 void SkipAbbrevRecord(); 896 897 bool ReadBlockInfoBlock(NaClAbbrevListener *Listener); 898 }; 899 900 } // namespace llvm 901 902 #endif 903