1 //===- NaClBitstreamReader.h -----------------------------------*- C++ -*-===// 2 // Low-level bitstream reader interface 3 // 4 // The LLVM Compiler Infrastructure 5 // 6 // This file is distributed under the University of Illinois Open Source 7 // License. See LICENSE.TXT for details. 8 // 9 //===----------------------------------------------------------------------===// 10 // 11 // This header defines the BitstreamReader class. This class can be used to 12 // read an arbitrary bitstream, regardless of its contents. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H 17 #define LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H 18 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h" 21 #include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/StreamingMemoryObject.h" 24 #include <atomic> 25 #include <climits> 26 #include <mutex> 27 #include <unordered_map> 28 #include <vector> 29 30 namespace llvm { 31 32 class Deserializer; 33 class NaClBitstreamCursor; 34 35 namespace naclbitc { 36 37 /// Returns the Bit as a Byte:BitInByte string. 38 std::string getBitAddress(uint64_t Bit); 39 40 /// Severity levels for reporting errors. 41 enum ErrorLevel { 42 Warning, 43 Error, 44 Fatal 45 }; 46 47 // Basic printing routine to generate the beginning of an error 48 // message. BitPosition is the bit position the error was found. 49 // Level is the severity of the error. 50 raw_ostream &ErrorAt(raw_ostream &Out, ErrorLevel Level, 51 uint64_t BitPosition); 52 53 } // End namespace naclbitc. 54 55 /// This class is used to read from a NaCl bitcode wire format stream, 56 /// maintaining information that is global to decoding the entire file. 57 /// While a file is being read, multiple cursors can be independently 58 /// advanced or skipped around within the file. These are represented by 59 /// the NaClBitstreamCursor class. 60 class NaClBitstreamReader { 61 public: 62 // Models a raw list of abbreviations. 63 static const size_t DefaultAbbrevListSize = 12; 64 using AbbrevListVector = SmallVector<NaClBitCodeAbbrev *, 65 DefaultAbbrevListSize>; 66 67 // Models and maintains a list of abbreviations. In particular, it maintains 68 // updating reference counts of abbreviation operators within the abbreviation 69 // list. 70 class AbbrevList { 71 public: 72 AbbrevList() = default; AbbrevList(const AbbrevList & NewAbbrevs)73 explicit AbbrevList(const AbbrevList &NewAbbrevs) { 74 appendList(NewAbbrevs); 75 } 76 AbbrevList &operator=(const AbbrevList &Rhs) { 77 clear(); 78 appendList(Rhs); 79 return *this; 80 } 81 // Creates a new (empty) abbreviation, appends it to this, and then returns 82 // the new abbreviation. appendCreate()83 NaClBitCodeAbbrev *appendCreate() { 84 NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev(); 85 Abbrevs.push_back(Abbv); 86 return Abbv; 87 } 88 // Appends the given abbreviation to this. append(NaClBitCodeAbbrev * Abbrv)89 void append(NaClBitCodeAbbrev *Abbrv) { 90 Abbrv->addRef(); 91 Abbrevs.push_back(Abbrv); 92 } 93 // Appends the contents of NewAbbrevs to this. appendList(const AbbrevList & NewAbbrevs)94 void appendList(const AbbrevList &NewAbbrevs) { 95 for (NaClBitCodeAbbrev *Abbrv : NewAbbrevs.Abbrevs) 96 append(Abbrv); 97 } 98 // Returns last abbreviation on list. last()99 NaClBitCodeAbbrev *last() { return Abbrevs.back(); } 100 // Removes the last element of the list. popLast()101 void popLast() { 102 Abbrevs.back()->dropRef(); 103 Abbrevs.pop_back(); 104 } 105 // Empties abbreviation list. clear()106 void clear() { 107 while(!Abbrevs.empty()) 108 popLast(); 109 } 110 // Allow read access to vector defining list. getVector()111 const AbbrevListVector &getVector() const { return Abbrevs; } ~AbbrevList()112 ~AbbrevList() { clear(); } 113 private: 114 AbbrevListVector Abbrevs; 115 }; 116 117 /// This contains information about abbreviations in blocks defined in the 118 /// BLOCKINFO_BLOCK block. These describe global abbreviations that apply to 119 /// all succeeding blocks of the specified ID. 120 class BlockInfo { 121 BlockInfo &operator=(const BlockInfo&) = delete; 122 public: 123 BlockInfo() = default; BlockInfo(unsigned BlockID)124 explicit BlockInfo(unsigned BlockID) 125 : BlockID(BlockID), Abbrevs() {} 126 BlockInfo(const BlockInfo&) = default; getBlockID()127 unsigned getBlockID() const { return BlockID; } setBlockID(unsigned ID)128 void setBlockID(unsigned ID) { BlockID = ID; } getAbbrevs()129 AbbrevList &getAbbrevs() { return Abbrevs; } ~BlockInfo()130 ~BlockInfo() {} 131 private: 132 unsigned BlockID; 133 AbbrevList Abbrevs; 134 }; 135 136 class BlockInfoRecordsMap; 137 using SharedBlockInfoMap = std::shared_ptr<BlockInfoRecordsMap>; 138 139 // Holds the global abbreviations in the BlockInfo block of the bitcode file. 140 // Sharing is used to allow parallel parses. Share by using std::share_ptr's 141 // and std::shared_from_this(). 142 // 143 // Note: The BlockInfo block must be parsed before sharing of the 144 // BlockInfoRecordsMap. Therefore, before changing to a parallel parse, the 145 // BlockInfoRecordsMap must be frozen. Failure to do so, can lead to 146 // unexpected behaviour. 147 // 148 // In practice, this means that only function blocks can be parsed in 149 // parallel. 150 class BlockInfoRecordsMap : 151 public std::enable_shared_from_this<BlockInfoRecordsMap> { 152 friend class NaClBitstreamReader; 153 BlockInfoRecordsMap(const BlockInfoRecordsMap&) = delete; 154 BlockInfoRecordsMap &operator=(const BlockInfoRecordsMap&) = delete; 155 public: 156 using InfosMap = std::unordered_map<unsigned, std::unique_ptr<BlockInfo>>; 157 create()158 static SharedBlockInfoMap create() { 159 return SharedBlockInfoMap(new BlockInfoRecordsMap()); 160 } 161 ~BlockInfoRecordsMap() = default; 162 isFrozen()163 bool isFrozen() const { 164 return IsFrozen.load(); 165 } 166 167 // Returns true if already frozen. freeze()168 bool freeze() { 169 return IsFrozen.exchange(true); 170 } 171 getBlockInfo(unsigned BlockID)172 BlockInfo *getBlockInfo(unsigned BlockID) { 173 auto Pos = KnownInfos.find(BlockID); 174 if (Pos != KnownInfos.end()) 175 return Pos->second.get(); 176 return getOrCreateUnknownBlockInfo(BlockID); 177 } 178 179 // Locks the BlockInfoRecordsMap for the lifetime of the UpdateLock. Used 180 // to allow the parsing of a BlockInfo block, and install global 181 // abbreviations. 182 // 183 // Verifies that the BlockInfoRecordsMap didn't get frozen during the 184 // instance's lifetime as a safety precaution. That is, it checks that no 185 // bitstream reader was created to share the global abbreviations before the 186 // global abbreviations are defined. 187 class UpdateLock { 188 UpdateLock() = delete; 189 UpdateLock(const UpdateLock&) = delete; 190 UpdateLock &operator=(const UpdateLock&) = delete; 191 public: 192 explicit UpdateLock(BlockInfoRecordsMap &BlockInfoRecords); 193 ~UpdateLock(); 194 private: 195 // The BlockInfoRecordsMap to update. 196 BlockInfoRecordsMap &BlockInfoRecords; 197 // The locked mutex from BlockInfoRecordsMap; 198 std::unique_lock<std::mutex> Lock; 199 }; 200 201 private: 202 // The set of known BlockInfo's. This map is prepopulated so that fast 203 // lookup can be performed thread safe (i.e. without using a lock). 204 InfosMap KnownInfos; 205 // The set of unknown BlockInfo's. This map is to handle unknown (and hence, 206 // invalid) PNaCl bitcode files. This map is updated incrementally, and uses 207 // UnknownBlockInfoLock to make it thread safe. 208 InfosMap UnknownInfos; 209 // True if the known BlockInfo blocks are frozen (i.e. the bitstream reader 210 // will ignore the BlockInfo block). 211 std::atomic_bool IsFrozen; 212 // Lock to use to update this data structure. 213 std::mutex UpdateRecordsLock; 214 // Lock to get/create an unknonw block info. 215 std::mutex UnknownBlockInfoLock; 216 217 BlockInfoRecordsMap(); 218 219 BlockInfo *getOrCreateUnknownBlockInfo(unsigned BlockID); 220 }; 221 222 private: 223 friend class NaClBitstreamCursor; 224 225 std::unique_ptr<MemoryObject> BitcodeBytes; 226 227 SharedBlockInfoMap BlockInfoRecords; 228 229 /// \brief Holds the offset of the first byte after the header. 230 size_t InitialAddress; 231 232 // Holds the number of bytes to add to the bitcode position, when reporting 233 // errors. Useful when using parallel parses of function blocks. 234 size_t ErrorOffset = 0; 235 236 // True if filler should be added to byte align records. 237 bool AlignBitcodeRecords = false; 238 NaClBitstreamReader(const NaClBitstreamReader&) = delete; 239 void operator=(const NaClBitstreamReader&) = delete; 240 241 initFromHeader(NaClBitcodeHeader & Header)242 void initFromHeader(NaClBitcodeHeader &Header) { 243 InitialAddress = Header.getHeaderSize(); 244 AlignBitcodeRecords = Header.getAlignBitcodeRecords(); 245 } 246 247 public: 248 /// Read stream from sequence of bytes [Start .. End) after parsing 249 /// the given bitcode header. NaClBitstreamReader(const unsigned char * Start,const unsigned char * End,NaClBitcodeHeader & Header)250 NaClBitstreamReader(const unsigned char *Start, const unsigned char *End, 251 NaClBitcodeHeader &Header) 252 : BitcodeBytes(getNonStreamedMemoryObject(Start, End)), 253 BlockInfoRecords(BlockInfoRecordsMap::create()) { 254 initFromHeader(Header); 255 } 256 257 /// Read stream from Bytes, after parsing the given bitcode header. NaClBitstreamReader(MemoryObject * Bytes,NaClBitcodeHeader & Header)258 NaClBitstreamReader(MemoryObject *Bytes, NaClBitcodeHeader &Header) 259 : BitcodeBytes(Bytes), BlockInfoRecords(BlockInfoRecordsMap::create()) 260 { initFromHeader(Header); } 261 262 /// Read stream from bytes, starting at the given initial address. 263 /// Provides simple API for unit testing. NaClBitstreamReader(MemoryObject * Bytes,size_t InitialAddress)264 NaClBitstreamReader(MemoryObject *Bytes, size_t InitialAddress) 265 : BitcodeBytes(Bytes), BlockInfoRecords(BlockInfoRecordsMap::create()), 266 InitialAddress(InitialAddress) {} 267 268 /// Read stream from sequence of bytes [Start .. End), using the global 269 /// abbreviations of the given bitstream reader. Assumes that [Start .. End) 270 /// is copied from Reader's memory object. NaClBitstreamReader(size_t StartAddress,const unsigned char * Start,const unsigned char * End,NaClBitstreamReader * Reader)271 NaClBitstreamReader(size_t StartAddress, const unsigned char *Start, 272 const unsigned char *End, NaClBitstreamReader *Reader) 273 : BitcodeBytes(getNonStreamedMemoryObject(Start, End)), 274 BlockInfoRecords(Reader->BlockInfoRecords), InitialAddress(0), 275 ErrorOffset(StartAddress) { BlockInfoRecords->freeze(); } 276 277 // Returns the memory object that is being read. getBitcodeBytes()278 MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } 279 ~NaClBitstreamReader()280 ~NaClBitstreamReader() {} 281 282 /// \brief Returns the initial address (after the header) of the input stream. getInitialAddress()283 size_t getInitialAddress() const { 284 return InitialAddress; 285 } 286 287 /// Returns the byte address of the first byte in the bitstream. Used 288 /// for error reporting. getErrorOffset()289 size_t getErrorOffset() const { return ErrorOffset; } 290 291 //===--------------------------------------------------------------------===// 292 // Block Manipulation 293 //===--------------------------------------------------------------------===// 294 getBlockInfo(unsigned BlockID)295 BlockInfo *getBlockInfo(unsigned BlockID) { 296 return BlockInfoRecords->getBlockInfo(BlockID); 297 } 298 }; 299 300 /// When advancing through a bitstream cursor, each advance can discover a few 301 /// different kinds of entries: 302 struct NaClBitstreamEntry { 303 enum { 304 Error, // Malformed bitcode was found. 305 EndBlock, // We've reached the end of the current block, (or the end of the 306 // file, which is treated like a series of EndBlock records. 307 SubBlock, // This is the start of a new subblock of a specific ID. 308 Record // This is a record with a specific AbbrevID. 309 } Kind; 310 311 unsigned ID; 312 getErrorNaClBitstreamEntry313 static NaClBitstreamEntry getError() { 314 NaClBitstreamEntry E; E.Kind = Error; return E; 315 } getEndBlockNaClBitstreamEntry316 static NaClBitstreamEntry getEndBlock() { 317 NaClBitstreamEntry E; E.Kind = EndBlock; return E; 318 } getSubBlockNaClBitstreamEntry319 static NaClBitstreamEntry getSubBlock(unsigned ID) { 320 NaClBitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; 321 } getRecordNaClBitstreamEntry322 static NaClBitstreamEntry getRecord(unsigned AbbrevID) { 323 NaClBitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; 324 } 325 }; 326 327 /// Models default view of a bitcode record. 328 typedef SmallVector<uint64_t, 8> NaClBitcodeRecordVector; 329 330 /// Class NaClAbbrevListener is used to allow instances of class 331 /// NaClBitcodeParser to listen to record details when processing 332 /// abbreviations. The major reason for using a listener is that the 333 /// NaCl bitcode reader would require a major rewrite (including the 334 /// introduction of more overhead) if we were to lift abbreviations up 335 /// to the bitcode reader. That is, not only would we have to lift the 336 /// block processing up into the readers (i.e. many blocks in 337 /// NaClBitcodeReader and NaClBitcodeParser), but add many new API's 338 /// to allow the readers to update internals of the bit stream reader 339 /// appropriately. 340 class NaClAbbrevListener { 341 NaClAbbrevListener(const NaClAbbrevListener&) = delete; 342 void operator=(const NaClAbbrevListener&) = delete; 343 public: NaClAbbrevListener()344 NaClAbbrevListener() {} ~NaClAbbrevListener()345 virtual ~NaClAbbrevListener() {} 346 347 /// Called to process the read abbreviation. 348 virtual void ProcessAbbreviation(NaClBitCodeAbbrev *Abbrv, 349 bool IsLocal) = 0; 350 351 /// Called after entering block. NumWords is the number of words 352 /// in the block. 353 virtual void BeginBlockInfoBlock(unsigned NumWords) = 0; 354 355 /// Called if a naclbitc::BLOCKINFO_CODE_SETBID record is found in 356 /// NaClBitstreamCursor::ReadBlockInfoBlock. 357 virtual void SetBID() = 0; 358 359 /// Called just before an EndBlock record is processed by 360 /// NaClBitstreamCursor::ReadBlockInfoBlock 361 virtual void EndBlockInfoBlock() = 0; 362 363 /// The values of the bitcode record associated with the called 364 /// virtual function. 365 NaClBitcodeRecordVector Values; 366 367 /// Start bit for current record being processed in 368 /// NaClBitstreamCursor::ReadBlockInfoBlock. 369 uint64_t StartBit; 370 }; 371 372 /// This represents a position within a bitcode file. There may be multiple 373 /// independent cursors reading within one bitstream, each maintaining their 374 /// own local state. 375 /// 376 /// Unlike iterators, NaClBitstreamCursors are heavy-weight objects 377 /// that should not be passed by value. 378 class NaClBitstreamCursor { 379 public: 380 /// This class handles errors in the bitstream reader. Redirects 381 /// fatal error messages to virtual method Fatal. 382 class ErrorHandler { 383 ErrorHandler(const ErrorHandler &) = delete; 384 ErrorHandler &operator=(const ErrorHandler &) = delete; 385 public: ErrorHandler(NaClBitstreamCursor & Cursor)386 explicit ErrorHandler(NaClBitstreamCursor &Cursor) : Cursor(Cursor) {} 387 LLVM_ATTRIBUTE_NORETURN 388 virtual void Fatal(const std::string &ErrorMessage) const; ~ErrorHandler()389 virtual ~ErrorHandler() {} getCurrentBitNo()390 uint64_t getCurrentBitNo() const { 391 return Cursor.GetCurrentBitNo(); 392 } 393 private: 394 NaClBitstreamCursor &Cursor; 395 }; 396 397 private: 398 friend class Deserializer; 399 NaClBitstreamReader *BitStream; 400 size_t NextChar; 401 // The current error handler for the bitstream reader. 402 std::unique_ptr<ErrorHandler> ErrHandler; 403 404 // The size of the bitcode. 0 if we don't know it yet. 405 size_t Size; 406 407 /// This is the current data we have pulled from the stream but have not 408 /// returned to the client. This is specifically and intentionally defined to 409 /// follow the word size of the host machine for efficiency. We use word_t in 410 /// places that are aware of this to make it perfectly explicit what is going 411 /// on. 412 typedef size_t word_t; 413 word_t CurWord; 414 415 /// This is the number of bits in CurWord that are valid. This 416 /// is always from [0...bits_of(word_t)-1] inclusive. 417 unsigned BitsInCurWord; 418 419 // Data specific to a block being scanned. 420 class Block { 421 public: 422 Block() = delete; 423 Block &operator=(const Block &Rhs) { 424 GlobalAbbrevs = Rhs.GlobalAbbrevs; 425 NumGlobalAbbrevs = Rhs.NumGlobalAbbrevs; 426 LocalAbbrevs = Rhs.LocalAbbrevs; 427 CodeAbbrev = Rhs.CodeAbbrev; 428 return *this; 429 } Block(NaClBitstreamReader::BlockInfo * GlobalAbbrevs,NaClBitcodeSelectorAbbrev & CodeAbbrev)430 Block(NaClBitstreamReader::BlockInfo *GlobalAbbrevs, 431 NaClBitcodeSelectorAbbrev& CodeAbbrev) 432 : GlobalAbbrevs(GlobalAbbrevs), 433 NumGlobalAbbrevs(GlobalAbbrevs->getAbbrevs().getVector().size()), 434 LocalAbbrevs(), CodeAbbrev(CodeAbbrev) {} Block(NaClBitstreamReader::BlockInfo * GlobalAbbrevs)435 Block(NaClBitstreamReader::BlockInfo *GlobalAbbrevs) 436 : GlobalAbbrevs(GlobalAbbrevs), 437 NumGlobalAbbrevs(GlobalAbbrevs->getAbbrevs().getVector().size()), 438 LocalAbbrevs(), CodeAbbrev() {} 439 ~Block() = default; getGlobalAbbrevs()440 const NaClBitstreamReader::AbbrevList &getGlobalAbbrevs() const { 441 return GlobalAbbrevs->getAbbrevs(); 442 } getNumGlobalAbbrevs()443 unsigned getNumGlobalAbbrevs() const { return NumGlobalAbbrevs; } getLocalAbbrevs()444 const NaClBitstreamReader::AbbrevList &getLocalAbbrevs() const { 445 return LocalAbbrevs; 446 } getCodeAbbrev()447 const NaClBitcodeSelectorAbbrev &getCodeAbbrev() const { 448 return CodeAbbrev; 449 } setCodeAbbrev(NaClBitcodeSelectorAbbrev & Abbrev)450 void setCodeAbbrev(NaClBitcodeSelectorAbbrev &Abbrev) { 451 CodeAbbrev = Abbrev; 452 } appendLocalCreate()453 NaClBitCodeAbbrev *appendLocalCreate() { 454 return LocalAbbrevs.appendCreate(); 455 } moveLocalAbbrevToAbbrevList(NaClBitstreamReader::AbbrevList * List)456 void moveLocalAbbrevToAbbrevList(NaClBitstreamReader::AbbrevList *List) { 457 if (List != &LocalAbbrevs) { 458 NaClBitCodeAbbrev *Abbv = LocalAbbrevs.last(); 459 List->append(Abbv); 460 LocalAbbrevs.popLast(); 461 } 462 } 463 private: 464 friend class NaClBitstreamCursor; 465 // The global abbreviations associated with this scope. 466 NaClBitstreamReader::BlockInfo *GlobalAbbrevs; 467 // Number of abbreviations when block was entered. Used to limit scope of 468 // CurBlockInfo, since any abbreviation added inside a BlockInfo block 469 // (within this block) must not effect global abbreviations. 470 unsigned NumGlobalAbbrevs; 471 NaClBitstreamReader::AbbrevList LocalAbbrevs; 472 // This is the declared size of code values used for the current block, in 473 // bits. 474 NaClBitcodeSelectorAbbrev CodeAbbrev; 475 }; 476 477 /// This tracks the Block-specific information for each nested block. 478 SmallVector<Block, 8> BlockScope; 479 480 NaClBitstreamCursor(const NaClBitstreamCursor &) = delete; 481 NaClBitstreamCursor &operator=(const NaClBitstreamCursor &) = delete; 482 483 public: NaClBitstreamCursor()484 NaClBitstreamCursor() : ErrHandler(new ErrorHandler(*this)) { 485 init(nullptr); 486 } 487 NaClBitstreamCursor(NaClBitstreamReader & R)488 explicit NaClBitstreamCursor(NaClBitstreamReader &R) 489 : ErrHandler(new ErrorHandler(*this)) { init(&R); } 490 init(NaClBitstreamReader * R)491 void init(NaClBitstreamReader *R) { 492 freeState(); 493 BitStream = R; 494 NextChar = (BitStream == nullptr) ? 0 : BitStream->getInitialAddress(); 495 Size = 0; 496 BitsInCurWord = 0; 497 if (BitStream) { 498 BlockScope.push_back( 499 Block(BitStream->getBlockInfo(naclbitc::TOP_LEVEL_BLOCKID))); 500 } 501 } 502 ~NaClBitstreamCursor()503 ~NaClBitstreamCursor() { 504 freeState(); 505 } 506 freeState()507 void freeState() { 508 while (!BlockScope.empty()) 509 BlockScope.pop_back(); 510 } 511 512 // Replaces the current bitstream error handler with the new 513 // handler. Takes ownership of the new handler and deletes it when 514 // it is no longer needed. setErrorHandler(std::unique_ptr<ErrorHandler> & NewHandler)515 void setErrorHandler(std::unique_ptr<ErrorHandler> &NewHandler) { 516 ErrHandler = std::move(NewHandler); 517 } 518 canSkipToPos(size_t pos)519 bool canSkipToPos(size_t pos) const { 520 // pos can be skipped to if it is a valid address or one byte past the end. 521 return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( 522 static_cast<uint64_t>(pos - 1)); 523 } 524 AtEndOfStream()525 bool AtEndOfStream() { 526 if (BitsInCurWord != 0) 527 return false; 528 if (Size != 0) 529 return Size == NextChar; 530 fillCurWord(); 531 return BitsInCurWord == 0; 532 } 533 534 /// Return the number of bits used to encode an abbrev #. getAbbrevIDWidth()535 unsigned getAbbrevIDWidth() const { 536 return BlockScope.back().getCodeAbbrev().NumBits; 537 } 538 539 /// Return the bit # of the bit we are reading. GetCurrentBitNo()540 uint64_t GetCurrentBitNo() const { 541 return NextChar*CHAR_BIT - BitsInCurWord; 542 } 543 544 /// Converts the given position into the corresponding Error position. getErrorBitNo(uint64_t Position)545 uint64_t getErrorBitNo(uint64_t Position) const { 546 return BitStream->getErrorOffset() * CHAR_BIT + Position; 547 } 548 549 /// Returns the current bit address for reporting errors. getErrorBitNo()550 uint64_t getErrorBitNo() const { 551 return getErrorBitNo(GetCurrentBitNo()); 552 } 553 getBitStreamReader()554 NaClBitstreamReader *getBitStreamReader() { 555 return BitStream; 556 } getBitStreamReader()557 const NaClBitstreamReader *getBitStreamReader() const { 558 return BitStream; 559 } 560 561 /// Returns the current bit address (string) of the bit cursor. getCurrentBitAddress()562 std::string getCurrentBitAddress() const { 563 return naclbitc::getBitAddress(GetCurrentBitNo()); 564 } 565 566 /// Flags that modify the behavior of advance(). 567 enum { 568 /// If this flag is used, the advance() method does not automatically pop 569 /// the block scope when the end of a block is reached. 570 AF_DontPopBlockAtEnd = 1, 571 572 /// If this flag is used, abbrev entries are returned just like normal 573 /// records. 574 AF_DontAutoprocessAbbrevs = 2 575 }; 576 577 /// Advance the current bitstream, returning the next entry in the stream. 578 /// Use the given abbreviation listener (if provided). advance(unsigned Flags,NaClAbbrevListener * Listener)579 NaClBitstreamEntry advance(unsigned Flags, NaClAbbrevListener *Listener) { 580 while (1) { 581 unsigned Code = ReadCode(); 582 if (Code == naclbitc::END_BLOCK) { 583 // Pop the end of the block unless Flags tells us not to. 584 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 585 return NaClBitstreamEntry::getError(); 586 return NaClBitstreamEntry::getEndBlock(); 587 } 588 589 if (Code == naclbitc::ENTER_SUBBLOCK) 590 return NaClBitstreamEntry::getSubBlock(ReadSubBlockID()); 591 592 if (Code == naclbitc::DEFINE_ABBREV && 593 !(Flags & AF_DontAutoprocessAbbrevs)) { 594 // We read and accumulate abbrev's, the client can't do anything with 595 // them anyway. 596 ReadAbbrevRecord(true, Listener); 597 continue; 598 } 599 600 return NaClBitstreamEntry::getRecord(Code); 601 } 602 } 603 604 /// This is a convenience function for clients that don't expect any 605 /// subblocks. This just skips over them automatically. 606 NaClBitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { 607 while (1) { 608 // If we found a normal entry, return it. 609 NaClBitstreamEntry Entry = advance(Flags, 0); 610 if (Entry.Kind != NaClBitstreamEntry::SubBlock) 611 return Entry; 612 613 // If we found a sub-block, just skip over it and check the next entry. 614 if (SkipBlock()) 615 return NaClBitstreamEntry::getError(); 616 } 617 } 618 619 /// Returns the starting byte of the word containing BitNo. getStartWordByteForBit(uint64_t BitNo)620 uintptr_t getStartWordByteForBit(uint64_t BitNo) const { 621 return uintptr_t(BitNo/CHAR_BIT) & ~(sizeof(word_t)-1); 622 } 623 624 /// Returns the index of BitNo within the word it appears in. getWordBitNo(uint64_t BitNo)625 unsigned getWordBitNo(uint64_t BitNo) const { 626 return unsigned(BitNo & (sizeof(word_t)*CHAR_BIT-1)); 627 } 628 629 /// Returns the ending byte of the word containing BitNo. getEndWordByteForBit(uint64_t BitNo)630 uintptr_t getEndWordByteForBit(uint64_t BitNo) const { 631 return getStartWordByteForBit(BitNo) + 632 (getWordBitNo(BitNo) 633 ? sizeof(word_t) 634 : 0); 635 } 636 637 /// Fills Buffer[Size] using bytes at Address (in the memory object being 638 /// read). Returns number of bytes filled (less than Size if at end of memory 639 /// object). fillBuffer(uint8_t * Buffer,size_t Size,size_t Address)640 uint64_t fillBuffer(uint8_t *Buffer, size_t Size, size_t Address) const { 641 return BitStream->getBitcodeBytes().readBytes(Buffer, Size, Address); 642 } 643 644 /// Reset the stream to the specified bit number. JumpToBit(uint64_t BitNo)645 void JumpToBit(uint64_t BitNo) { 646 const uintptr_t ByteNo = getStartWordByteForBit(BitNo); 647 const unsigned WordBitNo = getWordBitNo(BitNo); 648 if (!canSkipToPos(ByteNo)) 649 reportInvalidJumpToBit(BitNo); 650 651 // Move the cursor to the right word. 652 NextChar = ByteNo; 653 BitsInCurWord = 0; 654 655 // Skip over any bits that are already consumed. 656 if (WordBitNo) 657 Read(WordBitNo); 658 } 659 fillCurWord()660 void fillCurWord() { 661 assert(Size == 0 || NextChar < (unsigned)Size); 662 663 // Read the next word from the stream. 664 uint8_t Array[sizeof(word_t)] = {0}; 665 666 uint64_t BytesRead = fillBuffer(Array, sizeof(Array), NextChar); 667 668 // If we run out of data, stop at the end of the stream. 669 if (BytesRead == 0) { 670 Size = NextChar; 671 return; 672 } 673 674 CurWord = 675 support::endian::read<word_t, support::little, support::unaligned>( 676 Array); 677 NextChar += BytesRead; 678 BitsInCurWord = BytesRead * CHAR_BIT; 679 } 680 Read(unsigned NumBits)681 word_t Read(unsigned NumBits) { 682 static const unsigned BitsInWord = sizeof(word_t) * CHAR_BIT; 683 684 assert(NumBits && NumBits <= BitsInWord && 685 "Cannot return zero or more than BitsInWord bits!"); 686 687 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; 688 689 // If the field is fully contained by CurWord, return it quickly. 690 if (BitsInCurWord >= NumBits) { 691 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); 692 693 // Use a mask to avoid undefined behavior. 694 CurWord >>= (NumBits & Mask); 695 696 BitsInCurWord -= NumBits; 697 return R; 698 } 699 700 word_t R = BitsInCurWord ? CurWord : 0; 701 unsigned BitsLeft = NumBits - BitsInCurWord; 702 703 fillCurWord(); 704 705 // If we run out of data, stop at the end of the stream. 706 if (BitsLeft > BitsInCurWord) 707 return 0; 708 709 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); 710 711 // Use a mask to avoid undefined behavior. 712 CurWord >>= (BitsLeft & Mask); 713 714 BitsInCurWord -= BitsLeft; 715 716 R |= R2 << (NumBits - BitsLeft); 717 718 return R; 719 } 720 ReadVBR(unsigned NumBits)721 uint32_t ReadVBR(unsigned NumBits) { 722 uint32_t Piece = Read(NumBits); 723 if ((Piece & (1U << (NumBits-1))) == 0) 724 return Piece; 725 726 uint32_t Result = 0; 727 unsigned NextBit = 0; 728 while (1) { 729 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 730 731 if ((Piece & (1U << (NumBits-1))) == 0) 732 return Result; 733 734 NextBit += NumBits-1; 735 Piece = Read(NumBits); 736 } 737 } 738 739 // Read a VBR that may have a value up to 64-bits in size. The chunk size of 740 // the VBR must still be <= 32 bits though. ReadVBR64(unsigned NumBits)741 uint64_t ReadVBR64(unsigned NumBits) { 742 uint32_t Piece = Read(NumBits); 743 if ((Piece & (1U << (NumBits-1))) == 0) 744 return uint64_t(Piece); 745 746 uint64_t Result = 0; 747 unsigned NextBit = 0; 748 while (1) { 749 Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; 750 751 if ((Piece & (1U << (NumBits-1))) == 0) 752 return Result; 753 754 NextBit += NumBits-1; 755 Piece = Read(NumBits); 756 } 757 } 758 759 private: SkipToByteBoundary()760 void SkipToByteBoundary() { 761 unsigned BitsToSkip = BitsInCurWord % CHAR_BIT; 762 if (BitsToSkip) { 763 CurWord >>= BitsToSkip; 764 BitsInCurWord -= BitsToSkip; 765 } 766 } 767 SkipToByteBoundaryIfAligned()768 void SkipToByteBoundaryIfAligned() { 769 if (BitStream->AlignBitcodeRecords) 770 SkipToByteBoundary(); 771 } 772 SkipToFourByteBoundary()773 void SkipToFourByteBoundary() { 774 // If word_t is 64-bits and if we've read less than 32 bits, just dump 775 // the bits we have up to the next 32-bit boundary. 776 if (sizeof(word_t) > 4 && 777 BitsInCurWord >= 32) { 778 CurWord >>= BitsInCurWord-32; 779 BitsInCurWord = 32; 780 return; 781 } 782 783 BitsInCurWord = 0; 784 } 785 public: 786 ReadCode()787 unsigned ReadCode() { 788 const NaClBitcodeSelectorAbbrev &CodeAbbrev = 789 BlockScope.back().getCodeAbbrev(); 790 return CodeAbbrev.IsFixed 791 ? Read(CodeAbbrev.NumBits) 792 : ReadVBR(CodeAbbrev.NumBits); 793 } 794 795 // Block header: 796 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 797 798 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. ReadSubBlockID()799 unsigned ReadSubBlockID() { 800 return ReadVBR(naclbitc::BlockIDWidth); 801 } 802 803 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body 804 /// of this block. If the block record is malformed, return true. SkipBlock()805 bool SkipBlock() { 806 // Read and ignore the codelen value. Since we are skipping this block, we 807 // don't care what code widths are used inside of it. 808 ReadVBR(naclbitc::CodeLenWidth); 809 SkipToFourByteBoundary(); 810 unsigned NumFourBytes = Read(naclbitc::BlockSizeWidth); 811 812 // Check that the block wasn't partially defined, and that the offset isn't 813 // bogus. 814 size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*CHAR_BIT; 815 if (AtEndOfStream() || !canSkipToPos(SkipTo/CHAR_BIT)) 816 return true; 817 818 JumpToBit(SkipTo); 819 return false; 820 } 821 822 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true 823 /// if the block has an error. 824 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 825 ReadBlockEnd()826 bool ReadBlockEnd() { 827 if (BlockScope.empty()) return true; 828 829 // Block tail: 830 // [END_BLOCK, <align4bytes>] 831 SkipToFourByteBoundary(); 832 833 BlockScope.pop_back(); 834 return false; 835 } 836 837 private: 838 839 //===--------------------------------------------------------------------===// 840 // Record Processing 841 //===--------------------------------------------------------------------===// 842 843 private: 844 // Returns abbreviation encoding associated with Value. 845 NaClBitCodeAbbrevOp::Encoding getEncoding(uint64_t Value); 846 847 void skipAbbreviatedField(const NaClBitCodeAbbrevOp &Op); 848 849 // Reads the next Value using the abbreviation Op. Returns true only 850 // if Op is an array (and sets Value to the number of elements in the 851 // array). 852 inline bool readRecordAbbrevField(const NaClBitCodeAbbrevOp &Op, 853 uint64_t &Value); 854 855 // Reads and returns the next value using the abbreviation Op, 856 // assuming Op appears after an array abbreviation. 857 inline uint64_t readArrayAbbreviatedField(const NaClBitCodeAbbrevOp &Op); 858 859 // Reads the array abbreviation Op, NumArrayElements times, putting 860 // the read values in Vals. 861 inline void readArrayAbbrev(const NaClBitCodeAbbrevOp &Op, 862 unsigned NumArrayElements, 863 SmallVectorImpl<uint64_t> &Vals); 864 865 // Reports that that abbreviation Index is not valid. 866 void reportInvalidAbbrevNumber(unsigned Index) const; 867 868 // Reports that jumping to Bit is not valid. 869 void reportInvalidJumpToBit(uint64_t Bit) const; 870 871 public: 872 873 /// Return the abbreviation for the specified AbbrevId. getAbbrev(unsigned AbbrevID)874 const NaClBitCodeAbbrev *getAbbrev(unsigned AbbrevID) const { 875 unsigned AbbrevNo = AbbrevID-naclbitc::FIRST_APPLICATION_ABBREV; 876 const Block &CurBlock = BlockScope.back(); 877 const unsigned NumGlobalAbbrevs = CurBlock.getNumGlobalAbbrevs(); 878 if (AbbrevNo < NumGlobalAbbrevs) 879 return CurBlock.getGlobalAbbrevs().getVector()[AbbrevNo]; 880 unsigned LocalAbbrevNo = AbbrevNo - NumGlobalAbbrevs; 881 NaClBitstreamReader::AbbrevListVector 882 LocalAbbrevs = CurBlock.getLocalAbbrevs().getVector(); 883 if (LocalAbbrevNo >= LocalAbbrevs.size()) 884 reportInvalidAbbrevNumber(AbbrevID); 885 return LocalAbbrevs[LocalAbbrevNo]; 886 } 887 888 /// Read the current record and discard it. 889 void skipRecord(unsigned AbbrevID); 890 891 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals); 892 893 //===--------------------------------------------------------------------===// 894 // Abbrev Processing 895 //===--------------------------------------------------------------------===// 896 // IsLocal indicates where the abbreviation occurs. If it is in the 897 // BlockInfo block, IsLocal is false. In all other cases, IsLocal is 898 // true. 899 void ReadAbbrevRecord(bool IsLocal, 900 NaClAbbrevListener *Listener); 901 902 // Skips over an abbreviation record. Duplicates code of ReadAbbrevRecord, 903 // except that no abbreviation is built. 904 void SkipAbbrevRecord(); 905 906 bool ReadBlockInfoBlock(NaClAbbrevListener *Listener); 907 }; 908 909 } // End llvm namespace 910 911 #endif 912