1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header defines interfaces to read LLVM bitcode files/streams. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_BITCODE_BITCODEREADER_H 14 #define LLVM_BITCODE_BITCODEREADER_H 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Bitstream/BitCodes.h" 19 #include "llvm/IR/ModuleSummaryIndex.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/Error.h" 22 #include "llvm/Support/ErrorOr.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include <cstdint> 25 #include <memory> 26 #include <string> 27 #include <system_error> 28 #include <vector> 29 namespace llvm { 30 31 class LLVMContext; 32 class Module; 33 34 // These functions are for converting Expected/Error values to 35 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: 36 // Remove these functions once no longer needed by the C and libLTO APIs. 37 38 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); 39 40 template <typename T> expectedToErrorOrAndEmitErrors(LLVMContext & Ctx,Expected<T> Val)41 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { 42 if (!Val) 43 return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); 44 return std::move(*Val); 45 } 46 47 struct BitcodeFileContents; 48 49 /// Basic information extracted from a bitcode module to be used for LTO. 50 struct BitcodeLTOInfo { 51 bool IsThinLTO; 52 bool HasSummary; 53 bool EnableSplitLTOUnit; 54 }; 55 56 /// Represents a module in a bitcode file. 57 class BitcodeModule { 58 // This covers the identification (if present) and module blocks. 59 ArrayRef<uint8_t> Buffer; 60 StringRef ModuleIdentifier; 61 62 // The string table used to interpret this module. 63 StringRef Strtab; 64 65 // The bitstream location of the IDENTIFICATION_BLOCK. 66 uint64_t IdentificationBit; 67 68 // The bitstream location of this module's MODULE_BLOCK. 69 uint64_t ModuleBit; 70 BitcodeModule(ArrayRef<uint8_t> Buffer,StringRef ModuleIdentifier,uint64_t IdentificationBit,uint64_t ModuleBit)71 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, 72 uint64_t IdentificationBit, uint64_t ModuleBit) 73 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), 74 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} 75 76 // Calls the ctor. 77 friend Expected<BitcodeFileContents> 78 getBitcodeFileContents(MemoryBufferRef Buffer); 79 80 Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context, 81 bool MaterializeAll, 82 bool ShouldLazyLoadMetadata, 83 bool IsImporting); 84 85 public: getBuffer()86 StringRef getBuffer() const { 87 return StringRef((const char *)Buffer.begin(), Buffer.size()); 88 } 89 getStrtab()90 StringRef getStrtab() const { return Strtab; } 91 getModuleIdentifier()92 StringRef getModuleIdentifier() const { return ModuleIdentifier; } 93 94 /// Read the bitcode module and prepare for lazy deserialization of function 95 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. 96 /// If IsImporting is true, this module is being parsed for ThinLTO 97 /// importing into another module. 98 Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context, 99 bool ShouldLazyLoadMetadata, 100 bool IsImporting); 101 102 /// Read the entire bitcode module and return it. 103 Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context); 104 105 /// Returns information about the module to be used for LTO: whether to 106 /// compile with ThinLTO, and whether it has a summary. 107 Expected<BitcodeLTOInfo> getLTOInfo(); 108 109 /// Parse the specified bitcode buffer, returning the module summary index. 110 Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); 111 112 /// Parse the specified bitcode buffer and merge its module summary index 113 /// into CombinedIndex. 114 Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, 115 uint64_t ModuleId); 116 }; 117 118 struct BitcodeFileContents { 119 std::vector<BitcodeModule> Mods; 120 StringRef Symtab, StrtabForSymtab; 121 }; 122 123 /// Returns the contents of a bitcode file. This includes the raw contents of 124 /// the symbol table embedded in the bitcode file. Clients which require a 125 /// symbol table should prefer to use irsymtab::read instead of this function 126 /// because it creates a reader for the irsymtab and handles upgrading bitcode 127 /// files without a symbol table or with an old symbol table. 128 Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); 129 130 /// Returns a list of modules in the specified bitcode buffer. 131 Expected<std::vector<BitcodeModule>> 132 getBitcodeModuleList(MemoryBufferRef Buffer); 133 134 /// Read the header of the specified bitcode buffer and prepare for lazy 135 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, 136 /// lazily load metadata as well. If IsImporting is true, this module is 137 /// being parsed for ThinLTO importing into another module. 138 Expected<std::unique_ptr<Module>> 139 getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, 140 bool ShouldLazyLoadMetadata = false, 141 bool IsImporting = false); 142 143 /// Like getLazyBitcodeModule, except that the module takes ownership of 144 /// the memory buffer if successful. If successful, this moves Buffer. On 145 /// error, this *does not* move Buffer. If IsImporting is true, this module is 146 /// being parsed for ThinLTO importing into another module. 147 Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( 148 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, 149 bool ShouldLazyLoadMetadata = false, bool IsImporting = false); 150 151 /// Read the header of the specified bitcode buffer and extract just the 152 /// triple information. If successful, this returns a string. On error, this 153 /// returns "". 154 Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); 155 156 /// Return true if \p Buffer contains a bitcode file with ObjC code (category 157 /// or class) in it. 158 Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); 159 160 /// Read the header of the specified bitcode buffer and extract just the 161 /// producer string information. If successful, this returns a string. On 162 /// error, this returns "". 163 Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); 164 165 /// Read the specified bitcode file, returning the module. 166 Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer, 167 LLVMContext &Context); 168 169 /// Returns LTO information for the specified bitcode file. 170 Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); 171 172 /// Parse the specified bitcode buffer, returning the module summary index. 173 Expected<std::unique_ptr<ModuleSummaryIndex>> 174 getModuleSummaryIndex(MemoryBufferRef Buffer); 175 176 /// Parse the specified bitcode buffer and merge the index into CombinedIndex. 177 Error readModuleSummaryIndex(MemoryBufferRef Buffer, 178 ModuleSummaryIndex &CombinedIndex, 179 uint64_t ModuleId); 180 181 /// Parse the module summary index out of an IR file and return the module 182 /// summary index object if found, or an empty summary if not. If Path refers 183 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then 184 /// this function will return nullptr. 185 Expected<std::unique_ptr<ModuleSummaryIndex>> 186 getModuleSummaryIndexForFile(StringRef Path, 187 bool IgnoreEmptyThinLTOIndexFile = false); 188 189 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes 190 /// for an LLVM IR bitcode wrapper. isBitcodeWrapper(const unsigned char * BufPtr,const unsigned char * BufEnd)191 inline bool isBitcodeWrapper(const unsigned char *BufPtr, 192 const unsigned char *BufEnd) { 193 // See if you can find the hidden message in the magic bytes :-). 194 // (Hint: it's a little-endian encoding.) 195 return BufPtr != BufEnd && 196 BufPtr[0] == 0xDE && 197 BufPtr[1] == 0xC0 && 198 BufPtr[2] == 0x17 && 199 BufPtr[3] == 0x0B; 200 } 201 202 /// isRawBitcode - Return true if the given bytes are the magic bytes for 203 /// raw LLVM IR bitcode (without a wrapper). isRawBitcode(const unsigned char * BufPtr,const unsigned char * BufEnd)204 inline bool isRawBitcode(const unsigned char *BufPtr, 205 const unsigned char *BufEnd) { 206 // These bytes sort of have a hidden message, but it's not in 207 // little-endian this time, and it's a little redundant. 208 return BufPtr != BufEnd && 209 BufPtr[0] == 'B' && 210 BufPtr[1] == 'C' && 211 BufPtr[2] == 0xc0 && 212 BufPtr[3] == 0xde; 213 } 214 215 /// isBitcode - Return true if the given bytes are the magic bytes for 216 /// LLVM IR bitcode, either with or without a wrapper. isBitcode(const unsigned char * BufPtr,const unsigned char * BufEnd)217 inline bool isBitcode(const unsigned char *BufPtr, 218 const unsigned char *BufEnd) { 219 return isBitcodeWrapper(BufPtr, BufEnd) || 220 isRawBitcode(BufPtr, BufEnd); 221 } 222 223 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special 224 /// header for padding or other reasons. The format of this header is: 225 /// 226 /// struct bc_header { 227 /// uint32_t Magic; // 0x0B17C0DE 228 /// uint32_t Version; // Version, currently always 0. 229 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. 230 /// uint32_t BitcodeSize; // Size of traditional bitcode file. 231 /// ... potentially other gunk ... 232 /// }; 233 /// 234 /// This function is called when we find a file with a matching magic number. 235 /// In this case, skip down to the subsection of the file that is actually a 236 /// BC file. 237 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to 238 /// contain the whole bitcode file. SkipBitcodeWrapperHeader(const unsigned char * & BufPtr,const unsigned char * & BufEnd,bool VerifyBufferSize)239 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, 240 const unsigned char *&BufEnd, 241 bool VerifyBufferSize) { 242 // Must contain the offset and size field! 243 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) 244 return true; 245 246 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 247 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 248 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; 249 250 // Verify that Offset+Size fits in the file. 251 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) 252 return true; 253 BufPtr += Offset; 254 BufEnd = BufPtr+Size; 255 return false; 256 } 257 258 const std::error_category &BitcodeErrorCategory(); 259 enum class BitcodeError { CorruptedBitcode = 1 }; make_error_code(BitcodeError E)260 inline std::error_code make_error_code(BitcodeError E) { 261 return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); 262 } 263 264 } // end namespace llvm 265 266 namespace std { 267 268 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; 269 270 } // end namespace std 271 272 #endif // LLVM_BITCODE_BITCODEREADER_H 273