1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the Module class, which describes a module that has 10 // been loaded from an AST file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H 15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H 16 17 #include "clang/Basic/FileManager.h" 18 #include "clang/Basic/Module.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Serialization/ASTBitCodes.h" 21 #include "clang/Serialization/ContinuousRangeMap.h" 22 #include "clang/Serialization/ModuleFileExtension.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/PointerIntPair.h" 25 #include "llvm/ADT/SetVector.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/Bitstream/BitstreamReader.h" 29 #include "llvm/Support/Endian.h" 30 #include <cassert> 31 #include <cstdint> 32 #include <memory> 33 #include <string> 34 #include <vector> 35 36 namespace clang { 37 38 namespace serialization { 39 40 /// Specifies the kind of module that has been loaded. 41 enum ModuleKind { 42 /// File is an implicitly-loaded module. 43 MK_ImplicitModule, 44 45 /// File is an explicitly-loaded module. 46 MK_ExplicitModule, 47 48 /// File is a PCH file treated as such. 49 MK_PCH, 50 51 /// File is a PCH file treated as the preamble. 52 MK_Preamble, 53 54 /// File is a PCH file treated as the actual main file. 55 MK_MainFile, 56 57 /// File is from a prebuilt module path. 58 MK_PrebuiltModule 59 }; 60 61 /// The input file that has been loaded from this AST file, along with 62 /// bools indicating whether this was an overridden buffer or if it was 63 /// out-of-date or not-found. 64 class InputFile { 65 enum { 66 Overridden = 1, 67 OutOfDate = 2, 68 NotFound = 3 69 }; 70 llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val; 71 72 public: 73 InputFile() = default; 74 75 InputFile(FileEntryRef File, bool isOverridden = false, 76 bool isOutOfDate = false) { 77 assert(!(isOverridden && isOutOfDate) && 78 "an overridden cannot be out-of-date"); 79 unsigned intVal = 0; 80 if (isOverridden) 81 intVal = Overridden; 82 else if (isOutOfDate) 83 intVal = OutOfDate; 84 Val.setPointerAndInt(&File.getMapEntry(), intVal); 85 } 86 getNotFound()87 static InputFile getNotFound() { 88 InputFile File; 89 File.Val.setInt(NotFound); 90 return File; 91 } 92 getFile()93 OptionalFileEntryRefDegradesToFileEntryPtr getFile() const { 94 if (auto *P = Val.getPointer()) 95 return FileEntryRef(*P); 96 return None; 97 } isOverridden()98 bool isOverridden() const { return Val.getInt() == Overridden; } isOutOfDate()99 bool isOutOfDate() const { return Val.getInt() == OutOfDate; } isNotFound()100 bool isNotFound() const { return Val.getInt() == NotFound; } 101 }; 102 103 /// Information about a module that has been loaded by the ASTReader. 104 /// 105 /// Each instance of the Module class corresponds to a single AST file, which 106 /// may be a precompiled header, precompiled preamble, a module, or an AST file 107 /// of some sort loaded as the main file, all of which are specific formulations 108 /// of the general notion of a "module". A module may depend on any number of 109 /// other modules. 110 class ModuleFile { 111 public: ModuleFile(ModuleKind Kind,unsigned Generation)112 ModuleFile(ModuleKind Kind, unsigned Generation) 113 : Kind(Kind), Generation(Generation) {} 114 ~ModuleFile(); 115 116 // === General information === 117 118 /// The index of this module in the list of modules. 119 unsigned Index = 0; 120 121 /// The type of this module. 122 ModuleKind Kind; 123 124 /// The file name of the module file. 125 std::string FileName; 126 127 /// The name of the module. 128 std::string ModuleName; 129 130 /// The base directory of the module. 131 std::string BaseDirectory; 132 getTimestampFilename()133 std::string getTimestampFilename() const { 134 return FileName + ".timestamp"; 135 } 136 137 /// The original source file name that was used to build the 138 /// primary AST file, which may have been modified for 139 /// relocatable-pch support. 140 std::string OriginalSourceFileName; 141 142 /// The actual original source file name that was used to 143 /// build this AST file. 144 std::string ActualOriginalSourceFileName; 145 146 /// The file ID for the original source file that was used to 147 /// build this AST file. 148 FileID OriginalSourceFileID; 149 150 /// The directory that the PCH was originally created in. Used to 151 /// allow resolving headers even after headers+PCH was moved to a new path. 152 std::string OriginalDir; 153 154 std::string ModuleMapPath; 155 156 /// Whether this precompiled header is a relocatable PCH file. 157 bool RelocatablePCH = false; 158 159 /// Whether timestamps are included in this module file. 160 bool HasTimestamps = false; 161 162 /// Whether the top-level module has been read from the AST file. 163 bool DidReadTopLevelSubmodule = false; 164 165 /// The file entry for the module file. 166 OptionalFileEntryRefDegradesToFileEntryPtr File; 167 168 /// The signature of the module file, which may be used instead of the size 169 /// and modification time to identify this particular file. 170 ASTFileSignature Signature; 171 172 /// The signature of the AST block of the module file, this can be used to 173 /// unique module files based on AST contents. 174 ASTFileSignature ASTBlockHash; 175 176 /// Whether this module has been directly imported by the 177 /// user. 178 bool DirectlyImported = false; 179 180 /// The generation of which this module file is a part. 181 unsigned Generation; 182 183 /// The memory buffer that stores the data associated with 184 /// this AST file, owned by the InMemoryModuleCache. 185 llvm::MemoryBuffer *Buffer; 186 187 /// The size of this file, in bits. 188 uint64_t SizeInBits = 0; 189 190 /// The global bit offset (or base) of this module 191 uint64_t GlobalBitOffset = 0; 192 193 /// The bit offset of the AST block of this module. 194 uint64_t ASTBlockStartOffset = 0; 195 196 /// The serialized bitstream data for this file. 197 StringRef Data; 198 199 /// The main bitstream cursor for the main block. 200 llvm::BitstreamCursor Stream; 201 202 /// The source location where the module was explicitly or implicitly 203 /// imported in the local translation unit. 204 /// 205 /// If module A depends on and imports module B, both modules will have the 206 /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a 207 /// source location inside module A). 208 /// 209 /// WARNING: This is largely useless. It doesn't tell you when a module was 210 /// made visible, just when the first submodule of that module was imported. 211 SourceLocation DirectImportLoc; 212 213 /// The source location where this module was first imported. 214 SourceLocation ImportLoc; 215 216 /// The first source location in this module. 217 SourceLocation FirstLoc; 218 219 /// The list of extension readers that are attached to this module 220 /// file. 221 std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders; 222 223 /// The module offset map data for this file. If non-empty, the various 224 /// ContinuousRangeMaps described below have not yet been populated. 225 StringRef ModuleOffsetMap; 226 227 // === Input Files === 228 229 /// The cursor to the start of the input-files block. 230 llvm::BitstreamCursor InputFilesCursor; 231 232 /// Offsets for all of the input file entries in the AST file. 233 const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr; 234 235 /// The input files that have been loaded from this AST file. 236 std::vector<InputFile> InputFilesLoaded; 237 238 // All user input files reside at the index range [0, NumUserInputFiles), and 239 // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()). 240 unsigned NumUserInputFiles = 0; 241 242 /// If non-zero, specifies the time when we last validated input 243 /// files. Zero means we never validated them. 244 /// 245 /// The time is specified in seconds since the start of the Epoch. 246 uint64_t InputFilesValidationTimestamp = 0; 247 248 // === Source Locations === 249 250 /// Cursor used to read source location entries. 251 llvm::BitstreamCursor SLocEntryCursor; 252 253 /// The bit offset to the start of the SOURCE_MANAGER_BLOCK. 254 uint64_t SourceManagerBlockStartOffset = 0; 255 256 /// The number of source location entries in this AST file. 257 unsigned LocalNumSLocEntries = 0; 258 259 /// The base ID in the source manager's view of this module. 260 int SLocEntryBaseID = 0; 261 262 /// The base offset in the source manager's view of this module. 263 unsigned SLocEntryBaseOffset = 0; 264 265 /// Base file offset for the offsets in SLocEntryOffsets. Real file offset 266 /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. 267 uint64_t SLocEntryOffsetsBase = 0; 268 269 /// Offsets for all of the source location entries in the 270 /// AST file. 271 const uint32_t *SLocEntryOffsets = nullptr; 272 273 /// SLocEntries that we're going to preload. 274 SmallVector<uint64_t, 4> PreloadSLocEntries; 275 276 /// Remapping table for source locations in this module. 277 ContinuousRangeMap<uint32_t, int, 2> SLocRemap; 278 279 // === Identifiers === 280 281 /// The number of identifiers in this AST file. 282 unsigned LocalNumIdentifiers = 0; 283 284 /// Offsets into the identifier table data. 285 /// 286 /// This array is indexed by the identifier ID (-1), and provides 287 /// the offset into IdentifierTableData where the string data is 288 /// stored. 289 const uint32_t *IdentifierOffsets = nullptr; 290 291 /// Base identifier ID for identifiers local to this module. 292 serialization::IdentID BaseIdentifierID = 0; 293 294 /// Remapping table for identifier IDs in this module. 295 ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap; 296 297 /// Actual data for the on-disk hash table of identifiers. 298 /// 299 /// This pointer points into a memory buffer, where the on-disk hash 300 /// table for identifiers actually lives. 301 const char *IdentifierTableData = nullptr; 302 303 /// A pointer to an on-disk hash table of opaque type 304 /// IdentifierHashTable. 305 void *IdentifierLookupTable = nullptr; 306 307 /// Offsets of identifiers that we're going to preload within 308 /// IdentifierTableData. 309 std::vector<unsigned> PreloadIdentifierOffsets; 310 311 // === Macros === 312 313 /// The cursor to the start of the preprocessor block, which stores 314 /// all of the macro definitions. 315 llvm::BitstreamCursor MacroCursor; 316 317 /// The number of macros in this AST file. 318 unsigned LocalNumMacros = 0; 319 320 /// Base file offset for the offsets in MacroOffsets. Real file offset for 321 /// the entry is MacroOffsetsBase + MacroOffsets[i]. 322 uint64_t MacroOffsetsBase = 0; 323 324 /// Offsets of macros in the preprocessor block. 325 /// 326 /// This array is indexed by the macro ID (-1), and provides 327 /// the offset into the preprocessor block where macro definitions are 328 /// stored. 329 const uint32_t *MacroOffsets = nullptr; 330 331 /// Base macro ID for macros local to this module. 332 serialization::MacroID BaseMacroID = 0; 333 334 /// Remapping table for macro IDs in this module. 335 ContinuousRangeMap<uint32_t, int, 2> MacroRemap; 336 337 /// The offset of the start of the set of defined macros. 338 uint64_t MacroStartOffset = 0; 339 340 // === Detailed PreprocessingRecord === 341 342 /// The cursor to the start of the (optional) detailed preprocessing 343 /// record block. 344 llvm::BitstreamCursor PreprocessorDetailCursor; 345 346 /// The offset of the start of the preprocessor detail cursor. 347 uint64_t PreprocessorDetailStartOffset = 0; 348 349 /// Base preprocessed entity ID for preprocessed entities local to 350 /// this module. 351 serialization::PreprocessedEntityID BasePreprocessedEntityID = 0; 352 353 /// Remapping table for preprocessed entity IDs in this module. 354 ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap; 355 356 const PPEntityOffset *PreprocessedEntityOffsets = nullptr; 357 unsigned NumPreprocessedEntities = 0; 358 359 /// Base ID for preprocessed skipped ranges local to this module. 360 unsigned BasePreprocessedSkippedRangeID = 0; 361 362 const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr; 363 unsigned NumPreprocessedSkippedRanges = 0; 364 365 // === Header search information === 366 367 /// The number of local HeaderFileInfo structures. 368 unsigned LocalNumHeaderFileInfos = 0; 369 370 /// Actual data for the on-disk hash table of header file 371 /// information. 372 /// 373 /// This pointer points into a memory buffer, where the on-disk hash 374 /// table for header file information actually lives. 375 const char *HeaderFileInfoTableData = nullptr; 376 377 /// The on-disk hash table that contains information about each of 378 /// the header files. 379 void *HeaderFileInfoTable = nullptr; 380 381 // === Submodule information === 382 383 /// The number of submodules in this module. 384 unsigned LocalNumSubmodules = 0; 385 386 /// Base submodule ID for submodules local to this module. 387 serialization::SubmoduleID BaseSubmoduleID = 0; 388 389 /// Remapping table for submodule IDs in this module. 390 ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap; 391 392 // === Selectors === 393 394 /// The number of selectors new to this file. 395 /// 396 /// This is the number of entries in SelectorOffsets. 397 unsigned LocalNumSelectors = 0; 398 399 /// Offsets into the selector lookup table's data array 400 /// where each selector resides. 401 const uint32_t *SelectorOffsets = nullptr; 402 403 /// Base selector ID for selectors local to this module. 404 serialization::SelectorID BaseSelectorID = 0; 405 406 /// Remapping table for selector IDs in this module. 407 ContinuousRangeMap<uint32_t, int, 2> SelectorRemap; 408 409 /// A pointer to the character data that comprises the selector table 410 /// 411 /// The SelectorOffsets table refers into this memory. 412 const unsigned char *SelectorLookupTableData = nullptr; 413 414 /// A pointer to an on-disk hash table of opaque type 415 /// ASTSelectorLookupTable. 416 /// 417 /// This hash table provides the IDs of all selectors, and the associated 418 /// instance and factory methods. 419 void *SelectorLookupTable = nullptr; 420 421 // === Declarations === 422 423 /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block. 424 /// It has read all the abbreviations at the start of the block and is ready 425 /// to jump around with these in context. 426 llvm::BitstreamCursor DeclsCursor; 427 428 /// The offset to the start of the DECLTYPES_BLOCK block. 429 uint64_t DeclsBlockStartOffset = 0; 430 431 /// The number of declarations in this AST file. 432 unsigned LocalNumDecls = 0; 433 434 /// Offset of each declaration within the bitstream, indexed 435 /// by the declaration ID (-1). 436 const DeclOffset *DeclOffsets = nullptr; 437 438 /// Base declaration ID for declarations local to this module. 439 serialization::DeclID BaseDeclID = 0; 440 441 /// Remapping table for declaration IDs in this module. 442 ContinuousRangeMap<uint32_t, int, 2> DeclRemap; 443 444 /// Mapping from the module files that this module file depends on 445 /// to the base declaration ID for that module as it is understood within this 446 /// module. 447 /// 448 /// This is effectively a reverse global-to-local mapping for declaration 449 /// IDs, so that we can interpret a true global ID (for this translation unit) 450 /// as a local ID (for this module file). 451 llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs; 452 453 /// Array of file-level DeclIDs sorted by file. 454 const serialization::DeclID *FileSortedDecls = nullptr; 455 unsigned NumFileSortedDecls = 0; 456 457 /// Array of category list location information within this 458 /// module file, sorted by the definition ID. 459 const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr; 460 461 /// The number of redeclaration info entries in ObjCCategoriesMap. 462 unsigned LocalNumObjCCategoriesInMap = 0; 463 464 /// The Objective-C category lists for categories known to this 465 /// module. 466 SmallVector<uint64_t, 1> ObjCCategories; 467 468 // === Types === 469 470 /// The number of types in this AST file. 471 unsigned LocalNumTypes = 0; 472 473 /// Offset of each type within the bitstream, indexed by the 474 /// type ID, or the representation of a Type*. 475 const UnderalignedInt64 *TypeOffsets = nullptr; 476 477 /// Base type ID for types local to this module as represented in 478 /// the global type ID space. 479 serialization::TypeID BaseTypeIndex = 0; 480 481 /// Remapping table for type IDs in this module. 482 ContinuousRangeMap<uint32_t, int, 2> TypeRemap; 483 484 // === Miscellaneous === 485 486 /// Diagnostic IDs and their mappings that the user changed. 487 SmallVector<uint64_t, 8> PragmaDiagMappings; 488 489 /// List of modules which depend on this module 490 llvm::SetVector<ModuleFile *> ImportedBy; 491 492 /// List of modules which this module depends on 493 llvm::SetVector<ModuleFile *> Imports; 494 495 /// Determine whether this module was directly imported at 496 /// any point during translation. isDirectlyImported()497 bool isDirectlyImported() const { return DirectlyImported; } 498 499 /// Is this a module file for a module (rather than a PCH or similar). isModule()500 bool isModule() const { 501 return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule || 502 Kind == MK_PrebuiltModule; 503 } 504 505 /// Dump debugging output for this module. 506 void dump(); 507 }; 508 509 } // namespace serialization 510 511 } // namespace clang 512 513 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H 514