1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Defines the clang::Preprocessor interface. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 16 #define LLVM_CLANG_LEX_PREPROCESSOR_H 17 18 #include "clang/Basic/Builtins.h" 19 #include "clang/Basic/Diagnostic.h" 20 #include "clang/Basic/IdentifierTable.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Lex/Lexer.h" 23 #include "clang/Lex/MacroInfo.h" 24 #include "clang/Lex/ModuleMap.h" 25 #include "clang/Lex/PPCallbacks.h" 26 #include "clang/Lex/PTHLexer.h" 27 #include "clang/Lex/TokenLexer.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/IntrusiveRefCntPtr.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/TinyPtrVector.h" 34 #include "llvm/Support/Allocator.h" 35 #include "llvm/Support/Registry.h" 36 #include <memory> 37 #include <vector> 38 39 namespace llvm { 40 template<unsigned InternalLen> class SmallString; 41 } 42 43 namespace clang { 44 45 class SourceManager; 46 class ExternalPreprocessorSource; 47 class FileManager; 48 class FileEntry; 49 class HeaderSearch; 50 class PragmaNamespace; 51 class PragmaHandler; 52 class CommentHandler; 53 class ScratchBuffer; 54 class TargetInfo; 55 class PPCallbacks; 56 class CodeCompletionHandler; 57 class DirectoryLookup; 58 class PreprocessingRecord; 59 class ModuleLoader; 60 class PTHManager; 61 class PreprocessorOptions; 62 63 /// \brief Stores token information for comparing actual tokens with 64 /// predefined values. Only handles simple tokens and identifiers. 65 class TokenValue { 66 tok::TokenKind Kind; 67 IdentifierInfo *II; 68 69 public: TokenValue(tok::TokenKind Kind)70 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 71 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 72 assert(Kind != tok::identifier && 73 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 74 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 75 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 76 } TokenValue(IdentifierInfo * II)77 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 78 bool operator==(const Token &Tok) const { 79 return Tok.getKind() == Kind && 80 (!II || II == Tok.getIdentifierInfo()); 81 } 82 }; 83 84 /// \brief Context in which macro name is used. 85 enum MacroUse { 86 MU_Other = 0, // other than #define or #undef 87 MU_Define = 1, // macro name specified in #define 88 MU_Undef = 2 // macro name specified in #undef 89 }; 90 91 /// \brief Engages in a tight little dance with the lexer to efficiently 92 /// preprocess tokens. 93 /// 94 /// Lexers know only about tokens within a single source file, and don't 95 /// know anything about preprocessor-level issues like the \#include stack, 96 /// token expansion, etc. 97 class Preprocessor : public RefCountedBase<Preprocessor> { 98 IntrusiveRefCntPtr<PreprocessorOptions> PPOpts; 99 DiagnosticsEngine *Diags; 100 LangOptions &LangOpts; 101 const TargetInfo *Target; 102 const TargetInfo *AuxTarget; 103 FileManager &FileMgr; 104 SourceManager &SourceMgr; 105 std::unique_ptr<ScratchBuffer> ScratchBuf; 106 HeaderSearch &HeaderInfo; 107 ModuleLoader &TheModuleLoader; 108 109 /// \brief External source of macros. 110 ExternalPreprocessorSource *ExternalSource; 111 112 113 /// An optional PTHManager object used for getting tokens from 114 /// a token cache rather than lexing the original source file. 115 std::unique_ptr<PTHManager> PTH; 116 117 /// A BumpPtrAllocator object used to quickly allocate and release 118 /// objects internal to the Preprocessor. 119 llvm::BumpPtrAllocator BP; 120 121 /// Identifiers for builtin macros and other builtins. 122 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 123 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 124 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 125 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 126 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 127 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 128 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 129 IdentifierInfo *Ident__identifier; // __identifier 130 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 131 IdentifierInfo *Ident__has_feature; // __has_feature 132 IdentifierInfo *Ident__has_extension; // __has_extension 133 IdentifierInfo *Ident__has_builtin; // __has_builtin 134 IdentifierInfo *Ident__has_attribute; // __has_attribute 135 IdentifierInfo *Ident__has_include; // __has_include 136 IdentifierInfo *Ident__has_include_next; // __has_include_next 137 IdentifierInfo *Ident__has_warning; // __has_warning 138 IdentifierInfo *Ident__is_identifier; // __is_identifier 139 IdentifierInfo *Ident__building_module; // __building_module 140 IdentifierInfo *Ident__MODULE__; // __MODULE__ 141 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 142 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 143 144 SourceLocation DATELoc, TIMELoc; 145 unsigned CounterValue; // Next __COUNTER__ value. 146 147 enum { 148 /// \brief Maximum depth of \#includes. 149 MaxAllowedIncludeStackDepth = 200 150 }; 151 152 // State that is set before the preprocessor begins. 153 bool KeepComments : 1; 154 bool KeepMacroComments : 1; 155 bool SuppressIncludeNotFoundError : 1; 156 157 // State that changes while the preprocessor runs: 158 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 159 160 /// Whether the preprocessor owns the header search object. 161 bool OwnsHeaderSearch : 1; 162 163 /// True if macro expansion is disabled. 164 bool DisableMacroExpansion : 1; 165 166 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 167 /// when parsing preprocessor directives. 168 bool MacroExpansionInDirectivesOverride : 1; 169 170 class ResetMacroExpansionHelper; 171 172 /// \brief Whether we have already loaded macros from the external source. 173 mutable bool ReadMacrosFromExternalSource : 1; 174 175 /// \brief True if pragmas are enabled. 176 bool PragmasEnabled : 1; 177 178 /// \brief True if the current build action is a preprocessing action. 179 bool PreprocessedOutput : 1; 180 181 /// \brief True if we are currently preprocessing a #if or #elif directive 182 bool ParsingIfOrElifDirective; 183 184 /// \brief True if we are pre-expanding macro arguments. 185 bool InMacroArgPreExpansion; 186 187 /// \brief Mapping/lookup information for all identifiers in 188 /// the program, including program keywords. 189 mutable IdentifierTable Identifiers; 190 191 /// \brief This table contains all the selectors in the program. 192 /// 193 /// Unlike IdentifierTable above, this table *isn't* populated by the 194 /// preprocessor. It is declared/expanded here because its role/lifetime is 195 /// conceptually similar to the IdentifierTable. In addition, the current 196 /// control flow (in clang::ParseAST()), make it convenient to put here. 197 /// 198 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 199 /// the lifetime of the preprocessor. 200 SelectorTable Selectors; 201 202 /// \brief Information about builtins. 203 Builtin::Context BuiltinInfo; 204 205 /// \brief Tracks all of the pragmas that the client registered 206 /// with this preprocessor. 207 std::unique_ptr<PragmaNamespace> PragmaHandlers; 208 209 /// \brief Pragma handlers of the original source is stored here during the 210 /// parsing of a model file. 211 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 212 213 /// \brief Tracks all of the comment handlers that the client registered 214 /// with this preprocessor. 215 std::vector<CommentHandler *> CommentHandlers; 216 217 /// \brief True if we want to ignore EOF token and continue later on (thus 218 /// avoid tearing the Lexer and etc. down). 219 bool IncrementalProcessing; 220 221 /// The kind of translation unit we are processing. 222 TranslationUnitKind TUKind; 223 224 /// \brief The code-completion handler. 225 CodeCompletionHandler *CodeComplete; 226 227 /// \brief The file that we're performing code-completion for, if any. 228 const FileEntry *CodeCompletionFile; 229 230 /// \brief The offset in file for the code-completion point. 231 unsigned CodeCompletionOffset; 232 233 /// \brief The location for the code-completion point. This gets instantiated 234 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 235 SourceLocation CodeCompletionLoc; 236 237 /// \brief The start location for the file of the code-completion point. 238 /// 239 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 240 /// for preprocessing. 241 SourceLocation CodeCompletionFileLoc; 242 243 /// \brief The source location of the \c import contextual keyword we just 244 /// lexed, if any. 245 SourceLocation ModuleImportLoc; 246 247 /// \brief The module import path that we're currently processing. 248 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath; 249 250 /// \brief Whether the last token we lexed was an '@'. 251 bool LastTokenWasAt; 252 253 /// \brief Whether the module import expects an identifier next. Otherwise, 254 /// it expects a '.' or ';'. 255 bool ModuleImportExpectsIdentifier; 256 257 /// \brief The source location of the currently-active 258 /// \#pragma clang arc_cf_code_audited begin. 259 SourceLocation PragmaARCCFCodeAuditedLoc; 260 261 /// \brief The source location of the currently-active 262 /// \#pragma clang assume_nonnull begin. 263 SourceLocation PragmaAssumeNonNullLoc; 264 265 /// \brief True if we hit the code-completion point. 266 bool CodeCompletionReached; 267 268 /// \brief The directory that the main file should be considered to occupy, 269 /// if it does not correspond to a real file (as happens when building a 270 /// module). 271 const DirectoryEntry *MainFileDir; 272 273 /// \brief The number of bytes that we will initially skip when entering the 274 /// main file, along with a flag that indicates whether skipping this number 275 /// of bytes will place the lexer at the start of a line. 276 /// 277 /// This is used when loading a precompiled preamble. 278 std::pair<int, bool> SkipMainFilePreamble; 279 280 /// \brief The current top of the stack that we're lexing from if 281 /// not expanding a macro and we are lexing directly from source code. 282 /// 283 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. 284 std::unique_ptr<Lexer> CurLexer; 285 286 /// \brief The current top of stack that we're lexing from if 287 /// not expanding from a macro and we are lexing from a PTH cache. 288 /// 289 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. 290 std::unique_ptr<PTHLexer> CurPTHLexer; 291 292 /// \brief The current top of the stack what we're lexing from 293 /// if not expanding a macro. 294 /// 295 /// This is an alias for either CurLexer or CurPTHLexer. 296 PreprocessorLexer *CurPPLexer; 297 298 /// \brief Used to find the current FileEntry, if CurLexer is non-null 299 /// and if applicable. 300 /// 301 /// This allows us to implement \#include_next and find directory-specific 302 /// properties. 303 const DirectoryLookup *CurDirLookup; 304 305 /// \brief The current macro we are expanding, if we are expanding a macro. 306 /// 307 /// One of CurLexer and CurTokenLexer must be null. 308 std::unique_ptr<TokenLexer> CurTokenLexer; 309 310 /// \brief The kind of lexer we're currently working with. 311 enum CurLexerKind { 312 CLK_Lexer, 313 CLK_PTHLexer, 314 CLK_TokenLexer, 315 CLK_CachingLexer, 316 CLK_LexAfterModuleImport 317 } CurLexerKind; 318 319 /// \brief If the current lexer is for a submodule that is being built, this 320 /// is that submodule. 321 Module *CurSubmodule; 322 323 /// \brief Keeps track of the stack of files currently 324 /// \#included, and macros currently being expanded from, not counting 325 /// CurLexer/CurTokenLexer. 326 struct IncludeStackInfo { 327 enum CurLexerKind CurLexerKind; 328 Module *TheSubmodule; 329 std::unique_ptr<Lexer> TheLexer; 330 std::unique_ptr<PTHLexer> ThePTHLexer; 331 PreprocessorLexer *ThePPLexer; 332 std::unique_ptr<TokenLexer> TheTokenLexer; 333 const DirectoryLookup *TheDirLookup; 334 335 // The following constructors are completely useless copies of the default 336 // versions, only needed to pacify MSVC. IncludeStackInfoIncludeStackInfo337 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, 338 std::unique_ptr<Lexer> &&TheLexer, 339 std::unique_ptr<PTHLexer> &&ThePTHLexer, 340 PreprocessorLexer *ThePPLexer, 341 std::unique_ptr<TokenLexer> &&TheTokenLexer, 342 const DirectoryLookup *TheDirLookup) 343 : CurLexerKind(std::move(CurLexerKind)), 344 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 345 ThePTHLexer(std::move(ThePTHLexer)), 346 ThePPLexer(std::move(ThePPLexer)), 347 TheTokenLexer(std::move(TheTokenLexer)), 348 TheDirLookup(std::move(TheDirLookup)) {} IncludeStackInfoIncludeStackInfo349 IncludeStackInfo(IncludeStackInfo &&RHS) 350 : CurLexerKind(std::move(RHS.CurLexerKind)), 351 TheSubmodule(std::move(RHS.TheSubmodule)), 352 TheLexer(std::move(RHS.TheLexer)), 353 ThePTHLexer(std::move(RHS.ThePTHLexer)), 354 ThePPLexer(std::move(RHS.ThePPLexer)), 355 TheTokenLexer(std::move(RHS.TheTokenLexer)), 356 TheDirLookup(std::move(RHS.TheDirLookup)) {} 357 }; 358 std::vector<IncludeStackInfo> IncludeMacroStack; 359 360 /// \brief Actions invoked when some preprocessor activity is 361 /// encountered (e.g. a file is \#included, etc). 362 std::unique_ptr<PPCallbacks> Callbacks; 363 364 struct MacroExpandsInfo { 365 Token Tok; 366 MacroDefinition MD; 367 SourceRange Range; MacroExpandsInfoMacroExpandsInfo368 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 369 : Tok(Tok), MD(MD), Range(Range) { } 370 }; 371 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 372 373 /// Information about a name that has been used to define a module macro. 374 struct ModuleMacroInfo { ModuleMacroInfoModuleMacroInfo375 ModuleMacroInfo(MacroDirective *MD) 376 : MD(MD), ActiveModuleMacrosGeneration(0), IsAmbiguous(false) {} 377 378 /// The most recent macro directive for this identifier. 379 MacroDirective *MD; 380 /// The active module macros for this identifier. 381 llvm::TinyPtrVector<ModuleMacro*> ActiveModuleMacros; 382 /// The generation number at which we last updated ActiveModuleMacros. 383 /// \see Preprocessor::VisibleModules. 384 unsigned ActiveModuleMacrosGeneration; 385 /// Whether this macro name is ambiguous. 386 bool IsAmbiguous; 387 /// The module macros that are overridden by this macro. 388 llvm::TinyPtrVector<ModuleMacro*> OverriddenMacros; 389 }; 390 391 /// The state of a macro for an identifier. 392 class MacroState { 393 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 394 getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)395 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 396 const IdentifierInfo *II) const { 397 // FIXME: Find a spare bit on IdentifierInfo and store a 398 // HasModuleMacros flag. 399 if (!II->hasMacroDefinition() || 400 (!PP.getLangOpts().Modules && 401 !PP.getLangOpts().ModulesLocalVisibility) || 402 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 403 return nullptr; 404 405 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 406 if (!Info) { 407 Info = new (PP.getPreprocessorAllocator()) 408 ModuleMacroInfo(State.get<MacroDirective *>()); 409 State = Info; 410 } 411 412 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 413 Info->ActiveModuleMacrosGeneration) 414 PP.updateModuleMacroInfo(II, *Info); 415 return Info; 416 } 417 418 public: MacroState()419 MacroState() : MacroState(nullptr) {} MacroState(MacroDirective * MD)420 MacroState(MacroDirective *MD) : State(MD) {} MacroState(MacroState && O)421 MacroState(MacroState &&O) LLVM_NOEXCEPT : State(O.State) { 422 O.State = (MacroDirective *)nullptr; 423 } 424 MacroState &operator=(MacroState &&O) LLVM_NOEXCEPT { 425 auto S = O.State; 426 O.State = (MacroDirective *)nullptr; 427 State = S; 428 return *this; 429 } ~MacroState()430 ~MacroState() { 431 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 432 Info->~ModuleMacroInfo(); 433 } 434 getLatest()435 MacroDirective *getLatest() const { 436 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 437 return Info->MD; 438 return State.get<MacroDirective*>(); 439 } setLatest(MacroDirective * MD)440 void setLatest(MacroDirective *MD) { 441 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 442 Info->MD = MD; 443 else 444 State = MD; 445 } 446 isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)447 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 448 auto *Info = getModuleInfo(PP, II); 449 return Info ? Info->IsAmbiguous : false; 450 } 451 ArrayRef<ModuleMacro *> getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)452 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 453 if (auto *Info = getModuleInfo(PP, II)) 454 return Info->ActiveModuleMacros; 455 return None; 456 } 457 findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)458 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 459 SourceManager &SourceMgr) const { 460 // FIXME: Incorporate module macros into the result of this. 461 if (auto *Latest = getLatest()) 462 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 463 return MacroDirective::DefInfo(); 464 } 465 overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)466 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 467 if (auto *Info = getModuleInfo(PP, II)) { 468 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 469 Info->ActiveModuleMacros.begin(), 470 Info->ActiveModuleMacros.end()); 471 Info->ActiveModuleMacros.clear(); 472 Info->IsAmbiguous = false; 473 } 474 } getOverriddenMacros()475 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 476 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 477 return Info->OverriddenMacros; 478 return None; 479 } setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)480 void setOverriddenMacros(Preprocessor &PP, 481 ArrayRef<ModuleMacro *> Overrides) { 482 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 483 if (!Info) { 484 if (Overrides.empty()) 485 return; 486 Info = new (PP.getPreprocessorAllocator()) 487 ModuleMacroInfo(State.get<MacroDirective *>()); 488 State = Info; 489 } 490 Info->OverriddenMacros.clear(); 491 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 492 Overrides.begin(), Overrides.end()); 493 Info->ActiveModuleMacrosGeneration = 0; 494 } 495 }; 496 497 /// For each IdentifierInfo that was associated with a macro, we 498 /// keep a mapping to the history of all macro definitions and #undefs in 499 /// the reverse order (the latest one is in the head of the list). 500 /// 501 /// This mapping lives within the \p CurSubmoduleState. 502 typedef llvm::DenseMap<const IdentifierInfo *, MacroState> MacroMap; 503 504 friend class ASTReader; 505 506 struct SubmoduleState; 507 508 /// \brief Information about a submodule that we're currently building. 509 struct BuildingSubmoduleInfo { BuildingSubmoduleInfoBuildingSubmoduleInfo510 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, 511 SubmoduleState *OuterSubmoduleState, 512 unsigned OuterPendingModuleMacroNames) 513 : M(M), ImportLoc(ImportLoc), OuterSubmoduleState(OuterSubmoduleState), 514 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 515 516 /// The module that we are building. 517 Module *M; 518 /// The location at which the module was included. 519 SourceLocation ImportLoc; 520 /// The previous SubmoduleState. 521 SubmoduleState *OuterSubmoduleState; 522 /// The number of pending module macro names when we started building this. 523 unsigned OuterPendingModuleMacroNames; 524 }; 525 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 526 527 /// \brief Information about a submodule's preprocessor state. 528 struct SubmoduleState { 529 /// The macros for the submodule. 530 MacroMap Macros; 531 /// The set of modules that are visible within the submodule. 532 VisibleModuleSet VisibleModules; 533 // FIXME: CounterValue? 534 // FIXME: PragmaPushMacroInfo? 535 }; 536 std::map<Module*, SubmoduleState> Submodules; 537 538 /// The preprocessor state for preprocessing outside of any submodule. 539 SubmoduleState NullSubmoduleState; 540 541 /// The current submodule state. Will be \p NullSubmoduleState if we're not 542 /// in a submodule. 543 SubmoduleState *CurSubmoduleState; 544 545 /// The set of known macros exported from modules. 546 llvm::FoldingSet<ModuleMacro> ModuleMacros; 547 548 /// The names of potential module macros that we've not yet processed. 549 llvm::SmallVector<const IdentifierInfo*, 32> PendingModuleMacroNames; 550 551 /// The list of module macros, for each identifier, that are not overridden by 552 /// any other module macro. 553 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro*>> 554 LeafModuleMacros; 555 556 /// \brief Macros that we want to warn because they are not used at the end 557 /// of the translation unit. 558 /// 559 /// We store just their SourceLocations instead of 560 /// something like MacroInfo*. The benefit of this is that when we are 561 /// deserializing from PCH, we don't need to deserialize identifier & macros 562 /// just so that we can report that they are unused, we just warn using 563 /// the SourceLocations of this set (that will be filled by the ASTReader). 564 /// We are using SmallPtrSet instead of a vector for faster removal. 565 typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy; 566 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 567 568 /// \brief A "freelist" of MacroArg objects that can be 569 /// reused for quick allocation. 570 MacroArgs *MacroArgCache; 571 friend class MacroArgs; 572 573 /// For each IdentifierInfo used in a \#pragma push_macro directive, 574 /// we keep a MacroInfo stack used to restore the previous macro value. 575 llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo; 576 577 // Various statistics we track for performance analysis. 578 unsigned NumDirectives, NumDefined, NumUndefined, NumPragma; 579 unsigned NumIf, NumElse, NumEndif; 580 unsigned NumEnteredSourceFiles, MaxIncludeStackDepth; 581 unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded; 582 unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste; 583 unsigned NumSkipped; 584 585 /// \brief The predefined macros that preprocessor should use from the 586 /// command line etc. 587 std::string Predefines; 588 589 /// \brief The file ID for the preprocessor predefines. 590 FileID PredefinesFileID; 591 592 /// \{ 593 /// \brief Cache of macro expanders to reduce malloc traffic. 594 enum { TokenLexerCacheSize = 8 }; 595 unsigned NumCachedTokenLexers; 596 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 597 /// \} 598 599 /// \brief Keeps macro expanded tokens for TokenLexers. 600 // 601 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 602 /// going to lex in the cache and when it finishes the tokens are removed 603 /// from the end of the cache. 604 SmallVector<Token, 16> MacroExpandedTokens; 605 std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack; 606 607 /// \brief A record of the macro definitions and expansions that 608 /// occurred during preprocessing. 609 /// 610 /// This is an optional side structure that can be enabled with 611 /// \c createPreprocessingRecord() prior to preprocessing. 612 PreprocessingRecord *Record; 613 614 /// Cached tokens state. 615 typedef SmallVector<Token, 1> CachedTokensTy; 616 617 /// \brief Cached tokens are stored here when we do backtracking or 618 /// lookahead. They are "lexed" by the CachingLex() method. 619 CachedTokensTy CachedTokens; 620 621 /// \brief The position of the cached token that CachingLex() should 622 /// "lex" next. 623 /// 624 /// If it points beyond the CachedTokens vector, it means that a normal 625 /// Lex() should be invoked. 626 CachedTokensTy::size_type CachedLexPos; 627 628 /// \brief Stack of backtrack positions, allowing nested backtracks. 629 /// 630 /// The EnableBacktrackAtThisPos() method pushes a position to 631 /// indicate where CachedLexPos should be set when the BackTrack() method is 632 /// invoked (at which point the last position is popped). 633 std::vector<CachedTokensTy::size_type> BacktrackPositions; 634 635 struct MacroInfoChain { 636 MacroInfo MI; 637 MacroInfoChain *Next; 638 }; 639 640 /// MacroInfos are managed as a chain for easy disposal. This is the head 641 /// of that list. 642 MacroInfoChain *MIChainHead; 643 644 struct DeserializedMacroInfoChain { 645 MacroInfo MI; 646 unsigned OwningModuleID; // MUST be immediately after the MacroInfo object 647 // so it can be accessed by MacroInfo::getOwningModuleID(). 648 DeserializedMacroInfoChain *Next; 649 }; 650 DeserializedMacroInfoChain *DeserialMIChainHead; 651 652 public: 653 Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 654 DiagnosticsEngine &diags, LangOptions &opts, 655 SourceManager &SM, HeaderSearch &Headers, 656 ModuleLoader &TheModuleLoader, 657 IdentifierInfoLookup *IILookup = nullptr, 658 bool OwnsHeaderSearch = false, 659 TranslationUnitKind TUKind = TU_Complete); 660 661 ~Preprocessor(); 662 663 /// \brief Initialize the preprocessor using information about the target. 664 /// 665 /// \param Target is owned by the caller and must remain valid for the 666 /// lifetime of the preprocessor. 667 /// \param AuxTarget is owned by the caller and must remain valid for 668 /// the lifetime of the preprocessor. 669 void Initialize(const TargetInfo &Target, 670 const TargetInfo *AuxTarget = nullptr); 671 672 /// \brief Initialize the preprocessor to parse a model file 673 /// 674 /// To parse model files the preprocessor of the original source is reused to 675 /// preserver the identifier table. However to avoid some duplicate 676 /// information in the preprocessor some cleanup is needed before it is used 677 /// to parse model files. This method does that cleanup. 678 void InitializeForModelFile(); 679 680 /// \brief Cleanup after model file parsing 681 void FinalizeForModelFile(); 682 683 /// \brief Retrieve the preprocessor options used to initialize this 684 /// preprocessor. getPreprocessorOpts()685 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 686 getDiagnostics()687 DiagnosticsEngine &getDiagnostics() const { return *Diags; } setDiagnostics(DiagnosticsEngine & D)688 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 689 getLangOpts()690 const LangOptions &getLangOpts() const { return LangOpts; } getTargetInfo()691 const TargetInfo &getTargetInfo() const { return *Target; } getAuxTargetInfo()692 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } getFileManager()693 FileManager &getFileManager() const { return FileMgr; } getSourceManager()694 SourceManager &getSourceManager() const { return SourceMgr; } getHeaderSearchInfo()695 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 696 getIdentifierTable()697 IdentifierTable &getIdentifierTable() { return Identifiers; } getIdentifierTable()698 const IdentifierTable &getIdentifierTable() const { return Identifiers; } getSelectorTable()699 SelectorTable &getSelectorTable() { return Selectors; } getBuiltinInfo()700 Builtin::Context &getBuiltinInfo() { return BuiltinInfo; } getPreprocessorAllocator()701 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 702 703 void setPTHManager(PTHManager* pm); 704 getPTHManager()705 PTHManager *getPTHManager() { return PTH.get(); } 706 setExternalSource(ExternalPreprocessorSource * Source)707 void setExternalSource(ExternalPreprocessorSource *Source) { 708 ExternalSource = Source; 709 } 710 getExternalSource()711 ExternalPreprocessorSource *getExternalSource() const { 712 return ExternalSource; 713 } 714 715 /// \brief Retrieve the module loader associated with this preprocessor. getModuleLoader()716 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 717 hadModuleLoaderFatalFailure()718 bool hadModuleLoaderFatalFailure() const { 719 return TheModuleLoader.HadFatalFailure; 720 } 721 722 /// \brief True if we are currently preprocessing a #if or #elif directive isParsingIfOrElifDirective()723 bool isParsingIfOrElifDirective() const { 724 return ParsingIfOrElifDirective; 725 } 726 727 /// \brief Control whether the preprocessor retains comments in output. SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)728 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 729 this->KeepComments = KeepComments | KeepMacroComments; 730 this->KeepMacroComments = KeepMacroComments; 731 } 732 getCommentRetentionState()733 bool getCommentRetentionState() const { return KeepComments; } 734 setPragmasEnabled(bool Enabled)735 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } getPragmasEnabled()736 bool getPragmasEnabled() const { return PragmasEnabled; } 737 SetSuppressIncludeNotFoundError(bool Suppress)738 void SetSuppressIncludeNotFoundError(bool Suppress) { 739 SuppressIncludeNotFoundError = Suppress; 740 } 741 GetSuppressIncludeNotFoundError()742 bool GetSuppressIncludeNotFoundError() { 743 return SuppressIncludeNotFoundError; 744 } 745 746 /// Sets whether the preprocessor is responsible for producing output or if 747 /// it is producing tokens to be consumed by Parse and Sema. setPreprocessedOutput(bool IsPreprocessedOutput)748 void setPreprocessedOutput(bool IsPreprocessedOutput) { 749 PreprocessedOutput = IsPreprocessedOutput; 750 } 751 752 /// Returns true if the preprocessor is responsible for generating output, 753 /// false if it is producing tokens to be consumed by Parse and Sema. isPreprocessedOutput()754 bool isPreprocessedOutput() const { return PreprocessedOutput; } 755 756 /// \brief Return true if we are lexing directly from the specified lexer. isCurrentLexer(const PreprocessorLexer * L)757 bool isCurrentLexer(const PreprocessorLexer *L) const { 758 return CurPPLexer == L; 759 } 760 761 /// \brief Return the current lexer being lexed from. 762 /// 763 /// Note that this ignores any potentially active macro expansions and _Pragma 764 /// expansions going on at the time. getCurrentLexer()765 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 766 767 /// \brief Return the current file lexer being lexed from. 768 /// 769 /// Note that this ignores any potentially active macro expansions and _Pragma 770 /// expansions going on at the time. 771 PreprocessorLexer *getCurrentFileLexer() const; 772 773 /// \brief Return the submodule owning the file being lexed. getCurrentSubmodule()774 Module *getCurrentSubmodule() const { return CurSubmodule; } 775 776 /// \brief Returns the FileID for the preprocessor predefines. getPredefinesFileID()777 FileID getPredefinesFileID() const { return PredefinesFileID; } 778 779 /// \{ 780 /// \brief Accessors for preprocessor callbacks. 781 /// 782 /// Note that this class takes ownership of any PPCallbacks object given to 783 /// it. getPPCallbacks()784 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } addPPCallbacks(std::unique_ptr<PPCallbacks> C)785 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 786 if (Callbacks) 787 C = llvm::make_unique<PPChainedCallbacks>(std::move(C), 788 std::move(Callbacks)); 789 Callbacks = std::move(C); 790 } 791 /// \} 792 isMacroDefined(StringRef Id)793 bool isMacroDefined(StringRef Id) { 794 return isMacroDefined(&Identifiers.get(Id)); 795 } isMacroDefined(const IdentifierInfo * II)796 bool isMacroDefined(const IdentifierInfo *II) { 797 return II->hasMacroDefinition() && 798 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 799 } 800 801 /// \brief Determine whether II is defined as a macro within the module M, 802 /// if that is a module that we've already preprocessed. Does not check for 803 /// macros imported into M. isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)804 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 805 if (!II->hasMacroDefinition()) 806 return false; 807 auto I = Submodules.find(M); 808 if (I == Submodules.end()) 809 return false; 810 auto J = I->second.Macros.find(II); 811 if (J == I->second.Macros.end()) 812 return false; 813 auto *MD = J->second.getLatest(); 814 return MD && MD->isDefined(); 815 } 816 getMacroDefinition(const IdentifierInfo * II)817 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 818 if (!II->hasMacroDefinition()) 819 return MacroDefinition(); 820 821 MacroState &S = CurSubmoduleState->Macros[II]; 822 auto *MD = S.getLatest(); 823 while (MD && isa<VisibilityMacroDirective>(MD)) 824 MD = MD->getPrevious(); 825 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 826 S.getActiveModuleMacros(*this, II), 827 S.isAmbiguous(*this, II)); 828 } 829 getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)830 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 831 SourceLocation Loc) { 832 if (!II->hadMacroDefinition()) 833 return MacroDefinition(); 834 835 MacroState &S = CurSubmoduleState->Macros[II]; 836 MacroDirective::DefInfo DI; 837 if (auto *MD = S.getLatest()) 838 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 839 // FIXME: Compute the set of active module macros at the specified location. 840 return MacroDefinition(DI.getDirective(), 841 S.getActiveModuleMacros(*this, II), 842 S.isAmbiguous(*this, II)); 843 } 844 845 /// \brief Given an identifier, return its latest non-imported MacroDirective 846 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. getLocalMacroDirective(const IdentifierInfo * II)847 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 848 if (!II->hasMacroDefinition()) 849 return nullptr; 850 851 auto *MD = getLocalMacroDirectiveHistory(II); 852 if (!MD || MD->getDefinition().isUndefined()) 853 return nullptr; 854 855 return MD; 856 } 857 getMacroInfo(const IdentifierInfo * II)858 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 859 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 860 } 861 getMacroInfo(const IdentifierInfo * II)862 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 863 if (!II->hasMacroDefinition()) 864 return nullptr; 865 if (auto MD = getMacroDefinition(II)) 866 return MD.getMacroInfo(); 867 return nullptr; 868 } 869 870 /// \brief Given an identifier, return the latest non-imported macro 871 /// directive for that identifier. 872 /// 873 /// One can iterate over all previous macro directives from the most recent 874 /// one. 875 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 876 877 /// \brief Add a directive to the macro directive history for this identifier. 878 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)879 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 880 SourceLocation Loc) { 881 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 882 appendMacroDirective(II, MD); 883 return MD; 884 } appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)885 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 886 MacroInfo *MI) { 887 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 888 } 889 /// \brief Set a MacroDirective that was loaded from a PCH file. 890 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *MD); 891 892 /// \brief Register an exported macro for a module and identifier. 893 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, 894 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 895 ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II); 896 897 /// \brief Get the list of leaf (non-overridden) module macros for a name. getLeafModuleMacros(const IdentifierInfo * II)898 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 899 auto I = LeafModuleMacros.find(II); 900 if (I != LeafModuleMacros.end()) 901 return I->second; 902 return None; 903 } 904 905 /// \{ 906 /// Iterators for the macro history table. Currently defined macros have 907 /// IdentifierInfo::hasMacroDefinition() set and an empty 908 /// MacroInfo::getUndefLoc() at the head of the list. 909 typedef MacroMap::const_iterator macro_iterator; 910 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 911 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 912 llvm::iterator_range<macro_iterator> 913 macros(bool IncludeExternalMacros = true) const { 914 return llvm::make_range(macro_begin(IncludeExternalMacros), 915 macro_end(IncludeExternalMacros)); 916 } 917 /// \} 918 919 /// \brief Return the name of the macro defined before \p Loc that has 920 /// spelling \p Tokens. If there are multiple macros with same spelling, 921 /// return the last one defined. 922 StringRef getLastMacroWithSpelling(SourceLocation Loc, 923 ArrayRef<TokenValue> Tokens) const; 924 getPredefines()925 const std::string &getPredefines() const { return Predefines; } 926 /// \brief Set the predefines for this Preprocessor. 927 /// 928 /// These predefines are automatically injected when parsing the main file. setPredefines(const char * P)929 void setPredefines(const char *P) { Predefines = P; } setPredefines(StringRef P)930 void setPredefines(StringRef P) { Predefines = P; } 931 932 /// Return information about the specified preprocessor 933 /// identifier token. getIdentifierInfo(StringRef Name)934 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 935 return &Identifiers.get(Name); 936 } 937 938 /// \brief Add the specified pragma handler to this preprocessor. 939 /// 940 /// If \p Namespace is non-null, then it is a token required to exist on the 941 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 942 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); AddPragmaHandler(PragmaHandler * Handler)943 void AddPragmaHandler(PragmaHandler *Handler) { 944 AddPragmaHandler(StringRef(), Handler); 945 } 946 947 /// \brief Remove the specific pragma handler from this preprocessor. 948 /// 949 /// If \p Namespace is non-null, then it should be the namespace that 950 /// \p Handler was added to. It is an error to remove a handler that 951 /// has not been registered. 952 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); RemovePragmaHandler(PragmaHandler * Handler)953 void RemovePragmaHandler(PragmaHandler *Handler) { 954 RemovePragmaHandler(StringRef(), Handler); 955 } 956 957 /// Install empty handlers for all pragmas (making them ignored). 958 void IgnorePragmas(); 959 960 /// \brief Add the specified comment handler to the preprocessor. 961 void addCommentHandler(CommentHandler *Handler); 962 963 /// \brief Remove the specified comment handler. 964 /// 965 /// It is an error to remove a handler that has not been registered. 966 void removeCommentHandler(CommentHandler *Handler); 967 968 /// \brief Set the code completion handler to the given object. setCodeCompletionHandler(CodeCompletionHandler & Handler)969 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 970 CodeComplete = &Handler; 971 } 972 973 /// \brief Retrieve the current code-completion handler. getCodeCompletionHandler()974 CodeCompletionHandler *getCodeCompletionHandler() const { 975 return CodeComplete; 976 } 977 978 /// \brief Clear out the code completion handler. clearCodeCompletionHandler()979 void clearCodeCompletionHandler() { 980 CodeComplete = nullptr; 981 } 982 983 /// \brief Hook used by the lexer to invoke the "natural language" code 984 /// completion point. 985 void CodeCompleteNaturalLanguage(); 986 987 /// \brief Retrieve the preprocessing record, or NULL if there is no 988 /// preprocessing record. getPreprocessingRecord()989 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 990 991 /// \brief Create a new preprocessing record, which will keep track of 992 /// all macro expansions, macro definitions, etc. 993 void createPreprocessingRecord(); 994 995 /// \brief Enter the specified FileID as the main source file, 996 /// which implicitly adds the builtin defines etc. 997 void EnterMainSourceFile(); 998 999 /// \brief Inform the preprocessor callbacks that processing is complete. 1000 void EndSourceFile(); 1001 1002 /// \brief Add a source file to the top of the include stack and 1003 /// start lexing tokens from it instead of the current buffer. 1004 /// 1005 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1006 bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir, 1007 SourceLocation Loc); 1008 1009 /// \brief Add a Macro to the top of the include stack and start lexing 1010 /// tokens from it instead of the current buffer. 1011 /// 1012 /// \param Args specifies the tokens input to a function-like macro. 1013 /// \param ILEnd specifies the location of the ')' for a function-like macro 1014 /// or the identifier for an object-like macro. 1015 void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro, 1016 MacroArgs *Args); 1017 1018 /// \brief Add a "macro" context to the top of the include stack, 1019 /// which will cause the lexer to start returning the specified tokens. 1020 /// 1021 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1022 /// will not be subject to further macro expansion. Otherwise, these tokens 1023 /// will be re-macro-expanded when/if expansion is enabled. 1024 /// 1025 /// If \p OwnsTokens is false, this method assumes that the specified stream 1026 /// of tokens has a permanent owner somewhere, so they do not need to be 1027 /// copied. If it is true, it assumes the array of tokens is allocated with 1028 /// \c new[] and the Preprocessor will delete[] it. 1029 private: 1030 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1031 bool DisableMacroExpansion, bool OwnsTokens); 1032 1033 public: EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion)1034 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1035 bool DisableMacroExpansion) { 1036 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true); 1037 } EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion)1038 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion) { 1039 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false); 1040 } 1041 1042 /// \brief Pop the current lexer/macro exp off the top of the lexer stack. 1043 /// 1044 /// This should only be used in situations where the current state of the 1045 /// top-of-stack lexer is known. 1046 void RemoveTopOfLexerStack(); 1047 1048 /// From the point that this method is called, and until 1049 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1050 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1051 /// make the Preprocessor re-lex the same tokens. 1052 /// 1053 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1054 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1055 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1056 /// 1057 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1058 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1059 /// tokens will continue indefinitely. 1060 /// 1061 void EnableBacktrackAtThisPos(); 1062 1063 /// \brief Disable the last EnableBacktrackAtThisPos call. 1064 void CommitBacktrackedTokens(); 1065 1066 /// \brief Make Preprocessor re-lex the tokens that were lexed since 1067 /// EnableBacktrackAtThisPos() was previously called. 1068 void Backtrack(); 1069 1070 /// \brief True if EnableBacktrackAtThisPos() was called and 1071 /// caching of tokens is on. isBacktrackEnabled()1072 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1073 1074 /// \brief Lex the next token for this preprocessor. 1075 void Lex(Token &Result); 1076 1077 void LexAfterModuleImport(Token &Result); 1078 1079 void makeModuleVisible(Module *M, SourceLocation Loc); 1080 getModuleImportLoc(Module * M)1081 SourceLocation getModuleImportLoc(Module *M) const { 1082 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1083 } 1084 1085 /// \brief Lex a string literal, which may be the concatenation of multiple 1086 /// string literals and may even come from macro expansion. 1087 /// \returns true on success, false if a error diagnostic has been generated. LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1088 bool LexStringLiteral(Token &Result, std::string &String, 1089 const char *DiagnosticTag, bool AllowMacroExpansion) { 1090 if (AllowMacroExpansion) 1091 Lex(Result); 1092 else 1093 LexUnexpandedToken(Result); 1094 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1095 AllowMacroExpansion); 1096 } 1097 1098 /// \brief Complete the lexing of a string literal where the first token has 1099 /// already been lexed (see LexStringLiteral). 1100 bool FinishLexStringLiteral(Token &Result, std::string &String, 1101 const char *DiagnosticTag, 1102 bool AllowMacroExpansion); 1103 1104 /// \brief Lex a token. If it's a comment, keep lexing until we get 1105 /// something not a comment. 1106 /// 1107 /// This is useful in -E -C mode where comments would foul up preprocessor 1108 /// directive handling. LexNonComment(Token & Result)1109 void LexNonComment(Token &Result) { 1110 do 1111 Lex(Result); 1112 while (Result.getKind() == tok::comment); 1113 } 1114 1115 /// \brief Just like Lex, but disables macro expansion of identifier tokens. LexUnexpandedToken(Token & Result)1116 void LexUnexpandedToken(Token &Result) { 1117 // Disable macro expansion. 1118 bool OldVal = DisableMacroExpansion; 1119 DisableMacroExpansion = true; 1120 // Lex the token. 1121 Lex(Result); 1122 1123 // Reenable it. 1124 DisableMacroExpansion = OldVal; 1125 } 1126 1127 /// \brief Like LexNonComment, but this disables macro expansion of 1128 /// identifier tokens. LexUnexpandedNonComment(Token & Result)1129 void LexUnexpandedNonComment(Token &Result) { 1130 do 1131 LexUnexpandedToken(Result); 1132 while (Result.getKind() == tok::comment); 1133 } 1134 1135 /// \brief Parses a simple integer literal to get its numeric value. Floating 1136 /// point literals and user defined literals are rejected. Used primarily to 1137 /// handle pragmas that accept integer arguments. 1138 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1139 1140 /// Disables macro expansion everywhere except for preprocessor directives. SetMacroExpansionOnlyInDirectives()1141 void SetMacroExpansionOnlyInDirectives() { 1142 DisableMacroExpansion = true; 1143 MacroExpansionInDirectivesOverride = true; 1144 } 1145 1146 /// \brief Peeks ahead N tokens and returns that token without consuming any 1147 /// tokens. 1148 /// 1149 /// LookAhead(0) returns the next token that would be returned by Lex(), 1150 /// LookAhead(1) returns the token after it, etc. This returns normal 1151 /// tokens after phase 5. As such, it is equivalent to using 1152 /// 'Lex', not 'LexUnexpandedToken'. LookAhead(unsigned N)1153 const Token &LookAhead(unsigned N) { 1154 if (CachedLexPos + N < CachedTokens.size()) 1155 return CachedTokens[CachedLexPos+N]; 1156 else 1157 return PeekAhead(N+1); 1158 } 1159 1160 /// \brief When backtracking is enabled and tokens are cached, 1161 /// this allows to revert a specific number of tokens. 1162 /// 1163 /// Note that the number of tokens being reverted should be up to the last 1164 /// backtrack position, not more. RevertCachedTokens(unsigned N)1165 void RevertCachedTokens(unsigned N) { 1166 assert(isBacktrackEnabled() && 1167 "Should only be called when tokens are cached for backtracking"); 1168 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 1169 && "Should revert tokens up to the last backtrack position, not more"); 1170 assert(signed(CachedLexPos) - signed(N) >= 0 && 1171 "Corrupted backtrack positions ?"); 1172 CachedLexPos -= N; 1173 } 1174 1175 /// \brief Enters a token in the token stream to be lexed next. 1176 /// 1177 /// If BackTrack() is called afterwards, the token will remain at the 1178 /// insertion point. EnterToken(const Token & Tok)1179 void EnterToken(const Token &Tok) { 1180 EnterCachingLexMode(); 1181 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1182 } 1183 1184 /// We notify the Preprocessor that if it is caching tokens (because 1185 /// backtrack is enabled) it should replace the most recent cached tokens 1186 /// with the given annotation token. This function has no effect if 1187 /// backtracking is not enabled. 1188 /// 1189 /// Note that the use of this function is just for optimization, so that the 1190 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1191 /// invoked. AnnotateCachedTokens(const Token & Tok)1192 void AnnotateCachedTokens(const Token &Tok) { 1193 assert(Tok.isAnnotation() && "Expected annotation token"); 1194 if (CachedLexPos != 0 && isBacktrackEnabled()) 1195 AnnotatePreviousCachedTokens(Tok); 1196 } 1197 1198 /// Get the location of the last cached token, suitable for setting the end 1199 /// location of an annotation token. getLastCachedTokenLocation()1200 SourceLocation getLastCachedTokenLocation() const { 1201 assert(CachedLexPos != 0); 1202 return CachedTokens[CachedLexPos-1].getLastLoc(); 1203 } 1204 1205 /// \brief Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1206 /// CachedTokens. 1207 bool IsPreviousCachedToken(const Token &Tok) const; 1208 1209 /// \brief Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1210 /// in \p NewToks. 1211 /// 1212 /// Useful when a token needs to be split in smaller ones and CachedTokens 1213 /// most recent token must to be updated to reflect that. 1214 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1215 1216 /// \brief Replace the last token with an annotation token. 1217 /// 1218 /// Like AnnotateCachedTokens(), this routine replaces an 1219 /// already-parsed (and resolved) token with an annotation 1220 /// token. However, this routine only replaces the last token with 1221 /// the annotation token; it does not affect any other cached 1222 /// tokens. This function has no effect if backtracking is not 1223 /// enabled. ReplaceLastTokenWithAnnotation(const Token & Tok)1224 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1225 assert(Tok.isAnnotation() && "Expected annotation token"); 1226 if (CachedLexPos != 0 && isBacktrackEnabled()) 1227 CachedTokens[CachedLexPos-1] = Tok; 1228 } 1229 1230 /// Update the current token to represent the provided 1231 /// identifier, in order to cache an action performed by typo correction. TypoCorrectToken(const Token & Tok)1232 void TypoCorrectToken(const Token &Tok) { 1233 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1234 if (CachedLexPos != 0 && isBacktrackEnabled()) 1235 CachedTokens[CachedLexPos-1] = Tok; 1236 } 1237 1238 /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/ 1239 /// CurTokenLexer pointers. 1240 void recomputeCurLexerKind(); 1241 1242 /// \brief Returns true if incremental processing is enabled isIncrementalProcessingEnabled()1243 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1244 1245 /// \brief Enables the incremental processing 1246 void enableIncrementalProcessing(bool value = true) { 1247 IncrementalProcessing = value; 1248 } 1249 1250 /// \brief Specify the point at which code-completion will be performed. 1251 /// 1252 /// \param File the file in which code completion should occur. If 1253 /// this file is included multiple times, code-completion will 1254 /// perform completion the first time it is included. If NULL, this 1255 /// function clears out the code-completion point. 1256 /// 1257 /// \param Line the line at which code completion should occur 1258 /// (1-based). 1259 /// 1260 /// \param Column the column at which code completion should occur 1261 /// (1-based). 1262 /// 1263 /// \returns true if an error occurred, false otherwise. 1264 bool SetCodeCompletionPoint(const FileEntry *File, 1265 unsigned Line, unsigned Column); 1266 1267 /// \brief Determine if we are performing code completion. isCodeCompletionEnabled()1268 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1269 1270 /// \brief Returns the location of the code-completion point. 1271 /// 1272 /// Returns an invalid location if code-completion is not enabled or the file 1273 /// containing the code-completion point has not been lexed yet. getCodeCompletionLoc()1274 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1275 1276 /// \brief Returns the start location of the file of code-completion point. 1277 /// 1278 /// Returns an invalid location if code-completion is not enabled or the file 1279 /// containing the code-completion point has not been lexed yet. getCodeCompletionFileLoc()1280 SourceLocation getCodeCompletionFileLoc() const { 1281 return CodeCompletionFileLoc; 1282 } 1283 1284 /// \brief Returns true if code-completion is enabled and we have hit the 1285 /// code-completion point. isCodeCompletionReached()1286 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1287 1288 /// \brief Note that we hit the code-completion point. setCodeCompletionReached()1289 void setCodeCompletionReached() { 1290 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1291 CodeCompletionReached = true; 1292 // Silence any diagnostics that occur after we hit the code-completion. 1293 getDiagnostics().setSuppressAllDiagnostics(true); 1294 } 1295 1296 /// \brief The location of the currently-active \#pragma clang 1297 /// arc_cf_code_audited begin. 1298 /// 1299 /// Returns an invalid location if there is no such pragma active. getPragmaARCCFCodeAuditedLoc()1300 SourceLocation getPragmaARCCFCodeAuditedLoc() const { 1301 return PragmaARCCFCodeAuditedLoc; 1302 } 1303 1304 /// \brief Set the location of the currently-active \#pragma clang 1305 /// arc_cf_code_audited begin. An invalid location ends the pragma. setPragmaARCCFCodeAuditedLoc(SourceLocation Loc)1306 void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) { 1307 PragmaARCCFCodeAuditedLoc = Loc; 1308 } 1309 1310 /// \brief The location of the currently-active \#pragma clang 1311 /// assume_nonnull begin. 1312 /// 1313 /// Returns an invalid location if there is no such pragma active. getPragmaAssumeNonNullLoc()1314 SourceLocation getPragmaAssumeNonNullLoc() const { 1315 return PragmaAssumeNonNullLoc; 1316 } 1317 1318 /// \brief Set the location of the currently-active \#pragma clang 1319 /// assume_nonnull begin. An invalid location ends the pragma. setPragmaAssumeNonNullLoc(SourceLocation Loc)1320 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 1321 PragmaAssumeNonNullLoc = Loc; 1322 } 1323 1324 /// \brief Set the directory in which the main file should be considered 1325 /// to have been found, if it is not a real file. setMainFileDir(const DirectoryEntry * Dir)1326 void setMainFileDir(const DirectoryEntry *Dir) { 1327 MainFileDir = Dir; 1328 } 1329 1330 /// \brief Instruct the preprocessor to skip part of the main source file. 1331 /// 1332 /// \param Bytes The number of bytes in the preamble to skip. 1333 /// 1334 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 1335 /// start of a line. setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)1336 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 1337 SkipMainFilePreamble.first = Bytes; 1338 SkipMainFilePreamble.second = StartOfLine; 1339 } 1340 1341 /// Forwarding function for diagnostics. This emits a diagnostic at 1342 /// the specified Token's location, translating the token's start 1343 /// position in the current buffer into a SourcePosition object for rendering. Diag(SourceLocation Loc,unsigned DiagID)1344 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 1345 return Diags->Report(Loc, DiagID); 1346 } 1347 Diag(const Token & Tok,unsigned DiagID)1348 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 1349 return Diags->Report(Tok.getLocation(), DiagID); 1350 } 1351 1352 /// Return the 'spelling' of the token at the given 1353 /// location; does not go up to the spelling location or down to the 1354 /// expansion location. 1355 /// 1356 /// \param buffer A buffer which will be used only if the token requires 1357 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 1358 /// \param invalid If non-null, will be set \c true if an error occurs. 1359 StringRef getSpelling(SourceLocation loc, 1360 SmallVectorImpl<char> &buffer, 1361 bool *invalid = nullptr) const { 1362 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 1363 } 1364 1365 /// \brief Return the 'spelling' of the Tok token. 1366 /// 1367 /// The spelling of a token is the characters used to represent the token in 1368 /// the source file after trigraph expansion and escaped-newline folding. In 1369 /// particular, this wants to get the true, uncanonicalized, spelling of 1370 /// things like digraphs, UCNs, etc. 1371 /// 1372 /// \param Invalid If non-null, will be set \c true if an error occurs. 1373 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 1374 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 1375 } 1376 1377 /// \brief Get the spelling of a token into a preallocated buffer, instead 1378 /// of as an std::string. 1379 /// 1380 /// The caller is required to allocate enough space for the token, which is 1381 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 1382 /// actual result is returned. 1383 /// 1384 /// Note that this method may do two possible things: it may either fill in 1385 /// the buffer specified with characters, or it may *change the input pointer* 1386 /// to point to a constant buffer with the data already in it (avoiding a 1387 /// copy). The caller is not allowed to modify the returned buffer pointer 1388 /// if an internal buffer is returned. 1389 unsigned getSpelling(const Token &Tok, const char *&Buffer, 1390 bool *Invalid = nullptr) const { 1391 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 1392 } 1393 1394 /// \brief Get the spelling of a token into a SmallVector. 1395 /// 1396 /// Note that the returned StringRef may not point to the 1397 /// supplied buffer if a copy can be avoided. 1398 StringRef getSpelling(const Token &Tok, 1399 SmallVectorImpl<char> &Buffer, 1400 bool *Invalid = nullptr) const; 1401 1402 /// \brief Relex the token at the specified location. 1403 /// \returns true if there was a failure, false on success. 1404 bool getRawToken(SourceLocation Loc, Token &Result, 1405 bool IgnoreWhiteSpace = false) { 1406 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 1407 } 1408 1409 /// \brief Given a Token \p Tok that is a numeric constant with length 1, 1410 /// return the character. 1411 char 1412 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 1413 bool *Invalid = nullptr) const { 1414 assert(Tok.is(tok::numeric_constant) && 1415 Tok.getLength() == 1 && "Called on unsupported token"); 1416 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 1417 1418 // If the token is carrying a literal data pointer, just use it. 1419 if (const char *D = Tok.getLiteralData()) 1420 return *D; 1421 1422 // Otherwise, fall back on getCharacterData, which is slower, but always 1423 // works. 1424 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid); 1425 } 1426 1427 /// \brief Retrieve the name of the immediate macro expansion. 1428 /// 1429 /// This routine starts from a source location, and finds the name of the 1430 /// macro responsible for its immediate expansion. It looks through any 1431 /// intervening macro argument expansions to compute this. It returns a 1432 /// StringRef that refers to the SourceManager-owned buffer of the source 1433 /// where that macro name is spelled. Thus, the result shouldn't out-live 1434 /// the SourceManager. getImmediateMacroName(SourceLocation Loc)1435 StringRef getImmediateMacroName(SourceLocation Loc) { 1436 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 1437 } 1438 1439 /// \brief Plop the specified string into a scratch buffer and set the 1440 /// specified token's location and length to it. 1441 /// 1442 /// If specified, the source location provides a location of the expansion 1443 /// point of the token. 1444 void CreateString(StringRef Str, Token &Tok, 1445 SourceLocation ExpansionLocStart = SourceLocation(), 1446 SourceLocation ExpansionLocEnd = SourceLocation()); 1447 1448 /// \brief Computes the source location just past the end of the 1449 /// token at this source location. 1450 /// 1451 /// This routine can be used to produce a source location that 1452 /// points just past the end of the token referenced by \p Loc, and 1453 /// is generally used when a diagnostic needs to point just after a 1454 /// token where it expected something different that it received. If 1455 /// the returned source location would not be meaningful (e.g., if 1456 /// it points into a macro), this routine returns an invalid 1457 /// source location. 1458 /// 1459 /// \param Offset an offset from the end of the token, where the source 1460 /// location should refer to. The default offset (0) produces a source 1461 /// location pointing just past the end of the token; an offset of 1 produces 1462 /// a source location pointing to the last character in the token, etc. 1463 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 1464 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 1465 } 1466 1467 /// \brief Returns true if the given MacroID location points at the first 1468 /// token of the macro expansion. 1469 /// 1470 /// \param MacroBegin If non-null and function returns true, it is set to 1471 /// begin location of the macro. 1472 bool isAtStartOfMacroExpansion(SourceLocation loc, 1473 SourceLocation *MacroBegin = nullptr) const { 1474 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 1475 MacroBegin); 1476 } 1477 1478 /// \brief Returns true if the given MacroID location points at the last 1479 /// token of the macro expansion. 1480 /// 1481 /// \param MacroEnd If non-null and function returns true, it is set to 1482 /// end location of the macro. 1483 bool isAtEndOfMacroExpansion(SourceLocation loc, 1484 SourceLocation *MacroEnd = nullptr) const { 1485 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 1486 } 1487 1488 /// \brief Print the token to stderr, used for debugging. 1489 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 1490 void DumpLocation(SourceLocation Loc) const; 1491 void DumpMacro(const MacroInfo &MI) const; 1492 void dumpMacroInfo(const IdentifierInfo *II); 1493 1494 /// \brief Given a location that specifies the start of a 1495 /// token, return a new location that specifies a character within the token. AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)1496 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 1497 unsigned Char) const { 1498 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 1499 } 1500 1501 /// \brief Increment the counters for the number of token paste operations 1502 /// performed. 1503 /// 1504 /// If fast was specified, this is a 'fast paste' case we handled. IncrementPasteCounter(bool isFast)1505 void IncrementPasteCounter(bool isFast) { 1506 if (isFast) 1507 ++NumFastTokenPaste; 1508 else 1509 ++NumTokenPaste; 1510 } 1511 1512 void PrintStats(); 1513 1514 size_t getTotalMemory() const; 1515 1516 /// When the macro expander pastes together a comment (/##/) in Microsoft 1517 /// mode, this method handles updating the current state, returning the 1518 /// token on the next source line. 1519 void HandleMicrosoftCommentPaste(Token &Tok); 1520 1521 //===--------------------------------------------------------------------===// 1522 // Preprocessor callback methods. These are invoked by a lexer as various 1523 // directives and events are found. 1524 1525 /// Given a tok::raw_identifier token, look up the 1526 /// identifier information for the token and install it into the token, 1527 /// updating the token kind accordingly. 1528 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 1529 1530 private: 1531 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 1532 1533 public: 1534 1535 /// \brief Specifies the reason for poisoning an identifier. 1536 /// 1537 /// If that identifier is accessed while poisoned, then this reason will be 1538 /// used instead of the default "poisoned" diagnostic. 1539 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 1540 1541 /// \brief Display reason for poisoned identifier. 1542 void HandlePoisonedIdentifier(Token & Tok); 1543 MaybeHandlePoisonedIdentifier(Token & Identifier)1544 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 1545 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 1546 if(II->isPoisoned()) { 1547 HandlePoisonedIdentifier(Identifier); 1548 } 1549 } 1550 } 1551 1552 private: 1553 /// Identifiers used for SEH handling in Borland. These are only 1554 /// allowed in particular circumstances 1555 // __except block 1556 IdentifierInfo *Ident__exception_code, 1557 *Ident___exception_code, 1558 *Ident_GetExceptionCode; 1559 // __except filter expression 1560 IdentifierInfo *Ident__exception_info, 1561 *Ident___exception_info, 1562 *Ident_GetExceptionInfo; 1563 // __finally 1564 IdentifierInfo *Ident__abnormal_termination, 1565 *Ident___abnormal_termination, 1566 *Ident_AbnormalTermination; 1567 1568 const char *getCurLexerEndPos(); 1569 1570 public: 1571 void PoisonSEHIdentifiers(bool Poison = true); // Borland 1572 1573 /// \brief Callback invoked when the lexer reads an identifier and has 1574 /// filled in the tokens IdentifierInfo member. 1575 /// 1576 /// This callback potentially macro expands it or turns it into a named 1577 /// token (like 'for'). 1578 /// 1579 /// \returns true if we actually computed a token, false if we need to 1580 /// lex again. 1581 bool HandleIdentifier(Token &Identifier); 1582 1583 1584 /// \brief Callback invoked when the lexer hits the end of the current file. 1585 /// 1586 /// This either returns the EOF token and returns true, or 1587 /// pops a level off the include stack and returns false, at which point the 1588 /// client should call lex again. 1589 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 1590 1591 /// \brief Callback invoked when the current TokenLexer hits the end of its 1592 /// token stream. 1593 bool HandleEndOfTokenLexer(Token &Result); 1594 1595 /// \brief Callback invoked when the lexer sees a # token at the start of a 1596 /// line. 1597 /// 1598 /// This consumes the directive, modifies the lexer/preprocessor state, and 1599 /// advances the lexer(s) so that the next token read is the correct one. 1600 void HandleDirective(Token &Result); 1601 1602 /// \brief Ensure that the next token is a tok::eod token. 1603 /// 1604 /// If not, emit a diagnostic and consume up until the eod. 1605 /// If \p EnableMacros is true, then we consider macros that expand to zero 1606 /// tokens as being ok. 1607 void CheckEndOfDirective(const char *Directive, bool EnableMacros = false); 1608 1609 /// \brief Read and discard all tokens remaining on the current line until 1610 /// the tok::eod token is found. 1611 void DiscardUntilEndOfDirective(); 1612 1613 /// \brief Returns true if the preprocessor has seen a use of 1614 /// __DATE__ or __TIME__ in the file so far. SawDateOrTime()1615 bool SawDateOrTime() const { 1616 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 1617 } getCounterValue()1618 unsigned getCounterValue() const { return CounterValue; } setCounterValue(unsigned V)1619 void setCounterValue(unsigned V) { CounterValue = V; } 1620 1621 /// \brief Retrieves the module that we're currently building, if any. 1622 Module *getCurrentModule(); 1623 1624 /// \brief Allocate a new MacroInfo object with the provided SourceLocation. 1625 MacroInfo *AllocateMacroInfo(SourceLocation L); 1626 1627 /// \brief Allocate a new MacroInfo object loaded from an AST file. 1628 MacroInfo *AllocateDeserializedMacroInfo(SourceLocation L, 1629 unsigned SubModuleID); 1630 1631 /// \brief Turn the specified lexer token into a fully checked and spelled 1632 /// filename, e.g. as an operand of \#include. 1633 /// 1634 /// The caller is expected to provide a buffer that is large enough to hold 1635 /// the spelling of the filename, but is also expected to handle the case 1636 /// when this method decides to use a different buffer. 1637 /// 1638 /// \returns true if the input filename was in <>'s or false if it was 1639 /// in ""'s. 1640 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename); 1641 1642 /// \brief Given a "foo" or \<foo> reference, look up the indicated file. 1643 /// 1644 /// Returns null on failure. \p isAngled indicates whether the file 1645 /// reference is for system \#include's or not (i.e. using <> instead of ""). 1646 const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename, 1647 bool isAngled, const DirectoryLookup *FromDir, 1648 const FileEntry *FromFile, 1649 const DirectoryLookup *&CurDir, 1650 SmallVectorImpl<char> *SearchPath, 1651 SmallVectorImpl<char> *RelativePath, 1652 ModuleMap::KnownHeader *SuggestedModule, 1653 bool SkipCache = false); 1654 1655 /// \brief Get the DirectoryLookup structure used to find the current 1656 /// FileEntry, if CurLexer is non-null and if applicable. 1657 /// 1658 /// This allows us to implement \#include_next and find directory-specific 1659 /// properties. GetCurDirLookup()1660 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; } 1661 1662 /// \brief Return true if we're in the top-level file, not in a \#include. 1663 bool isInPrimaryFile() const; 1664 1665 /// \brief Handle cases where the \#include name is expanded 1666 /// from a macro as multiple tokens, which need to be glued together. 1667 /// 1668 /// This occurs for code like: 1669 /// \code 1670 /// \#define FOO <x/y.h> 1671 /// \#include FOO 1672 /// \endcode 1673 /// because in this case, "<x/y.h>" is returned as 7 tokens, not one. 1674 /// 1675 /// This code concatenates and consumes tokens up to the '>' token. It 1676 /// returns false if the > was found, otherwise it returns true if it finds 1677 /// and consumes the EOD marker. 1678 bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer, 1679 SourceLocation &End); 1680 1681 /// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is 1682 /// followed by EOD. Return true if the token is not a valid on-off-switch. 1683 bool LexOnOffSwitch(tok::OnOffSwitch &OOS); 1684 1685 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 1686 bool *ShadowFlag = nullptr); 1687 1688 private: 1689 PushIncludeMacroStack()1690 void PushIncludeMacroStack() { 1691 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer"); 1692 IncludeMacroStack.emplace_back( 1693 CurLexerKind, CurSubmodule, std::move(CurLexer), std::move(CurPTHLexer), 1694 CurPPLexer, std::move(CurTokenLexer), CurDirLookup); 1695 CurPPLexer = nullptr; 1696 } 1697 PopIncludeMacroStack()1698 void PopIncludeMacroStack() { 1699 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 1700 CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer); 1701 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 1702 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 1703 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 1704 CurSubmodule = IncludeMacroStack.back().TheSubmodule; 1705 CurLexerKind = IncludeMacroStack.back().CurLexerKind; 1706 IncludeMacroStack.pop_back(); 1707 } 1708 1709 void PropagateLineStartLeadingSpaceInfo(Token &Result); 1710 1711 void EnterSubmodule(Module *M, SourceLocation ImportLoc); 1712 void LeaveSubmodule(); 1713 1714 /// Determine whether we need to create module macros for #defines in the 1715 /// current context. 1716 bool needModuleMacros() const; 1717 1718 /// Update the set of active module macros and ambiguity flag for a module 1719 /// macro name. 1720 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 1721 1722 /// \brief Allocate a new MacroInfo object. 1723 MacroInfo *AllocateMacroInfo(); 1724 1725 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 1726 SourceLocation Loc); 1727 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 1728 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 1729 bool isPublic); 1730 1731 /// \brief Lex and validate a macro name, which occurs after a 1732 /// \#define or \#undef. 1733 /// 1734 /// \param MacroNameTok Token that represents the name defined or undefined. 1735 /// \param IsDefineUndef Kind if preprocessor directive. 1736 /// \param ShadowFlag Points to flag that is set if macro name shadows 1737 /// a keyword. 1738 /// 1739 /// This emits a diagnostic, sets the token kind to eod, 1740 /// and discards the rest of the macro line if the macro name is invalid. 1741 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 1742 bool *ShadowFlag = nullptr); 1743 1744 /// The ( starting an argument list of a macro definition has just been read. 1745 /// Lex the rest of the arguments and the closing ), updating \p MI with 1746 /// what we learn and saving in \p LastTok the last token read. 1747 /// Return true if an error occurs parsing the arg list. 1748 bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok); 1749 1750 /// We just read a \#if or related directive and decided that the 1751 /// subsequent tokens are in the \#if'd out portion of the 1752 /// file. Lex the rest of the file, until we see an \#endif. If \p 1753 /// FoundNonSkipPortion is true, then we have already emitted code for part of 1754 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 1755 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 1756 /// already seen one so a \#else directive is a duplicate. When this returns, 1757 /// the caller can lex the first valid token. 1758 void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, 1759 bool FoundNonSkipPortion, bool FoundElse, 1760 SourceLocation ElseLoc = SourceLocation()); 1761 1762 /// \brief A fast PTH version of SkipExcludedConditionalBlock. 1763 void PTHSkipExcludedConditionalBlock(); 1764 1765 /// \brief Evaluate an integer constant expression that may occur after a 1766 /// \#if or \#elif directive and return it as a bool. 1767 /// 1768 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 1769 bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); 1770 1771 /// \brief Install the standard preprocessor pragmas: 1772 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 1773 void RegisterBuiltinPragmas(); 1774 1775 /// \brief Register builtin macros such as __LINE__ with the identifier table. 1776 void RegisterBuiltinMacros(); 1777 1778 /// If an identifier token is read that is to be expanded as a macro, handle 1779 /// it and return the next token as 'Tok'. If we lexed a token, return true; 1780 /// otherwise the caller should lex again. 1781 bool HandleMacroExpandedIdentifier(Token &Tok, const MacroDefinition &MD); 1782 1783 /// \brief Cache macro expanded tokens for TokenLexers. 1784 // 1785 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 1786 /// going to lex in the cache and when it finishes the tokens are removed 1787 /// from the end of the cache. 1788 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 1789 ArrayRef<Token> tokens); 1790 void removeCachedMacroExpandedTokensOfLastLexer(); 1791 friend void TokenLexer::ExpandFunctionArguments(); 1792 1793 /// Determine whether the next preprocessor token to be 1794 /// lexed is a '('. If so, consume the token and return true, if not, this 1795 /// method should have no observable side-effect on the lexed tokens. 1796 bool isNextPPTokenLParen(); 1797 1798 /// After reading "MACRO(", this method is invoked to read all of the formal 1799 /// arguments specified for the macro invocation. Returns null on error. 1800 MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI, 1801 SourceLocation &ExpansionEnd); 1802 1803 /// \brief If an identifier token is read that is to be expanded 1804 /// as a builtin macro, handle it and return the next token as 'Tok'. 1805 void ExpandBuiltinMacro(Token &Tok); 1806 1807 /// \brief Read a \c _Pragma directive, slice it up, process it, then 1808 /// return the first token after the directive. 1809 /// This assumes that the \c _Pragma token has just been read into \p Tok. 1810 void Handle_Pragma(Token &Tok); 1811 1812 /// \brief Like Handle_Pragma except the pragma text is not enclosed within 1813 /// a string literal. 1814 void HandleMicrosoft__pragma(Token &Tok); 1815 1816 /// \brief Add a lexer to the top of the include stack and 1817 /// start lexing tokens from it instead of the current buffer. 1818 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir); 1819 1820 /// \brief Add a lexer to the top of the include stack and 1821 /// start getting tokens from it using the PTH cache. 1822 void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir); 1823 1824 /// \brief Set the FileID for the preprocessor predefines. setPredefinesFileID(FileID FID)1825 void setPredefinesFileID(FileID FID) { 1826 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 1827 PredefinesFileID = FID; 1828 } 1829 1830 /// \brief Returns true if we are lexing from a file and not a 1831 /// pragma or a macro. IsFileLexer(const Lexer * L,const PreprocessorLexer * P)1832 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 1833 return L ? !L->isPragmaLexer() : P != nullptr; 1834 } 1835 IsFileLexer(const IncludeStackInfo & I)1836 static bool IsFileLexer(const IncludeStackInfo& I) { 1837 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 1838 } 1839 IsFileLexer()1840 bool IsFileLexer() const { 1841 return IsFileLexer(CurLexer.get(), CurPPLexer); 1842 } 1843 1844 //===--------------------------------------------------------------------===// 1845 // Caching stuff. 1846 void CachingLex(Token &Result); InCachingLexMode()1847 bool InCachingLexMode() const { 1848 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 1849 // that we are past EOF, not that we are in CachingLex mode. 1850 return !CurPPLexer && !CurTokenLexer && !CurPTHLexer && 1851 !IncludeMacroStack.empty(); 1852 } 1853 void EnterCachingLexMode(); ExitCachingLexMode()1854 void ExitCachingLexMode() { 1855 if (InCachingLexMode()) 1856 RemoveTopOfLexerStack(); 1857 } 1858 const Token &PeekAhead(unsigned N); 1859 void AnnotatePreviousCachedTokens(const Token &Tok); 1860 1861 //===--------------------------------------------------------------------===// 1862 /// Handle*Directive - implement the various preprocessor directives. These 1863 /// should side-effect the current preprocessor object so that the next call 1864 /// to Lex() will return the appropriate token next. 1865 void HandleLineDirective(Token &Tok); 1866 void HandleDigitDirective(Token &Tok); 1867 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 1868 void HandleIdentSCCSDirective(Token &Tok); 1869 void HandleMacroPublicDirective(Token &Tok); 1870 void HandleMacroPrivateDirective(Token &Tok); 1871 1872 // File inclusion. 1873 void HandleIncludeDirective(SourceLocation HashLoc, 1874 Token &Tok, 1875 const DirectoryLookup *LookupFrom = nullptr, 1876 const FileEntry *LookupFromFile = nullptr, 1877 bool isImport = false); 1878 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 1879 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 1880 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 1881 void HandleMicrosoftImportDirective(Token &Tok); 1882 1883 public: 1884 // Module inclusion testing. 1885 /// \brief Find the module that owns the source or header file that 1886 /// \p Loc points to. If the location is in a file that was included 1887 /// into a module, or is outside any module, returns nullptr. 1888 Module *getModuleForLocation(SourceLocation Loc); 1889 1890 /// \brief Find the module that contains the specified location, either 1891 /// directly or indirectly. 1892 Module *getModuleContainingLocation(SourceLocation Loc); 1893 1894 /// \brief We want to produce a diagnostic at location IncLoc concerning a 1895 /// missing module import. 1896 /// 1897 /// \param IncLoc The location at which the missing import was detected. 1898 /// \param MLoc A location within the desired module at which some desired 1899 /// effect occurred (eg, where a desired entity was declared). 1900 /// 1901 /// \return A file that can be #included to import a module containing MLoc. 1902 /// Null if no such file could be determined or if a #include is not 1903 /// appropriate. 1904 const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 1905 SourceLocation MLoc); 1906 1907 private: 1908 // Macro handling. 1909 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef); 1910 void HandleUndefDirective(Token &Tok); 1911 1912 // Conditional Inclusion. 1913 void HandleIfdefDirective(Token &Tok, bool isIfndef, 1914 bool ReadAnyTokensBeforeDirective); 1915 void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective); 1916 void HandleEndifDirective(Token &Tok); 1917 void HandleElseDirective(Token &Tok); 1918 void HandleElifDirective(Token &Tok); 1919 1920 // Pragmas. 1921 void HandlePragmaDirective(SourceLocation IntroducerLoc, 1922 PragmaIntroducerKind Introducer); 1923 public: 1924 void HandlePragmaOnce(Token &OnceTok); 1925 void HandlePragmaMark(); 1926 void HandlePragmaPoison(Token &PoisonTok); 1927 void HandlePragmaSystemHeader(Token &SysHeaderTok); 1928 void HandlePragmaDependency(Token &DependencyTok); 1929 void HandlePragmaPushMacro(Token &Tok); 1930 void HandlePragmaPopMacro(Token &Tok); 1931 void HandlePragmaIncludeAlias(Token &Tok); 1932 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 1933 1934 // Return true and store the first token only if any CommentHandler 1935 // has inserted some tokens and getCommentRetentionState() is false. 1936 bool HandleComment(Token &Token, SourceRange Comment); 1937 1938 /// \brief A macro is used, update information about macros that need unused 1939 /// warnings. 1940 void markMacroAsUsed(MacroInfo *MI); 1941 }; 1942 1943 /// \brief Abstract base class that describes a handler that will receive 1944 /// source ranges for each of the comments encountered in the source file. 1945 class CommentHandler { 1946 public: 1947 virtual ~CommentHandler(); 1948 1949 // The handler shall return true if it has pushed any tokens 1950 // to be read using e.g. EnterToken or EnterTokenStream. 1951 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 1952 }; 1953 1954 /// \brief Registry of pragma handlers added by plugins 1955 typedef llvm::Registry<PragmaHandler> PragmaHandlerRegistry; 1956 1957 } // end namespace clang 1958 1959 extern template class llvm::Registry<clang::PragmaHandler>; 1960 1961 #endif 1962