1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Defines the clang::Preprocessor interface. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 16 #define LLVM_CLANG_LEX_PREPROCESSOR_H 17 18 #include "clang/Basic/Builtins.h" 19 #include "clang/Basic/Diagnostic.h" 20 #include "clang/Basic/IdentifierTable.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Lex/Lexer.h" 23 #include "clang/Lex/MacroInfo.h" 24 #include "clang/Lex/ModuleMap.h" 25 #include "clang/Lex/PPCallbacks.h" 26 #include "clang/Lex/PTHLexer.h" 27 #include "clang/Lex/TokenLexer.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/IntrusiveRefCntPtr.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/TinyPtrVector.h" 34 #include "llvm/Support/Allocator.h" 35 #include <memory> 36 #include <vector> 37 38 namespace llvm { 39 template<unsigned InternalLen> class SmallString; 40 } 41 42 namespace clang { 43 44 class SourceManager; 45 class ExternalPreprocessorSource; 46 class FileManager; 47 class FileEntry; 48 class HeaderSearch; 49 class PragmaNamespace; 50 class PragmaHandler; 51 class CommentHandler; 52 class ScratchBuffer; 53 class TargetInfo; 54 class PPCallbacks; 55 class CodeCompletionHandler; 56 class DirectoryLookup; 57 class PreprocessingRecord; 58 class ModuleLoader; 59 class PTHManager; 60 class PreprocessorOptions; 61 62 /// \brief Stores token information for comparing actual tokens with 63 /// predefined values. Only handles simple tokens and identifiers. 64 class TokenValue { 65 tok::TokenKind Kind; 66 IdentifierInfo *II; 67 68 public: TokenValue(tok::TokenKind Kind)69 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 70 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 71 assert(Kind != tok::identifier && 72 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 73 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 74 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 75 } TokenValue(IdentifierInfo * II)76 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 77 bool operator==(const Token &Tok) const { 78 return Tok.getKind() == Kind && 79 (!II || II == Tok.getIdentifierInfo()); 80 } 81 }; 82 83 /// \brief Context in which macro name is used. 84 enum MacroUse { 85 MU_Other = 0, // other than #define or #undef 86 MU_Define = 1, // macro name specified in #define 87 MU_Undef = 2 // macro name specified in #undef 88 }; 89 90 /// \brief Engages in a tight little dance with the lexer to efficiently 91 /// preprocess tokens. 92 /// 93 /// Lexers know only about tokens within a single source file, and don't 94 /// know anything about preprocessor-level issues like the \#include stack, 95 /// token expansion, etc. 96 class Preprocessor : public RefCountedBase<Preprocessor> { 97 IntrusiveRefCntPtr<PreprocessorOptions> PPOpts; 98 DiagnosticsEngine *Diags; 99 LangOptions &LangOpts; 100 const TargetInfo *Target; 101 const TargetInfo *AuxTarget; 102 FileManager &FileMgr; 103 SourceManager &SourceMgr; 104 std::unique_ptr<ScratchBuffer> ScratchBuf; 105 HeaderSearch &HeaderInfo; 106 ModuleLoader &TheModuleLoader; 107 108 /// \brief External source of macros. 109 ExternalPreprocessorSource *ExternalSource; 110 111 112 /// An optional PTHManager object used for getting tokens from 113 /// a token cache rather than lexing the original source file. 114 std::unique_ptr<PTHManager> PTH; 115 116 /// A BumpPtrAllocator object used to quickly allocate and release 117 /// objects internal to the Preprocessor. 118 llvm::BumpPtrAllocator BP; 119 120 /// Identifiers for builtin macros and other builtins. 121 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 122 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 123 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 124 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 125 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 126 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 127 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 128 IdentifierInfo *Ident__identifier; // __identifier 129 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 130 IdentifierInfo *Ident__has_feature; // __has_feature 131 IdentifierInfo *Ident__has_extension; // __has_extension 132 IdentifierInfo *Ident__has_builtin; // __has_builtin 133 IdentifierInfo *Ident__has_attribute; // __has_attribute 134 IdentifierInfo *Ident__has_include; // __has_include 135 IdentifierInfo *Ident__has_include_next; // __has_include_next 136 IdentifierInfo *Ident__has_warning; // __has_warning 137 IdentifierInfo *Ident__is_identifier; // __is_identifier 138 IdentifierInfo *Ident__building_module; // __building_module 139 IdentifierInfo *Ident__MODULE__; // __MODULE__ 140 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 141 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 142 143 SourceLocation DATELoc, TIMELoc; 144 unsigned CounterValue; // Next __COUNTER__ value. 145 146 enum { 147 /// \brief Maximum depth of \#includes. 148 MaxAllowedIncludeStackDepth = 200 149 }; 150 151 // State that is set before the preprocessor begins. 152 bool KeepComments : 1; 153 bool KeepMacroComments : 1; 154 bool SuppressIncludeNotFoundError : 1; 155 156 // State that changes while the preprocessor runs: 157 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 158 159 /// Whether the preprocessor owns the header search object. 160 bool OwnsHeaderSearch : 1; 161 162 /// True if macro expansion is disabled. 163 bool DisableMacroExpansion : 1; 164 165 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 166 /// when parsing preprocessor directives. 167 bool MacroExpansionInDirectivesOverride : 1; 168 169 class ResetMacroExpansionHelper; 170 171 /// \brief Whether we have already loaded macros from the external source. 172 mutable bool ReadMacrosFromExternalSource : 1; 173 174 /// \brief True if pragmas are enabled. 175 bool PragmasEnabled : 1; 176 177 /// \brief True if the current build action is a preprocessing action. 178 bool PreprocessedOutput : 1; 179 180 /// \brief True if we are currently preprocessing a #if or #elif directive 181 bool ParsingIfOrElifDirective; 182 183 /// \brief True if we are pre-expanding macro arguments. 184 bool InMacroArgPreExpansion; 185 186 /// \brief Mapping/lookup information for all identifiers in 187 /// the program, including program keywords. 188 mutable IdentifierTable Identifiers; 189 190 /// \brief This table contains all the selectors in the program. 191 /// 192 /// Unlike IdentifierTable above, this table *isn't* populated by the 193 /// preprocessor. It is declared/expanded here because its role/lifetime is 194 /// conceptually similar to the IdentifierTable. In addition, the current 195 /// control flow (in clang::ParseAST()), make it convenient to put here. 196 /// 197 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 198 /// the lifetime of the preprocessor. 199 SelectorTable Selectors; 200 201 /// \brief Information about builtins. 202 Builtin::Context BuiltinInfo; 203 204 /// \brief Tracks all of the pragmas that the client registered 205 /// with this preprocessor. 206 std::unique_ptr<PragmaNamespace> PragmaHandlers; 207 208 /// \brief Pragma handlers of the original source is stored here during the 209 /// parsing of a model file. 210 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 211 212 /// \brief Tracks all of the comment handlers that the client registered 213 /// with this preprocessor. 214 std::vector<CommentHandler *> CommentHandlers; 215 216 /// \brief True if we want to ignore EOF token and continue later on (thus 217 /// avoid tearing the Lexer and etc. down). 218 bool IncrementalProcessing; 219 220 /// The kind of translation unit we are processing. 221 TranslationUnitKind TUKind; 222 223 /// \brief The code-completion handler. 224 CodeCompletionHandler *CodeComplete; 225 226 /// \brief The file that we're performing code-completion for, if any. 227 const FileEntry *CodeCompletionFile; 228 229 /// \brief The offset in file for the code-completion point. 230 unsigned CodeCompletionOffset; 231 232 /// \brief The location for the code-completion point. This gets instantiated 233 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 234 SourceLocation CodeCompletionLoc; 235 236 /// \brief The start location for the file of the code-completion point. 237 /// 238 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 239 /// for preprocessing. 240 SourceLocation CodeCompletionFileLoc; 241 242 /// \brief The source location of the \c import contextual keyword we just 243 /// lexed, if any. 244 SourceLocation ModuleImportLoc; 245 246 /// \brief The module import path that we're currently processing. 247 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath; 248 249 /// \brief Whether the last token we lexed was an '@'. 250 bool LastTokenWasAt; 251 252 /// \brief Whether the module import expects an identifier next. Otherwise, 253 /// it expects a '.' or ';'. 254 bool ModuleImportExpectsIdentifier; 255 256 /// \brief The source location of the currently-active 257 /// \#pragma clang arc_cf_code_audited begin. 258 SourceLocation PragmaARCCFCodeAuditedLoc; 259 260 /// \brief The source location of the currently-active 261 /// \#pragma clang assume_nonnull begin. 262 SourceLocation PragmaAssumeNonNullLoc; 263 264 /// \brief True if we hit the code-completion point. 265 bool CodeCompletionReached; 266 267 /// \brief The directory that the main file should be considered to occupy, 268 /// if it does not correspond to a real file (as happens when building a 269 /// module). 270 const DirectoryEntry *MainFileDir; 271 272 /// \brief The number of bytes that we will initially skip when entering the 273 /// main file, along with a flag that indicates whether skipping this number 274 /// of bytes will place the lexer at the start of a line. 275 /// 276 /// This is used when loading a precompiled preamble. 277 std::pair<int, bool> SkipMainFilePreamble; 278 279 /// \brief The current top of the stack that we're lexing from if 280 /// not expanding a macro and we are lexing directly from source code. 281 /// 282 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. 283 std::unique_ptr<Lexer> CurLexer; 284 285 /// \brief The current top of stack that we're lexing from if 286 /// not expanding from a macro and we are lexing from a PTH cache. 287 /// 288 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. 289 std::unique_ptr<PTHLexer> CurPTHLexer; 290 291 /// \brief The current top of the stack what we're lexing from 292 /// if not expanding a macro. 293 /// 294 /// This is an alias for either CurLexer or CurPTHLexer. 295 PreprocessorLexer *CurPPLexer; 296 297 /// \brief Used to find the current FileEntry, if CurLexer is non-null 298 /// and if applicable. 299 /// 300 /// This allows us to implement \#include_next and find directory-specific 301 /// properties. 302 const DirectoryLookup *CurDirLookup; 303 304 /// \brief The current macro we are expanding, if we are expanding a macro. 305 /// 306 /// One of CurLexer and CurTokenLexer must be null. 307 std::unique_ptr<TokenLexer> CurTokenLexer; 308 309 /// \brief The kind of lexer we're currently working with. 310 enum CurLexerKind { 311 CLK_Lexer, 312 CLK_PTHLexer, 313 CLK_TokenLexer, 314 CLK_CachingLexer, 315 CLK_LexAfterModuleImport 316 } CurLexerKind; 317 318 /// \brief If the current lexer is for a submodule that is being built, this 319 /// is that submodule. 320 Module *CurSubmodule; 321 322 /// \brief Keeps track of the stack of files currently 323 /// \#included, and macros currently being expanded from, not counting 324 /// CurLexer/CurTokenLexer. 325 struct IncludeStackInfo { 326 enum CurLexerKind CurLexerKind; 327 Module *TheSubmodule; 328 std::unique_ptr<Lexer> TheLexer; 329 std::unique_ptr<PTHLexer> ThePTHLexer; 330 PreprocessorLexer *ThePPLexer; 331 std::unique_ptr<TokenLexer> TheTokenLexer; 332 const DirectoryLookup *TheDirLookup; 333 334 // The following constructors are completely useless copies of the default 335 // versions, only needed to pacify MSVC. IncludeStackInfoIncludeStackInfo336 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, 337 std::unique_ptr<Lexer> &&TheLexer, 338 std::unique_ptr<PTHLexer> &&ThePTHLexer, 339 PreprocessorLexer *ThePPLexer, 340 std::unique_ptr<TokenLexer> &&TheTokenLexer, 341 const DirectoryLookup *TheDirLookup) 342 : CurLexerKind(std::move(CurLexerKind)), 343 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 344 ThePTHLexer(std::move(ThePTHLexer)), 345 ThePPLexer(std::move(ThePPLexer)), 346 TheTokenLexer(std::move(TheTokenLexer)), 347 TheDirLookup(std::move(TheDirLookup)) {} IncludeStackInfoIncludeStackInfo348 IncludeStackInfo(IncludeStackInfo &&RHS) 349 : CurLexerKind(std::move(RHS.CurLexerKind)), 350 TheSubmodule(std::move(RHS.TheSubmodule)), 351 TheLexer(std::move(RHS.TheLexer)), 352 ThePTHLexer(std::move(RHS.ThePTHLexer)), 353 ThePPLexer(std::move(RHS.ThePPLexer)), 354 TheTokenLexer(std::move(RHS.TheTokenLexer)), 355 TheDirLookup(std::move(RHS.TheDirLookup)) {} 356 }; 357 std::vector<IncludeStackInfo> IncludeMacroStack; 358 359 /// \brief Actions invoked when some preprocessor activity is 360 /// encountered (e.g. a file is \#included, etc). 361 std::unique_ptr<PPCallbacks> Callbacks; 362 363 struct MacroExpandsInfo { 364 Token Tok; 365 MacroDefinition MD; 366 SourceRange Range; MacroExpandsInfoMacroExpandsInfo367 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 368 : Tok(Tok), MD(MD), Range(Range) { } 369 }; 370 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 371 372 /// Information about a name that has been used to define a module macro. 373 struct ModuleMacroInfo { ModuleMacroInfoModuleMacroInfo374 ModuleMacroInfo(MacroDirective *MD) 375 : MD(MD), ActiveModuleMacrosGeneration(0), IsAmbiguous(false) {} 376 377 /// The most recent macro directive for this identifier. 378 MacroDirective *MD; 379 /// The active module macros for this identifier. 380 llvm::TinyPtrVector<ModuleMacro*> ActiveModuleMacros; 381 /// The generation number at which we last updated ActiveModuleMacros. 382 /// \see Preprocessor::VisibleModules. 383 unsigned ActiveModuleMacrosGeneration; 384 /// Whether this macro name is ambiguous. 385 bool IsAmbiguous; 386 /// The module macros that are overridden by this macro. 387 llvm::TinyPtrVector<ModuleMacro*> OverriddenMacros; 388 }; 389 390 /// The state of a macro for an identifier. 391 class MacroState { 392 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 393 getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)394 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 395 const IdentifierInfo *II) const { 396 // FIXME: Find a spare bit on IdentifierInfo and store a 397 // HasModuleMacros flag. 398 if (!II->hasMacroDefinition() || 399 (!PP.getLangOpts().Modules && 400 !PP.getLangOpts().ModulesLocalVisibility) || 401 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 402 return nullptr; 403 404 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 405 if (!Info) { 406 Info = new (PP.getPreprocessorAllocator()) 407 ModuleMacroInfo(State.get<MacroDirective *>()); 408 State = Info; 409 } 410 411 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 412 Info->ActiveModuleMacrosGeneration) 413 PP.updateModuleMacroInfo(II, *Info); 414 return Info; 415 } 416 417 public: MacroState()418 MacroState() : MacroState(nullptr) {} MacroState(MacroDirective * MD)419 MacroState(MacroDirective *MD) : State(MD) {} MacroState(MacroState && O)420 MacroState(MacroState &&O) LLVM_NOEXCEPT : State(O.State) { 421 O.State = (MacroDirective *)nullptr; 422 } 423 MacroState &operator=(MacroState &&O) LLVM_NOEXCEPT { 424 auto S = O.State; 425 O.State = (MacroDirective *)nullptr; 426 State = S; 427 return *this; 428 } ~MacroState()429 ~MacroState() { 430 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 431 Info->~ModuleMacroInfo(); 432 } 433 getLatest()434 MacroDirective *getLatest() const { 435 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 436 return Info->MD; 437 return State.get<MacroDirective*>(); 438 } setLatest(MacroDirective * MD)439 void setLatest(MacroDirective *MD) { 440 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 441 Info->MD = MD; 442 else 443 State = MD; 444 } 445 isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)446 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 447 auto *Info = getModuleInfo(PP, II); 448 return Info ? Info->IsAmbiguous : false; 449 } 450 ArrayRef<ModuleMacro *> getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)451 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 452 if (auto *Info = getModuleInfo(PP, II)) 453 return Info->ActiveModuleMacros; 454 return None; 455 } 456 findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)457 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 458 SourceManager &SourceMgr) const { 459 // FIXME: Incorporate module macros into the result of this. 460 if (auto *Latest = getLatest()) 461 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 462 return MacroDirective::DefInfo(); 463 } 464 overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)465 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 466 if (auto *Info = getModuleInfo(PP, II)) { 467 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 468 Info->ActiveModuleMacros.begin(), 469 Info->ActiveModuleMacros.end()); 470 Info->ActiveModuleMacros.clear(); 471 Info->IsAmbiguous = false; 472 } 473 } getOverriddenMacros()474 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 475 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 476 return Info->OverriddenMacros; 477 return None; 478 } setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)479 void setOverriddenMacros(Preprocessor &PP, 480 ArrayRef<ModuleMacro *> Overrides) { 481 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 482 if (!Info) { 483 if (Overrides.empty()) 484 return; 485 Info = new (PP.getPreprocessorAllocator()) 486 ModuleMacroInfo(State.get<MacroDirective *>()); 487 State = Info; 488 } 489 Info->OverriddenMacros.clear(); 490 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 491 Overrides.begin(), Overrides.end()); 492 Info->ActiveModuleMacrosGeneration = 0; 493 } 494 }; 495 496 /// For each IdentifierInfo that was associated with a macro, we 497 /// keep a mapping to the history of all macro definitions and #undefs in 498 /// the reverse order (the latest one is in the head of the list). 499 /// 500 /// This mapping lives within the \p CurSubmoduleState. 501 typedef llvm::DenseMap<const IdentifierInfo *, MacroState> MacroMap; 502 503 friend class ASTReader; 504 505 struct SubmoduleState; 506 507 /// \brief Information about a submodule that we're currently building. 508 struct BuildingSubmoduleInfo { BuildingSubmoduleInfoBuildingSubmoduleInfo509 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, 510 SubmoduleState *OuterSubmoduleState) 511 : M(M), ImportLoc(ImportLoc), OuterSubmoduleState(OuterSubmoduleState) { 512 } 513 514 /// The module that we are building. 515 Module *M; 516 /// The location at which the module was included. 517 SourceLocation ImportLoc; 518 /// The previous SubmoduleState. 519 SubmoduleState *OuterSubmoduleState; 520 }; 521 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 522 523 /// \brief Information about a submodule's preprocessor state. 524 struct SubmoduleState { 525 /// The macros for the submodule. 526 MacroMap Macros; 527 /// The set of modules that are visible within the submodule. 528 VisibleModuleSet VisibleModules; 529 // FIXME: CounterValue? 530 // FIXME: PragmaPushMacroInfo? 531 }; 532 std::map<Module*, SubmoduleState> Submodules; 533 534 /// The preprocessor state for preprocessing outside of any submodule. 535 SubmoduleState NullSubmoduleState; 536 537 /// The current submodule state. Will be \p NullSubmoduleState if we're not 538 /// in a submodule. 539 SubmoduleState *CurSubmoduleState; 540 541 /// The set of known macros exported from modules. 542 llvm::FoldingSet<ModuleMacro> ModuleMacros; 543 544 /// The list of module macros, for each identifier, that are not overridden by 545 /// any other module macro. 546 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro*>> 547 LeafModuleMacros; 548 549 /// \brief Macros that we want to warn because they are not used at the end 550 /// of the translation unit. 551 /// 552 /// We store just their SourceLocations instead of 553 /// something like MacroInfo*. The benefit of this is that when we are 554 /// deserializing from PCH, we don't need to deserialize identifier & macros 555 /// just so that we can report that they are unused, we just warn using 556 /// the SourceLocations of this set (that will be filled by the ASTReader). 557 /// We are using SmallPtrSet instead of a vector for faster removal. 558 typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy; 559 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 560 561 /// \brief A "freelist" of MacroArg objects that can be 562 /// reused for quick allocation. 563 MacroArgs *MacroArgCache; 564 friend class MacroArgs; 565 566 /// For each IdentifierInfo used in a \#pragma push_macro directive, 567 /// we keep a MacroInfo stack used to restore the previous macro value. 568 llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo; 569 570 // Various statistics we track for performance analysis. 571 unsigned NumDirectives, NumDefined, NumUndefined, NumPragma; 572 unsigned NumIf, NumElse, NumEndif; 573 unsigned NumEnteredSourceFiles, MaxIncludeStackDepth; 574 unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded; 575 unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste; 576 unsigned NumSkipped; 577 578 /// \brief The predefined macros that preprocessor should use from the 579 /// command line etc. 580 std::string Predefines; 581 582 /// \brief The file ID for the preprocessor predefines. 583 FileID PredefinesFileID; 584 585 /// \{ 586 /// \brief Cache of macro expanders to reduce malloc traffic. 587 enum { TokenLexerCacheSize = 8 }; 588 unsigned NumCachedTokenLexers; 589 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 590 /// \} 591 592 /// \brief Keeps macro expanded tokens for TokenLexers. 593 // 594 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 595 /// going to lex in the cache and when it finishes the tokens are removed 596 /// from the end of the cache. 597 SmallVector<Token, 16> MacroExpandedTokens; 598 std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack; 599 600 /// \brief A record of the macro definitions and expansions that 601 /// occurred during preprocessing. 602 /// 603 /// This is an optional side structure that can be enabled with 604 /// \c createPreprocessingRecord() prior to preprocessing. 605 PreprocessingRecord *Record; 606 607 /// Cached tokens state. 608 typedef SmallVector<Token, 1> CachedTokensTy; 609 610 /// \brief Cached tokens are stored here when we do backtracking or 611 /// lookahead. They are "lexed" by the CachingLex() method. 612 CachedTokensTy CachedTokens; 613 614 /// \brief The position of the cached token that CachingLex() should 615 /// "lex" next. 616 /// 617 /// If it points beyond the CachedTokens vector, it means that a normal 618 /// Lex() should be invoked. 619 CachedTokensTy::size_type CachedLexPos; 620 621 /// \brief Stack of backtrack positions, allowing nested backtracks. 622 /// 623 /// The EnableBacktrackAtThisPos() method pushes a position to 624 /// indicate where CachedLexPos should be set when the BackTrack() method is 625 /// invoked (at which point the last position is popped). 626 std::vector<CachedTokensTy::size_type> BacktrackPositions; 627 628 struct MacroInfoChain { 629 MacroInfo MI; 630 MacroInfoChain *Next; 631 }; 632 633 /// MacroInfos are managed as a chain for easy disposal. This is the head 634 /// of that list. 635 MacroInfoChain *MIChainHead; 636 637 struct DeserializedMacroInfoChain { 638 MacroInfo MI; 639 unsigned OwningModuleID; // MUST be immediately after the MacroInfo object 640 // so it can be accessed by MacroInfo::getOwningModuleID(). 641 DeserializedMacroInfoChain *Next; 642 }; 643 DeserializedMacroInfoChain *DeserialMIChainHead; 644 645 public: 646 Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 647 DiagnosticsEngine &diags, LangOptions &opts, 648 SourceManager &SM, HeaderSearch &Headers, 649 ModuleLoader &TheModuleLoader, 650 IdentifierInfoLookup *IILookup = nullptr, 651 bool OwnsHeaderSearch = false, 652 TranslationUnitKind TUKind = TU_Complete); 653 654 ~Preprocessor(); 655 656 /// \brief Initialize the preprocessor using information about the target. 657 /// 658 /// \param Target is owned by the caller and must remain valid for the 659 /// lifetime of the preprocessor. 660 /// \param AuxTarget is owned by the caller and must remain valid for 661 /// the lifetime of the preprocessor. 662 void Initialize(const TargetInfo &Target, 663 const TargetInfo *AuxTarget = nullptr); 664 665 /// \brief Initialize the preprocessor to parse a model file 666 /// 667 /// To parse model files the preprocessor of the original source is reused to 668 /// preserver the identifier table. However to avoid some duplicate 669 /// information in the preprocessor some cleanup is needed before it is used 670 /// to parse model files. This method does that cleanup. 671 void InitializeForModelFile(); 672 673 /// \brief Cleanup after model file parsing 674 void FinalizeForModelFile(); 675 676 /// \brief Retrieve the preprocessor options used to initialize this 677 /// preprocessor. getPreprocessorOpts()678 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 679 getDiagnostics()680 DiagnosticsEngine &getDiagnostics() const { return *Diags; } setDiagnostics(DiagnosticsEngine & D)681 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 682 getLangOpts()683 const LangOptions &getLangOpts() const { return LangOpts; } getTargetInfo()684 const TargetInfo &getTargetInfo() const { return *Target; } getAuxTargetInfo()685 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } getFileManager()686 FileManager &getFileManager() const { return FileMgr; } getSourceManager()687 SourceManager &getSourceManager() const { return SourceMgr; } getHeaderSearchInfo()688 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 689 getIdentifierTable()690 IdentifierTable &getIdentifierTable() { return Identifiers; } getIdentifierTable()691 const IdentifierTable &getIdentifierTable() const { return Identifiers; } getSelectorTable()692 SelectorTable &getSelectorTable() { return Selectors; } getBuiltinInfo()693 Builtin::Context &getBuiltinInfo() { return BuiltinInfo; } getPreprocessorAllocator()694 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 695 696 void setPTHManager(PTHManager* pm); 697 getPTHManager()698 PTHManager *getPTHManager() { return PTH.get(); } 699 setExternalSource(ExternalPreprocessorSource * Source)700 void setExternalSource(ExternalPreprocessorSource *Source) { 701 ExternalSource = Source; 702 } 703 getExternalSource()704 ExternalPreprocessorSource *getExternalSource() const { 705 return ExternalSource; 706 } 707 708 /// \brief Retrieve the module loader associated with this preprocessor. getModuleLoader()709 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 710 hadModuleLoaderFatalFailure()711 bool hadModuleLoaderFatalFailure() const { 712 return TheModuleLoader.HadFatalFailure; 713 } 714 715 /// \brief True if we are currently preprocessing a #if or #elif directive isParsingIfOrElifDirective()716 bool isParsingIfOrElifDirective() const { 717 return ParsingIfOrElifDirective; 718 } 719 720 /// \brief Control whether the preprocessor retains comments in output. SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)721 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 722 this->KeepComments = KeepComments | KeepMacroComments; 723 this->KeepMacroComments = KeepMacroComments; 724 } 725 getCommentRetentionState()726 bool getCommentRetentionState() const { return KeepComments; } 727 setPragmasEnabled(bool Enabled)728 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } getPragmasEnabled()729 bool getPragmasEnabled() const { return PragmasEnabled; } 730 SetSuppressIncludeNotFoundError(bool Suppress)731 void SetSuppressIncludeNotFoundError(bool Suppress) { 732 SuppressIncludeNotFoundError = Suppress; 733 } 734 GetSuppressIncludeNotFoundError()735 bool GetSuppressIncludeNotFoundError() { 736 return SuppressIncludeNotFoundError; 737 } 738 739 /// Sets whether the preprocessor is responsible for producing output or if 740 /// it is producing tokens to be consumed by Parse and Sema. setPreprocessedOutput(bool IsPreprocessedOutput)741 void setPreprocessedOutput(bool IsPreprocessedOutput) { 742 PreprocessedOutput = IsPreprocessedOutput; 743 } 744 745 /// Returns true if the preprocessor is responsible for generating output, 746 /// false if it is producing tokens to be consumed by Parse and Sema. isPreprocessedOutput()747 bool isPreprocessedOutput() const { return PreprocessedOutput; } 748 749 /// \brief Return true if we are lexing directly from the specified lexer. isCurrentLexer(const PreprocessorLexer * L)750 bool isCurrentLexer(const PreprocessorLexer *L) const { 751 return CurPPLexer == L; 752 } 753 754 /// \brief Return the current lexer being lexed from. 755 /// 756 /// Note that this ignores any potentially active macro expansions and _Pragma 757 /// expansions going on at the time. getCurrentLexer()758 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 759 760 /// \brief Return the current file lexer being lexed from. 761 /// 762 /// Note that this ignores any potentially active macro expansions and _Pragma 763 /// expansions going on at the time. 764 PreprocessorLexer *getCurrentFileLexer() const; 765 766 /// \brief Return the submodule owning the file being lexed. getCurrentSubmodule()767 Module *getCurrentSubmodule() const { return CurSubmodule; } 768 769 /// \brief Returns the FileID for the preprocessor predefines. getPredefinesFileID()770 FileID getPredefinesFileID() const { return PredefinesFileID; } 771 772 /// \{ 773 /// \brief Accessors for preprocessor callbacks. 774 /// 775 /// Note that this class takes ownership of any PPCallbacks object given to 776 /// it. getPPCallbacks()777 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } addPPCallbacks(std::unique_ptr<PPCallbacks> C)778 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 779 if (Callbacks) 780 C = llvm::make_unique<PPChainedCallbacks>(std::move(C), 781 std::move(Callbacks)); 782 Callbacks = std::move(C); 783 } 784 /// \} 785 isMacroDefined(StringRef Id)786 bool isMacroDefined(StringRef Id) { 787 return isMacroDefined(&Identifiers.get(Id)); 788 } isMacroDefined(const IdentifierInfo * II)789 bool isMacroDefined(const IdentifierInfo *II) { 790 return II->hasMacroDefinition() && 791 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 792 } 793 794 /// \brief Determine whether II is defined as a macro within the module M, 795 /// if that is a module that we've already preprocessed. Does not check for 796 /// macros imported into M. isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)797 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 798 if (!II->hasMacroDefinition()) 799 return false; 800 auto I = Submodules.find(M); 801 if (I == Submodules.end()) 802 return false; 803 auto J = I->second.Macros.find(II); 804 if (J == I->second.Macros.end()) 805 return false; 806 auto *MD = J->second.getLatest(); 807 return MD && MD->isDefined(); 808 } 809 getMacroDefinition(const IdentifierInfo * II)810 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 811 if (!II->hasMacroDefinition()) 812 return MacroDefinition(); 813 814 MacroState &S = CurSubmoduleState->Macros[II]; 815 auto *MD = S.getLatest(); 816 while (MD && isa<VisibilityMacroDirective>(MD)) 817 MD = MD->getPrevious(); 818 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 819 S.getActiveModuleMacros(*this, II), 820 S.isAmbiguous(*this, II)); 821 } 822 getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)823 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 824 SourceLocation Loc) { 825 if (!II->hadMacroDefinition()) 826 return MacroDefinition(); 827 828 MacroState &S = CurSubmoduleState->Macros[II]; 829 MacroDirective::DefInfo DI; 830 if (auto *MD = S.getLatest()) 831 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 832 // FIXME: Compute the set of active module macros at the specified location. 833 return MacroDefinition(DI.getDirective(), 834 S.getActiveModuleMacros(*this, II), 835 S.isAmbiguous(*this, II)); 836 } 837 838 /// \brief Given an identifier, return its latest non-imported MacroDirective 839 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. getLocalMacroDirective(const IdentifierInfo * II)840 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 841 if (!II->hasMacroDefinition()) 842 return nullptr; 843 844 auto *MD = getLocalMacroDirectiveHistory(II); 845 if (!MD || MD->getDefinition().isUndefined()) 846 return nullptr; 847 848 return MD; 849 } 850 getMacroInfo(const IdentifierInfo * II)851 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 852 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 853 } 854 getMacroInfo(const IdentifierInfo * II)855 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 856 if (!II->hasMacroDefinition()) 857 return nullptr; 858 if (auto MD = getMacroDefinition(II)) 859 return MD.getMacroInfo(); 860 return nullptr; 861 } 862 863 /// \brief Given an identifier, return the latest non-imported macro 864 /// directive for that identifier. 865 /// 866 /// One can iterate over all previous macro directives from the most recent 867 /// one. 868 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 869 870 /// \brief Add a directive to the macro directive history for this identifier. 871 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)872 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 873 SourceLocation Loc) { 874 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 875 appendMacroDirective(II, MD); 876 return MD; 877 } appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)878 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 879 MacroInfo *MI) { 880 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 881 } 882 /// \brief Set a MacroDirective that was loaded from a PCH file. 883 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *MD); 884 885 /// \brief Register an exported macro for a module and identifier. 886 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, 887 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 888 ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II); 889 890 /// \brief Get the list of leaf (non-overridden) module macros for a name. getLeafModuleMacros(const IdentifierInfo * II)891 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 892 auto I = LeafModuleMacros.find(II); 893 if (I != LeafModuleMacros.end()) 894 return I->second; 895 return None; 896 } 897 898 /// \{ 899 /// Iterators for the macro history table. Currently defined macros have 900 /// IdentifierInfo::hasMacroDefinition() set and an empty 901 /// MacroInfo::getUndefLoc() at the head of the list. 902 typedef MacroMap::const_iterator macro_iterator; 903 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 904 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 905 llvm::iterator_range<macro_iterator> 906 macros(bool IncludeExternalMacros = true) const { 907 return llvm::make_range(macro_begin(IncludeExternalMacros), 908 macro_end(IncludeExternalMacros)); 909 } 910 /// \} 911 912 /// \brief Return the name of the macro defined before \p Loc that has 913 /// spelling \p Tokens. If there are multiple macros with same spelling, 914 /// return the last one defined. 915 StringRef getLastMacroWithSpelling(SourceLocation Loc, 916 ArrayRef<TokenValue> Tokens) const; 917 getPredefines()918 const std::string &getPredefines() const { return Predefines; } 919 /// \brief Set the predefines for this Preprocessor. 920 /// 921 /// These predefines are automatically injected when parsing the main file. setPredefines(const char * P)922 void setPredefines(const char *P) { Predefines = P; } setPredefines(StringRef P)923 void setPredefines(StringRef P) { Predefines = P; } 924 925 /// Return information about the specified preprocessor 926 /// identifier token. getIdentifierInfo(StringRef Name)927 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 928 return &Identifiers.get(Name); 929 } 930 931 /// \brief Add the specified pragma handler to this preprocessor. 932 /// 933 /// If \p Namespace is non-null, then it is a token required to exist on the 934 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 935 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); AddPragmaHandler(PragmaHandler * Handler)936 void AddPragmaHandler(PragmaHandler *Handler) { 937 AddPragmaHandler(StringRef(), Handler); 938 } 939 940 /// \brief Remove the specific pragma handler from this preprocessor. 941 /// 942 /// If \p Namespace is non-null, then it should be the namespace that 943 /// \p Handler was added to. It is an error to remove a handler that 944 /// has not been registered. 945 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); RemovePragmaHandler(PragmaHandler * Handler)946 void RemovePragmaHandler(PragmaHandler *Handler) { 947 RemovePragmaHandler(StringRef(), Handler); 948 } 949 950 /// Install empty handlers for all pragmas (making them ignored). 951 void IgnorePragmas(); 952 953 /// \brief Add the specified comment handler to the preprocessor. 954 void addCommentHandler(CommentHandler *Handler); 955 956 /// \brief Remove the specified comment handler. 957 /// 958 /// It is an error to remove a handler that has not been registered. 959 void removeCommentHandler(CommentHandler *Handler); 960 961 /// \brief Set the code completion handler to the given object. setCodeCompletionHandler(CodeCompletionHandler & Handler)962 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 963 CodeComplete = &Handler; 964 } 965 966 /// \brief Retrieve the current code-completion handler. getCodeCompletionHandler()967 CodeCompletionHandler *getCodeCompletionHandler() const { 968 return CodeComplete; 969 } 970 971 /// \brief Clear out the code completion handler. clearCodeCompletionHandler()972 void clearCodeCompletionHandler() { 973 CodeComplete = nullptr; 974 } 975 976 /// \brief Hook used by the lexer to invoke the "natural language" code 977 /// completion point. 978 void CodeCompleteNaturalLanguage(); 979 980 /// \brief Retrieve the preprocessing record, or NULL if there is no 981 /// preprocessing record. getPreprocessingRecord()982 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 983 984 /// \brief Create a new preprocessing record, which will keep track of 985 /// all macro expansions, macro definitions, etc. 986 void createPreprocessingRecord(); 987 988 /// \brief Enter the specified FileID as the main source file, 989 /// which implicitly adds the builtin defines etc. 990 void EnterMainSourceFile(); 991 992 /// \brief Inform the preprocessor callbacks that processing is complete. 993 void EndSourceFile(); 994 995 /// \brief Add a source file to the top of the include stack and 996 /// start lexing tokens from it instead of the current buffer. 997 /// 998 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 999 bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir, 1000 SourceLocation Loc); 1001 1002 /// \brief Add a Macro to the top of the include stack and start lexing 1003 /// tokens from it instead of the current buffer. 1004 /// 1005 /// \param Args specifies the tokens input to a function-like macro. 1006 /// \param ILEnd specifies the location of the ')' for a function-like macro 1007 /// or the identifier for an object-like macro. 1008 void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro, 1009 MacroArgs *Args); 1010 1011 /// \brief Add a "macro" context to the top of the include stack, 1012 /// which will cause the lexer to start returning the specified tokens. 1013 /// 1014 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1015 /// will not be subject to further macro expansion. Otherwise, these tokens 1016 /// will be re-macro-expanded when/if expansion is enabled. 1017 /// 1018 /// If \p OwnsTokens is false, this method assumes that the specified stream 1019 /// of tokens has a permanent owner somewhere, so they do not need to be 1020 /// copied. If it is true, it assumes the array of tokens is allocated with 1021 /// \c new[] and must be freed. 1022 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1023 bool DisableMacroExpansion, bool OwnsTokens); 1024 1025 /// \brief Pop the current lexer/macro exp off the top of the lexer stack. 1026 /// 1027 /// This should only be used in situations where the current state of the 1028 /// top-of-stack lexer is known. 1029 void RemoveTopOfLexerStack(); 1030 1031 /// From the point that this method is called, and until 1032 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1033 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1034 /// make the Preprocessor re-lex the same tokens. 1035 /// 1036 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1037 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1038 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1039 /// 1040 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1041 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1042 /// tokens will continue indefinitely. 1043 /// 1044 void EnableBacktrackAtThisPos(); 1045 1046 /// \brief Disable the last EnableBacktrackAtThisPos call. 1047 void CommitBacktrackedTokens(); 1048 1049 /// \brief Make Preprocessor re-lex the tokens that were lexed since 1050 /// EnableBacktrackAtThisPos() was previously called. 1051 void Backtrack(); 1052 1053 /// \brief True if EnableBacktrackAtThisPos() was called and 1054 /// caching of tokens is on. isBacktrackEnabled()1055 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1056 1057 /// \brief Lex the next token for this preprocessor. 1058 void Lex(Token &Result); 1059 1060 void LexAfterModuleImport(Token &Result); 1061 1062 void makeModuleVisible(Module *M, SourceLocation Loc); 1063 getModuleImportLoc(Module * M)1064 SourceLocation getModuleImportLoc(Module *M) const { 1065 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1066 } 1067 1068 /// \brief Lex a string literal, which may be the concatenation of multiple 1069 /// string literals and may even come from macro expansion. 1070 /// \returns true on success, false if a error diagnostic has been generated. LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1071 bool LexStringLiteral(Token &Result, std::string &String, 1072 const char *DiagnosticTag, bool AllowMacroExpansion) { 1073 if (AllowMacroExpansion) 1074 Lex(Result); 1075 else 1076 LexUnexpandedToken(Result); 1077 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1078 AllowMacroExpansion); 1079 } 1080 1081 /// \brief Complete the lexing of a string literal where the first token has 1082 /// already been lexed (see LexStringLiteral). 1083 bool FinishLexStringLiteral(Token &Result, std::string &String, 1084 const char *DiagnosticTag, 1085 bool AllowMacroExpansion); 1086 1087 /// \brief Lex a token. If it's a comment, keep lexing until we get 1088 /// something not a comment. 1089 /// 1090 /// This is useful in -E -C mode where comments would foul up preprocessor 1091 /// directive handling. LexNonComment(Token & Result)1092 void LexNonComment(Token &Result) { 1093 do 1094 Lex(Result); 1095 while (Result.getKind() == tok::comment); 1096 } 1097 1098 /// \brief Just like Lex, but disables macro expansion of identifier tokens. LexUnexpandedToken(Token & Result)1099 void LexUnexpandedToken(Token &Result) { 1100 // Disable macro expansion. 1101 bool OldVal = DisableMacroExpansion; 1102 DisableMacroExpansion = true; 1103 // Lex the token. 1104 Lex(Result); 1105 1106 // Reenable it. 1107 DisableMacroExpansion = OldVal; 1108 } 1109 1110 /// \brief Like LexNonComment, but this disables macro expansion of 1111 /// identifier tokens. LexUnexpandedNonComment(Token & Result)1112 void LexUnexpandedNonComment(Token &Result) { 1113 do 1114 LexUnexpandedToken(Result); 1115 while (Result.getKind() == tok::comment); 1116 } 1117 1118 /// \brief Parses a simple integer literal to get its numeric value. Floating 1119 /// point literals and user defined literals are rejected. Used primarily to 1120 /// handle pragmas that accept integer arguments. 1121 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1122 1123 /// Disables macro expansion everywhere except for preprocessor directives. SetMacroExpansionOnlyInDirectives()1124 void SetMacroExpansionOnlyInDirectives() { 1125 DisableMacroExpansion = true; 1126 MacroExpansionInDirectivesOverride = true; 1127 } 1128 1129 /// \brief Peeks ahead N tokens and returns that token without consuming any 1130 /// tokens. 1131 /// 1132 /// LookAhead(0) returns the next token that would be returned by Lex(), 1133 /// LookAhead(1) returns the token after it, etc. This returns normal 1134 /// tokens after phase 5. As such, it is equivalent to using 1135 /// 'Lex', not 'LexUnexpandedToken'. LookAhead(unsigned N)1136 const Token &LookAhead(unsigned N) { 1137 if (CachedLexPos + N < CachedTokens.size()) 1138 return CachedTokens[CachedLexPos+N]; 1139 else 1140 return PeekAhead(N+1); 1141 } 1142 1143 /// \brief When backtracking is enabled and tokens are cached, 1144 /// this allows to revert a specific number of tokens. 1145 /// 1146 /// Note that the number of tokens being reverted should be up to the last 1147 /// backtrack position, not more. RevertCachedTokens(unsigned N)1148 void RevertCachedTokens(unsigned N) { 1149 assert(isBacktrackEnabled() && 1150 "Should only be called when tokens are cached for backtracking"); 1151 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 1152 && "Should revert tokens up to the last backtrack position, not more"); 1153 assert(signed(CachedLexPos) - signed(N) >= 0 && 1154 "Corrupted backtrack positions ?"); 1155 CachedLexPos -= N; 1156 } 1157 1158 /// \brief Enters a token in the token stream to be lexed next. 1159 /// 1160 /// If BackTrack() is called afterwards, the token will remain at the 1161 /// insertion point. EnterToken(const Token & Tok)1162 void EnterToken(const Token &Tok) { 1163 EnterCachingLexMode(); 1164 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1165 } 1166 1167 /// We notify the Preprocessor that if it is caching tokens (because 1168 /// backtrack is enabled) it should replace the most recent cached tokens 1169 /// with the given annotation token. This function has no effect if 1170 /// backtracking is not enabled. 1171 /// 1172 /// Note that the use of this function is just for optimization, so that the 1173 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1174 /// invoked. AnnotateCachedTokens(const Token & Tok)1175 void AnnotateCachedTokens(const Token &Tok) { 1176 assert(Tok.isAnnotation() && "Expected annotation token"); 1177 if (CachedLexPos != 0 && isBacktrackEnabled()) 1178 AnnotatePreviousCachedTokens(Tok); 1179 } 1180 1181 /// Get the location of the last cached token, suitable for setting the end 1182 /// location of an annotation token. getLastCachedTokenLocation()1183 SourceLocation getLastCachedTokenLocation() const { 1184 assert(CachedLexPos != 0); 1185 return CachedTokens[CachedLexPos-1].getLastLoc(); 1186 } 1187 1188 /// \brief Replace the last token with an annotation token. 1189 /// 1190 /// Like AnnotateCachedTokens(), this routine replaces an 1191 /// already-parsed (and resolved) token with an annotation 1192 /// token. However, this routine only replaces the last token with 1193 /// the annotation token; it does not affect any other cached 1194 /// tokens. This function has no effect if backtracking is not 1195 /// enabled. ReplaceLastTokenWithAnnotation(const Token & Tok)1196 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1197 assert(Tok.isAnnotation() && "Expected annotation token"); 1198 if (CachedLexPos != 0 && isBacktrackEnabled()) 1199 CachedTokens[CachedLexPos-1] = Tok; 1200 } 1201 1202 /// Update the current token to represent the provided 1203 /// identifier, in order to cache an action performed by typo correction. TypoCorrectToken(const Token & Tok)1204 void TypoCorrectToken(const Token &Tok) { 1205 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1206 if (CachedLexPos != 0 && isBacktrackEnabled()) 1207 CachedTokens[CachedLexPos-1] = Tok; 1208 } 1209 1210 /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/ 1211 /// CurTokenLexer pointers. 1212 void recomputeCurLexerKind(); 1213 1214 /// \brief Returns true if incremental processing is enabled isIncrementalProcessingEnabled()1215 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1216 1217 /// \brief Enables the incremental processing 1218 void enableIncrementalProcessing(bool value = true) { 1219 IncrementalProcessing = value; 1220 } 1221 1222 /// \brief Specify the point at which code-completion will be performed. 1223 /// 1224 /// \param File the file in which code completion should occur. If 1225 /// this file is included multiple times, code-completion will 1226 /// perform completion the first time it is included. If NULL, this 1227 /// function clears out the code-completion point. 1228 /// 1229 /// \param Line the line at which code completion should occur 1230 /// (1-based). 1231 /// 1232 /// \param Column the column at which code completion should occur 1233 /// (1-based). 1234 /// 1235 /// \returns true if an error occurred, false otherwise. 1236 bool SetCodeCompletionPoint(const FileEntry *File, 1237 unsigned Line, unsigned Column); 1238 1239 /// \brief Determine if we are performing code completion. isCodeCompletionEnabled()1240 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1241 1242 /// \brief Returns the location of the code-completion point. 1243 /// 1244 /// Returns an invalid location if code-completion is not enabled or the file 1245 /// containing the code-completion point has not been lexed yet. getCodeCompletionLoc()1246 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1247 1248 /// \brief Returns the start location of the file of code-completion point. 1249 /// 1250 /// Returns an invalid location if code-completion is not enabled or the file 1251 /// containing the code-completion point has not been lexed yet. getCodeCompletionFileLoc()1252 SourceLocation getCodeCompletionFileLoc() const { 1253 return CodeCompletionFileLoc; 1254 } 1255 1256 /// \brief Returns true if code-completion is enabled and we have hit the 1257 /// code-completion point. isCodeCompletionReached()1258 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1259 1260 /// \brief Note that we hit the code-completion point. setCodeCompletionReached()1261 void setCodeCompletionReached() { 1262 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1263 CodeCompletionReached = true; 1264 // Silence any diagnostics that occur after we hit the code-completion. 1265 getDiagnostics().setSuppressAllDiagnostics(true); 1266 } 1267 1268 /// \brief The location of the currently-active \#pragma clang 1269 /// arc_cf_code_audited begin. 1270 /// 1271 /// Returns an invalid location if there is no such pragma active. getPragmaARCCFCodeAuditedLoc()1272 SourceLocation getPragmaARCCFCodeAuditedLoc() const { 1273 return PragmaARCCFCodeAuditedLoc; 1274 } 1275 1276 /// \brief Set the location of the currently-active \#pragma clang 1277 /// arc_cf_code_audited begin. An invalid location ends the pragma. setPragmaARCCFCodeAuditedLoc(SourceLocation Loc)1278 void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) { 1279 PragmaARCCFCodeAuditedLoc = Loc; 1280 } 1281 1282 /// \brief The location of the currently-active \#pragma clang 1283 /// assume_nonnull begin. 1284 /// 1285 /// Returns an invalid location if there is no such pragma active. getPragmaAssumeNonNullLoc()1286 SourceLocation getPragmaAssumeNonNullLoc() const { 1287 return PragmaAssumeNonNullLoc; 1288 } 1289 1290 /// \brief Set the location of the currently-active \#pragma clang 1291 /// assume_nonnull begin. An invalid location ends the pragma. setPragmaAssumeNonNullLoc(SourceLocation Loc)1292 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 1293 PragmaAssumeNonNullLoc = Loc; 1294 } 1295 1296 /// \brief Set the directory in which the main file should be considered 1297 /// to have been found, if it is not a real file. setMainFileDir(const DirectoryEntry * Dir)1298 void setMainFileDir(const DirectoryEntry *Dir) { 1299 MainFileDir = Dir; 1300 } 1301 1302 /// \brief Instruct the preprocessor to skip part of the main source file. 1303 /// 1304 /// \param Bytes The number of bytes in the preamble to skip. 1305 /// 1306 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 1307 /// start of a line. setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)1308 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 1309 SkipMainFilePreamble.first = Bytes; 1310 SkipMainFilePreamble.second = StartOfLine; 1311 } 1312 1313 /// Forwarding function for diagnostics. This emits a diagnostic at 1314 /// the specified Token's location, translating the token's start 1315 /// position in the current buffer into a SourcePosition object for rendering. Diag(SourceLocation Loc,unsigned DiagID)1316 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 1317 return Diags->Report(Loc, DiagID); 1318 } 1319 Diag(const Token & Tok,unsigned DiagID)1320 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 1321 return Diags->Report(Tok.getLocation(), DiagID); 1322 } 1323 1324 /// Return the 'spelling' of the token at the given 1325 /// location; does not go up to the spelling location or down to the 1326 /// expansion location. 1327 /// 1328 /// \param buffer A buffer which will be used only if the token requires 1329 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 1330 /// \param invalid If non-null, will be set \c true if an error occurs. 1331 StringRef getSpelling(SourceLocation loc, 1332 SmallVectorImpl<char> &buffer, 1333 bool *invalid = nullptr) const { 1334 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 1335 } 1336 1337 /// \brief Return the 'spelling' of the Tok token. 1338 /// 1339 /// The spelling of a token is the characters used to represent the token in 1340 /// the source file after trigraph expansion and escaped-newline folding. In 1341 /// particular, this wants to get the true, uncanonicalized, spelling of 1342 /// things like digraphs, UCNs, etc. 1343 /// 1344 /// \param Invalid If non-null, will be set \c true if an error occurs. 1345 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 1346 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 1347 } 1348 1349 /// \brief Get the spelling of a token into a preallocated buffer, instead 1350 /// of as an std::string. 1351 /// 1352 /// The caller is required to allocate enough space for the token, which is 1353 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 1354 /// actual result is returned. 1355 /// 1356 /// Note that this method may do two possible things: it may either fill in 1357 /// the buffer specified with characters, or it may *change the input pointer* 1358 /// to point to a constant buffer with the data already in it (avoiding a 1359 /// copy). The caller is not allowed to modify the returned buffer pointer 1360 /// if an internal buffer is returned. 1361 unsigned getSpelling(const Token &Tok, const char *&Buffer, 1362 bool *Invalid = nullptr) const { 1363 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 1364 } 1365 1366 /// \brief Get the spelling of a token into a SmallVector. 1367 /// 1368 /// Note that the returned StringRef may not point to the 1369 /// supplied buffer if a copy can be avoided. 1370 StringRef getSpelling(const Token &Tok, 1371 SmallVectorImpl<char> &Buffer, 1372 bool *Invalid = nullptr) const; 1373 1374 /// \brief Relex the token at the specified location. 1375 /// \returns true if there was a failure, false on success. 1376 bool getRawToken(SourceLocation Loc, Token &Result, 1377 bool IgnoreWhiteSpace = false) { 1378 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 1379 } 1380 1381 /// \brief Given a Token \p Tok that is a numeric constant with length 1, 1382 /// return the character. 1383 char 1384 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 1385 bool *Invalid = nullptr) const { 1386 assert(Tok.is(tok::numeric_constant) && 1387 Tok.getLength() == 1 && "Called on unsupported token"); 1388 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 1389 1390 // If the token is carrying a literal data pointer, just use it. 1391 if (const char *D = Tok.getLiteralData()) 1392 return *D; 1393 1394 // Otherwise, fall back on getCharacterData, which is slower, but always 1395 // works. 1396 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid); 1397 } 1398 1399 /// \brief Retrieve the name of the immediate macro expansion. 1400 /// 1401 /// This routine starts from a source location, and finds the name of the 1402 /// macro responsible for its immediate expansion. It looks through any 1403 /// intervening macro argument expansions to compute this. It returns a 1404 /// StringRef that refers to the SourceManager-owned buffer of the source 1405 /// where that macro name is spelled. Thus, the result shouldn't out-live 1406 /// the SourceManager. getImmediateMacroName(SourceLocation Loc)1407 StringRef getImmediateMacroName(SourceLocation Loc) { 1408 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 1409 } 1410 1411 /// \brief Plop the specified string into a scratch buffer and set the 1412 /// specified token's location and length to it. 1413 /// 1414 /// If specified, the source location provides a location of the expansion 1415 /// point of the token. 1416 void CreateString(StringRef Str, Token &Tok, 1417 SourceLocation ExpansionLocStart = SourceLocation(), 1418 SourceLocation ExpansionLocEnd = SourceLocation()); 1419 1420 /// \brief Computes the source location just past the end of the 1421 /// token at this source location. 1422 /// 1423 /// This routine can be used to produce a source location that 1424 /// points just past the end of the token referenced by \p Loc, and 1425 /// is generally used when a diagnostic needs to point just after a 1426 /// token where it expected something different that it received. If 1427 /// the returned source location would not be meaningful (e.g., if 1428 /// it points into a macro), this routine returns an invalid 1429 /// source location. 1430 /// 1431 /// \param Offset an offset from the end of the token, where the source 1432 /// location should refer to. The default offset (0) produces a source 1433 /// location pointing just past the end of the token; an offset of 1 produces 1434 /// a source location pointing to the last character in the token, etc. 1435 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 1436 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 1437 } 1438 1439 /// \brief Returns true if the given MacroID location points at the first 1440 /// token of the macro expansion. 1441 /// 1442 /// \param MacroBegin If non-null and function returns true, it is set to 1443 /// begin location of the macro. 1444 bool isAtStartOfMacroExpansion(SourceLocation loc, 1445 SourceLocation *MacroBegin = nullptr) const { 1446 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 1447 MacroBegin); 1448 } 1449 1450 /// \brief Returns true if the given MacroID location points at the last 1451 /// token of the macro expansion. 1452 /// 1453 /// \param MacroEnd If non-null and function returns true, it is set to 1454 /// end location of the macro. 1455 bool isAtEndOfMacroExpansion(SourceLocation loc, 1456 SourceLocation *MacroEnd = nullptr) const { 1457 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 1458 } 1459 1460 /// \brief Print the token to stderr, used for debugging. 1461 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 1462 void DumpLocation(SourceLocation Loc) const; 1463 void DumpMacro(const MacroInfo &MI) const; 1464 void dumpMacroInfo(const IdentifierInfo *II); 1465 1466 /// \brief Given a location that specifies the start of a 1467 /// token, return a new location that specifies a character within the token. AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)1468 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 1469 unsigned Char) const { 1470 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 1471 } 1472 1473 /// \brief Increment the counters for the number of token paste operations 1474 /// performed. 1475 /// 1476 /// If fast was specified, this is a 'fast paste' case we handled. IncrementPasteCounter(bool isFast)1477 void IncrementPasteCounter(bool isFast) { 1478 if (isFast) 1479 ++NumFastTokenPaste; 1480 else 1481 ++NumTokenPaste; 1482 } 1483 1484 void PrintStats(); 1485 1486 size_t getTotalMemory() const; 1487 1488 /// When the macro expander pastes together a comment (/##/) in Microsoft 1489 /// mode, this method handles updating the current state, returning the 1490 /// token on the next source line. 1491 void HandleMicrosoftCommentPaste(Token &Tok); 1492 1493 //===--------------------------------------------------------------------===// 1494 // Preprocessor callback methods. These are invoked by a lexer as various 1495 // directives and events are found. 1496 1497 /// Given a tok::raw_identifier token, look up the 1498 /// identifier information for the token and install it into the token, 1499 /// updating the token kind accordingly. 1500 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 1501 1502 private: 1503 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 1504 1505 public: 1506 1507 /// \brief Specifies the reason for poisoning an identifier. 1508 /// 1509 /// If that identifier is accessed while poisoned, then this reason will be 1510 /// used instead of the default "poisoned" diagnostic. 1511 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 1512 1513 /// \brief Display reason for poisoned identifier. 1514 void HandlePoisonedIdentifier(Token & Tok); 1515 MaybeHandlePoisonedIdentifier(Token & Identifier)1516 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 1517 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 1518 if(II->isPoisoned()) { 1519 HandlePoisonedIdentifier(Identifier); 1520 } 1521 } 1522 } 1523 1524 private: 1525 /// Identifiers used for SEH handling in Borland. These are only 1526 /// allowed in particular circumstances 1527 // __except block 1528 IdentifierInfo *Ident__exception_code, 1529 *Ident___exception_code, 1530 *Ident_GetExceptionCode; 1531 // __except filter expression 1532 IdentifierInfo *Ident__exception_info, 1533 *Ident___exception_info, 1534 *Ident_GetExceptionInfo; 1535 // __finally 1536 IdentifierInfo *Ident__abnormal_termination, 1537 *Ident___abnormal_termination, 1538 *Ident_AbnormalTermination; 1539 1540 const char *getCurLexerEndPos(); 1541 1542 public: 1543 void PoisonSEHIdentifiers(bool Poison = true); // Borland 1544 1545 /// \brief Callback invoked when the lexer reads an identifier and has 1546 /// filled in the tokens IdentifierInfo member. 1547 /// 1548 /// This callback potentially macro expands it or turns it into a named 1549 /// token (like 'for'). 1550 /// 1551 /// \returns true if we actually computed a token, false if we need to 1552 /// lex again. 1553 bool HandleIdentifier(Token &Identifier); 1554 1555 1556 /// \brief Callback invoked when the lexer hits the end of the current file. 1557 /// 1558 /// This either returns the EOF token and returns true, or 1559 /// pops a level off the include stack and returns false, at which point the 1560 /// client should call lex again. 1561 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 1562 1563 /// \brief Callback invoked when the current TokenLexer hits the end of its 1564 /// token stream. 1565 bool HandleEndOfTokenLexer(Token &Result); 1566 1567 /// \brief Callback invoked when the lexer sees a # token at the start of a 1568 /// line. 1569 /// 1570 /// This consumes the directive, modifies the lexer/preprocessor state, and 1571 /// advances the lexer(s) so that the next token read is the correct one. 1572 void HandleDirective(Token &Result); 1573 1574 /// \brief Ensure that the next token is a tok::eod token. 1575 /// 1576 /// If not, emit a diagnostic and consume up until the eod. 1577 /// If \p EnableMacros is true, then we consider macros that expand to zero 1578 /// tokens as being ok. 1579 void CheckEndOfDirective(const char *Directive, bool EnableMacros = false); 1580 1581 /// \brief Read and discard all tokens remaining on the current line until 1582 /// the tok::eod token is found. 1583 void DiscardUntilEndOfDirective(); 1584 1585 /// \brief Returns true if the preprocessor has seen a use of 1586 /// __DATE__ or __TIME__ in the file so far. SawDateOrTime()1587 bool SawDateOrTime() const { 1588 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 1589 } getCounterValue()1590 unsigned getCounterValue() const { return CounterValue; } setCounterValue(unsigned V)1591 void setCounterValue(unsigned V) { CounterValue = V; } 1592 1593 /// \brief Retrieves the module that we're currently building, if any. 1594 Module *getCurrentModule(); 1595 1596 /// \brief Allocate a new MacroInfo object with the provided SourceLocation. 1597 MacroInfo *AllocateMacroInfo(SourceLocation L); 1598 1599 /// \brief Allocate a new MacroInfo object loaded from an AST file. 1600 MacroInfo *AllocateDeserializedMacroInfo(SourceLocation L, 1601 unsigned SubModuleID); 1602 1603 /// \brief Turn the specified lexer token into a fully checked and spelled 1604 /// filename, e.g. as an operand of \#include. 1605 /// 1606 /// The caller is expected to provide a buffer that is large enough to hold 1607 /// the spelling of the filename, but is also expected to handle the case 1608 /// when this method decides to use a different buffer. 1609 /// 1610 /// \returns true if the input filename was in <>'s or false if it was 1611 /// in ""'s. 1612 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename); 1613 1614 /// \brief Given a "foo" or \<foo> reference, look up the indicated file. 1615 /// 1616 /// Returns null on failure. \p isAngled indicates whether the file 1617 /// reference is for system \#include's or not (i.e. using <> instead of ""). 1618 const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename, 1619 bool isAngled, const DirectoryLookup *FromDir, 1620 const FileEntry *FromFile, 1621 const DirectoryLookup *&CurDir, 1622 SmallVectorImpl<char> *SearchPath, 1623 SmallVectorImpl<char> *RelativePath, 1624 ModuleMap::KnownHeader *SuggestedModule, 1625 bool SkipCache = false); 1626 1627 /// \brief Get the DirectoryLookup structure used to find the current 1628 /// FileEntry, if CurLexer is non-null and if applicable. 1629 /// 1630 /// This allows us to implement \#include_next and find directory-specific 1631 /// properties. GetCurDirLookup()1632 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; } 1633 1634 /// \brief Return true if we're in the top-level file, not in a \#include. 1635 bool isInPrimaryFile() const; 1636 1637 /// \brief Handle cases where the \#include name is expanded 1638 /// from a macro as multiple tokens, which need to be glued together. 1639 /// 1640 /// This occurs for code like: 1641 /// \code 1642 /// \#define FOO <x/y.h> 1643 /// \#include FOO 1644 /// \endcode 1645 /// because in this case, "<x/y.h>" is returned as 7 tokens, not one. 1646 /// 1647 /// This code concatenates and consumes tokens up to the '>' token. It 1648 /// returns false if the > was found, otherwise it returns true if it finds 1649 /// and consumes the EOD marker. 1650 bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer, 1651 SourceLocation &End); 1652 1653 /// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is 1654 /// followed by EOD. Return true if the token is not a valid on-off-switch. 1655 bool LexOnOffSwitch(tok::OnOffSwitch &OOS); 1656 1657 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 1658 bool *ShadowFlag = nullptr); 1659 1660 private: 1661 PushIncludeMacroStack()1662 void PushIncludeMacroStack() { 1663 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer"); 1664 IncludeMacroStack.emplace_back( 1665 CurLexerKind, CurSubmodule, std::move(CurLexer), std::move(CurPTHLexer), 1666 CurPPLexer, std::move(CurTokenLexer), CurDirLookup); 1667 CurPPLexer = nullptr; 1668 } 1669 PopIncludeMacroStack()1670 void PopIncludeMacroStack() { 1671 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 1672 CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer); 1673 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 1674 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 1675 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 1676 CurSubmodule = IncludeMacroStack.back().TheSubmodule; 1677 CurLexerKind = IncludeMacroStack.back().CurLexerKind; 1678 IncludeMacroStack.pop_back(); 1679 } 1680 1681 void PropagateLineStartLeadingSpaceInfo(Token &Result); 1682 1683 void EnterSubmodule(Module *M, SourceLocation ImportLoc); 1684 void LeaveSubmodule(); 1685 1686 /// Update the set of active module macros and ambiguity flag for a module 1687 /// macro name. 1688 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 1689 1690 /// \brief Allocate a new MacroInfo object. 1691 MacroInfo *AllocateMacroInfo(); 1692 1693 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 1694 SourceLocation Loc); 1695 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 1696 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 1697 bool isPublic); 1698 1699 /// \brief Lex and validate a macro name, which occurs after a 1700 /// \#define or \#undef. 1701 /// 1702 /// \param MacroNameTok Token that represents the name defined or undefined. 1703 /// \param IsDefineUndef Kind if preprocessor directive. 1704 /// \param ShadowFlag Points to flag that is set if macro name shadows 1705 /// a keyword. 1706 /// 1707 /// This emits a diagnostic, sets the token kind to eod, 1708 /// and discards the rest of the macro line if the macro name is invalid. 1709 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 1710 bool *ShadowFlag = nullptr); 1711 1712 /// The ( starting an argument list of a macro definition has just been read. 1713 /// Lex the rest of the arguments and the closing ), updating \p MI with 1714 /// what we learn and saving in \p LastTok the last token read. 1715 /// Return true if an error occurs parsing the arg list. 1716 bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok); 1717 1718 /// We just read a \#if or related directive and decided that the 1719 /// subsequent tokens are in the \#if'd out portion of the 1720 /// file. Lex the rest of the file, until we see an \#endif. If \p 1721 /// FoundNonSkipPortion is true, then we have already emitted code for part of 1722 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 1723 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 1724 /// already seen one so a \#else directive is a duplicate. When this returns, 1725 /// the caller can lex the first valid token. 1726 void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, 1727 bool FoundNonSkipPortion, bool FoundElse, 1728 SourceLocation ElseLoc = SourceLocation()); 1729 1730 /// \brief A fast PTH version of SkipExcludedConditionalBlock. 1731 void PTHSkipExcludedConditionalBlock(); 1732 1733 /// \brief Evaluate an integer constant expression that may occur after a 1734 /// \#if or \#elif directive and return it as a bool. 1735 /// 1736 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 1737 bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); 1738 1739 /// \brief Install the standard preprocessor pragmas: 1740 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 1741 void RegisterBuiltinPragmas(); 1742 1743 /// \brief Register builtin macros such as __LINE__ with the identifier table. 1744 void RegisterBuiltinMacros(); 1745 1746 /// If an identifier token is read that is to be expanded as a macro, handle 1747 /// it and return the next token as 'Tok'. If we lexed a token, return true; 1748 /// otherwise the caller should lex again. 1749 bool HandleMacroExpandedIdentifier(Token &Tok, const MacroDefinition &MD); 1750 1751 /// \brief Cache macro expanded tokens for TokenLexers. 1752 // 1753 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 1754 /// going to lex in the cache and when it finishes the tokens are removed 1755 /// from the end of the cache. 1756 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 1757 ArrayRef<Token> tokens); 1758 void removeCachedMacroExpandedTokensOfLastLexer(); 1759 friend void TokenLexer::ExpandFunctionArguments(); 1760 1761 /// Determine whether the next preprocessor token to be 1762 /// lexed is a '('. If so, consume the token and return true, if not, this 1763 /// method should have no observable side-effect on the lexed tokens. 1764 bool isNextPPTokenLParen(); 1765 1766 /// After reading "MACRO(", this method is invoked to read all of the formal 1767 /// arguments specified for the macro invocation. Returns null on error. 1768 MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI, 1769 SourceLocation &ExpansionEnd); 1770 1771 /// \brief If an identifier token is read that is to be expanded 1772 /// as a builtin macro, handle it and return the next token as 'Tok'. 1773 void ExpandBuiltinMacro(Token &Tok); 1774 1775 /// \brief Read a \c _Pragma directive, slice it up, process it, then 1776 /// return the first token after the directive. 1777 /// This assumes that the \c _Pragma token has just been read into \p Tok. 1778 void Handle_Pragma(Token &Tok); 1779 1780 /// \brief Like Handle_Pragma except the pragma text is not enclosed within 1781 /// a string literal. 1782 void HandleMicrosoft__pragma(Token &Tok); 1783 1784 /// \brief Add a lexer to the top of the include stack and 1785 /// start lexing tokens from it instead of the current buffer. 1786 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir); 1787 1788 /// \brief Add a lexer to the top of the include stack and 1789 /// start getting tokens from it using the PTH cache. 1790 void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir); 1791 1792 /// \brief Set the FileID for the preprocessor predefines. setPredefinesFileID(FileID FID)1793 void setPredefinesFileID(FileID FID) { 1794 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 1795 PredefinesFileID = FID; 1796 } 1797 1798 /// \brief Returns true if we are lexing from a file and not a 1799 /// pragma or a macro. IsFileLexer(const Lexer * L,const PreprocessorLexer * P)1800 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 1801 return L ? !L->isPragmaLexer() : P != nullptr; 1802 } 1803 IsFileLexer(const IncludeStackInfo & I)1804 static bool IsFileLexer(const IncludeStackInfo& I) { 1805 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 1806 } 1807 IsFileLexer()1808 bool IsFileLexer() const { 1809 return IsFileLexer(CurLexer.get(), CurPPLexer); 1810 } 1811 1812 //===--------------------------------------------------------------------===// 1813 // Caching stuff. 1814 void CachingLex(Token &Result); InCachingLexMode()1815 bool InCachingLexMode() const { 1816 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 1817 // that we are past EOF, not that we are in CachingLex mode. 1818 return !CurPPLexer && !CurTokenLexer && !CurPTHLexer && 1819 !IncludeMacroStack.empty(); 1820 } 1821 void EnterCachingLexMode(); ExitCachingLexMode()1822 void ExitCachingLexMode() { 1823 if (InCachingLexMode()) 1824 RemoveTopOfLexerStack(); 1825 } 1826 const Token &PeekAhead(unsigned N); 1827 void AnnotatePreviousCachedTokens(const Token &Tok); 1828 1829 //===--------------------------------------------------------------------===// 1830 /// Handle*Directive - implement the various preprocessor directives. These 1831 /// should side-effect the current preprocessor object so that the next call 1832 /// to Lex() will return the appropriate token next. 1833 void HandleLineDirective(Token &Tok); 1834 void HandleDigitDirective(Token &Tok); 1835 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 1836 void HandleIdentSCCSDirective(Token &Tok); 1837 void HandleMacroPublicDirective(Token &Tok); 1838 void HandleMacroPrivateDirective(Token &Tok); 1839 1840 // File inclusion. 1841 void HandleIncludeDirective(SourceLocation HashLoc, 1842 Token &Tok, 1843 const DirectoryLookup *LookupFrom = nullptr, 1844 const FileEntry *LookupFromFile = nullptr, 1845 bool isImport = false); 1846 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 1847 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 1848 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 1849 void HandleMicrosoftImportDirective(Token &Tok); 1850 1851 public: 1852 // Module inclusion testing. 1853 /// \brief Find the module that owns the source or header file that 1854 /// \p Loc points to. If the location is in a file that was included 1855 /// into a module, or is outside any module, returns nullptr. 1856 Module *getModuleForLocation(SourceLocation Loc); 1857 1858 /// \brief Find the module that contains the specified location, either 1859 /// directly or indirectly. 1860 Module *getModuleContainingLocation(SourceLocation Loc); 1861 1862 private: 1863 // Macro handling. 1864 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef); 1865 void HandleUndefDirective(Token &Tok); 1866 1867 // Conditional Inclusion. 1868 void HandleIfdefDirective(Token &Tok, bool isIfndef, 1869 bool ReadAnyTokensBeforeDirective); 1870 void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective); 1871 void HandleEndifDirective(Token &Tok); 1872 void HandleElseDirective(Token &Tok); 1873 void HandleElifDirective(Token &Tok); 1874 1875 // Pragmas. 1876 void HandlePragmaDirective(SourceLocation IntroducerLoc, 1877 PragmaIntroducerKind Introducer); 1878 public: 1879 void HandlePragmaOnce(Token &OnceTok); 1880 void HandlePragmaMark(); 1881 void HandlePragmaPoison(Token &PoisonTok); 1882 void HandlePragmaSystemHeader(Token &SysHeaderTok); 1883 void HandlePragmaDependency(Token &DependencyTok); 1884 void HandlePragmaPushMacro(Token &Tok); 1885 void HandlePragmaPopMacro(Token &Tok); 1886 void HandlePragmaIncludeAlias(Token &Tok); 1887 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 1888 1889 // Return true and store the first token only if any CommentHandler 1890 // has inserted some tokens and getCommentRetentionState() is false. 1891 bool HandleComment(Token &Token, SourceRange Comment); 1892 1893 /// \brief A macro is used, update information about macros that need unused 1894 /// warnings. 1895 void markMacroAsUsed(MacroInfo *MI); 1896 }; 1897 1898 /// \brief Abstract base class that describes a handler that will receive 1899 /// source ranges for each of the comments encountered in the source file. 1900 class CommentHandler { 1901 public: 1902 virtual ~CommentHandler(); 1903 1904 // The handler shall return true if it has pushed any tokens 1905 // to be read using e.g. EnterToken or EnterTokenStream. 1906 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 1907 }; 1908 1909 } // end namespace clang 1910 1911 #endif 1912