1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Defines the clang::Preprocessor interface. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 16 #define LLVM_CLANG_LEX_PREPROCESSOR_H 17 18 #include "clang/Basic/Builtins.h" 19 #include "clang/Basic/Diagnostic.h" 20 #include "clang/Basic/IdentifierTable.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Lex/Lexer.h" 23 #include "clang/Lex/MacroInfo.h" 24 #include "clang/Lex/ModuleMap.h" 25 #include "clang/Lex/PPCallbacks.h" 26 #include "clang/Lex/PTHLexer.h" 27 #include "clang/Lex/PTHManager.h" 28 #include "clang/Lex/TokenLexer.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/IntrusiveRefCntPtr.h" 32 #include "llvm/ADT/SmallPtrSet.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/Support/Allocator.h" 35 #include <memory> 36 #include <vector> 37 38 namespace llvm { 39 template<unsigned InternalLen> class SmallString; 40 } 41 42 namespace clang { 43 44 class SourceManager; 45 class ExternalPreprocessorSource; 46 class FileManager; 47 class FileEntry; 48 class HeaderSearch; 49 class PragmaNamespace; 50 class PragmaHandler; 51 class CommentHandler; 52 class ScratchBuffer; 53 class TargetInfo; 54 class PPCallbacks; 55 class CodeCompletionHandler; 56 class DirectoryLookup; 57 class PreprocessingRecord; 58 class ModuleLoader; 59 class PreprocessorOptions; 60 61 /// \brief Stores token information for comparing actual tokens with 62 /// predefined values. Only handles simple tokens and identifiers. 63 class TokenValue { 64 tok::TokenKind Kind; 65 IdentifierInfo *II; 66 67 public: TokenValue(tok::TokenKind Kind)68 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 69 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 70 assert(Kind != tok::identifier && 71 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 72 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 73 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 74 } TokenValue(IdentifierInfo * II)75 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 76 bool operator==(const Token &Tok) const { 77 return Tok.getKind() == Kind && 78 (!II || II == Tok.getIdentifierInfo()); 79 } 80 }; 81 82 /// \brief Engages in a tight little dance with the lexer to efficiently 83 /// preprocess tokens. 84 /// 85 /// Lexers know only about tokens within a single source file, and don't 86 /// know anything about preprocessor-level issues like the \#include stack, 87 /// token expansion, etc. 88 class Preprocessor : public RefCountedBase<Preprocessor> { 89 IntrusiveRefCntPtr<PreprocessorOptions> PPOpts; 90 DiagnosticsEngine *Diags; 91 LangOptions &LangOpts; 92 const TargetInfo *Target; 93 FileManager &FileMgr; 94 SourceManager &SourceMgr; 95 ScratchBuffer *ScratchBuf; 96 HeaderSearch &HeaderInfo; 97 ModuleLoader &TheModuleLoader; 98 99 /// \brief External source of macros. 100 ExternalPreprocessorSource *ExternalSource; 101 102 103 /// An optional PTHManager object used for getting tokens from 104 /// a token cache rather than lexing the original source file. 105 std::unique_ptr<PTHManager> PTH; 106 107 /// A BumpPtrAllocator object used to quickly allocate and release 108 /// objects internal to the Preprocessor. 109 llvm::BumpPtrAllocator BP; 110 111 /// Identifiers for builtin macros and other builtins. 112 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 113 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 114 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 115 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 116 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 117 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 118 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 119 IdentifierInfo *Ident__identifier; // __identifier 120 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 121 IdentifierInfo *Ident__has_feature; // __has_feature 122 IdentifierInfo *Ident__has_extension; // __has_extension 123 IdentifierInfo *Ident__has_builtin; // __has_builtin 124 IdentifierInfo *Ident__has_attribute; // __has_attribute 125 IdentifierInfo *Ident__has_include; // __has_include 126 IdentifierInfo *Ident__has_include_next; // __has_include_next 127 IdentifierInfo *Ident__has_warning; // __has_warning 128 IdentifierInfo *Ident__is_identifier; // __is_identifier 129 IdentifierInfo *Ident__building_module; // __building_module 130 IdentifierInfo *Ident__MODULE__; // __MODULE__ 131 132 SourceLocation DATELoc, TIMELoc; 133 unsigned CounterValue; // Next __COUNTER__ value. 134 135 enum { 136 /// \brief Maximum depth of \#includes. 137 MaxAllowedIncludeStackDepth = 200 138 }; 139 140 // State that is set before the preprocessor begins. 141 bool KeepComments : 1; 142 bool KeepMacroComments : 1; 143 bool SuppressIncludeNotFoundError : 1; 144 145 // State that changes while the preprocessor runs: 146 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 147 148 /// Whether the preprocessor owns the header search object. 149 bool OwnsHeaderSearch : 1; 150 151 /// True if macro expansion is disabled. 152 bool DisableMacroExpansion : 1; 153 154 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 155 /// when parsing preprocessor directives. 156 bool MacroExpansionInDirectivesOverride : 1; 157 158 class ResetMacroExpansionHelper; 159 160 /// \brief Whether we have already loaded macros from the external source. 161 mutable bool ReadMacrosFromExternalSource : 1; 162 163 /// \brief True if pragmas are enabled. 164 bool PragmasEnabled : 1; 165 166 /// \brief True if the current build action is a preprocessing action. 167 bool PreprocessedOutput : 1; 168 169 /// \brief True if we are currently preprocessing a #if or #elif directive 170 bool ParsingIfOrElifDirective; 171 172 /// \brief True if we are pre-expanding macro arguments. 173 bool InMacroArgPreExpansion; 174 175 /// \brief Mapping/lookup information for all identifiers in 176 /// the program, including program keywords. 177 mutable IdentifierTable Identifiers; 178 179 /// \brief This table contains all the selectors in the program. 180 /// 181 /// Unlike IdentifierTable above, this table *isn't* populated by the 182 /// preprocessor. It is declared/expanded here because its role/lifetime is 183 /// conceptually similar to the IdentifierTable. In addition, the current 184 /// control flow (in clang::ParseAST()), make it convenient to put here. 185 /// 186 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 187 /// the lifetime of the preprocessor. 188 SelectorTable Selectors; 189 190 /// \brief Information about builtins. 191 Builtin::Context BuiltinInfo; 192 193 /// \brief Tracks all of the pragmas that the client registered 194 /// with this preprocessor. 195 PragmaNamespace *PragmaHandlers; 196 197 /// \brief Tracks all of the comment handlers that the client registered 198 /// with this preprocessor. 199 std::vector<CommentHandler *> CommentHandlers; 200 201 /// \brief True if we want to ignore EOF token and continue later on (thus 202 /// avoid tearing the Lexer and etc. down). 203 bool IncrementalProcessing; 204 205 /// The kind of translation unit we are processing. 206 TranslationUnitKind TUKind; 207 208 /// \brief The code-completion handler. 209 CodeCompletionHandler *CodeComplete; 210 211 /// \brief The file that we're performing code-completion for, if any. 212 const FileEntry *CodeCompletionFile; 213 214 /// \brief The offset in file for the code-completion point. 215 unsigned CodeCompletionOffset; 216 217 /// \brief The location for the code-completion point. This gets instantiated 218 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 219 SourceLocation CodeCompletionLoc; 220 221 /// \brief The start location for the file of the code-completion point. 222 /// 223 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 224 /// for preprocessing. 225 SourceLocation CodeCompletionFileLoc; 226 227 /// \brief The source location of the \c import contextual keyword we just 228 /// lexed, if any. 229 SourceLocation ModuleImportLoc; 230 231 /// \brief The module import path that we're currently processing. 232 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath; 233 234 /// \brief Whether the last token we lexed was an '@'. 235 bool LastTokenWasAt; 236 237 /// \brief Whether the module import expects an identifier next. Otherwise, 238 /// it expects a '.' or ';'. 239 bool ModuleImportExpectsIdentifier; 240 241 /// \brief The source location of the currently-active 242 /// \#pragma clang arc_cf_code_audited begin. 243 SourceLocation PragmaARCCFCodeAuditedLoc; 244 245 /// \brief True if we hit the code-completion point. 246 bool CodeCompletionReached; 247 248 /// \brief The number of bytes that we will initially skip when entering the 249 /// main file, along with a flag that indicates whether skipping this number 250 /// of bytes will place the lexer at the start of a line. 251 /// 252 /// This is used when loading a precompiled preamble. 253 std::pair<unsigned, bool> SkipMainFilePreamble; 254 255 /// \brief The current top of the stack that we're lexing from if 256 /// not expanding a macro and we are lexing directly from source code. 257 /// 258 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. 259 std::unique_ptr<Lexer> CurLexer; 260 261 /// \brief The current top of stack that we're lexing from if 262 /// not expanding from a macro and we are lexing from a PTH cache. 263 /// 264 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null. 265 std::unique_ptr<PTHLexer> CurPTHLexer; 266 267 /// \brief The current top of the stack what we're lexing from 268 /// if not expanding a macro. 269 /// 270 /// This is an alias for either CurLexer or CurPTHLexer. 271 PreprocessorLexer *CurPPLexer; 272 273 /// \brief Used to find the current FileEntry, if CurLexer is non-null 274 /// and if applicable. 275 /// 276 /// This allows us to implement \#include_next and find directory-specific 277 /// properties. 278 const DirectoryLookup *CurDirLookup; 279 280 /// \brief The current macro we are expanding, if we are expanding a macro. 281 /// 282 /// One of CurLexer and CurTokenLexer must be null. 283 std::unique_ptr<TokenLexer> CurTokenLexer; 284 285 /// \brief The kind of lexer we're currently working with. 286 enum CurLexerKind { 287 CLK_Lexer, 288 CLK_PTHLexer, 289 CLK_TokenLexer, 290 CLK_CachingLexer, 291 CLK_LexAfterModuleImport 292 } CurLexerKind; 293 294 /// \brief If the current lexer is for a submodule that is being built, this 295 /// is that submodule. 296 Module *CurSubmodule; 297 298 /// \brief Keeps track of the stack of files currently 299 /// \#included, and macros currently being expanded from, not counting 300 /// CurLexer/CurTokenLexer. 301 struct IncludeStackInfo { 302 enum CurLexerKind CurLexerKind; 303 Module *TheSubmodule; 304 std::unique_ptr<Lexer> TheLexer; 305 std::unique_ptr<PTHLexer> ThePTHLexer; 306 PreprocessorLexer *ThePPLexer; 307 std::unique_ptr<TokenLexer> TheTokenLexer; 308 const DirectoryLookup *TheDirLookup; 309 310 // The following constructors are completely useless copies of the default 311 // versions, only needed to pacify MSVC. IncludeStackInfoIncludeStackInfo312 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, 313 std::unique_ptr<Lexer> &&TheLexer, 314 std::unique_ptr<PTHLexer> &&ThePTHLexer, 315 PreprocessorLexer *ThePPLexer, 316 std::unique_ptr<TokenLexer> &&TheTokenLexer, 317 const DirectoryLookup *TheDirLookup) 318 : CurLexerKind(std::move(CurLexerKind)), 319 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 320 ThePTHLexer(std::move(ThePTHLexer)), 321 ThePPLexer(std::move(ThePPLexer)), 322 TheTokenLexer(std::move(TheTokenLexer)), 323 TheDirLookup(std::move(TheDirLookup)) {} IncludeStackInfoIncludeStackInfo324 IncludeStackInfo(IncludeStackInfo &&RHS) 325 : CurLexerKind(std::move(RHS.CurLexerKind)), 326 TheSubmodule(std::move(RHS.TheSubmodule)), 327 TheLexer(std::move(RHS.TheLexer)), 328 ThePTHLexer(std::move(RHS.ThePTHLexer)), 329 ThePPLexer(std::move(RHS.ThePPLexer)), 330 TheTokenLexer(std::move(RHS.TheTokenLexer)), 331 TheDirLookup(std::move(RHS.TheDirLookup)) {} 332 }; 333 std::vector<IncludeStackInfo> IncludeMacroStack; 334 335 /// \brief Actions invoked when some preprocessor activity is 336 /// encountered (e.g. a file is \#included, etc). 337 PPCallbacks *Callbacks; 338 339 struct MacroExpandsInfo { 340 Token Tok; 341 MacroDirective *MD; 342 SourceRange Range; MacroExpandsInfoMacroExpandsInfo343 MacroExpandsInfo(Token Tok, MacroDirective *MD, SourceRange Range) 344 : Tok(Tok), MD(MD), Range(Range) { } 345 }; 346 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 347 348 /// For each IdentifierInfo that was associated with a macro, we 349 /// keep a mapping to the history of all macro definitions and #undefs in 350 /// the reverse order (the latest one is in the head of the list). 351 llvm::DenseMap<const IdentifierInfo*, MacroDirective*> Macros; 352 friend class ASTReader; 353 354 /// \brief Macros that we want to warn because they are not used at the end 355 /// of the translation unit. 356 /// 357 /// We store just their SourceLocations instead of 358 /// something like MacroInfo*. The benefit of this is that when we are 359 /// deserializing from PCH, we don't need to deserialize identifier & macros 360 /// just so that we can report that they are unused, we just warn using 361 /// the SourceLocations of this set (that will be filled by the ASTReader). 362 /// We are using SmallPtrSet instead of a vector for faster removal. 363 typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy; 364 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 365 366 /// \brief A "freelist" of MacroArg objects that can be 367 /// reused for quick allocation. 368 MacroArgs *MacroArgCache; 369 friend class MacroArgs; 370 371 /// For each IdentifierInfo used in a \#pragma push_macro directive, 372 /// we keep a MacroInfo stack used to restore the previous macro value. 373 llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo; 374 375 // Various statistics we track for performance analysis. 376 unsigned NumDirectives, NumDefined, NumUndefined, NumPragma; 377 unsigned NumIf, NumElse, NumEndif; 378 unsigned NumEnteredSourceFiles, MaxIncludeStackDepth; 379 unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded; 380 unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste; 381 unsigned NumSkipped; 382 383 /// \brief The predefined macros that preprocessor should use from the 384 /// command line etc. 385 std::string Predefines; 386 387 /// \brief The file ID for the preprocessor predefines. 388 FileID PredefinesFileID; 389 390 /// \{ 391 /// \brief Cache of macro expanders to reduce malloc traffic. 392 enum { TokenLexerCacheSize = 8 }; 393 unsigned NumCachedTokenLexers; 394 TokenLexer *TokenLexerCache[TokenLexerCacheSize]; 395 /// \} 396 397 /// \brief Keeps macro expanded tokens for TokenLexers. 398 // 399 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 400 /// going to lex in the cache and when it finishes the tokens are removed 401 /// from the end of the cache. 402 SmallVector<Token, 16> MacroExpandedTokens; 403 std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack; 404 405 /// \brief A record of the macro definitions and expansions that 406 /// occurred during preprocessing. 407 /// 408 /// This is an optional side structure that can be enabled with 409 /// \c createPreprocessingRecord() prior to preprocessing. 410 PreprocessingRecord *Record; 411 412 private: // Cached tokens state. 413 typedef SmallVector<Token, 1> CachedTokensTy; 414 415 /// \brief Cached tokens are stored here when we do backtracking or 416 /// lookahead. They are "lexed" by the CachingLex() method. 417 CachedTokensTy CachedTokens; 418 419 /// \brief The position of the cached token that CachingLex() should 420 /// "lex" next. 421 /// 422 /// If it points beyond the CachedTokens vector, it means that a normal 423 /// Lex() should be invoked. 424 CachedTokensTy::size_type CachedLexPos; 425 426 /// \brief Stack of backtrack positions, allowing nested backtracks. 427 /// 428 /// The EnableBacktrackAtThisPos() method pushes a position to 429 /// indicate where CachedLexPos should be set when the BackTrack() method is 430 /// invoked (at which point the last position is popped). 431 std::vector<CachedTokensTy::size_type> BacktrackPositions; 432 433 struct MacroInfoChain { 434 MacroInfo MI; 435 MacroInfoChain *Next; 436 MacroInfoChain *Prev; 437 }; 438 439 /// MacroInfos are managed as a chain for easy disposal. This is the head 440 /// of that list. 441 MacroInfoChain *MIChainHead; 442 443 /// A "freelist" of MacroInfo objects that can be reused for quick 444 /// allocation. 445 MacroInfoChain *MICache; 446 447 struct DeserializedMacroInfoChain { 448 MacroInfo MI; 449 unsigned OwningModuleID; // MUST be immediately after the MacroInfo object 450 // so it can be accessed by MacroInfo::getOwningModuleID(). 451 DeserializedMacroInfoChain *Next; 452 }; 453 DeserializedMacroInfoChain *DeserialMIChainHead; 454 455 public: 456 Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 457 DiagnosticsEngine &diags, LangOptions &opts, 458 SourceManager &SM, HeaderSearch &Headers, 459 ModuleLoader &TheModuleLoader, 460 IdentifierInfoLookup *IILookup = nullptr, 461 bool OwnsHeaderSearch = false, 462 TranslationUnitKind TUKind = TU_Complete); 463 464 ~Preprocessor(); 465 466 /// \brief Initialize the preprocessor using information about the target. 467 /// 468 /// \param Target is owned by the caller and must remain valid for the 469 /// lifetime of the preprocessor. 470 void Initialize(const TargetInfo &Target); 471 472 /// \brief Retrieve the preprocessor options used to initialize this 473 /// preprocessor. getPreprocessorOpts()474 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 475 getDiagnostics()476 DiagnosticsEngine &getDiagnostics() const { return *Diags; } setDiagnostics(DiagnosticsEngine & D)477 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 478 getLangOpts()479 const LangOptions &getLangOpts() const { return LangOpts; } getTargetInfo()480 const TargetInfo &getTargetInfo() const { return *Target; } getFileManager()481 FileManager &getFileManager() const { return FileMgr; } getSourceManager()482 SourceManager &getSourceManager() const { return SourceMgr; } getHeaderSearchInfo()483 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 484 getIdentifierTable()485 IdentifierTable &getIdentifierTable() { return Identifiers; } getSelectorTable()486 SelectorTable &getSelectorTable() { return Selectors; } getBuiltinInfo()487 Builtin::Context &getBuiltinInfo() { return BuiltinInfo; } getPreprocessorAllocator()488 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 489 490 void setPTHManager(PTHManager* pm); 491 getPTHManager()492 PTHManager *getPTHManager() { return PTH.get(); } 493 setExternalSource(ExternalPreprocessorSource * Source)494 void setExternalSource(ExternalPreprocessorSource *Source) { 495 ExternalSource = Source; 496 } 497 getExternalSource()498 ExternalPreprocessorSource *getExternalSource() const { 499 return ExternalSource; 500 } 501 502 /// \brief Retrieve the module loader associated with this preprocessor. getModuleLoader()503 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 504 hadModuleLoaderFatalFailure()505 bool hadModuleLoaderFatalFailure() const { 506 return TheModuleLoader.HadFatalFailure; 507 } 508 509 /// \brief True if we are currently preprocessing a #if or #elif directive isParsingIfOrElifDirective()510 bool isParsingIfOrElifDirective() const { 511 return ParsingIfOrElifDirective; 512 } 513 514 /// \brief Control whether the preprocessor retains comments in output. SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)515 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 516 this->KeepComments = KeepComments | KeepMacroComments; 517 this->KeepMacroComments = KeepMacroComments; 518 } 519 getCommentRetentionState()520 bool getCommentRetentionState() const { return KeepComments; } 521 setPragmasEnabled(bool Enabled)522 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } getPragmasEnabled()523 bool getPragmasEnabled() const { return PragmasEnabled; } 524 SetSuppressIncludeNotFoundError(bool Suppress)525 void SetSuppressIncludeNotFoundError(bool Suppress) { 526 SuppressIncludeNotFoundError = Suppress; 527 } 528 GetSuppressIncludeNotFoundError()529 bool GetSuppressIncludeNotFoundError() { 530 return SuppressIncludeNotFoundError; 531 } 532 533 /// Sets whether the preprocessor is responsible for producing output or if 534 /// it is producing tokens to be consumed by Parse and Sema. setPreprocessedOutput(bool IsPreprocessedOutput)535 void setPreprocessedOutput(bool IsPreprocessedOutput) { 536 PreprocessedOutput = IsPreprocessedOutput; 537 } 538 539 /// Returns true if the preprocessor is responsible for generating output, 540 /// false if it is producing tokens to be consumed by Parse and Sema. isPreprocessedOutput()541 bool isPreprocessedOutput() const { return PreprocessedOutput; } 542 543 /// \brief Return true if we are lexing directly from the specified lexer. isCurrentLexer(const PreprocessorLexer * L)544 bool isCurrentLexer(const PreprocessorLexer *L) const { 545 return CurPPLexer == L; 546 } 547 548 /// \brief Return the current lexer being lexed from. 549 /// 550 /// Note that this ignores any potentially active macro expansions and _Pragma 551 /// expansions going on at the time. getCurrentLexer()552 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 553 554 /// \brief Return the current file lexer being lexed from. 555 /// 556 /// Note that this ignores any potentially active macro expansions and _Pragma 557 /// expansions going on at the time. 558 PreprocessorLexer *getCurrentFileLexer() const; 559 560 /// \brief Returns the FileID for the preprocessor predefines. getPredefinesFileID()561 FileID getPredefinesFileID() const { return PredefinesFileID; } 562 563 /// \{ 564 /// \brief Accessors for preprocessor callbacks. 565 /// 566 /// Note that this class takes ownership of any PPCallbacks object given to 567 /// it. getPPCallbacks()568 PPCallbacks *getPPCallbacks() const { return Callbacks; } addPPCallbacks(PPCallbacks * C)569 void addPPCallbacks(PPCallbacks *C) { 570 if (Callbacks) 571 C = new PPChainedCallbacks(C, Callbacks); 572 Callbacks = C; 573 } 574 /// \} 575 576 /// \brief Given an identifier, return its latest MacroDirective if it is 577 /// \#defined or null if it isn't \#define'd. getMacroDirective(IdentifierInfo * II)578 MacroDirective *getMacroDirective(IdentifierInfo *II) const { 579 if (!II->hasMacroDefinition()) 580 return nullptr; 581 582 MacroDirective *MD = getMacroDirectiveHistory(II); 583 assert(MD->isDefined() && "Macro is undefined!"); 584 return MD; 585 } 586 getMacroInfo(IdentifierInfo * II)587 const MacroInfo *getMacroInfo(IdentifierInfo *II) const { 588 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 589 } 590 getMacroInfo(IdentifierInfo * II)591 MacroInfo *getMacroInfo(IdentifierInfo *II) { 592 if (MacroDirective *MD = getMacroDirective(II)) 593 return MD->getMacroInfo(); 594 return nullptr; 595 } 596 597 /// \brief Given an identifier, return the (probably #undef'd) MacroInfo 598 /// representing the most recent macro definition. 599 /// 600 /// One can iterate over all previous macro definitions from the most recent 601 /// one. This should only be called for identifiers that hadMacroDefinition(). 602 MacroDirective *getMacroDirectiveHistory(const IdentifierInfo *II) const; 603 604 /// \brief Add a directive to the macro directive history for this identifier. 605 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc,bool isImported)606 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 607 SourceLocation Loc, 608 bool isImported) { 609 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc, isImported); 610 appendMacroDirective(II, MD); 611 return MD; 612 } appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)613 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI){ 614 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc(), false); 615 } 616 /// \brief Set a MacroDirective that was loaded from a PCH file. 617 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *MD); 618 619 /// \{ 620 /// Iterators for the macro history table. Currently defined macros have 621 /// IdentifierInfo::hasMacroDefinition() set and an empty 622 /// MacroInfo::getUndefLoc() at the head of the list. 623 typedef llvm::DenseMap<const IdentifierInfo *, 624 MacroDirective*>::const_iterator macro_iterator; 625 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 626 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 627 /// \} 628 629 /// \brief Return the name of the macro defined before \p Loc that has 630 /// spelling \p Tokens. If there are multiple macros with same spelling, 631 /// return the last one defined. 632 StringRef getLastMacroWithSpelling(SourceLocation Loc, 633 ArrayRef<TokenValue> Tokens) const; 634 getPredefines()635 const std::string &getPredefines() const { return Predefines; } 636 /// \brief Set the predefines for this Preprocessor. 637 /// 638 /// These predefines are automatically injected when parsing the main file. setPredefines(const char * P)639 void setPredefines(const char *P) { Predefines = P; } setPredefines(const std::string & P)640 void setPredefines(const std::string &P) { Predefines = P; } 641 642 /// Return information about the specified preprocessor 643 /// identifier token. getIdentifierInfo(StringRef Name)644 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 645 return &Identifiers.get(Name); 646 } 647 648 /// \brief Add the specified pragma handler to this preprocessor. 649 /// 650 /// If \p Namespace is non-null, then it is a token required to exist on the 651 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 652 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); AddPragmaHandler(PragmaHandler * Handler)653 void AddPragmaHandler(PragmaHandler *Handler) { 654 AddPragmaHandler(StringRef(), Handler); 655 } 656 657 /// \brief Remove the specific pragma handler from this preprocessor. 658 /// 659 /// If \p Namespace is non-null, then it should be the namespace that 660 /// \p Handler was added to. It is an error to remove a handler that 661 /// has not been registered. 662 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); RemovePragmaHandler(PragmaHandler * Handler)663 void RemovePragmaHandler(PragmaHandler *Handler) { 664 RemovePragmaHandler(StringRef(), Handler); 665 } 666 667 /// Install empty handlers for all pragmas (making them ignored). 668 void IgnorePragmas(); 669 670 /// \brief Add the specified comment handler to the preprocessor. 671 void addCommentHandler(CommentHandler *Handler); 672 673 /// \brief Remove the specified comment handler. 674 /// 675 /// It is an error to remove a handler that has not been registered. 676 void removeCommentHandler(CommentHandler *Handler); 677 678 /// \brief Set the code completion handler to the given object. setCodeCompletionHandler(CodeCompletionHandler & Handler)679 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 680 CodeComplete = &Handler; 681 } 682 683 /// \brief Retrieve the current code-completion handler. getCodeCompletionHandler()684 CodeCompletionHandler *getCodeCompletionHandler() const { 685 return CodeComplete; 686 } 687 688 /// \brief Clear out the code completion handler. clearCodeCompletionHandler()689 void clearCodeCompletionHandler() { 690 CodeComplete = nullptr; 691 } 692 693 /// \brief Hook used by the lexer to invoke the "natural language" code 694 /// completion point. 695 void CodeCompleteNaturalLanguage(); 696 697 /// \brief Retrieve the preprocessing record, or NULL if there is no 698 /// preprocessing record. getPreprocessingRecord()699 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 700 701 /// \brief Create a new preprocessing record, which will keep track of 702 /// all macro expansions, macro definitions, etc. 703 void createPreprocessingRecord(); 704 705 /// \brief Enter the specified FileID as the main source file, 706 /// which implicitly adds the builtin defines etc. 707 void EnterMainSourceFile(); 708 709 /// \brief Inform the preprocessor callbacks that processing is complete. 710 void EndSourceFile(); 711 712 /// \brief Add a source file to the top of the include stack and 713 /// start lexing tokens from it instead of the current buffer. 714 /// 715 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 716 bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir, 717 SourceLocation Loc); 718 719 /// \brief Add a Macro to the top of the include stack and start lexing 720 /// tokens from it instead of the current buffer. 721 /// 722 /// \param Args specifies the tokens input to a function-like macro. 723 /// \param ILEnd specifies the location of the ')' for a function-like macro 724 /// or the identifier for an object-like macro. 725 void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro, 726 MacroArgs *Args); 727 728 /// \brief Add a "macro" context to the top of the include stack, 729 /// which will cause the lexer to start returning the specified tokens. 730 /// 731 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 732 /// will not be subject to further macro expansion. Otherwise, these tokens 733 /// will be re-macro-expanded when/if expansion is enabled. 734 /// 735 /// If \p OwnsTokens is false, this method assumes that the specified stream 736 /// of tokens has a permanent owner somewhere, so they do not need to be 737 /// copied. If it is true, it assumes the array of tokens is allocated with 738 /// \c new[] and must be freed. 739 void EnterTokenStream(const Token *Toks, unsigned NumToks, 740 bool DisableMacroExpansion, bool OwnsTokens); 741 742 /// \brief Pop the current lexer/macro exp off the top of the lexer stack. 743 /// 744 /// This should only be used in situations where the current state of the 745 /// top-of-stack lexer is known. 746 void RemoveTopOfLexerStack(); 747 748 /// From the point that this method is called, and until 749 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 750 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 751 /// make the Preprocessor re-lex the same tokens. 752 /// 753 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 754 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 755 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 756 /// 757 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 758 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 759 /// tokens will continue indefinitely. 760 /// 761 void EnableBacktrackAtThisPos(); 762 763 /// \brief Disable the last EnableBacktrackAtThisPos call. 764 void CommitBacktrackedTokens(); 765 766 /// \brief Make Preprocessor re-lex the tokens that were lexed since 767 /// EnableBacktrackAtThisPos() was previously called. 768 void Backtrack(); 769 770 /// \brief True if EnableBacktrackAtThisPos() was called and 771 /// caching of tokens is on. isBacktrackEnabled()772 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 773 774 /// \brief Lex the next token for this preprocessor. 775 void Lex(Token &Result); 776 777 void LexAfterModuleImport(Token &Result); 778 779 /// \brief Lex a string literal, which may be the concatenation of multiple 780 /// string literals and may even come from macro expansion. 781 /// \returns true on success, false if a error diagnostic has been generated. LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)782 bool LexStringLiteral(Token &Result, std::string &String, 783 const char *DiagnosticTag, bool AllowMacroExpansion) { 784 if (AllowMacroExpansion) 785 Lex(Result); 786 else 787 LexUnexpandedToken(Result); 788 return FinishLexStringLiteral(Result, String, DiagnosticTag, 789 AllowMacroExpansion); 790 } 791 792 /// \brief Complete the lexing of a string literal where the first token has 793 /// already been lexed (see LexStringLiteral). 794 bool FinishLexStringLiteral(Token &Result, std::string &String, 795 const char *DiagnosticTag, 796 bool AllowMacroExpansion); 797 798 /// \brief Lex a token. If it's a comment, keep lexing until we get 799 /// something not a comment. 800 /// 801 /// This is useful in -E -C mode where comments would foul up preprocessor 802 /// directive handling. LexNonComment(Token & Result)803 void LexNonComment(Token &Result) { 804 do 805 Lex(Result); 806 while (Result.getKind() == tok::comment); 807 } 808 809 /// \brief Just like Lex, but disables macro expansion of identifier tokens. LexUnexpandedToken(Token & Result)810 void LexUnexpandedToken(Token &Result) { 811 // Disable macro expansion. 812 bool OldVal = DisableMacroExpansion; 813 DisableMacroExpansion = true; 814 // Lex the token. 815 Lex(Result); 816 817 // Reenable it. 818 DisableMacroExpansion = OldVal; 819 } 820 821 /// \brief Like LexNonComment, but this disables macro expansion of 822 /// identifier tokens. LexUnexpandedNonComment(Token & Result)823 void LexUnexpandedNonComment(Token &Result) { 824 do 825 LexUnexpandedToken(Result); 826 while (Result.getKind() == tok::comment); 827 } 828 829 /// \brief Parses a simple integer literal to get its numeric value. Floating 830 /// point literals and user defined literals are rejected. Used primarily to 831 /// handle pragmas that accept integer arguments. 832 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 833 834 /// Disables macro expansion everywhere except for preprocessor directives. SetMacroExpansionOnlyInDirectives()835 void SetMacroExpansionOnlyInDirectives() { 836 DisableMacroExpansion = true; 837 MacroExpansionInDirectivesOverride = true; 838 } 839 840 /// \brief Peeks ahead N tokens and returns that token without consuming any 841 /// tokens. 842 /// 843 /// LookAhead(0) returns the next token that would be returned by Lex(), 844 /// LookAhead(1) returns the token after it, etc. This returns normal 845 /// tokens after phase 5. As such, it is equivalent to using 846 /// 'Lex', not 'LexUnexpandedToken'. LookAhead(unsigned N)847 const Token &LookAhead(unsigned N) { 848 if (CachedLexPos + N < CachedTokens.size()) 849 return CachedTokens[CachedLexPos+N]; 850 else 851 return PeekAhead(N+1); 852 } 853 854 /// \brief When backtracking is enabled and tokens are cached, 855 /// this allows to revert a specific number of tokens. 856 /// 857 /// Note that the number of tokens being reverted should be up to the last 858 /// backtrack position, not more. RevertCachedTokens(unsigned N)859 void RevertCachedTokens(unsigned N) { 860 assert(isBacktrackEnabled() && 861 "Should only be called when tokens are cached for backtracking"); 862 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 863 && "Should revert tokens up to the last backtrack position, not more"); 864 assert(signed(CachedLexPos) - signed(N) >= 0 && 865 "Corrupted backtrack positions ?"); 866 CachedLexPos -= N; 867 } 868 869 /// \brief Enters a token in the token stream to be lexed next. 870 /// 871 /// If BackTrack() is called afterwards, the token will remain at the 872 /// insertion point. EnterToken(const Token & Tok)873 void EnterToken(const Token &Tok) { 874 EnterCachingLexMode(); 875 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 876 } 877 878 /// We notify the Preprocessor that if it is caching tokens (because 879 /// backtrack is enabled) it should replace the most recent cached tokens 880 /// with the given annotation token. This function has no effect if 881 /// backtracking is not enabled. 882 /// 883 /// Note that the use of this function is just for optimization, so that the 884 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 885 /// invoked. AnnotateCachedTokens(const Token & Tok)886 void AnnotateCachedTokens(const Token &Tok) { 887 assert(Tok.isAnnotation() && "Expected annotation token"); 888 if (CachedLexPos != 0 && isBacktrackEnabled()) 889 AnnotatePreviousCachedTokens(Tok); 890 } 891 892 /// Get the location of the last cached token, suitable for setting the end 893 /// location of an annotation token. getLastCachedTokenLocation()894 SourceLocation getLastCachedTokenLocation() const { 895 assert(CachedLexPos != 0); 896 return CachedTokens[CachedLexPos-1].getLocation(); 897 } 898 899 /// \brief Replace the last token with an annotation token. 900 /// 901 /// Like AnnotateCachedTokens(), this routine replaces an 902 /// already-parsed (and resolved) token with an annotation 903 /// token. However, this routine only replaces the last token with 904 /// the annotation token; it does not affect any other cached 905 /// tokens. This function has no effect if backtracking is not 906 /// enabled. ReplaceLastTokenWithAnnotation(const Token & Tok)907 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 908 assert(Tok.isAnnotation() && "Expected annotation token"); 909 if (CachedLexPos != 0 && isBacktrackEnabled()) 910 CachedTokens[CachedLexPos-1] = Tok; 911 } 912 913 /// Update the current token to represent the provided 914 /// identifier, in order to cache an action performed by typo correction. TypoCorrectToken(const Token & Tok)915 void TypoCorrectToken(const Token &Tok) { 916 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 917 if (CachedLexPos != 0 && isBacktrackEnabled()) 918 CachedTokens[CachedLexPos-1] = Tok; 919 } 920 921 /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/ 922 /// CurTokenLexer pointers. 923 void recomputeCurLexerKind(); 924 925 /// \brief Returns true if incremental processing is enabled isIncrementalProcessingEnabled()926 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 927 928 /// \brief Enables the incremental processing 929 void enableIncrementalProcessing(bool value = true) { 930 IncrementalProcessing = value; 931 } 932 933 /// \brief Specify the point at which code-completion will be performed. 934 /// 935 /// \param File the file in which code completion should occur. If 936 /// this file is included multiple times, code-completion will 937 /// perform completion the first time it is included. If NULL, this 938 /// function clears out the code-completion point. 939 /// 940 /// \param Line the line at which code completion should occur 941 /// (1-based). 942 /// 943 /// \param Column the column at which code completion should occur 944 /// (1-based). 945 /// 946 /// \returns true if an error occurred, false otherwise. 947 bool SetCodeCompletionPoint(const FileEntry *File, 948 unsigned Line, unsigned Column); 949 950 /// \brief Determine if we are performing code completion. isCodeCompletionEnabled()951 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 952 953 /// \brief Returns the location of the code-completion point. 954 /// 955 /// Returns an invalid location if code-completion is not enabled or the file 956 /// containing the code-completion point has not been lexed yet. getCodeCompletionLoc()957 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 958 959 /// \brief Returns the start location of the file of code-completion point. 960 /// 961 /// Returns an invalid location if code-completion is not enabled or the file 962 /// containing the code-completion point has not been lexed yet. getCodeCompletionFileLoc()963 SourceLocation getCodeCompletionFileLoc() const { 964 return CodeCompletionFileLoc; 965 } 966 967 /// \brief Returns true if code-completion is enabled and we have hit the 968 /// code-completion point. isCodeCompletionReached()969 bool isCodeCompletionReached() const { return CodeCompletionReached; } 970 971 /// \brief Note that we hit the code-completion point. setCodeCompletionReached()972 void setCodeCompletionReached() { 973 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 974 CodeCompletionReached = true; 975 // Silence any diagnostics that occur after we hit the code-completion. 976 getDiagnostics().setSuppressAllDiagnostics(true); 977 } 978 979 /// \brief The location of the currently-active \#pragma clang 980 /// arc_cf_code_audited begin. 981 /// 982 /// Returns an invalid location if there is no such pragma active. getPragmaARCCFCodeAuditedLoc()983 SourceLocation getPragmaARCCFCodeAuditedLoc() const { 984 return PragmaARCCFCodeAuditedLoc; 985 } 986 987 /// \brief Set the location of the currently-active \#pragma clang 988 /// arc_cf_code_audited begin. An invalid location ends the pragma. setPragmaARCCFCodeAuditedLoc(SourceLocation Loc)989 void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) { 990 PragmaARCCFCodeAuditedLoc = Loc; 991 } 992 993 /// \brief Instruct the preprocessor to skip part of the main source file. 994 /// 995 /// \param Bytes The number of bytes in the preamble to skip. 996 /// 997 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 998 /// start of a line. setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)999 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 1000 SkipMainFilePreamble.first = Bytes; 1001 SkipMainFilePreamble.second = StartOfLine; 1002 } 1003 1004 /// Forwarding function for diagnostics. This emits a diagnostic at 1005 /// the specified Token's location, translating the token's start 1006 /// position in the current buffer into a SourcePosition object for rendering. Diag(SourceLocation Loc,unsigned DiagID)1007 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 1008 return Diags->Report(Loc, DiagID); 1009 } 1010 Diag(const Token & Tok,unsigned DiagID)1011 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 1012 return Diags->Report(Tok.getLocation(), DiagID); 1013 } 1014 1015 /// Return the 'spelling' of the token at the given 1016 /// location; does not go up to the spelling location or down to the 1017 /// expansion location. 1018 /// 1019 /// \param buffer A buffer which will be used only if the token requires 1020 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 1021 /// \param invalid If non-null, will be set \c true if an error occurs. 1022 StringRef getSpelling(SourceLocation loc, 1023 SmallVectorImpl<char> &buffer, 1024 bool *invalid = nullptr) const { 1025 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 1026 } 1027 1028 /// \brief Return the 'spelling' of the Tok token. 1029 /// 1030 /// The spelling of a token is the characters used to represent the token in 1031 /// the source file after trigraph expansion and escaped-newline folding. In 1032 /// particular, this wants to get the true, uncanonicalized, spelling of 1033 /// things like digraphs, UCNs, etc. 1034 /// 1035 /// \param Invalid If non-null, will be set \c true if an error occurs. 1036 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 1037 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 1038 } 1039 1040 /// \brief Get the spelling of a token into a preallocated buffer, instead 1041 /// of as an std::string. 1042 /// 1043 /// The caller is required to allocate enough space for the token, which is 1044 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 1045 /// actual result is returned. 1046 /// 1047 /// Note that this method may do two possible things: it may either fill in 1048 /// the buffer specified with characters, or it may *change the input pointer* 1049 /// to point to a constant buffer with the data already in it (avoiding a 1050 /// copy). The caller is not allowed to modify the returned buffer pointer 1051 /// if an internal buffer is returned. 1052 unsigned getSpelling(const Token &Tok, const char *&Buffer, 1053 bool *Invalid = nullptr) const { 1054 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 1055 } 1056 1057 /// \brief Get the spelling of a token into a SmallVector. 1058 /// 1059 /// Note that the returned StringRef may not point to the 1060 /// supplied buffer if a copy can be avoided. 1061 StringRef getSpelling(const Token &Tok, 1062 SmallVectorImpl<char> &Buffer, 1063 bool *Invalid = nullptr) const; 1064 1065 /// \brief Relex the token at the specified location. 1066 /// \returns true if there was a failure, false on success. 1067 bool getRawToken(SourceLocation Loc, Token &Result, 1068 bool IgnoreWhiteSpace = false) { 1069 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 1070 } 1071 1072 /// \brief Given a Token \p Tok that is a numeric constant with length 1, 1073 /// return the character. 1074 char 1075 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 1076 bool *Invalid = nullptr) const { 1077 assert(Tok.is(tok::numeric_constant) && 1078 Tok.getLength() == 1 && "Called on unsupported token"); 1079 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 1080 1081 // If the token is carrying a literal data pointer, just use it. 1082 if (const char *D = Tok.getLiteralData()) 1083 return *D; 1084 1085 // Otherwise, fall back on getCharacterData, which is slower, but always 1086 // works. 1087 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid); 1088 } 1089 1090 /// \brief Retrieve the name of the immediate macro expansion. 1091 /// 1092 /// This routine starts from a source location, and finds the name of the 1093 /// macro responsible for its immediate expansion. It looks through any 1094 /// intervening macro argument expansions to compute this. It returns a 1095 /// StringRef that refers to the SourceManager-owned buffer of the source 1096 /// where that macro name is spelled. Thus, the result shouldn't out-live 1097 /// the SourceManager. getImmediateMacroName(SourceLocation Loc)1098 StringRef getImmediateMacroName(SourceLocation Loc) { 1099 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 1100 } 1101 1102 /// \brief Plop the specified string into a scratch buffer and set the 1103 /// specified token's location and length to it. 1104 /// 1105 /// If specified, the source location provides a location of the expansion 1106 /// point of the token. 1107 void CreateString(StringRef Str, Token &Tok, 1108 SourceLocation ExpansionLocStart = SourceLocation(), 1109 SourceLocation ExpansionLocEnd = SourceLocation()); 1110 1111 /// \brief Computes the source location just past the end of the 1112 /// token at this source location. 1113 /// 1114 /// This routine can be used to produce a source location that 1115 /// points just past the end of the token referenced by \p Loc, and 1116 /// is generally used when a diagnostic needs to point just after a 1117 /// token where it expected something different that it received. If 1118 /// the returned source location would not be meaningful (e.g., if 1119 /// it points into a macro), this routine returns an invalid 1120 /// source location. 1121 /// 1122 /// \param Offset an offset from the end of the token, where the source 1123 /// location should refer to. The default offset (0) produces a source 1124 /// location pointing just past the end of the token; an offset of 1 produces 1125 /// a source location pointing to the last character in the token, etc. 1126 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 1127 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 1128 } 1129 1130 /// \brief Returns true if the given MacroID location points at the first 1131 /// token of the macro expansion. 1132 /// 1133 /// \param MacroBegin If non-null and function returns true, it is set to 1134 /// begin location of the macro. 1135 bool isAtStartOfMacroExpansion(SourceLocation loc, 1136 SourceLocation *MacroBegin = nullptr) const { 1137 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 1138 MacroBegin); 1139 } 1140 1141 /// \brief Returns true if the given MacroID location points at the last 1142 /// token of the macro expansion. 1143 /// 1144 /// \param MacroEnd If non-null and function returns true, it is set to 1145 /// end location of the macro. 1146 bool isAtEndOfMacroExpansion(SourceLocation loc, 1147 SourceLocation *MacroEnd = nullptr) const { 1148 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 1149 } 1150 1151 /// \brief Print the token to stderr, used for debugging. 1152 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 1153 void DumpLocation(SourceLocation Loc) const; 1154 void DumpMacro(const MacroInfo &MI) const; 1155 1156 /// \brief Given a location that specifies the start of a 1157 /// token, return a new location that specifies a character within the token. AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)1158 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 1159 unsigned Char) const { 1160 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 1161 } 1162 1163 /// \brief Increment the counters for the number of token paste operations 1164 /// performed. 1165 /// 1166 /// If fast was specified, this is a 'fast paste' case we handled. IncrementPasteCounter(bool isFast)1167 void IncrementPasteCounter(bool isFast) { 1168 if (isFast) 1169 ++NumFastTokenPaste; 1170 else 1171 ++NumTokenPaste; 1172 } 1173 1174 void PrintStats(); 1175 1176 size_t getTotalMemory() const; 1177 1178 /// When the macro expander pastes together a comment (/##/) in Microsoft 1179 /// mode, this method handles updating the current state, returning the 1180 /// token on the next source line. 1181 void HandleMicrosoftCommentPaste(Token &Tok); 1182 1183 //===--------------------------------------------------------------------===// 1184 // Preprocessor callback methods. These are invoked by a lexer as various 1185 // directives and events are found. 1186 1187 /// Given a tok::raw_identifier token, look up the 1188 /// identifier information for the token and install it into the token, 1189 /// updating the token kind accordingly. 1190 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 1191 1192 private: 1193 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 1194 1195 public: 1196 1197 /// \brief Specifies the reason for poisoning an identifier. 1198 /// 1199 /// If that identifier is accessed while poisoned, then this reason will be 1200 /// used instead of the default "poisoned" diagnostic. 1201 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 1202 1203 /// \brief Display reason for poisoned identifier. 1204 void HandlePoisonedIdentifier(Token & Tok); 1205 MaybeHandlePoisonedIdentifier(Token & Identifier)1206 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 1207 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 1208 if(II->isPoisoned()) { 1209 HandlePoisonedIdentifier(Identifier); 1210 } 1211 } 1212 } 1213 1214 private: 1215 /// Identifiers used for SEH handling in Borland. These are only 1216 /// allowed in particular circumstances 1217 // __except block 1218 IdentifierInfo *Ident__exception_code, 1219 *Ident___exception_code, 1220 *Ident_GetExceptionCode; 1221 // __except filter expression 1222 IdentifierInfo *Ident__exception_info, 1223 *Ident___exception_info, 1224 *Ident_GetExceptionInfo; 1225 // __finally 1226 IdentifierInfo *Ident__abnormal_termination, 1227 *Ident___abnormal_termination, 1228 *Ident_AbnormalTermination; 1229 1230 const char *getCurLexerEndPos(); 1231 1232 public: 1233 void PoisonSEHIdentifiers(bool Poison = true); // Borland 1234 1235 /// \brief Callback invoked when the lexer reads an identifier and has 1236 /// filled in the tokens IdentifierInfo member. 1237 /// 1238 /// This callback potentially macro expands it or turns it into a named 1239 /// token (like 'for'). 1240 /// 1241 /// \returns true if we actually computed a token, false if we need to 1242 /// lex again. 1243 bool HandleIdentifier(Token &Identifier); 1244 1245 1246 /// \brief Callback invoked when the lexer hits the end of the current file. 1247 /// 1248 /// This either returns the EOF token and returns true, or 1249 /// pops a level off the include stack and returns false, at which point the 1250 /// client should call lex again. 1251 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 1252 1253 /// \brief Callback invoked when the current TokenLexer hits the end of its 1254 /// token stream. 1255 bool HandleEndOfTokenLexer(Token &Result); 1256 1257 /// \brief Callback invoked when the lexer sees a # token at the start of a 1258 /// line. 1259 /// 1260 /// This consumes the directive, modifies the lexer/preprocessor state, and 1261 /// advances the lexer(s) so that the next token read is the correct one. 1262 void HandleDirective(Token &Result); 1263 1264 /// \brief Ensure that the next token is a tok::eod token. 1265 /// 1266 /// If not, emit a diagnostic and consume up until the eod. 1267 /// If \p EnableMacros is true, then we consider macros that expand to zero 1268 /// tokens as being ok. 1269 void CheckEndOfDirective(const char *Directive, bool EnableMacros = false); 1270 1271 /// \brief Read and discard all tokens remaining on the current line until 1272 /// the tok::eod token is found. 1273 void DiscardUntilEndOfDirective(); 1274 1275 /// \brief Returns true if the preprocessor has seen a use of 1276 /// __DATE__ or __TIME__ in the file so far. SawDateOrTime()1277 bool SawDateOrTime() const { 1278 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 1279 } getCounterValue()1280 unsigned getCounterValue() const { return CounterValue; } setCounterValue(unsigned V)1281 void setCounterValue(unsigned V) { CounterValue = V; } 1282 1283 /// \brief Retrieves the module that we're currently building, if any. 1284 Module *getCurrentModule(); 1285 1286 /// \brief Allocate a new MacroInfo object with the provided SourceLocation. 1287 MacroInfo *AllocateMacroInfo(SourceLocation L); 1288 1289 /// \brief Allocate a new MacroInfo object loaded from an AST file. 1290 MacroInfo *AllocateDeserializedMacroInfo(SourceLocation L, 1291 unsigned SubModuleID); 1292 1293 /// \brief Turn the specified lexer token into a fully checked and spelled 1294 /// filename, e.g. as an operand of \#include. 1295 /// 1296 /// The caller is expected to provide a buffer that is large enough to hold 1297 /// the spelling of the filename, but is also expected to handle the case 1298 /// when this method decides to use a different buffer. 1299 /// 1300 /// \returns true if the input filename was in <>'s or false if it was 1301 /// in ""'s. 1302 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename); 1303 1304 /// \brief Given a "foo" or \<foo> reference, look up the indicated file. 1305 /// 1306 /// Returns null on failure. \p isAngled indicates whether the file 1307 /// reference is for system \#include's or not (i.e. using <> instead of ""). 1308 const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename, 1309 bool isAngled, const DirectoryLookup *FromDir, 1310 const DirectoryLookup *&CurDir, 1311 SmallVectorImpl<char> *SearchPath, 1312 SmallVectorImpl<char> *RelativePath, 1313 ModuleMap::KnownHeader *SuggestedModule, 1314 bool SkipCache = false); 1315 1316 /// \brief Get the DirectoryLookup structure used to find the current 1317 /// FileEntry, if CurLexer is non-null and if applicable. 1318 /// 1319 /// This allows us to implement \#include_next and find directory-specific 1320 /// properties. GetCurDirLookup()1321 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; } 1322 1323 /// \brief Return true if we're in the top-level file, not in a \#include. 1324 bool isInPrimaryFile() const; 1325 1326 /// \brief Handle cases where the \#include name is expanded 1327 /// from a macro as multiple tokens, which need to be glued together. 1328 /// 1329 /// This occurs for code like: 1330 /// \code 1331 /// \#define FOO <x/y.h> 1332 /// \#include FOO 1333 /// \endcode 1334 /// because in this case, "<x/y.h>" is returned as 7 tokens, not one. 1335 /// 1336 /// This code concatenates and consumes tokens up to the '>' token. It 1337 /// returns false if the > was found, otherwise it returns true if it finds 1338 /// and consumes the EOD marker. 1339 bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer, 1340 SourceLocation &End); 1341 1342 /// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is 1343 /// followed by EOD. Return true if the token is not a valid on-off-switch. 1344 bool LexOnOffSwitch(tok::OnOffSwitch &OOS); 1345 1346 bool CheckMacroName(Token &MacroNameTok, char isDefineUndef); 1347 1348 private: 1349 PushIncludeMacroStack()1350 void PushIncludeMacroStack() { 1351 IncludeMacroStack.push_back(IncludeStackInfo( 1352 CurLexerKind, CurSubmodule, std::move(CurLexer), std::move(CurPTHLexer), 1353 CurPPLexer, std::move(CurTokenLexer), CurDirLookup)); 1354 CurPPLexer = nullptr; 1355 } 1356 PopIncludeMacroStack()1357 void PopIncludeMacroStack() { 1358 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 1359 CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer); 1360 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 1361 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 1362 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 1363 CurSubmodule = IncludeMacroStack.back().TheSubmodule; 1364 CurLexerKind = IncludeMacroStack.back().CurLexerKind; 1365 IncludeMacroStack.pop_back(); 1366 } 1367 1368 void PropagateLineStartLeadingSpaceInfo(Token &Result); 1369 1370 /// \brief Allocate a new MacroInfo object. 1371 MacroInfo *AllocateMacroInfo(); 1372 1373 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 1374 SourceLocation Loc, 1375 bool isImported); 1376 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 1377 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 1378 bool isPublic); 1379 1380 /// \brief Release the specified MacroInfo for re-use. 1381 /// 1382 /// This memory will be reused for allocating new MacroInfo objects. 1383 void ReleaseMacroInfo(MacroInfo* MI); 1384 1385 /// \brief Lex and validate a macro name, which occurs after a 1386 /// \#define or \#undef. 1387 /// 1388 /// This emits a diagnostic, sets the token kind to eod, 1389 /// and discards the rest of the macro line if the macro name is invalid. 1390 void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0); 1391 1392 /// The ( starting an argument list of a macro definition has just been read. 1393 /// Lex the rest of the arguments and the closing ), updating \p MI with 1394 /// what we learn and saving in \p LastTok the last token read. 1395 /// Return true if an error occurs parsing the arg list. 1396 bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok); 1397 1398 /// We just read a \#if or related directive and decided that the 1399 /// subsequent tokens are in the \#if'd out portion of the 1400 /// file. Lex the rest of the file, until we see an \#endif. If \p 1401 /// FoundNonSkipPortion is true, then we have already emitted code for part of 1402 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 1403 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 1404 /// already seen one so a \#else directive is a duplicate. When this returns, 1405 /// the caller can lex the first valid token. 1406 void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, 1407 bool FoundNonSkipPortion, bool FoundElse, 1408 SourceLocation ElseLoc = SourceLocation()); 1409 1410 /// \brief A fast PTH version of SkipExcludedConditionalBlock. 1411 void PTHSkipExcludedConditionalBlock(); 1412 1413 /// \brief Evaluate an integer constant expression that may occur after a 1414 /// \#if or \#elif directive and return it as a bool. 1415 /// 1416 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 1417 bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); 1418 1419 /// \brief Install the standard preprocessor pragmas: 1420 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 1421 void RegisterBuiltinPragmas(); 1422 1423 /// \brief Register builtin macros such as __LINE__ with the identifier table. 1424 void RegisterBuiltinMacros(); 1425 1426 /// If an identifier token is read that is to be expanded as a macro, handle 1427 /// it and return the next token as 'Tok'. If we lexed a token, return true; 1428 /// otherwise the caller should lex again. 1429 bool HandleMacroExpandedIdentifier(Token &Tok, MacroDirective *MD); 1430 1431 /// \brief Cache macro expanded tokens for TokenLexers. 1432 // 1433 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 1434 /// going to lex in the cache and when it finishes the tokens are removed 1435 /// from the end of the cache. 1436 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 1437 ArrayRef<Token> tokens); 1438 void removeCachedMacroExpandedTokensOfLastLexer(); 1439 friend void TokenLexer::ExpandFunctionArguments(); 1440 1441 /// Determine whether the next preprocessor token to be 1442 /// lexed is a '('. If so, consume the token and return true, if not, this 1443 /// method should have no observable side-effect on the lexed tokens. 1444 bool isNextPPTokenLParen(); 1445 1446 /// After reading "MACRO(", this method is invoked to read all of the formal 1447 /// arguments specified for the macro invocation. Returns null on error. 1448 MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI, 1449 SourceLocation &ExpansionEnd); 1450 1451 /// \brief If an identifier token is read that is to be expanded 1452 /// as a builtin macro, handle it and return the next token as 'Tok'. 1453 void ExpandBuiltinMacro(Token &Tok); 1454 1455 /// \brief Read a \c _Pragma directive, slice it up, process it, then 1456 /// return the first token after the directive. 1457 /// This assumes that the \c _Pragma token has just been read into \p Tok. 1458 void Handle_Pragma(Token &Tok); 1459 1460 /// \brief Like Handle_Pragma except the pragma text is not enclosed within 1461 /// a string literal. 1462 void HandleMicrosoft__pragma(Token &Tok); 1463 1464 /// \brief Add a lexer to the top of the include stack and 1465 /// start lexing tokens from it instead of the current buffer. 1466 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir); 1467 1468 /// \brief Add a lexer to the top of the include stack and 1469 /// start getting tokens from it using the PTH cache. 1470 void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir); 1471 1472 /// \brief Set the FileID for the preprocessor predefines. setPredefinesFileID(FileID FID)1473 void setPredefinesFileID(FileID FID) { 1474 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 1475 PredefinesFileID = FID; 1476 } 1477 1478 /// \brief Returns true if we are lexing from a file and not a 1479 /// pragma or a macro. IsFileLexer(const Lexer * L,const PreprocessorLexer * P)1480 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 1481 return L ? !L->isPragmaLexer() : P != nullptr; 1482 } 1483 IsFileLexer(const IncludeStackInfo & I)1484 static bool IsFileLexer(const IncludeStackInfo& I) { 1485 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 1486 } 1487 IsFileLexer()1488 bool IsFileLexer() const { 1489 return IsFileLexer(CurLexer.get(), CurPPLexer); 1490 } 1491 1492 //===--------------------------------------------------------------------===// 1493 // Caching stuff. 1494 void CachingLex(Token &Result); InCachingLexMode()1495 bool InCachingLexMode() const { 1496 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 1497 // that we are past EOF, not that we are in CachingLex mode. 1498 return !CurPPLexer && !CurTokenLexer && !CurPTHLexer && 1499 !IncludeMacroStack.empty(); 1500 } 1501 void EnterCachingLexMode(); ExitCachingLexMode()1502 void ExitCachingLexMode() { 1503 if (InCachingLexMode()) 1504 RemoveTopOfLexerStack(); 1505 } 1506 const Token &PeekAhead(unsigned N); 1507 void AnnotatePreviousCachedTokens(const Token &Tok); 1508 1509 //===--------------------------------------------------------------------===// 1510 /// Handle*Directive - implement the various preprocessor directives. These 1511 /// should side-effect the current preprocessor object so that the next call 1512 /// to Lex() will return the appropriate token next. 1513 void HandleLineDirective(Token &Tok); 1514 void HandleDigitDirective(Token &Tok); 1515 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 1516 void HandleIdentSCCSDirective(Token &Tok); 1517 void HandleMacroPublicDirective(Token &Tok); 1518 void HandleMacroPrivateDirective(Token &Tok); 1519 1520 // File inclusion. 1521 void HandleIncludeDirective(SourceLocation HashLoc, 1522 Token &Tok, 1523 const DirectoryLookup *LookupFrom = nullptr, 1524 bool isImport = false); 1525 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 1526 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 1527 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 1528 void HandleMicrosoftImportDirective(Token &Tok); 1529 1530 // Module inclusion testing. 1531 /// \brief Find the module for the source or header file that \p FilenameLoc 1532 /// points to. 1533 Module *getModuleForLocation(SourceLocation FilenameLoc); 1534 1535 // Macro handling. 1536 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef); 1537 void HandleUndefDirective(Token &Tok); 1538 1539 // Conditional Inclusion. 1540 void HandleIfdefDirective(Token &Tok, bool isIfndef, 1541 bool ReadAnyTokensBeforeDirective); 1542 void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective); 1543 void HandleEndifDirective(Token &Tok); 1544 void HandleElseDirective(Token &Tok); 1545 void HandleElifDirective(Token &Tok); 1546 1547 // Pragmas. 1548 void HandlePragmaDirective(SourceLocation IntroducerLoc, 1549 PragmaIntroducerKind Introducer); 1550 public: 1551 void HandlePragmaOnce(Token &OnceTok); 1552 void HandlePragmaMark(); 1553 void HandlePragmaPoison(Token &PoisonTok); 1554 void HandlePragmaSystemHeader(Token &SysHeaderTok); 1555 void HandlePragmaDependency(Token &DependencyTok); 1556 void HandlePragmaPushMacro(Token &Tok); 1557 void HandlePragmaPopMacro(Token &Tok); 1558 void HandlePragmaIncludeAlias(Token &Tok); 1559 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 1560 1561 // Return true and store the first token only if any CommentHandler 1562 // has inserted some tokens and getCommentRetentionState() is false. 1563 bool HandleComment(Token &Token, SourceRange Comment); 1564 1565 /// \brief A macro is used, update information about macros that need unused 1566 /// warnings. 1567 void markMacroAsUsed(MacroInfo *MI); 1568 }; 1569 1570 /// \brief Abstract base class that describes a handler that will receive 1571 /// source ranges for each of the comments encountered in the source file. 1572 class CommentHandler { 1573 public: 1574 virtual ~CommentHandler(); 1575 1576 // The handler shall return true if it has pushed any tokens 1577 // to be read using e.g. EnterToken or EnterTokenStream. 1578 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 1579 }; 1580 1581 } // end namespace clang 1582 1583 #endif 1584