• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2  //
3  //                     The LLVM Compiler Infrastructure
4  //
5  // This file is distributed under the University of Illinois Open Source
6  // License. See LICENSE.TXT for details.
7  //
8  //===----------------------------------------------------------------------===//
9  //
10  //  This file implements the Preprocessor interface.
11  //
12  //===----------------------------------------------------------------------===//
13  //
14  // Options to support:
15  //   -H       - Print the name of each header file used.
16  //   -d[DNI] - Dump various things.
17  //   -fworking-directory - #line's with preprocessor's working dir.
18  //   -fpreprocessed
19  //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20  //   -W*
21  //   -w
22  //
23  // Messages to emit:
24  //   "Multiple include guards may be useful for:\n"
25  //
26  //===----------------------------------------------------------------------===//
27  
28  #include "clang/Lex/Preprocessor.h"
29  #include "clang/Basic/FileManager.h"
30  #include "clang/Basic/FileSystemStatCache.h"
31  #include "clang/Basic/SourceManager.h"
32  #include "clang/Basic/TargetInfo.h"
33  #include "clang/Lex/CodeCompletionHandler.h"
34  #include "clang/Lex/ExternalPreprocessorSource.h"
35  #include "clang/Lex/HeaderSearch.h"
36  #include "clang/Lex/LexDiagnostic.h"
37  #include "clang/Lex/LiteralSupport.h"
38  #include "clang/Lex/MacroArgs.h"
39  #include "clang/Lex/MacroInfo.h"
40  #include "clang/Lex/ModuleLoader.h"
41  #include "clang/Lex/Pragma.h"
42  #include "clang/Lex/PreprocessingRecord.h"
43  #include "clang/Lex/PreprocessorOptions.h"
44  #include "clang/Lex/ScratchBuffer.h"
45  #include "llvm/ADT/APFloat.h"
46  #include "llvm/ADT/STLExtras.h"
47  #include "llvm/ADT/SmallString.h"
48  #include "llvm/ADT/StringExtras.h"
49  #include "llvm/Support/Capacity.h"
50  #include "llvm/Support/ConvertUTF.h"
51  #include "llvm/Support/MemoryBuffer.h"
52  #include "llvm/Support/raw_ostream.h"
53  using namespace clang;
54  
55  //===----------------------------------------------------------------------===//
~ExternalPreprocessorSource()56  ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
57  
Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,DiagnosticsEngine & diags,LangOptions & opts,SourceManager & SM,HeaderSearch & Headers,ModuleLoader & TheModuleLoader,IdentifierInfoLookup * IILookup,bool OwnsHeaders,TranslationUnitKind TUKind)58  Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
59                             DiagnosticsEngine &diags, LangOptions &opts,
60                             SourceManager &SM, HeaderSearch &Headers,
61                             ModuleLoader &TheModuleLoader,
62                             IdentifierInfoLookup *IILookup, bool OwnsHeaders,
63                             TranslationUnitKind TUKind)
64      : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
65        FileMgr(Headers.getFileMgr()), SourceMgr(SM),
66        ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers),
67        TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
68        Identifiers(opts, IILookup),
69        PragmaHandlers(new PragmaNamespace(StringRef())),
70        IncrementalProcessing(false), TUKind(TUKind),
71        CodeComplete(nullptr), CodeCompletionFile(nullptr),
72        CodeCompletionOffset(0), LastTokenWasAt(false),
73        ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
74        MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
75        CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
76        Callbacks(nullptr), MacroArgCache(nullptr), Record(nullptr),
77        MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
78    OwnsHeaderSearch = OwnsHeaders;
79  
80    CounterValue = 0; // __COUNTER__ starts at 0.
81  
82    // Clear stats.
83    NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
84    NumIf = NumElse = NumEndif = 0;
85    NumEnteredSourceFiles = 0;
86    NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
87    NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
88    MaxIncludeStackDepth = 0;
89    NumSkipped = 0;
90  
91    // Default to discarding comments.
92    KeepComments = false;
93    KeepMacroComments = false;
94    SuppressIncludeNotFoundError = false;
95  
96    // Macro expansion is enabled.
97    DisableMacroExpansion = false;
98    MacroExpansionInDirectivesOverride = false;
99    InMacroArgs = false;
100    InMacroArgPreExpansion = false;
101    NumCachedTokenLexers = 0;
102    PragmasEnabled = true;
103    ParsingIfOrElifDirective = false;
104    PreprocessedOutput = false;
105  
106    CachedLexPos = 0;
107  
108    // We haven't read anything from the external source.
109    ReadMacrosFromExternalSource = false;
110  
111    // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
112    // This gets unpoisoned where it is allowed.
113    (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
114    SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
115  
116    // Initialize the pragma handlers.
117    RegisterBuiltinPragmas();
118  
119    // Initialize builtin macros like __LINE__ and friends.
120    RegisterBuiltinMacros();
121  
122    if(LangOpts.Borland) {
123      Ident__exception_info        = getIdentifierInfo("_exception_info");
124      Ident___exception_info       = getIdentifierInfo("__exception_info");
125      Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
126      Ident__exception_code        = getIdentifierInfo("_exception_code");
127      Ident___exception_code       = getIdentifierInfo("__exception_code");
128      Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
129      Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
130      Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
131      Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
132    } else {
133      Ident__exception_info = Ident__exception_code = nullptr;
134      Ident__abnormal_termination = Ident___exception_info = nullptr;
135      Ident___exception_code = Ident___abnormal_termination = nullptr;
136      Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
137      Ident_AbnormalTermination = nullptr;
138    }
139  }
140  
~Preprocessor()141  Preprocessor::~Preprocessor() {
142    assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
143  
144    IncludeMacroStack.clear();
145  
146    // Destroy any macro definitions.
147    while (MacroInfoChain *I = MIChainHead) {
148      MIChainHead = I->Next;
149      I->~MacroInfoChain();
150    }
151  
152    // Free any cached macro expanders.
153    // This populates MacroArgCache, so all TokenLexers need to be destroyed
154    // before the code below that frees up the MacroArgCache list.
155    std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
156    CurTokenLexer.reset();
157  
158    while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
159      DeserialMIChainHead = I->Next;
160      I->~DeserializedMacroInfoChain();
161    }
162  
163    // Free any cached MacroArgs.
164    for (MacroArgs *ArgList = MacroArgCache; ArgList;)
165      ArgList = ArgList->deallocate();
166  
167    // Delete the header search info, if we own it.
168    if (OwnsHeaderSearch)
169      delete &HeaderInfo;
170  }
171  
Initialize(const TargetInfo & Target)172  void Preprocessor::Initialize(const TargetInfo &Target) {
173    assert((!this->Target || this->Target == &Target) &&
174           "Invalid override of target information");
175    this->Target = &Target;
176  
177    // Initialize information about built-ins.
178    BuiltinInfo.InitializeTarget(Target);
179    HeaderInfo.setTarget(Target);
180  }
181  
InitializeForModelFile()182  void Preprocessor::InitializeForModelFile() {
183    NumEnteredSourceFiles = 0;
184  
185    // Reset pragmas
186    PragmaHandlersBackup = std::move(PragmaHandlers);
187    PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
188    RegisterBuiltinPragmas();
189  
190    // Reset PredefinesFileID
191    PredefinesFileID = FileID();
192  }
193  
FinalizeForModelFile()194  void Preprocessor::FinalizeForModelFile() {
195    NumEnteredSourceFiles = 1;
196  
197    PragmaHandlers = std::move(PragmaHandlersBackup);
198  }
199  
setPTHManager(PTHManager * pm)200  void Preprocessor::setPTHManager(PTHManager* pm) {
201    PTH.reset(pm);
202    FileMgr.addStatCache(PTH->createStatCache());
203  }
204  
DumpToken(const Token & Tok,bool DumpFlags) const205  void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
206    llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
207                 << getSpelling(Tok) << "'";
208  
209    if (!DumpFlags) return;
210  
211    llvm::errs() << "\t";
212    if (Tok.isAtStartOfLine())
213      llvm::errs() << " [StartOfLine]";
214    if (Tok.hasLeadingSpace())
215      llvm::errs() << " [LeadingSpace]";
216    if (Tok.isExpandDisabled())
217      llvm::errs() << " [ExpandDisabled]";
218    if (Tok.needsCleaning()) {
219      const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
220      llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
221                   << "']";
222    }
223  
224    llvm::errs() << "\tLoc=<";
225    DumpLocation(Tok.getLocation());
226    llvm::errs() << ">";
227  }
228  
DumpLocation(SourceLocation Loc) const229  void Preprocessor::DumpLocation(SourceLocation Loc) const {
230    Loc.dump(SourceMgr);
231  }
232  
DumpMacro(const MacroInfo & MI) const233  void Preprocessor::DumpMacro(const MacroInfo &MI) const {
234    llvm::errs() << "MACRO: ";
235    for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
236      DumpToken(MI.getReplacementToken(i));
237      llvm::errs() << "  ";
238    }
239    llvm::errs() << "\n";
240  }
241  
PrintStats()242  void Preprocessor::PrintStats() {
243    llvm::errs() << "\n*** Preprocessor Stats:\n";
244    llvm::errs() << NumDirectives << " directives found:\n";
245    llvm::errs() << "  " << NumDefined << " #define.\n";
246    llvm::errs() << "  " << NumUndefined << " #undef.\n";
247    llvm::errs() << "  #include/#include_next/#import:\n";
248    llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
249    llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
250    llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
251    llvm::errs() << "  " << NumElse << " #else/#elif.\n";
252    llvm::errs() << "  " << NumEndif << " #endif.\n";
253    llvm::errs() << "  " << NumPragma << " #pragma.\n";
254    llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
255  
256    llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
257               << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
258               << NumFastMacroExpanded << " on the fast path.\n";
259    llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
260               << " token paste (##) operations performed, "
261               << NumFastTokenPaste << " on the fast path.\n";
262  
263    llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
264  
265    llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
266    llvm::errs() << "\n  Macro Expanded Tokens: "
267                 << llvm::capacity_in_bytes(MacroExpandedTokens);
268    llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
269    llvm::errs() << "\n  Macros: " << llvm::capacity_in_bytes(Macros);
270    llvm::errs() << "\n  #pragma push_macro Info: "
271                 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
272    llvm::errs() << "\n  Poison Reasons: "
273                 << llvm::capacity_in_bytes(PoisonReasons);
274    llvm::errs() << "\n  Comment Handlers: "
275                 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
276  }
277  
278  Preprocessor::macro_iterator
macro_begin(bool IncludeExternalMacros) const279  Preprocessor::macro_begin(bool IncludeExternalMacros) const {
280    if (IncludeExternalMacros && ExternalSource &&
281        !ReadMacrosFromExternalSource) {
282      ReadMacrosFromExternalSource = true;
283      ExternalSource->ReadDefinedMacros();
284    }
285  
286    return Macros.begin();
287  }
288  
getTotalMemory() const289  size_t Preprocessor::getTotalMemory() const {
290    return BP.getTotalMemory()
291      + llvm::capacity_in_bytes(MacroExpandedTokens)
292      + Predefines.capacity() /* Predefines buffer. */
293      + llvm::capacity_in_bytes(Macros)
294      + llvm::capacity_in_bytes(PragmaPushMacroInfo)
295      + llvm::capacity_in_bytes(PoisonReasons)
296      + llvm::capacity_in_bytes(CommentHandlers);
297  }
298  
299  Preprocessor::macro_iterator
macro_end(bool IncludeExternalMacros) const300  Preprocessor::macro_end(bool IncludeExternalMacros) const {
301    if (IncludeExternalMacros && ExternalSource &&
302        !ReadMacrosFromExternalSource) {
303      ReadMacrosFromExternalSource = true;
304      ExternalSource->ReadDefinedMacros();
305    }
306  
307    return Macros.end();
308  }
309  
310  /// \brief Compares macro tokens with a specified token value sequence.
MacroDefinitionEquals(const MacroInfo * MI,ArrayRef<TokenValue> Tokens)311  static bool MacroDefinitionEquals(const MacroInfo *MI,
312                                    ArrayRef<TokenValue> Tokens) {
313    return Tokens.size() == MI->getNumTokens() &&
314        std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
315  }
316  
getLastMacroWithSpelling(SourceLocation Loc,ArrayRef<TokenValue> Tokens) const317  StringRef Preprocessor::getLastMacroWithSpelling(
318                                      SourceLocation Loc,
319                                      ArrayRef<TokenValue> Tokens) const {
320    SourceLocation BestLocation;
321    StringRef BestSpelling;
322    for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
323         I != E; ++I) {
324      const MacroDirective::DefInfo
325        Def = I->second->findDirectiveAtLoc(Loc, SourceMgr);
326      if (!Def || !Def.getMacroInfo())
327        continue;
328      if (!Def.getMacroInfo()->isObjectLike())
329        continue;
330      if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
331        continue;
332      SourceLocation Location = Def.getLocation();
333      // Choose the macro defined latest.
334      if (BestLocation.isInvalid() ||
335          (Location.isValid() &&
336           SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
337        BestLocation = Location;
338        BestSpelling = I->first->getName();
339      }
340    }
341    return BestSpelling;
342  }
343  
recomputeCurLexerKind()344  void Preprocessor::recomputeCurLexerKind() {
345    if (CurLexer)
346      CurLexerKind = CLK_Lexer;
347    else if (CurPTHLexer)
348      CurLexerKind = CLK_PTHLexer;
349    else if (CurTokenLexer)
350      CurLexerKind = CLK_TokenLexer;
351    else
352      CurLexerKind = CLK_CachingLexer;
353  }
354  
SetCodeCompletionPoint(const FileEntry * File,unsigned CompleteLine,unsigned CompleteColumn)355  bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
356                                            unsigned CompleteLine,
357                                            unsigned CompleteColumn) {
358    assert(File);
359    assert(CompleteLine && CompleteColumn && "Starts from 1:1");
360    assert(!CodeCompletionFile && "Already set");
361  
362    using llvm::MemoryBuffer;
363  
364    // Load the actual file's contents.
365    bool Invalid = false;
366    const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
367    if (Invalid)
368      return true;
369  
370    // Find the byte position of the truncation point.
371    const char *Position = Buffer->getBufferStart();
372    for (unsigned Line = 1; Line < CompleteLine; ++Line) {
373      for (; *Position; ++Position) {
374        if (*Position != '\r' && *Position != '\n')
375          continue;
376  
377        // Eat \r\n or \n\r as a single line.
378        if ((Position[1] == '\r' || Position[1] == '\n') &&
379            Position[0] != Position[1])
380          ++Position;
381        ++Position;
382        break;
383      }
384    }
385  
386    Position += CompleteColumn - 1;
387  
388    // If pointing inside the preamble, adjust the position at the beginning of
389    // the file after the preamble.
390    if (SkipMainFilePreamble.first &&
391        SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
392      if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
393        Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
394    }
395  
396    if (Position > Buffer->getBufferEnd())
397      Position = Buffer->getBufferEnd();
398  
399    CodeCompletionFile = File;
400    CodeCompletionOffset = Position - Buffer->getBufferStart();
401  
402    std::unique_ptr<MemoryBuffer> NewBuffer =
403        MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
404                                            Buffer->getBufferIdentifier());
405    char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
406    char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
407    *NewPos = '\0';
408    std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
409    SourceMgr.overrideFileContents(File, std::move(NewBuffer));
410  
411    return false;
412  }
413  
CodeCompleteNaturalLanguage()414  void Preprocessor::CodeCompleteNaturalLanguage() {
415    if (CodeComplete)
416      CodeComplete->CodeCompleteNaturalLanguage();
417    setCodeCompletionReached();
418  }
419  
420  /// getSpelling - This method is used to get the spelling of a token into a
421  /// SmallVector. Note that the returned StringRef may not point to the
422  /// supplied buffer if a copy can be avoided.
getSpelling(const Token & Tok,SmallVectorImpl<char> & Buffer,bool * Invalid) const423  StringRef Preprocessor::getSpelling(const Token &Tok,
424                                            SmallVectorImpl<char> &Buffer,
425                                            bool *Invalid) const {
426    // NOTE: this has to be checked *before* testing for an IdentifierInfo.
427    if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
428      // Try the fast path.
429      if (const IdentifierInfo *II = Tok.getIdentifierInfo())
430        return II->getName();
431    }
432  
433    // Resize the buffer if we need to copy into it.
434    if (Tok.needsCleaning())
435      Buffer.resize(Tok.getLength());
436  
437    const char *Ptr = Buffer.data();
438    unsigned Len = getSpelling(Tok, Ptr, Invalid);
439    return StringRef(Ptr, Len);
440  }
441  
442  /// CreateString - Plop the specified string into a scratch buffer and return a
443  /// location for it.  If specified, the source location provides a source
444  /// location for the token.
CreateString(StringRef Str,Token & Tok,SourceLocation ExpansionLocStart,SourceLocation ExpansionLocEnd)445  void Preprocessor::CreateString(StringRef Str, Token &Tok,
446                                  SourceLocation ExpansionLocStart,
447                                  SourceLocation ExpansionLocEnd) {
448    Tok.setLength(Str.size());
449  
450    const char *DestPtr;
451    SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
452  
453    if (ExpansionLocStart.isValid())
454      Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
455                                         ExpansionLocEnd, Str.size());
456    Tok.setLocation(Loc);
457  
458    // If this is a raw identifier or a literal token, set the pointer data.
459    if (Tok.is(tok::raw_identifier))
460      Tok.setRawIdentifierData(DestPtr);
461    else if (Tok.isLiteral())
462      Tok.setLiteralData(DestPtr);
463  }
464  
getCurrentModule()465  Module *Preprocessor::getCurrentModule() {
466    if (getLangOpts().CurrentModule.empty())
467      return nullptr;
468  
469    return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
470  }
471  
472  //===----------------------------------------------------------------------===//
473  // Preprocessor Initialization Methods
474  //===----------------------------------------------------------------------===//
475  
476  
477  /// EnterMainSourceFile - Enter the specified FileID as the main source file,
478  /// which implicitly adds the builtin defines etc.
EnterMainSourceFile()479  void Preprocessor::EnterMainSourceFile() {
480    // We do not allow the preprocessor to reenter the main file.  Doing so will
481    // cause FileID's to accumulate information from both runs (e.g. #line
482    // information) and predefined macros aren't guaranteed to be set properly.
483    assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
484    FileID MainFileID = SourceMgr.getMainFileID();
485  
486    // If MainFileID is loaded it means we loaded an AST file, no need to enter
487    // a main file.
488    if (!SourceMgr.isLoadedFileID(MainFileID)) {
489      // Enter the main file source buffer.
490      EnterSourceFile(MainFileID, nullptr, SourceLocation());
491  
492      // If we've been asked to skip bytes in the main file (e.g., as part of a
493      // precompiled preamble), do so now.
494      if (SkipMainFilePreamble.first > 0)
495        CurLexer->SkipBytes(SkipMainFilePreamble.first,
496                            SkipMainFilePreamble.second);
497  
498      // Tell the header info that the main file was entered.  If the file is later
499      // #imported, it won't be re-entered.
500      if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
501        HeaderInfo.IncrementIncludeCount(FE);
502    }
503  
504    // Preprocess Predefines to populate the initial preprocessor state.
505    std::unique_ptr<llvm::MemoryBuffer> SB =
506      llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
507    assert(SB && "Cannot create predefined source buffer");
508    FileID FID = SourceMgr.createFileID(std::move(SB));
509    assert(!FID.isInvalid() && "Could not create FileID for predefines?");
510    setPredefinesFileID(FID);
511  
512    // Start parsing the predefines.
513    EnterSourceFile(FID, nullptr, SourceLocation());
514  }
515  
EndSourceFile()516  void Preprocessor::EndSourceFile() {
517    // Notify the client that we reached the end of the source file.
518    if (Callbacks)
519      Callbacks->EndOfMainFile();
520  }
521  
522  //===----------------------------------------------------------------------===//
523  // Lexer Event Handling.
524  //===----------------------------------------------------------------------===//
525  
526  /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
527  /// identifier information for the token and install it into the token,
528  /// updating the token kind accordingly.
LookUpIdentifierInfo(Token & Identifier) const529  IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
530    assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
531  
532    // Look up this token, see if it is a macro, or if it is a language keyword.
533    IdentifierInfo *II;
534    if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
535      // No cleaning needed, just use the characters from the lexed buffer.
536      II = getIdentifierInfo(Identifier.getRawIdentifier());
537    } else {
538      // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
539      SmallString<64> IdentifierBuffer;
540      StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
541  
542      if (Identifier.hasUCN()) {
543        SmallString<64> UCNIdentifierBuffer;
544        expandUCNs(UCNIdentifierBuffer, CleanedStr);
545        II = getIdentifierInfo(UCNIdentifierBuffer);
546      } else {
547        II = getIdentifierInfo(CleanedStr);
548      }
549    }
550  
551    // Update the token info (identifier info and appropriate token kind).
552    Identifier.setIdentifierInfo(II);
553    Identifier.setKind(II->getTokenID());
554  
555    return II;
556  }
557  
SetPoisonReason(IdentifierInfo * II,unsigned DiagID)558  void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
559    PoisonReasons[II] = DiagID;
560  }
561  
PoisonSEHIdentifiers(bool Poison)562  void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
563    assert(Ident__exception_code && Ident__exception_info);
564    assert(Ident___exception_code && Ident___exception_info);
565    Ident__exception_code->setIsPoisoned(Poison);
566    Ident___exception_code->setIsPoisoned(Poison);
567    Ident_GetExceptionCode->setIsPoisoned(Poison);
568    Ident__exception_info->setIsPoisoned(Poison);
569    Ident___exception_info->setIsPoisoned(Poison);
570    Ident_GetExceptionInfo->setIsPoisoned(Poison);
571    Ident__abnormal_termination->setIsPoisoned(Poison);
572    Ident___abnormal_termination->setIsPoisoned(Poison);
573    Ident_AbnormalTermination->setIsPoisoned(Poison);
574  }
575  
HandlePoisonedIdentifier(Token & Identifier)576  void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
577    assert(Identifier.getIdentifierInfo() &&
578           "Can't handle identifiers without identifier info!");
579    llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
580      PoisonReasons.find(Identifier.getIdentifierInfo());
581    if(it == PoisonReasons.end())
582      Diag(Identifier, diag::err_pp_used_poisoned_id);
583    else
584      Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
585  }
586  
587  /// HandleIdentifier - This callback is invoked when the lexer reads an
588  /// identifier.  This callback looks up the identifier in the map and/or
589  /// potentially macro expands it or turns it into a named token (like 'for').
590  ///
591  /// Note that callers of this method are guarded by checking the
592  /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
593  /// IdentifierInfo methods that compute these properties will need to change to
594  /// match.
HandleIdentifier(Token & Identifier)595  bool Preprocessor::HandleIdentifier(Token &Identifier) {
596    assert(Identifier.getIdentifierInfo() &&
597           "Can't handle identifiers without identifier info!");
598  
599    IdentifierInfo &II = *Identifier.getIdentifierInfo();
600  
601    // If the information about this identifier is out of date, update it from
602    // the external source.
603    // We have to treat __VA_ARGS__ in a special way, since it gets
604    // serialized with isPoisoned = true, but our preprocessor may have
605    // unpoisoned it if we're defining a C99 macro.
606    if (II.isOutOfDate()) {
607      bool CurrentIsPoisoned = false;
608      if (&II == Ident__VA_ARGS__)
609        CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
610  
611      ExternalSource->updateOutOfDateIdentifier(II);
612      Identifier.setKind(II.getTokenID());
613  
614      if (&II == Ident__VA_ARGS__)
615        II.setIsPoisoned(CurrentIsPoisoned);
616    }
617  
618    // If this identifier was poisoned, and if it was not produced from a macro
619    // expansion, emit an error.
620    if (II.isPoisoned() && CurPPLexer) {
621      HandlePoisonedIdentifier(Identifier);
622    }
623  
624    // If this is a macro to be expanded, do it.
625    if (MacroDirective *MD = getMacroDirective(&II)) {
626      MacroInfo *MI = MD->getMacroInfo();
627      if (!DisableMacroExpansion) {
628        if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
629          // C99 6.10.3p10: If the preprocessing token immediately after the
630          // macro name isn't a '(', this macro should not be expanded.
631          if (!MI->isFunctionLike() || isNextPPTokenLParen())
632            return HandleMacroExpandedIdentifier(Identifier, MD);
633        } else {
634          // C99 6.10.3.4p2 says that a disabled macro may never again be
635          // expanded, even if it's in a context where it could be expanded in the
636          // future.
637          Identifier.setFlag(Token::DisableExpand);
638          if (MI->isObjectLike() || isNextPPTokenLParen())
639            Diag(Identifier, diag::pp_disabled_macro_expansion);
640        }
641      }
642    }
643  
644    // If this identifier is a keyword in C++11, produce a warning. Don't warn if
645    // we're not considering macro expansion, since this identifier might be the
646    // name of a macro.
647    // FIXME: This warning is disabled in cases where it shouldn't be, like
648    //   "#define constexpr constexpr", "int constexpr;"
649    if (II.isCXX11CompatKeyword() && !DisableMacroExpansion) {
650      Diag(Identifier, diag::warn_cxx11_keyword) << II.getName();
651      // Don't diagnose this keyword again in this translation unit.
652      II.setIsCXX11CompatKeyword(false);
653    }
654  
655    // C++ 2.11p2: If this is an alternative representation of a C++ operator,
656    // then we act as if it is the actual operator and not the textual
657    // representation of it.
658    if (II.isCPlusPlusOperatorKeyword())
659      Identifier.setIdentifierInfo(nullptr);
660  
661    // If this is an extension token, diagnose its use.
662    // We avoid diagnosing tokens that originate from macro definitions.
663    // FIXME: This warning is disabled in cases where it shouldn't be,
664    // like "#define TY typeof", "TY(1) x".
665    if (II.isExtensionToken() && !DisableMacroExpansion)
666      Diag(Identifier, diag::ext_token_used);
667  
668    // If this is the 'import' contextual keyword following an '@', note
669    // that the next token indicates a module name.
670    //
671    // Note that we do not treat 'import' as a contextual
672    // keyword when we're in a caching lexer, because caching lexers only get
673    // used in contexts where import declarations are disallowed.
674    if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
675        !DisableMacroExpansion &&
676        (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
677        CurLexerKind != CLK_CachingLexer) {
678      ModuleImportLoc = Identifier.getLocation();
679      ModuleImportPath.clear();
680      ModuleImportExpectsIdentifier = true;
681      CurLexerKind = CLK_LexAfterModuleImport;
682    }
683    return true;
684  }
685  
Lex(Token & Result)686  void Preprocessor::Lex(Token &Result) {
687    // We loop here until a lex function retuns a token; this avoids recursion.
688    bool ReturnedToken;
689    do {
690      switch (CurLexerKind) {
691      case CLK_Lexer:
692        ReturnedToken = CurLexer->Lex(Result);
693        break;
694      case CLK_PTHLexer:
695        ReturnedToken = CurPTHLexer->Lex(Result);
696        break;
697      case CLK_TokenLexer:
698        ReturnedToken = CurTokenLexer->Lex(Result);
699        break;
700      case CLK_CachingLexer:
701        CachingLex(Result);
702        ReturnedToken = true;
703        break;
704      case CLK_LexAfterModuleImport:
705        LexAfterModuleImport(Result);
706        ReturnedToken = true;
707        break;
708      }
709    } while (!ReturnedToken);
710  
711    LastTokenWasAt = Result.is(tok::at);
712  }
713  
714  
715  /// \brief Lex a token following the 'import' contextual keyword.
716  ///
LexAfterModuleImport(Token & Result)717  void Preprocessor::LexAfterModuleImport(Token &Result) {
718    // Figure out what kind of lexer we actually have.
719    recomputeCurLexerKind();
720  
721    // Lex the next token.
722    Lex(Result);
723  
724    // The token sequence
725    //
726    //   import identifier (. identifier)*
727    //
728    // indicates a module import directive. We already saw the 'import'
729    // contextual keyword, so now we're looking for the identifiers.
730    if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
731      // We expected to see an identifier here, and we did; continue handling
732      // identifiers.
733      ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
734                                                Result.getLocation()));
735      ModuleImportExpectsIdentifier = false;
736      CurLexerKind = CLK_LexAfterModuleImport;
737      return;
738    }
739  
740    // If we're expecting a '.' or a ';', and we got a '.', then wait until we
741    // see the next identifier.
742    if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
743      ModuleImportExpectsIdentifier = true;
744      CurLexerKind = CLK_LexAfterModuleImport;
745      return;
746    }
747  
748    // If we have a non-empty module path, load the named module.
749    if (!ModuleImportPath.empty()) {
750      Module *Imported = nullptr;
751      if (getLangOpts().Modules)
752        Imported = TheModuleLoader.loadModule(ModuleImportLoc,
753                                              ModuleImportPath,
754                                              Module::MacrosVisible,
755                                              /*IsIncludeDirective=*/false);
756      if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
757        Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
758    }
759  }
760  
FinishLexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)761  bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
762                                            const char *DiagnosticTag,
763                                            bool AllowMacroExpansion) {
764    // We need at least one string literal.
765    if (Result.isNot(tok::string_literal)) {
766      Diag(Result, diag::err_expected_string_literal)
767        << /*Source='in...'*/0 << DiagnosticTag;
768      return false;
769    }
770  
771    // Lex string literal tokens, optionally with macro expansion.
772    SmallVector<Token, 4> StrToks;
773    do {
774      StrToks.push_back(Result);
775  
776      if (Result.hasUDSuffix())
777        Diag(Result, diag::err_invalid_string_udl);
778  
779      if (AllowMacroExpansion)
780        Lex(Result);
781      else
782        LexUnexpandedToken(Result);
783    } while (Result.is(tok::string_literal));
784  
785    // Concatenate and parse the strings.
786    StringLiteralParser Literal(StrToks, *this);
787    assert(Literal.isAscii() && "Didn't allow wide strings in");
788  
789    if (Literal.hadError)
790      return false;
791  
792    if (Literal.Pascal) {
793      Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
794        << /*Source='in...'*/0 << DiagnosticTag;
795      return false;
796    }
797  
798    String = Literal.GetString();
799    return true;
800  }
801  
parseSimpleIntegerLiteral(Token & Tok,uint64_t & Value)802  bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
803    assert(Tok.is(tok::numeric_constant));
804    SmallString<8> IntegerBuffer;
805    bool NumberInvalid = false;
806    StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
807    if (NumberInvalid)
808      return false;
809    NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
810    if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
811      return false;
812    llvm::APInt APVal(64, 0);
813    if (Literal.GetIntegerValue(APVal))
814      return false;
815    Lex(Tok);
816    Value = APVal.getLimitedValue();
817    return true;
818  }
819  
addCommentHandler(CommentHandler * Handler)820  void Preprocessor::addCommentHandler(CommentHandler *Handler) {
821    assert(Handler && "NULL comment handler");
822    assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
823           CommentHandlers.end() && "Comment handler already registered");
824    CommentHandlers.push_back(Handler);
825  }
826  
removeCommentHandler(CommentHandler * Handler)827  void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
828    std::vector<CommentHandler *>::iterator Pos
829    = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
830    assert(Pos != CommentHandlers.end() && "Comment handler not registered");
831    CommentHandlers.erase(Pos);
832  }
833  
HandleComment(Token & result,SourceRange Comment)834  bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
835    bool AnyPendingTokens = false;
836    for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
837         HEnd = CommentHandlers.end();
838         H != HEnd; ++H) {
839      if ((*H)->HandleComment(*this, Comment))
840        AnyPendingTokens = true;
841    }
842    if (!AnyPendingTokens || getCommentRetentionState())
843      return false;
844    Lex(result);
845    return true;
846  }
847  
~ModuleLoader()848  ModuleLoader::~ModuleLoader() { }
849  
~CommentHandler()850  CommentHandler::~CommentHandler() { }
851  
~CodeCompletionHandler()852  CodeCompletionHandler::~CodeCompletionHandler() { }
853  
createPreprocessingRecord()854  void Preprocessor::createPreprocessingRecord() {
855    if (Record)
856      return;
857  
858    Record = new PreprocessingRecord(getSourceManager());
859    addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
860  }
861