• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/APFloat.h"
47 #include "llvm/ADT/STLExtras.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/StringExtras.h"
50 #include "llvm/Support/Capacity.h"
51 #include "llvm/Support/ConvertUTF.h"
52 #include "llvm/Support/MemoryBuffer.h"
53 #include "llvm/Support/raw_ostream.h"
54 using namespace clang;
55 
56 //===----------------------------------------------------------------------===//
~ExternalPreprocessorSource()57 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
58 
Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,DiagnosticsEngine & diags,LangOptions & opts,SourceManager & SM,HeaderSearch & Headers,ModuleLoader & TheModuleLoader,IdentifierInfoLookup * IILookup,bool OwnsHeaders,TranslationUnitKind TUKind)59 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
60                            DiagnosticsEngine &diags, LangOptions &opts,
61                            SourceManager &SM, HeaderSearch &Headers,
62                            ModuleLoader &TheModuleLoader,
63                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
64                            TranslationUnitKind TUKind)
65     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
66       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
67       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
68       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
69       Identifiers(opts, IILookup),
70       PragmaHandlers(new PragmaNamespace(StringRef())),
71       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
72       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
73       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
74       CodeCompletionReached(0), MainFileDir(nullptr),
75       SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
76       CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
77       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
78       Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
79   OwnsHeaderSearch = OwnsHeaders;
80 
81   CounterValue = 0; // __COUNTER__ starts at 0.
82 
83   // Clear stats.
84   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
85   NumIf = NumElse = NumEndif = 0;
86   NumEnteredSourceFiles = 0;
87   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
88   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
89   MaxIncludeStackDepth = 0;
90   NumSkipped = 0;
91 
92   // Default to discarding comments.
93   KeepComments = false;
94   KeepMacroComments = false;
95   SuppressIncludeNotFoundError = false;
96 
97   // Macro expansion is enabled.
98   DisableMacroExpansion = false;
99   MacroExpansionInDirectivesOverride = false;
100   InMacroArgs = false;
101   InMacroArgPreExpansion = false;
102   NumCachedTokenLexers = 0;
103   PragmasEnabled = true;
104   ParsingIfOrElifDirective = false;
105   PreprocessedOutput = false;
106 
107   CachedLexPos = 0;
108 
109   // We haven't read anything from the external source.
110   ReadMacrosFromExternalSource = false;
111 
112   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
113   // This gets unpoisoned where it is allowed.
114   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
115   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
116 
117   // Initialize the pragma handlers.
118   RegisterBuiltinPragmas();
119 
120   // Initialize builtin macros like __LINE__ and friends.
121   RegisterBuiltinMacros();
122 
123   if(LangOpts.Borland) {
124     Ident__exception_info        = getIdentifierInfo("_exception_info");
125     Ident___exception_info       = getIdentifierInfo("__exception_info");
126     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
127     Ident__exception_code        = getIdentifierInfo("_exception_code");
128     Ident___exception_code       = getIdentifierInfo("__exception_code");
129     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
130     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
131     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
132     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
133   } else {
134     Ident__exception_info = Ident__exception_code = nullptr;
135     Ident__abnormal_termination = Ident___exception_info = nullptr;
136     Ident___exception_code = Ident___abnormal_termination = nullptr;
137     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
138     Ident_AbnormalTermination = nullptr;
139   }
140 }
141 
~Preprocessor()142 Preprocessor::~Preprocessor() {
143   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
144 
145   IncludeMacroStack.clear();
146 
147   // Destroy any macro definitions.
148   while (MacroInfoChain *I = MIChainHead) {
149     MIChainHead = I->Next;
150     I->~MacroInfoChain();
151   }
152 
153   // Free any cached macro expanders.
154   // This populates MacroArgCache, so all TokenLexers need to be destroyed
155   // before the code below that frees up the MacroArgCache list.
156   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
157   CurTokenLexer.reset();
158 
159   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
160     DeserialMIChainHead = I->Next;
161     I->~DeserializedMacroInfoChain();
162   }
163 
164   // Free any cached MacroArgs.
165   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
166     ArgList = ArgList->deallocate();
167 
168   // Delete the header search info, if we own it.
169   if (OwnsHeaderSearch)
170     delete &HeaderInfo;
171 }
172 
Initialize(const TargetInfo & Target,const TargetInfo * AuxTarget)173 void Preprocessor::Initialize(const TargetInfo &Target,
174                               const TargetInfo *AuxTarget) {
175   assert((!this->Target || this->Target == &Target) &&
176          "Invalid override of target information");
177   this->Target = &Target;
178 
179   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
180          "Invalid override of aux target information.");
181   this->AuxTarget = AuxTarget;
182 
183   // Initialize information about built-ins.
184   BuiltinInfo.InitializeTarget(Target, AuxTarget);
185   HeaderInfo.setTarget(Target);
186 }
187 
InitializeForModelFile()188 void Preprocessor::InitializeForModelFile() {
189   NumEnteredSourceFiles = 0;
190 
191   // Reset pragmas
192   PragmaHandlersBackup = std::move(PragmaHandlers);
193   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
194   RegisterBuiltinPragmas();
195 
196   // Reset PredefinesFileID
197   PredefinesFileID = FileID();
198 }
199 
FinalizeForModelFile()200 void Preprocessor::FinalizeForModelFile() {
201   NumEnteredSourceFiles = 1;
202 
203   PragmaHandlers = std::move(PragmaHandlersBackup);
204 }
205 
setPTHManager(PTHManager * pm)206 void Preprocessor::setPTHManager(PTHManager* pm) {
207   PTH.reset(pm);
208   FileMgr.addStatCache(PTH->createStatCache());
209 }
210 
DumpToken(const Token & Tok,bool DumpFlags) const211 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
212   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
213                << getSpelling(Tok) << "'";
214 
215   if (!DumpFlags) return;
216 
217   llvm::errs() << "\t";
218   if (Tok.isAtStartOfLine())
219     llvm::errs() << " [StartOfLine]";
220   if (Tok.hasLeadingSpace())
221     llvm::errs() << " [LeadingSpace]";
222   if (Tok.isExpandDisabled())
223     llvm::errs() << " [ExpandDisabled]";
224   if (Tok.needsCleaning()) {
225     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
226     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
227                  << "']";
228   }
229 
230   llvm::errs() << "\tLoc=<";
231   DumpLocation(Tok.getLocation());
232   llvm::errs() << ">";
233 }
234 
DumpLocation(SourceLocation Loc) const235 void Preprocessor::DumpLocation(SourceLocation Loc) const {
236   Loc.dump(SourceMgr);
237 }
238 
DumpMacro(const MacroInfo & MI) const239 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
240   llvm::errs() << "MACRO: ";
241   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
242     DumpToken(MI.getReplacementToken(i));
243     llvm::errs() << "  ";
244   }
245   llvm::errs() << "\n";
246 }
247 
PrintStats()248 void Preprocessor::PrintStats() {
249   llvm::errs() << "\n*** Preprocessor Stats:\n";
250   llvm::errs() << NumDirectives << " directives found:\n";
251   llvm::errs() << "  " << NumDefined << " #define.\n";
252   llvm::errs() << "  " << NumUndefined << " #undef.\n";
253   llvm::errs() << "  #include/#include_next/#import:\n";
254   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
255   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
256   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
257   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
258   llvm::errs() << "  " << NumEndif << " #endif.\n";
259   llvm::errs() << "  " << NumPragma << " #pragma.\n";
260   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
261 
262   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
263              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
264              << NumFastMacroExpanded << " on the fast path.\n";
265   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
266              << " token paste (##) operations performed, "
267              << NumFastTokenPaste << " on the fast path.\n";
268 
269   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
270 
271   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
272   llvm::errs() << "\n  Macro Expanded Tokens: "
273                << llvm::capacity_in_bytes(MacroExpandedTokens);
274   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
275   // FIXME: List information for all submodules.
276   llvm::errs() << "\n  Macros: "
277                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
278   llvm::errs() << "\n  #pragma push_macro Info: "
279                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
280   llvm::errs() << "\n  Poison Reasons: "
281                << llvm::capacity_in_bytes(PoisonReasons);
282   llvm::errs() << "\n  Comment Handlers: "
283                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
284 }
285 
286 Preprocessor::macro_iterator
macro_begin(bool IncludeExternalMacros) const287 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
288   if (IncludeExternalMacros && ExternalSource &&
289       !ReadMacrosFromExternalSource) {
290     ReadMacrosFromExternalSource = true;
291     ExternalSource->ReadDefinedMacros();
292   }
293 
294   // Make sure we cover all macros in visible modules.
295   for (const ModuleMacro &Macro : ModuleMacros)
296     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
297 
298   return CurSubmoduleState->Macros.begin();
299 }
300 
getTotalMemory() const301 size_t Preprocessor::getTotalMemory() const {
302   return BP.getTotalMemory()
303     + llvm::capacity_in_bytes(MacroExpandedTokens)
304     + Predefines.capacity() /* Predefines buffer. */
305     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
306     // and ModuleMacros.
307     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
308     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
309     + llvm::capacity_in_bytes(PoisonReasons)
310     + llvm::capacity_in_bytes(CommentHandlers);
311 }
312 
313 Preprocessor::macro_iterator
macro_end(bool IncludeExternalMacros) const314 Preprocessor::macro_end(bool IncludeExternalMacros) const {
315   if (IncludeExternalMacros && ExternalSource &&
316       !ReadMacrosFromExternalSource) {
317     ReadMacrosFromExternalSource = true;
318     ExternalSource->ReadDefinedMacros();
319   }
320 
321   return CurSubmoduleState->Macros.end();
322 }
323 
324 /// \brief Compares macro tokens with a specified token value sequence.
MacroDefinitionEquals(const MacroInfo * MI,ArrayRef<TokenValue> Tokens)325 static bool MacroDefinitionEquals(const MacroInfo *MI,
326                                   ArrayRef<TokenValue> Tokens) {
327   return Tokens.size() == MI->getNumTokens() &&
328       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
329 }
330 
getLastMacroWithSpelling(SourceLocation Loc,ArrayRef<TokenValue> Tokens) const331 StringRef Preprocessor::getLastMacroWithSpelling(
332                                     SourceLocation Loc,
333                                     ArrayRef<TokenValue> Tokens) const {
334   SourceLocation BestLocation;
335   StringRef BestSpelling;
336   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
337        I != E; ++I) {
338     const MacroDirective::DefInfo
339       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
340     if (!Def || !Def.getMacroInfo())
341       continue;
342     if (!Def.getMacroInfo()->isObjectLike())
343       continue;
344     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
345       continue;
346     SourceLocation Location = Def.getLocation();
347     // Choose the macro defined latest.
348     if (BestLocation.isInvalid() ||
349         (Location.isValid() &&
350          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
351       BestLocation = Location;
352       BestSpelling = I->first->getName();
353     }
354   }
355   return BestSpelling;
356 }
357 
recomputeCurLexerKind()358 void Preprocessor::recomputeCurLexerKind() {
359   if (CurLexer)
360     CurLexerKind = CLK_Lexer;
361   else if (CurPTHLexer)
362     CurLexerKind = CLK_PTHLexer;
363   else if (CurTokenLexer)
364     CurLexerKind = CLK_TokenLexer;
365   else
366     CurLexerKind = CLK_CachingLexer;
367 }
368 
SetCodeCompletionPoint(const FileEntry * File,unsigned CompleteLine,unsigned CompleteColumn)369 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
370                                           unsigned CompleteLine,
371                                           unsigned CompleteColumn) {
372   assert(File);
373   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
374   assert(!CodeCompletionFile && "Already set");
375 
376   using llvm::MemoryBuffer;
377 
378   // Load the actual file's contents.
379   bool Invalid = false;
380   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
381   if (Invalid)
382     return true;
383 
384   // Find the byte position of the truncation point.
385   const char *Position = Buffer->getBufferStart();
386   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
387     for (; *Position; ++Position) {
388       if (*Position != '\r' && *Position != '\n')
389         continue;
390 
391       // Eat \r\n or \n\r as a single line.
392       if ((Position[1] == '\r' || Position[1] == '\n') &&
393           Position[0] != Position[1])
394         ++Position;
395       ++Position;
396       break;
397     }
398   }
399 
400   Position += CompleteColumn - 1;
401 
402   // If pointing inside the preamble, adjust the position at the beginning of
403   // the file after the preamble.
404   if (SkipMainFilePreamble.first &&
405       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
406     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
407       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
408   }
409 
410   if (Position > Buffer->getBufferEnd())
411     Position = Buffer->getBufferEnd();
412 
413   CodeCompletionFile = File;
414   CodeCompletionOffset = Position - Buffer->getBufferStart();
415 
416   std::unique_ptr<MemoryBuffer> NewBuffer =
417       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
418                                           Buffer->getBufferIdentifier());
419   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
420   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
421   *NewPos = '\0';
422   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
423   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
424 
425   return false;
426 }
427 
CodeCompleteNaturalLanguage()428 void Preprocessor::CodeCompleteNaturalLanguage() {
429   if (CodeComplete)
430     CodeComplete->CodeCompleteNaturalLanguage();
431   setCodeCompletionReached();
432 }
433 
434 /// getSpelling - This method is used to get the spelling of a token into a
435 /// SmallVector. Note that the returned StringRef may not point to the
436 /// supplied buffer if a copy can be avoided.
getSpelling(const Token & Tok,SmallVectorImpl<char> & Buffer,bool * Invalid) const437 StringRef Preprocessor::getSpelling(const Token &Tok,
438                                           SmallVectorImpl<char> &Buffer,
439                                           bool *Invalid) const {
440   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
441   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
442     // Try the fast path.
443     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
444       return II->getName();
445   }
446 
447   // Resize the buffer if we need to copy into it.
448   if (Tok.needsCleaning())
449     Buffer.resize(Tok.getLength());
450 
451   const char *Ptr = Buffer.data();
452   unsigned Len = getSpelling(Tok, Ptr, Invalid);
453   return StringRef(Ptr, Len);
454 }
455 
456 /// CreateString - Plop the specified string into a scratch buffer and return a
457 /// location for it.  If specified, the source location provides a source
458 /// location for the token.
CreateString(StringRef Str,Token & Tok,SourceLocation ExpansionLocStart,SourceLocation ExpansionLocEnd)459 void Preprocessor::CreateString(StringRef Str, Token &Tok,
460                                 SourceLocation ExpansionLocStart,
461                                 SourceLocation ExpansionLocEnd) {
462   Tok.setLength(Str.size());
463 
464   const char *DestPtr;
465   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
466 
467   if (ExpansionLocStart.isValid())
468     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
469                                        ExpansionLocEnd, Str.size());
470   Tok.setLocation(Loc);
471 
472   // If this is a raw identifier or a literal token, set the pointer data.
473   if (Tok.is(tok::raw_identifier))
474     Tok.setRawIdentifierData(DestPtr);
475   else if (Tok.isLiteral())
476     Tok.setLiteralData(DestPtr);
477 }
478 
getCurrentModule()479 Module *Preprocessor::getCurrentModule() {
480   if (getLangOpts().CurrentModule.empty())
481     return nullptr;
482 
483   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
484 }
485 
486 //===----------------------------------------------------------------------===//
487 // Preprocessor Initialization Methods
488 //===----------------------------------------------------------------------===//
489 
490 
491 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
492 /// which implicitly adds the builtin defines etc.
EnterMainSourceFile()493 void Preprocessor::EnterMainSourceFile() {
494   // We do not allow the preprocessor to reenter the main file.  Doing so will
495   // cause FileID's to accumulate information from both runs (e.g. #line
496   // information) and predefined macros aren't guaranteed to be set properly.
497   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
498   FileID MainFileID = SourceMgr.getMainFileID();
499 
500   // If MainFileID is loaded it means we loaded an AST file, no need to enter
501   // a main file.
502   if (!SourceMgr.isLoadedFileID(MainFileID)) {
503     // Enter the main file source buffer.
504     EnterSourceFile(MainFileID, nullptr, SourceLocation());
505 
506     // If we've been asked to skip bytes in the main file (e.g., as part of a
507     // precompiled preamble), do so now.
508     if (SkipMainFilePreamble.first > 0)
509       CurLexer->SkipBytes(SkipMainFilePreamble.first,
510                           SkipMainFilePreamble.second);
511 
512     // Tell the header info that the main file was entered.  If the file is later
513     // #imported, it won't be re-entered.
514     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
515       HeaderInfo.IncrementIncludeCount(FE);
516   }
517 
518   // Preprocess Predefines to populate the initial preprocessor state.
519   std::unique_ptr<llvm::MemoryBuffer> SB =
520     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
521   assert(SB && "Cannot create predefined source buffer");
522   FileID FID = SourceMgr.createFileID(std::move(SB));
523   assert(FID.isValid() && "Could not create FileID for predefines?");
524   setPredefinesFileID(FID);
525 
526   // Start parsing the predefines.
527   EnterSourceFile(FID, nullptr, SourceLocation());
528 }
529 
EndSourceFile()530 void Preprocessor::EndSourceFile() {
531   // Notify the client that we reached the end of the source file.
532   if (Callbacks)
533     Callbacks->EndOfMainFile();
534 }
535 
536 //===----------------------------------------------------------------------===//
537 // Lexer Event Handling.
538 //===----------------------------------------------------------------------===//
539 
540 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
541 /// identifier information for the token and install it into the token,
542 /// updating the token kind accordingly.
LookUpIdentifierInfo(Token & Identifier) const543 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
544   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
545 
546   // Look up this token, see if it is a macro, or if it is a language keyword.
547   IdentifierInfo *II;
548   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
549     // No cleaning needed, just use the characters from the lexed buffer.
550     II = getIdentifierInfo(Identifier.getRawIdentifier());
551   } else {
552     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
553     SmallString<64> IdentifierBuffer;
554     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
555 
556     if (Identifier.hasUCN()) {
557       SmallString<64> UCNIdentifierBuffer;
558       expandUCNs(UCNIdentifierBuffer, CleanedStr);
559       II = getIdentifierInfo(UCNIdentifierBuffer);
560     } else {
561       II = getIdentifierInfo(CleanedStr);
562     }
563   }
564 
565   // Update the token info (identifier info and appropriate token kind).
566   Identifier.setIdentifierInfo(II);
567   Identifier.setKind(II->getTokenID());
568 
569   return II;
570 }
571 
SetPoisonReason(IdentifierInfo * II,unsigned DiagID)572 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
573   PoisonReasons[II] = DiagID;
574 }
575 
PoisonSEHIdentifiers(bool Poison)576 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
577   assert(Ident__exception_code && Ident__exception_info);
578   assert(Ident___exception_code && Ident___exception_info);
579   Ident__exception_code->setIsPoisoned(Poison);
580   Ident___exception_code->setIsPoisoned(Poison);
581   Ident_GetExceptionCode->setIsPoisoned(Poison);
582   Ident__exception_info->setIsPoisoned(Poison);
583   Ident___exception_info->setIsPoisoned(Poison);
584   Ident_GetExceptionInfo->setIsPoisoned(Poison);
585   Ident__abnormal_termination->setIsPoisoned(Poison);
586   Ident___abnormal_termination->setIsPoisoned(Poison);
587   Ident_AbnormalTermination->setIsPoisoned(Poison);
588 }
589 
HandlePoisonedIdentifier(Token & Identifier)590 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
591   assert(Identifier.getIdentifierInfo() &&
592          "Can't handle identifiers without identifier info!");
593   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
594     PoisonReasons.find(Identifier.getIdentifierInfo());
595   if(it == PoisonReasons.end())
596     Diag(Identifier, diag::err_pp_used_poisoned_id);
597   else
598     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
599 }
600 
601 /// \brief Returns a diagnostic message kind for reporting a future keyword as
602 /// appropriate for the identifier and specified language.
getFutureCompatDiagKind(const IdentifierInfo & II,const LangOptions & LangOpts)603 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
604                                           const LangOptions &LangOpts) {
605   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
606 
607   if (LangOpts.CPlusPlus)
608     return llvm::StringSwitch<diag::kind>(II.getName())
609 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
610         .Case(#NAME, diag::warn_cxx11_keyword)
611 #include "clang/Basic/TokenKinds.def"
612         ;
613 
614   llvm_unreachable(
615       "Keyword not known to come from a newer Standard or proposed Standard");
616 }
617 
618 /// HandleIdentifier - This callback is invoked when the lexer reads an
619 /// identifier.  This callback looks up the identifier in the map and/or
620 /// potentially macro expands it or turns it into a named token (like 'for').
621 ///
622 /// Note that callers of this method are guarded by checking the
623 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
624 /// IdentifierInfo methods that compute these properties will need to change to
625 /// match.
HandleIdentifier(Token & Identifier)626 bool Preprocessor::HandleIdentifier(Token &Identifier) {
627   assert(Identifier.getIdentifierInfo() &&
628          "Can't handle identifiers without identifier info!");
629 
630   IdentifierInfo &II = *Identifier.getIdentifierInfo();
631 
632   // If the information about this identifier is out of date, update it from
633   // the external source.
634   // We have to treat __VA_ARGS__ in a special way, since it gets
635   // serialized with isPoisoned = true, but our preprocessor may have
636   // unpoisoned it if we're defining a C99 macro.
637   if (II.isOutOfDate()) {
638     bool CurrentIsPoisoned = false;
639     if (&II == Ident__VA_ARGS__)
640       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
641 
642     ExternalSource->updateOutOfDateIdentifier(II);
643     Identifier.setKind(II.getTokenID());
644 
645     if (&II == Ident__VA_ARGS__)
646       II.setIsPoisoned(CurrentIsPoisoned);
647   }
648 
649   // If this identifier was poisoned, and if it was not produced from a macro
650   // expansion, emit an error.
651   if (II.isPoisoned() && CurPPLexer) {
652     HandlePoisonedIdentifier(Identifier);
653   }
654 
655   // If this is a macro to be expanded, do it.
656   if (MacroDefinition MD = getMacroDefinition(&II)) {
657     auto *MI = MD.getMacroInfo();
658     assert(MI && "macro definition with no macro info?");
659     if (!DisableMacroExpansion) {
660       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
661         // C99 6.10.3p10: If the preprocessing token immediately after the
662         // macro name isn't a '(', this macro should not be expanded.
663         if (!MI->isFunctionLike() || isNextPPTokenLParen())
664           return HandleMacroExpandedIdentifier(Identifier, MD);
665       } else {
666         // C99 6.10.3.4p2 says that a disabled macro may never again be
667         // expanded, even if it's in a context where it could be expanded in the
668         // future.
669         Identifier.setFlag(Token::DisableExpand);
670         if (MI->isObjectLike() || isNextPPTokenLParen())
671           Diag(Identifier, diag::pp_disabled_macro_expansion);
672       }
673     }
674   }
675 
676   // If this identifier is a keyword in a newer Standard or proposed Standard,
677   // produce a warning. Don't warn if we're not considering macro expansion,
678   // since this identifier might be the name of a macro.
679   // FIXME: This warning is disabled in cases where it shouldn't be, like
680   //   "#define constexpr constexpr", "int constexpr;"
681   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
682     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
683         << II.getName();
684     // Don't diagnose this keyword again in this translation unit.
685     II.setIsFutureCompatKeyword(false);
686   }
687 
688   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
689   // then we act as if it is the actual operator and not the textual
690   // representation of it.
691   if (II.isCPlusPlusOperatorKeyword())
692     Identifier.setIdentifierInfo(nullptr);
693 
694   // If this is an extension token, diagnose its use.
695   // We avoid diagnosing tokens that originate from macro definitions.
696   // FIXME: This warning is disabled in cases where it shouldn't be,
697   // like "#define TY typeof", "TY(1) x".
698   if (II.isExtensionToken() && !DisableMacroExpansion)
699     Diag(Identifier, diag::ext_token_used);
700 
701   // If this is the 'import' contextual keyword following an '@', note
702   // that the next token indicates a module name.
703   //
704   // Note that we do not treat 'import' as a contextual
705   // keyword when we're in a caching lexer, because caching lexers only get
706   // used in contexts where import declarations are disallowed.
707   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
708       !DisableMacroExpansion &&
709       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
710       CurLexerKind != CLK_CachingLexer) {
711     ModuleImportLoc = Identifier.getLocation();
712     ModuleImportPath.clear();
713     ModuleImportExpectsIdentifier = true;
714     CurLexerKind = CLK_LexAfterModuleImport;
715   }
716   return true;
717 }
718 
Lex(Token & Result)719 void Preprocessor::Lex(Token &Result) {
720   // We loop here until a lex function returns a token; this avoids recursion.
721   bool ReturnedToken;
722   do {
723     switch (CurLexerKind) {
724     case CLK_Lexer:
725       ReturnedToken = CurLexer->Lex(Result);
726       break;
727     case CLK_PTHLexer:
728       ReturnedToken = CurPTHLexer->Lex(Result);
729       break;
730     case CLK_TokenLexer:
731       ReturnedToken = CurTokenLexer->Lex(Result);
732       break;
733     case CLK_CachingLexer:
734       CachingLex(Result);
735       ReturnedToken = true;
736       break;
737     case CLK_LexAfterModuleImport:
738       LexAfterModuleImport(Result);
739       ReturnedToken = true;
740       break;
741     }
742   } while (!ReturnedToken);
743 
744   LastTokenWasAt = Result.is(tok::at);
745 }
746 
747 
748 /// \brief Lex a token following the 'import' contextual keyword.
749 ///
LexAfterModuleImport(Token & Result)750 void Preprocessor::LexAfterModuleImport(Token &Result) {
751   // Figure out what kind of lexer we actually have.
752   recomputeCurLexerKind();
753 
754   // Lex the next token.
755   Lex(Result);
756 
757   // The token sequence
758   //
759   //   import identifier (. identifier)*
760   //
761   // indicates a module import directive. We already saw the 'import'
762   // contextual keyword, so now we're looking for the identifiers.
763   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
764     // We expected to see an identifier here, and we did; continue handling
765     // identifiers.
766     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
767                                               Result.getLocation()));
768     ModuleImportExpectsIdentifier = false;
769     CurLexerKind = CLK_LexAfterModuleImport;
770     return;
771   }
772 
773   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
774   // see the next identifier.
775   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
776     ModuleImportExpectsIdentifier = true;
777     CurLexerKind = CLK_LexAfterModuleImport;
778     return;
779   }
780 
781   // If we have a non-empty module path, load the named module.
782   if (!ModuleImportPath.empty()) {
783     Module *Imported = nullptr;
784     if (getLangOpts().Modules) {
785       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
786                                             ModuleImportPath,
787                                             Module::Hidden,
788                                             /*IsIncludeDirective=*/false);
789       if (Imported)
790         makeModuleVisible(Imported, ModuleImportLoc);
791     }
792     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
793       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
794   }
795 }
796 
makeModuleVisible(Module * M,SourceLocation Loc)797 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
798   CurSubmoduleState->VisibleModules.setVisible(
799       M, Loc, [](Module *) {},
800       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
801         // FIXME: Include the path in the diagnostic.
802         // FIXME: Include the import location for the conflicting module.
803         Diag(ModuleImportLoc, diag::warn_module_conflict)
804             << Path[0]->getFullModuleName()
805             << Conflict->getFullModuleName()
806             << Message;
807       });
808 
809   // Add this module to the imports list of the currently-built submodule.
810   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
811     BuildingSubmoduleStack.back().M->Imports.insert(M);
812 }
813 
FinishLexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)814 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
815                                           const char *DiagnosticTag,
816                                           bool AllowMacroExpansion) {
817   // We need at least one string literal.
818   if (Result.isNot(tok::string_literal)) {
819     Diag(Result, diag::err_expected_string_literal)
820       << /*Source='in...'*/0 << DiagnosticTag;
821     return false;
822   }
823 
824   // Lex string literal tokens, optionally with macro expansion.
825   SmallVector<Token, 4> StrToks;
826   do {
827     StrToks.push_back(Result);
828 
829     if (Result.hasUDSuffix())
830       Diag(Result, diag::err_invalid_string_udl);
831 
832     if (AllowMacroExpansion)
833       Lex(Result);
834     else
835       LexUnexpandedToken(Result);
836   } while (Result.is(tok::string_literal));
837 
838   // Concatenate and parse the strings.
839   StringLiteralParser Literal(StrToks, *this);
840   assert(Literal.isAscii() && "Didn't allow wide strings in");
841 
842   if (Literal.hadError)
843     return false;
844 
845   if (Literal.Pascal) {
846     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
847       << /*Source='in...'*/0 << DiagnosticTag;
848     return false;
849   }
850 
851   String = Literal.GetString();
852   return true;
853 }
854 
parseSimpleIntegerLiteral(Token & Tok,uint64_t & Value)855 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
856   assert(Tok.is(tok::numeric_constant));
857   SmallString<8> IntegerBuffer;
858   bool NumberInvalid = false;
859   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
860   if (NumberInvalid)
861     return false;
862   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
863   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
864     return false;
865   llvm::APInt APVal(64, 0);
866   if (Literal.GetIntegerValue(APVal))
867     return false;
868   Lex(Tok);
869   Value = APVal.getLimitedValue();
870   return true;
871 }
872 
addCommentHandler(CommentHandler * Handler)873 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
874   assert(Handler && "NULL comment handler");
875   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
876          CommentHandlers.end() && "Comment handler already registered");
877   CommentHandlers.push_back(Handler);
878 }
879 
removeCommentHandler(CommentHandler * Handler)880 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
881   std::vector<CommentHandler *>::iterator Pos
882   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
883   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
884   CommentHandlers.erase(Pos);
885 }
886 
HandleComment(Token & result,SourceRange Comment)887 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
888   bool AnyPendingTokens = false;
889   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
890        HEnd = CommentHandlers.end();
891        H != HEnd; ++H) {
892     if ((*H)->HandleComment(*this, Comment))
893       AnyPendingTokens = true;
894   }
895   if (!AnyPendingTokens || getCommentRetentionState())
896     return false;
897   Lex(result);
898   return true;
899 }
900 
~ModuleLoader()901 ModuleLoader::~ModuleLoader() { }
902 
~CommentHandler()903 CommentHandler::~CommentHandler() { }
904 
~CodeCompletionHandler()905 CodeCompletionHandler::~CodeCompletionHandler() { }
906 
createPreprocessingRecord()907 void Preprocessor::createPreprocessingRecord() {
908   if (Record)
909     return;
910 
911   Record = new PreprocessingRecord(getSourceManager());
912   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
913 }
914