1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 // -H - Print the name of each header file used.
16 // -d[DNI] - Dump various things.
17 // -fworking-directory - #line's with preprocessor's working dir.
18 // -fpreprocessed
19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 // -W*
21 // -w
22 //
23 // Messages to emit:
24 // "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/APFloat.h"
47 #include "llvm/ADT/STLExtras.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/StringExtras.h"
50 #include "llvm/Support/Capacity.h"
51 #include "llvm/Support/ConvertUTF.h"
52 #include "llvm/Support/MemoryBuffer.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <utility>
55 using namespace clang;
56
57 template class llvm::Registry<clang::PragmaHandler>;
58
59 //===----------------------------------------------------------------------===//
~ExternalPreprocessorSource()60 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
61
Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,DiagnosticsEngine & diags,LangOptions & opts,SourceManager & SM,HeaderSearch & Headers,ModuleLoader & TheModuleLoader,IdentifierInfoLookup * IILookup,bool OwnsHeaders,TranslationUnitKind TUKind)62 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
63 DiagnosticsEngine &diags, LangOptions &opts,
64 SourceManager &SM, HeaderSearch &Headers,
65 ModuleLoader &TheModuleLoader,
66 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
67 TranslationUnitKind TUKind)
68 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
69 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
70 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
71 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
72 Identifiers(opts, IILookup),
73 PragmaHandlers(new PragmaNamespace(StringRef())),
74 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
75 CodeCompletionFile(nullptr), CodeCompletionOffset(0),
76 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
77 CodeCompletionReached(0), MainFileDir(nullptr),
78 SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
79 CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
80 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
81 Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
82 OwnsHeaderSearch = OwnsHeaders;
83
84 CounterValue = 0; // __COUNTER__ starts at 0.
85
86 // Clear stats.
87 NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
88 NumIf = NumElse = NumEndif = 0;
89 NumEnteredSourceFiles = 0;
90 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
91 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
92 MaxIncludeStackDepth = 0;
93 NumSkipped = 0;
94
95 // Default to discarding comments.
96 KeepComments = false;
97 KeepMacroComments = false;
98 SuppressIncludeNotFoundError = false;
99
100 // Macro expansion is enabled.
101 DisableMacroExpansion = false;
102 MacroExpansionInDirectivesOverride = false;
103 InMacroArgs = false;
104 InMacroArgPreExpansion = false;
105 NumCachedTokenLexers = 0;
106 PragmasEnabled = true;
107 ParsingIfOrElifDirective = false;
108 PreprocessedOutput = false;
109
110 CachedLexPos = 0;
111
112 // We haven't read anything from the external source.
113 ReadMacrosFromExternalSource = false;
114
115 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
116 // This gets unpoisoned where it is allowed.
117 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
118 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
119
120 // Initialize the pragma handlers.
121 RegisterBuiltinPragmas();
122
123 // Initialize builtin macros like __LINE__ and friends.
124 RegisterBuiltinMacros();
125
126 if(LangOpts.Borland) {
127 Ident__exception_info = getIdentifierInfo("_exception_info");
128 Ident___exception_info = getIdentifierInfo("__exception_info");
129 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
130 Ident__exception_code = getIdentifierInfo("_exception_code");
131 Ident___exception_code = getIdentifierInfo("__exception_code");
132 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
133 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
134 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
135 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
136 } else {
137 Ident__exception_info = Ident__exception_code = nullptr;
138 Ident__abnormal_termination = Ident___exception_info = nullptr;
139 Ident___exception_code = Ident___abnormal_termination = nullptr;
140 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
141 Ident_AbnormalTermination = nullptr;
142 }
143 }
144
~Preprocessor()145 Preprocessor::~Preprocessor() {
146 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
147
148 IncludeMacroStack.clear();
149
150 // Destroy any macro definitions.
151 while (MacroInfoChain *I = MIChainHead) {
152 MIChainHead = I->Next;
153 I->~MacroInfoChain();
154 }
155
156 // Free any cached macro expanders.
157 // This populates MacroArgCache, so all TokenLexers need to be destroyed
158 // before the code below that frees up the MacroArgCache list.
159 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
160 CurTokenLexer.reset();
161
162 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
163 DeserialMIChainHead = I->Next;
164 I->~DeserializedMacroInfoChain();
165 }
166
167 // Free any cached MacroArgs.
168 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
169 ArgList = ArgList->deallocate();
170
171 // Delete the header search info, if we own it.
172 if (OwnsHeaderSearch)
173 delete &HeaderInfo;
174 }
175
Initialize(const TargetInfo & Target,const TargetInfo * AuxTarget)176 void Preprocessor::Initialize(const TargetInfo &Target,
177 const TargetInfo *AuxTarget) {
178 assert((!this->Target || this->Target == &Target) &&
179 "Invalid override of target information");
180 this->Target = &Target;
181
182 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
183 "Invalid override of aux target information.");
184 this->AuxTarget = AuxTarget;
185
186 // Initialize information about built-ins.
187 BuiltinInfo.InitializeTarget(Target, AuxTarget);
188 HeaderInfo.setTarget(Target);
189 }
190
InitializeForModelFile()191 void Preprocessor::InitializeForModelFile() {
192 NumEnteredSourceFiles = 0;
193
194 // Reset pragmas
195 PragmaHandlersBackup = std::move(PragmaHandlers);
196 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
197 RegisterBuiltinPragmas();
198
199 // Reset PredefinesFileID
200 PredefinesFileID = FileID();
201 }
202
FinalizeForModelFile()203 void Preprocessor::FinalizeForModelFile() {
204 NumEnteredSourceFiles = 1;
205
206 PragmaHandlers = std::move(PragmaHandlersBackup);
207 }
208
setPTHManager(PTHManager * pm)209 void Preprocessor::setPTHManager(PTHManager* pm) {
210 PTH.reset(pm);
211 FileMgr.addStatCache(PTH->createStatCache());
212 }
213
DumpToken(const Token & Tok,bool DumpFlags) const214 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
215 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
216 << getSpelling(Tok) << "'";
217
218 if (!DumpFlags) return;
219
220 llvm::errs() << "\t";
221 if (Tok.isAtStartOfLine())
222 llvm::errs() << " [StartOfLine]";
223 if (Tok.hasLeadingSpace())
224 llvm::errs() << " [LeadingSpace]";
225 if (Tok.isExpandDisabled())
226 llvm::errs() << " [ExpandDisabled]";
227 if (Tok.needsCleaning()) {
228 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
229 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
230 << "']";
231 }
232
233 llvm::errs() << "\tLoc=<";
234 DumpLocation(Tok.getLocation());
235 llvm::errs() << ">";
236 }
237
DumpLocation(SourceLocation Loc) const238 void Preprocessor::DumpLocation(SourceLocation Loc) const {
239 Loc.dump(SourceMgr);
240 }
241
DumpMacro(const MacroInfo & MI) const242 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
243 llvm::errs() << "MACRO: ";
244 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
245 DumpToken(MI.getReplacementToken(i));
246 llvm::errs() << " ";
247 }
248 llvm::errs() << "\n";
249 }
250
PrintStats()251 void Preprocessor::PrintStats() {
252 llvm::errs() << "\n*** Preprocessor Stats:\n";
253 llvm::errs() << NumDirectives << " directives found:\n";
254 llvm::errs() << " " << NumDefined << " #define.\n";
255 llvm::errs() << " " << NumUndefined << " #undef.\n";
256 llvm::errs() << " #include/#include_next/#import:\n";
257 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
258 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
259 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
260 llvm::errs() << " " << NumElse << " #else/#elif.\n";
261 llvm::errs() << " " << NumEndif << " #endif.\n";
262 llvm::errs() << " " << NumPragma << " #pragma.\n";
263 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
264
265 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
266 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
267 << NumFastMacroExpanded << " on the fast path.\n";
268 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
269 << " token paste (##) operations performed, "
270 << NumFastTokenPaste << " on the fast path.\n";
271
272 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
273
274 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
275 llvm::errs() << "\n Macro Expanded Tokens: "
276 << llvm::capacity_in_bytes(MacroExpandedTokens);
277 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
278 // FIXME: List information for all submodules.
279 llvm::errs() << "\n Macros: "
280 << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
281 llvm::errs() << "\n #pragma push_macro Info: "
282 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
283 llvm::errs() << "\n Poison Reasons: "
284 << llvm::capacity_in_bytes(PoisonReasons);
285 llvm::errs() << "\n Comment Handlers: "
286 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
287 }
288
289 Preprocessor::macro_iterator
macro_begin(bool IncludeExternalMacros) const290 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
291 if (IncludeExternalMacros && ExternalSource &&
292 !ReadMacrosFromExternalSource) {
293 ReadMacrosFromExternalSource = true;
294 ExternalSource->ReadDefinedMacros();
295 }
296
297 // Make sure we cover all macros in visible modules.
298 for (const ModuleMacro &Macro : ModuleMacros)
299 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
300
301 return CurSubmoduleState->Macros.begin();
302 }
303
getTotalMemory() const304 size_t Preprocessor::getTotalMemory() const {
305 return BP.getTotalMemory()
306 + llvm::capacity_in_bytes(MacroExpandedTokens)
307 + Predefines.capacity() /* Predefines buffer. */
308 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
309 // and ModuleMacros.
310 + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
311 + llvm::capacity_in_bytes(PragmaPushMacroInfo)
312 + llvm::capacity_in_bytes(PoisonReasons)
313 + llvm::capacity_in_bytes(CommentHandlers);
314 }
315
316 Preprocessor::macro_iterator
macro_end(bool IncludeExternalMacros) const317 Preprocessor::macro_end(bool IncludeExternalMacros) const {
318 if (IncludeExternalMacros && ExternalSource &&
319 !ReadMacrosFromExternalSource) {
320 ReadMacrosFromExternalSource = true;
321 ExternalSource->ReadDefinedMacros();
322 }
323
324 return CurSubmoduleState->Macros.end();
325 }
326
327 /// \brief Compares macro tokens with a specified token value sequence.
MacroDefinitionEquals(const MacroInfo * MI,ArrayRef<TokenValue> Tokens)328 static bool MacroDefinitionEquals(const MacroInfo *MI,
329 ArrayRef<TokenValue> Tokens) {
330 return Tokens.size() == MI->getNumTokens() &&
331 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
332 }
333
getLastMacroWithSpelling(SourceLocation Loc,ArrayRef<TokenValue> Tokens) const334 StringRef Preprocessor::getLastMacroWithSpelling(
335 SourceLocation Loc,
336 ArrayRef<TokenValue> Tokens) const {
337 SourceLocation BestLocation;
338 StringRef BestSpelling;
339 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
340 I != E; ++I) {
341 const MacroDirective::DefInfo
342 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
343 if (!Def || !Def.getMacroInfo())
344 continue;
345 if (!Def.getMacroInfo()->isObjectLike())
346 continue;
347 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
348 continue;
349 SourceLocation Location = Def.getLocation();
350 // Choose the macro defined latest.
351 if (BestLocation.isInvalid() ||
352 (Location.isValid() &&
353 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
354 BestLocation = Location;
355 BestSpelling = I->first->getName();
356 }
357 }
358 return BestSpelling;
359 }
360
recomputeCurLexerKind()361 void Preprocessor::recomputeCurLexerKind() {
362 if (CurLexer)
363 CurLexerKind = CLK_Lexer;
364 else if (CurPTHLexer)
365 CurLexerKind = CLK_PTHLexer;
366 else if (CurTokenLexer)
367 CurLexerKind = CLK_TokenLexer;
368 else
369 CurLexerKind = CLK_CachingLexer;
370 }
371
SetCodeCompletionPoint(const FileEntry * File,unsigned CompleteLine,unsigned CompleteColumn)372 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
373 unsigned CompleteLine,
374 unsigned CompleteColumn) {
375 assert(File);
376 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
377 assert(!CodeCompletionFile && "Already set");
378
379 using llvm::MemoryBuffer;
380
381 // Load the actual file's contents.
382 bool Invalid = false;
383 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
384 if (Invalid)
385 return true;
386
387 // Find the byte position of the truncation point.
388 const char *Position = Buffer->getBufferStart();
389 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
390 for (; *Position; ++Position) {
391 if (*Position != '\r' && *Position != '\n')
392 continue;
393
394 // Eat \r\n or \n\r as a single line.
395 if ((Position[1] == '\r' || Position[1] == '\n') &&
396 Position[0] != Position[1])
397 ++Position;
398 ++Position;
399 break;
400 }
401 }
402
403 Position += CompleteColumn - 1;
404
405 // If pointing inside the preamble, adjust the position at the beginning of
406 // the file after the preamble.
407 if (SkipMainFilePreamble.first &&
408 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
409 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
410 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
411 }
412
413 if (Position > Buffer->getBufferEnd())
414 Position = Buffer->getBufferEnd();
415
416 CodeCompletionFile = File;
417 CodeCompletionOffset = Position - Buffer->getBufferStart();
418
419 std::unique_ptr<MemoryBuffer> NewBuffer =
420 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
421 Buffer->getBufferIdentifier());
422 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
423 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
424 *NewPos = '\0';
425 std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
426 SourceMgr.overrideFileContents(File, std::move(NewBuffer));
427
428 return false;
429 }
430
CodeCompleteNaturalLanguage()431 void Preprocessor::CodeCompleteNaturalLanguage() {
432 if (CodeComplete)
433 CodeComplete->CodeCompleteNaturalLanguage();
434 setCodeCompletionReached();
435 }
436
437 /// getSpelling - This method is used to get the spelling of a token into a
438 /// SmallVector. Note that the returned StringRef may not point to the
439 /// supplied buffer if a copy can be avoided.
getSpelling(const Token & Tok,SmallVectorImpl<char> & Buffer,bool * Invalid) const440 StringRef Preprocessor::getSpelling(const Token &Tok,
441 SmallVectorImpl<char> &Buffer,
442 bool *Invalid) const {
443 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
444 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
445 // Try the fast path.
446 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
447 return II->getName();
448 }
449
450 // Resize the buffer if we need to copy into it.
451 if (Tok.needsCleaning())
452 Buffer.resize(Tok.getLength());
453
454 const char *Ptr = Buffer.data();
455 unsigned Len = getSpelling(Tok, Ptr, Invalid);
456 return StringRef(Ptr, Len);
457 }
458
459 /// CreateString - Plop the specified string into a scratch buffer and return a
460 /// location for it. If specified, the source location provides a source
461 /// location for the token.
CreateString(StringRef Str,Token & Tok,SourceLocation ExpansionLocStart,SourceLocation ExpansionLocEnd)462 void Preprocessor::CreateString(StringRef Str, Token &Tok,
463 SourceLocation ExpansionLocStart,
464 SourceLocation ExpansionLocEnd) {
465 Tok.setLength(Str.size());
466
467 const char *DestPtr;
468 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
469
470 if (ExpansionLocStart.isValid())
471 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
472 ExpansionLocEnd, Str.size());
473 Tok.setLocation(Loc);
474
475 // If this is a raw identifier or a literal token, set the pointer data.
476 if (Tok.is(tok::raw_identifier))
477 Tok.setRawIdentifierData(DestPtr);
478 else if (Tok.isLiteral())
479 Tok.setLiteralData(DestPtr);
480 }
481
getCurrentModule()482 Module *Preprocessor::getCurrentModule() {
483 if (!getLangOpts().CompilingModule)
484 return nullptr;
485
486 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
487 }
488
489 //===----------------------------------------------------------------------===//
490 // Preprocessor Initialization Methods
491 //===----------------------------------------------------------------------===//
492
493
494 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
495 /// which implicitly adds the builtin defines etc.
EnterMainSourceFile()496 void Preprocessor::EnterMainSourceFile() {
497 // We do not allow the preprocessor to reenter the main file. Doing so will
498 // cause FileID's to accumulate information from both runs (e.g. #line
499 // information) and predefined macros aren't guaranteed to be set properly.
500 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
501 FileID MainFileID = SourceMgr.getMainFileID();
502
503 // If MainFileID is loaded it means we loaded an AST file, no need to enter
504 // a main file.
505 if (!SourceMgr.isLoadedFileID(MainFileID)) {
506 // Enter the main file source buffer.
507 EnterSourceFile(MainFileID, nullptr, SourceLocation());
508
509 // If we've been asked to skip bytes in the main file (e.g., as part of a
510 // precompiled preamble), do so now.
511 if (SkipMainFilePreamble.first > 0)
512 CurLexer->SkipBytes(SkipMainFilePreamble.first,
513 SkipMainFilePreamble.second);
514
515 // Tell the header info that the main file was entered. If the file is later
516 // #imported, it won't be re-entered.
517 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
518 HeaderInfo.IncrementIncludeCount(FE);
519 }
520
521 // Preprocess Predefines to populate the initial preprocessor state.
522 std::unique_ptr<llvm::MemoryBuffer> SB =
523 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
524 assert(SB && "Cannot create predefined source buffer");
525 FileID FID = SourceMgr.createFileID(std::move(SB));
526 assert(FID.isValid() && "Could not create FileID for predefines?");
527 setPredefinesFileID(FID);
528
529 // Start parsing the predefines.
530 EnterSourceFile(FID, nullptr, SourceLocation());
531 }
532
EndSourceFile()533 void Preprocessor::EndSourceFile() {
534 // Notify the client that we reached the end of the source file.
535 if (Callbacks)
536 Callbacks->EndOfMainFile();
537 }
538
539 //===----------------------------------------------------------------------===//
540 // Lexer Event Handling.
541 //===----------------------------------------------------------------------===//
542
543 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
544 /// identifier information for the token and install it into the token,
545 /// updating the token kind accordingly.
LookUpIdentifierInfo(Token & Identifier) const546 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
547 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
548
549 // Look up this token, see if it is a macro, or if it is a language keyword.
550 IdentifierInfo *II;
551 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
552 // No cleaning needed, just use the characters from the lexed buffer.
553 II = getIdentifierInfo(Identifier.getRawIdentifier());
554 } else {
555 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
556 SmallString<64> IdentifierBuffer;
557 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
558
559 if (Identifier.hasUCN()) {
560 SmallString<64> UCNIdentifierBuffer;
561 expandUCNs(UCNIdentifierBuffer, CleanedStr);
562 II = getIdentifierInfo(UCNIdentifierBuffer);
563 } else {
564 II = getIdentifierInfo(CleanedStr);
565 }
566 }
567
568 // Update the token info (identifier info and appropriate token kind).
569 Identifier.setIdentifierInfo(II);
570 Identifier.setKind(II->getTokenID());
571
572 return II;
573 }
574
SetPoisonReason(IdentifierInfo * II,unsigned DiagID)575 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
576 PoisonReasons[II] = DiagID;
577 }
578
PoisonSEHIdentifiers(bool Poison)579 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
580 assert(Ident__exception_code && Ident__exception_info);
581 assert(Ident___exception_code && Ident___exception_info);
582 Ident__exception_code->setIsPoisoned(Poison);
583 Ident___exception_code->setIsPoisoned(Poison);
584 Ident_GetExceptionCode->setIsPoisoned(Poison);
585 Ident__exception_info->setIsPoisoned(Poison);
586 Ident___exception_info->setIsPoisoned(Poison);
587 Ident_GetExceptionInfo->setIsPoisoned(Poison);
588 Ident__abnormal_termination->setIsPoisoned(Poison);
589 Ident___abnormal_termination->setIsPoisoned(Poison);
590 Ident_AbnormalTermination->setIsPoisoned(Poison);
591 }
592
HandlePoisonedIdentifier(Token & Identifier)593 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
594 assert(Identifier.getIdentifierInfo() &&
595 "Can't handle identifiers without identifier info!");
596 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
597 PoisonReasons.find(Identifier.getIdentifierInfo());
598 if(it == PoisonReasons.end())
599 Diag(Identifier, diag::err_pp_used_poisoned_id);
600 else
601 Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
602 }
603
604 /// \brief Returns a diagnostic message kind for reporting a future keyword as
605 /// appropriate for the identifier and specified language.
getFutureCompatDiagKind(const IdentifierInfo & II,const LangOptions & LangOpts)606 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
607 const LangOptions &LangOpts) {
608 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
609
610 if (LangOpts.CPlusPlus)
611 return llvm::StringSwitch<diag::kind>(II.getName())
612 #define CXX11_KEYWORD(NAME, FLAGS) \
613 .Case(#NAME, diag::warn_cxx11_keyword)
614 #include "clang/Basic/TokenKinds.def"
615 ;
616
617 llvm_unreachable(
618 "Keyword not known to come from a newer Standard or proposed Standard");
619 }
620
621 /// HandleIdentifier - This callback is invoked when the lexer reads an
622 /// identifier. This callback looks up the identifier in the map and/or
623 /// potentially macro expands it or turns it into a named token (like 'for').
624 ///
625 /// Note that callers of this method are guarded by checking the
626 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
627 /// IdentifierInfo methods that compute these properties will need to change to
628 /// match.
HandleIdentifier(Token & Identifier)629 bool Preprocessor::HandleIdentifier(Token &Identifier) {
630 assert(Identifier.getIdentifierInfo() &&
631 "Can't handle identifiers without identifier info!");
632
633 IdentifierInfo &II = *Identifier.getIdentifierInfo();
634
635 // If the information about this identifier is out of date, update it from
636 // the external source.
637 // We have to treat __VA_ARGS__ in a special way, since it gets
638 // serialized with isPoisoned = true, but our preprocessor may have
639 // unpoisoned it if we're defining a C99 macro.
640 if (II.isOutOfDate()) {
641 bool CurrentIsPoisoned = false;
642 if (&II == Ident__VA_ARGS__)
643 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
644
645 ExternalSource->updateOutOfDateIdentifier(II);
646 Identifier.setKind(II.getTokenID());
647
648 if (&II == Ident__VA_ARGS__)
649 II.setIsPoisoned(CurrentIsPoisoned);
650 }
651
652 // If this identifier was poisoned, and if it was not produced from a macro
653 // expansion, emit an error.
654 if (II.isPoisoned() && CurPPLexer) {
655 HandlePoisonedIdentifier(Identifier);
656 }
657
658 // If this is a macro to be expanded, do it.
659 if (MacroDefinition MD = getMacroDefinition(&II)) {
660 auto *MI = MD.getMacroInfo();
661 assert(MI && "macro definition with no macro info?");
662 if (!DisableMacroExpansion) {
663 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
664 // C99 6.10.3p10: If the preprocessing token immediately after the
665 // macro name isn't a '(', this macro should not be expanded.
666 if (!MI->isFunctionLike() || isNextPPTokenLParen())
667 return HandleMacroExpandedIdentifier(Identifier, MD);
668 } else {
669 // C99 6.10.3.4p2 says that a disabled macro may never again be
670 // expanded, even if it's in a context where it could be expanded in the
671 // future.
672 Identifier.setFlag(Token::DisableExpand);
673 if (MI->isObjectLike() || isNextPPTokenLParen())
674 Diag(Identifier, diag::pp_disabled_macro_expansion);
675 }
676 }
677 }
678
679 // If this identifier is a keyword in a newer Standard or proposed Standard,
680 // produce a warning. Don't warn if we're not considering macro expansion,
681 // since this identifier might be the name of a macro.
682 // FIXME: This warning is disabled in cases where it shouldn't be, like
683 // "#define constexpr constexpr", "int constexpr;"
684 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
685 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
686 << II.getName();
687 // Don't diagnose this keyword again in this translation unit.
688 II.setIsFutureCompatKeyword(false);
689 }
690
691 // C++ 2.11p2: If this is an alternative representation of a C++ operator,
692 // then we act as if it is the actual operator and not the textual
693 // representation of it.
694 if (II.isCPlusPlusOperatorKeyword())
695 Identifier.setIdentifierInfo(nullptr);
696
697 // If this is an extension token, diagnose its use.
698 // We avoid diagnosing tokens that originate from macro definitions.
699 // FIXME: This warning is disabled in cases where it shouldn't be,
700 // like "#define TY typeof", "TY(1) x".
701 if (II.isExtensionToken() && !DisableMacroExpansion)
702 Diag(Identifier, diag::ext_token_used);
703
704 // If this is the 'import' contextual keyword following an '@', note
705 // that the next token indicates a module name.
706 //
707 // Note that we do not treat 'import' as a contextual
708 // keyword when we're in a caching lexer, because caching lexers only get
709 // used in contexts where import declarations are disallowed.
710 if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
711 !DisableMacroExpansion &&
712 (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
713 CurLexerKind != CLK_CachingLexer) {
714 ModuleImportLoc = Identifier.getLocation();
715 ModuleImportPath.clear();
716 ModuleImportExpectsIdentifier = true;
717 CurLexerKind = CLK_LexAfterModuleImport;
718 }
719 return true;
720 }
721
Lex(Token & Result)722 void Preprocessor::Lex(Token &Result) {
723 // We loop here until a lex function returns a token; this avoids recursion.
724 bool ReturnedToken;
725 do {
726 switch (CurLexerKind) {
727 case CLK_Lexer:
728 ReturnedToken = CurLexer->Lex(Result);
729 break;
730 case CLK_PTHLexer:
731 ReturnedToken = CurPTHLexer->Lex(Result);
732 break;
733 case CLK_TokenLexer:
734 ReturnedToken = CurTokenLexer->Lex(Result);
735 break;
736 case CLK_CachingLexer:
737 CachingLex(Result);
738 ReturnedToken = true;
739 break;
740 case CLK_LexAfterModuleImport:
741 LexAfterModuleImport(Result);
742 ReturnedToken = true;
743 break;
744 }
745 } while (!ReturnedToken);
746
747 LastTokenWasAt = Result.is(tok::at);
748 }
749
750
751 /// \brief Lex a token following the 'import' contextual keyword.
752 ///
LexAfterModuleImport(Token & Result)753 void Preprocessor::LexAfterModuleImport(Token &Result) {
754 // Figure out what kind of lexer we actually have.
755 recomputeCurLexerKind();
756
757 // Lex the next token.
758 Lex(Result);
759
760 // The token sequence
761 //
762 // import identifier (. identifier)*
763 //
764 // indicates a module import directive. We already saw the 'import'
765 // contextual keyword, so now we're looking for the identifiers.
766 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
767 // We expected to see an identifier here, and we did; continue handling
768 // identifiers.
769 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
770 Result.getLocation()));
771 ModuleImportExpectsIdentifier = false;
772 CurLexerKind = CLK_LexAfterModuleImport;
773 return;
774 }
775
776 // If we're expecting a '.' or a ';', and we got a '.', then wait until we
777 // see the next identifier.
778 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
779 ModuleImportExpectsIdentifier = true;
780 CurLexerKind = CLK_LexAfterModuleImport;
781 return;
782 }
783
784 // If we have a non-empty module path, load the named module.
785 if (!ModuleImportPath.empty()) {
786 Module *Imported = nullptr;
787 if (getLangOpts().Modules) {
788 Imported = TheModuleLoader.loadModule(ModuleImportLoc,
789 ModuleImportPath,
790 Module::Hidden,
791 /*IsIncludeDirective=*/false);
792 if (Imported)
793 makeModuleVisible(Imported, ModuleImportLoc);
794 }
795 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
796 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
797 }
798 }
799
makeModuleVisible(Module * M,SourceLocation Loc)800 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
801 CurSubmoduleState->VisibleModules.setVisible(
802 M, Loc, [](Module *) {},
803 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
804 // FIXME: Include the path in the diagnostic.
805 // FIXME: Include the import location for the conflicting module.
806 Diag(ModuleImportLoc, diag::warn_module_conflict)
807 << Path[0]->getFullModuleName()
808 << Conflict->getFullModuleName()
809 << Message;
810 });
811
812 // Add this module to the imports list of the currently-built submodule.
813 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
814 BuildingSubmoduleStack.back().M->Imports.insert(M);
815 }
816
FinishLexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)817 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
818 const char *DiagnosticTag,
819 bool AllowMacroExpansion) {
820 // We need at least one string literal.
821 if (Result.isNot(tok::string_literal)) {
822 Diag(Result, diag::err_expected_string_literal)
823 << /*Source='in...'*/0 << DiagnosticTag;
824 return false;
825 }
826
827 // Lex string literal tokens, optionally with macro expansion.
828 SmallVector<Token, 4> StrToks;
829 do {
830 StrToks.push_back(Result);
831
832 if (Result.hasUDSuffix())
833 Diag(Result, diag::err_invalid_string_udl);
834
835 if (AllowMacroExpansion)
836 Lex(Result);
837 else
838 LexUnexpandedToken(Result);
839 } while (Result.is(tok::string_literal));
840
841 // Concatenate and parse the strings.
842 StringLiteralParser Literal(StrToks, *this);
843 assert(Literal.isAscii() && "Didn't allow wide strings in");
844
845 if (Literal.hadError)
846 return false;
847
848 if (Literal.Pascal) {
849 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
850 << /*Source='in...'*/0 << DiagnosticTag;
851 return false;
852 }
853
854 String = Literal.GetString();
855 return true;
856 }
857
parseSimpleIntegerLiteral(Token & Tok,uint64_t & Value)858 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
859 assert(Tok.is(tok::numeric_constant));
860 SmallString<8> IntegerBuffer;
861 bool NumberInvalid = false;
862 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
863 if (NumberInvalid)
864 return false;
865 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
866 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
867 return false;
868 llvm::APInt APVal(64, 0);
869 if (Literal.GetIntegerValue(APVal))
870 return false;
871 Lex(Tok);
872 Value = APVal.getLimitedValue();
873 return true;
874 }
875
addCommentHandler(CommentHandler * Handler)876 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
877 assert(Handler && "NULL comment handler");
878 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
879 CommentHandlers.end() && "Comment handler already registered");
880 CommentHandlers.push_back(Handler);
881 }
882
removeCommentHandler(CommentHandler * Handler)883 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
884 std::vector<CommentHandler *>::iterator Pos
885 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
886 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
887 CommentHandlers.erase(Pos);
888 }
889
HandleComment(Token & result,SourceRange Comment)890 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
891 bool AnyPendingTokens = false;
892 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
893 HEnd = CommentHandlers.end();
894 H != HEnd; ++H) {
895 if ((*H)->HandleComment(*this, Comment))
896 AnyPendingTokens = true;
897 }
898 if (!AnyPendingTokens || getCommentRetentionState())
899 return false;
900 Lex(result);
901 return true;
902 }
903
~ModuleLoader()904 ModuleLoader::~ModuleLoader() { }
905
~CommentHandler()906 CommentHandler::~CommentHandler() { }
907
~CodeCompletionHandler()908 CodeCompletionHandler::~CodeCompletionHandler() { }
909
createPreprocessingRecord()910 void Preprocessor::createPreprocessingRecord() {
911 if (Record)
912 return;
913
914 Record = new PreprocessingRecord(getSourceManager());
915 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
916 }
917