1 //===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PTHLexer interface.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/Lex/PTHLexer.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemStatCache.h"
17 #include "clang/Basic/IdentifierTable.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/LexDiagnostic.h"
20 #include "clang/Lex/PTHManager.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Lex/Token.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringMap.h"
25 #include "llvm/Support/EndianStream.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/OnDiskHashTable.h"
28 #include <memory>
29 #include <system_error>
30 using namespace clang;
31
32 static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
33
34 //===----------------------------------------------------------------------===//
35 // PTHLexer methods.
36 //===----------------------------------------------------------------------===//
37
PTHLexer(Preprocessor & PP,FileID FID,const unsigned char * D,const unsigned char * ppcond,PTHManager & PM)38 PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
39 const unsigned char *ppcond, PTHManager &PM)
40 : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
41 PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
42
43 FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
44 }
45
Lex(Token & Tok)46 bool PTHLexer::Lex(Token& Tok) {
47 //===--------------------------------------==//
48 // Read the raw token data.
49 //===--------------------------------------==//
50 using namespace llvm::support;
51
52 // Shadow CurPtr into an automatic variable.
53 const unsigned char *CurPtrShadow = CurPtr;
54
55 // Read in the data for the token.
56 unsigned Word0 = endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
57 uint32_t IdentifierID =
58 endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
59 uint32_t FileOffset =
60 endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
61
62 tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
63 Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
64 uint32_t Len = Word0 >> 16;
65
66 CurPtr = CurPtrShadow;
67
68 //===--------------------------------------==//
69 // Construct the token itself.
70 //===--------------------------------------==//
71
72 Tok.startToken();
73 Tok.setKind(TKind);
74 Tok.setFlag(TFlags);
75 assert(!LexingRawMode);
76 Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
77 Tok.setLength(Len);
78
79 // Handle identifiers.
80 if (Tok.isLiteral()) {
81 Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
82 }
83 else if (IdentifierID) {
84 MIOpt.ReadToken();
85 IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
86
87 Tok.setIdentifierInfo(II);
88
89 // Change the kind of this identifier to the appropriate token kind, e.g.
90 // turning "for" into a keyword.
91 Tok.setKind(II->getTokenID());
92
93 if (II->isHandleIdentifierCase())
94 return PP->HandleIdentifier(Tok);
95
96 return true;
97 }
98
99 //===--------------------------------------==//
100 // Process the token.
101 //===--------------------------------------==//
102 if (TKind == tok::eof) {
103 // Save the end-of-file token.
104 EofToken = Tok;
105
106 assert(!ParsingPreprocessorDirective);
107 assert(!LexingRawMode);
108
109 return LexEndOfFile(Tok);
110 }
111
112 if (TKind == tok::hash && Tok.isAtStartOfLine()) {
113 LastHashTokPtr = CurPtr - StoredTokenSize;
114 assert(!LexingRawMode);
115 PP->HandleDirective(Tok);
116
117 return false;
118 }
119
120 if (TKind == tok::eod) {
121 assert(ParsingPreprocessorDirective);
122 ParsingPreprocessorDirective = false;
123 return true;
124 }
125
126 MIOpt.ReadToken();
127 return true;
128 }
129
LexEndOfFile(Token & Result)130 bool PTHLexer::LexEndOfFile(Token &Result) {
131 // If we hit the end of the file while parsing a preprocessor directive,
132 // end the preprocessor directive first. The next token returned will
133 // then be the end of file.
134 if (ParsingPreprocessorDirective) {
135 ParsingPreprocessorDirective = false; // Done parsing the "line".
136 return true; // Have a token.
137 }
138
139 assert(!LexingRawMode);
140
141 // If we are in a #if directive, emit an error.
142 while (!ConditionalStack.empty()) {
143 if (PP->getCodeCompletionFileLoc() != FileStartLoc)
144 PP->Diag(ConditionalStack.back().IfLoc,
145 diag::err_pp_unterminated_conditional);
146 ConditionalStack.pop_back();
147 }
148
149 // Finally, let the preprocessor handle this.
150 return PP->HandleEndOfFile(Result);
151 }
152
153 // FIXME: We can just grab the last token instead of storing a copy
154 // into EofToken.
getEOF(Token & Tok)155 void PTHLexer::getEOF(Token& Tok) {
156 assert(EofToken.is(tok::eof));
157 Tok = EofToken;
158 }
159
DiscardToEndOfLine()160 void PTHLexer::DiscardToEndOfLine() {
161 assert(ParsingPreprocessorDirective && ParsingFilename == false &&
162 "Must be in a preprocessing directive!");
163
164 // We assume that if the preprocessor wishes to discard to the end of
165 // the line that it also means to end the current preprocessor directive.
166 ParsingPreprocessorDirective = false;
167
168 // Skip tokens by only peeking at their token kind and the flags.
169 // We don't need to actually reconstruct full tokens from the token buffer.
170 // This saves some copies and it also reduces IdentifierInfo* lookup.
171 const unsigned char* p = CurPtr;
172 while (1) {
173 // Read the token kind. Are we at the end of the file?
174 tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
175 if (x == tok::eof) break;
176
177 // Read the token flags. Are we at the start of the next line?
178 Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
179 if (y & Token::StartOfLine) break;
180
181 // Skip to the next token.
182 p += StoredTokenSize;
183 }
184
185 CurPtr = p;
186 }
187
188 /// SkipBlock - Used by Preprocessor to skip the current conditional block.
SkipBlock()189 bool PTHLexer::SkipBlock() {
190 using namespace llvm::support;
191 assert(CurPPCondPtr && "No cached PP conditional information.");
192 assert(LastHashTokPtr && "No known '#' token.");
193
194 const unsigned char *HashEntryI = nullptr;
195 uint32_t TableIdx;
196
197 do {
198 // Read the token offset from the side-table.
199 uint32_t Offset = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
200
201 // Read the target table index from the side-table.
202 TableIdx = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
203
204 // Compute the actual memory address of the '#' token data for this entry.
205 HashEntryI = TokBuf + Offset;
206
207 // Optmization: "Sibling jumping". #if...#else...#endif blocks can
208 // contain nested blocks. In the side-table we can jump over these
209 // nested blocks instead of doing a linear search if the next "sibling"
210 // entry is not at a location greater than LastHashTokPtr.
211 if (HashEntryI < LastHashTokPtr && TableIdx) {
212 // In the side-table we are still at an entry for a '#' token that
213 // is earlier than the last one we saw. Check if the location we would
214 // stride gets us closer.
215 const unsigned char* NextPPCondPtr =
216 PPCond + TableIdx*(sizeof(uint32_t)*2);
217 assert(NextPPCondPtr >= CurPPCondPtr);
218 // Read where we should jump to.
219 const unsigned char *HashEntryJ =
220 TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
221
222 if (HashEntryJ <= LastHashTokPtr) {
223 // Jump directly to the next entry in the side table.
224 HashEntryI = HashEntryJ;
225 TableIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
226 CurPPCondPtr = NextPPCondPtr;
227 }
228 }
229 }
230 while (HashEntryI < LastHashTokPtr);
231 assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
232 assert(TableIdx && "No jumping from #endifs.");
233
234 // Update our side-table iterator.
235 const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
236 assert(NextPPCondPtr >= CurPPCondPtr);
237 CurPPCondPtr = NextPPCondPtr;
238
239 // Read where we should jump to.
240 HashEntryI =
241 TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
242 uint32_t NextIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
243
244 // By construction NextIdx will be zero if this is a #endif. This is useful
245 // to know to obviate lexing another token.
246 bool isEndif = NextIdx == 0;
247
248 // This case can occur when we see something like this:
249 //
250 // #if ...
251 // /* a comment or nothing */
252 // #elif
253 //
254 // If we are skipping the first #if block it will be the case that CurPtr
255 // already points 'elif'. Just return.
256
257 if (CurPtr > HashEntryI) {
258 assert(CurPtr == HashEntryI + StoredTokenSize);
259 // Did we reach a #endif? If so, go ahead and consume that token as well.
260 if (isEndif)
261 CurPtr += StoredTokenSize * 2;
262 else
263 LastHashTokPtr = HashEntryI;
264
265 return isEndif;
266 }
267
268 // Otherwise, we need to advance. Update CurPtr to point to the '#' token.
269 CurPtr = HashEntryI;
270
271 // Update the location of the last observed '#'. This is useful if we
272 // are skipping multiple blocks.
273 LastHashTokPtr = CurPtr;
274
275 // Skip the '#' token.
276 assert(((tok::TokenKind)*CurPtr) == tok::hash);
277 CurPtr += StoredTokenSize;
278
279 // Did we reach a #endif? If so, go ahead and consume that token as well.
280 if (isEndif) {
281 CurPtr += StoredTokenSize * 2;
282 }
283
284 return isEndif;
285 }
286
getSourceLocation()287 SourceLocation PTHLexer::getSourceLocation() {
288 // getSourceLocation is not on the hot path. It is used to get the location
289 // of the next token when transitioning back to this lexer when done
290 // handling a #included file. Just read the necessary data from the token
291 // data buffer to construct the SourceLocation object.
292 // NOTE: This is a virtual function; hence it is defined out-of-line.
293 using namespace llvm::support;
294
295 const unsigned char *OffsetPtr = CurPtr + (StoredTokenSize - 4);
296 uint32_t Offset = endian::readNext<uint32_t, little, aligned>(OffsetPtr);
297 return FileStartLoc.getLocWithOffset(Offset);
298 }
299
300 //===----------------------------------------------------------------------===//
301 // PTH file lookup: map from strings to file data.
302 //===----------------------------------------------------------------------===//
303
304 /// PTHFileLookup - This internal data structure is used by the PTHManager
305 /// to map from FileEntry objects managed by FileManager to offsets within
306 /// the PTH file.
307 namespace {
308 class PTHFileData {
309 const uint32_t TokenOff;
310 const uint32_t PPCondOff;
311 public:
PTHFileData(uint32_t tokenOff,uint32_t ppCondOff)312 PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
313 : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
314
getTokenOffset() const315 uint32_t getTokenOffset() const { return TokenOff; }
getPPCondOffset() const316 uint32_t getPPCondOffset() const { return PPCondOff; }
317 };
318
319
320 class PTHFileLookupCommonTrait {
321 public:
322 typedef std::pair<unsigned char, const char*> internal_key_type;
323 typedef unsigned hash_value_type;
324 typedef unsigned offset_type;
325
ComputeHash(internal_key_type x)326 static hash_value_type ComputeHash(internal_key_type x) {
327 return llvm::HashString(x.second);
328 }
329
330 static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)331 ReadKeyDataLength(const unsigned char*& d) {
332 using namespace llvm::support;
333 unsigned keyLen =
334 (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
335 unsigned dataLen = (unsigned) *(d++);
336 return std::make_pair(keyLen, dataLen);
337 }
338
ReadKey(const unsigned char * d,unsigned)339 static internal_key_type ReadKey(const unsigned char* d, unsigned) {
340 unsigned char k = *(d++); // Read the entry kind.
341 return std::make_pair(k, (const char*) d);
342 }
343 };
344
345 class PTHFileLookupTrait : public PTHFileLookupCommonTrait {
346 public:
347 typedef const FileEntry* external_key_type;
348 typedef PTHFileData data_type;
349
GetInternalKey(const FileEntry * FE)350 static internal_key_type GetInternalKey(const FileEntry* FE) {
351 return std::make_pair((unsigned char) 0x1, FE->getName());
352 }
353
EqualKey(internal_key_type a,internal_key_type b)354 static bool EqualKey(internal_key_type a, internal_key_type b) {
355 return a.first == b.first && strcmp(a.second, b.second) == 0;
356 }
357
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)358 static PTHFileData ReadData(const internal_key_type& k,
359 const unsigned char* d, unsigned) {
360 assert(k.first == 0x1 && "Only file lookups can match!");
361 using namespace llvm::support;
362 uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
363 uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
364 return PTHFileData(x, y);
365 }
366 };
367
368 class PTHStringLookupTrait {
369 public:
370 typedef uint32_t data_type;
371 typedef const std::pair<const char*, unsigned> external_key_type;
372 typedef external_key_type internal_key_type;
373 typedef uint32_t hash_value_type;
374 typedef unsigned offset_type;
375
EqualKey(const internal_key_type & a,const internal_key_type & b)376 static bool EqualKey(const internal_key_type& a,
377 const internal_key_type& b) {
378 return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
379 : false;
380 }
381
ComputeHash(const internal_key_type & a)382 static hash_value_type ComputeHash(const internal_key_type& a) {
383 return llvm::HashString(StringRef(a.first, a.second));
384 }
385
386 // This hopefully will just get inlined and removed by the optimizer.
387 static const internal_key_type&
GetInternalKey(const external_key_type & x)388 GetInternalKey(const external_key_type& x) { return x; }
389
390 static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)391 ReadKeyDataLength(const unsigned char*& d) {
392 using namespace llvm::support;
393 return std::make_pair(
394 (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
395 sizeof(uint32_t));
396 }
397
398 static std::pair<const char*, unsigned>
ReadKey(const unsigned char * d,unsigned n)399 ReadKey(const unsigned char* d, unsigned n) {
400 assert(n >= 2 && d[n-1] == '\0');
401 return std::make_pair((const char*) d, n-1);
402 }
403
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)404 static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
405 unsigned) {
406 using namespace llvm::support;
407 return endian::readNext<uint32_t, little, unaligned>(d);
408 }
409 };
410
411 } // end anonymous namespace
412
413 typedef llvm::OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup;
414 typedef llvm::OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup;
415
416 //===----------------------------------------------------------------------===//
417 // PTHManager methods.
418 //===----------------------------------------------------------------------===//
419
PTHManager(const llvm::MemoryBuffer * buf,void * fileLookup,const unsigned char * idDataTable,IdentifierInfo ** perIDCache,void * stringIdLookup,unsigned numIds,const unsigned char * spellingBase,const char * originalSourceFile)420 PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
421 const unsigned char* idDataTable,
422 IdentifierInfo** perIDCache,
423 void* stringIdLookup, unsigned numIds,
424 const unsigned char* spellingBase,
425 const char* originalSourceFile)
426 : Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
427 IdDataTable(idDataTable), StringIdLookup(stringIdLookup),
428 NumIds(numIds), PP(nullptr), SpellingBase(spellingBase),
429 OriginalSourceFile(originalSourceFile) {}
430
~PTHManager()431 PTHManager::~PTHManager() {
432 delete Buf;
433 delete (PTHFileLookup*) FileLookup;
434 delete (PTHStringIdLookup*) StringIdLookup;
435 free(PerIDCache);
436 }
437
InvalidPTH(DiagnosticsEngine & Diags,const char * Msg)438 static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
439 Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
440 }
441
Create(const std::string & file,DiagnosticsEngine & Diags)442 PTHManager *PTHManager::Create(const std::string &file,
443 DiagnosticsEngine &Diags) {
444 // Memory map the PTH file.
445 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
446 llvm::MemoryBuffer::getFile(file);
447
448 if (!FileOrErr) {
449 // FIXME: Add ec.message() to this diag.
450 Diags.Report(diag::err_invalid_pth_file) << file;
451 return nullptr;
452 }
453 std::unique_ptr<llvm::MemoryBuffer> File = std::move(FileOrErr.get());
454
455 using namespace llvm::support;
456
457 // Get the buffer ranges and check if there are at least three 32-bit
458 // words at the end of the file.
459 const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
460 const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
461
462 // Check the prologue of the file.
463 if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
464 memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
465 Diags.Report(diag::err_invalid_pth_file) << file;
466 return nullptr;
467 }
468
469 // Read the PTH version.
470 const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
471 unsigned Version = endian::readNext<uint32_t, little, aligned>(p);
472
473 if (Version < PTHManager::Version) {
474 InvalidPTH(Diags,
475 Version < PTHManager::Version
476 ? "PTH file uses an older PTH format that is no longer supported"
477 : "PTH file uses a newer PTH format that cannot be read");
478 return nullptr;
479 }
480
481 // Compute the address of the index table at the end of the PTH file.
482 const unsigned char *PrologueOffset = p;
483
484 if (PrologueOffset >= BufEnd) {
485 Diags.Report(diag::err_invalid_pth_file) << file;
486 return nullptr;
487 }
488
489 // Construct the file lookup table. This will be used for mapping from
490 // FileEntry*'s to cached tokens.
491 const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
492 const unsigned char *FileTable =
493 BufBeg + endian::readNext<uint32_t, little, aligned>(FileTableOffset);
494
495 if (!(FileTable > BufBeg && FileTable < BufEnd)) {
496 Diags.Report(diag::err_invalid_pth_file) << file;
497 return nullptr; // FIXME: Proper error diagnostic?
498 }
499
500 std::unique_ptr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
501
502 // Warn if the PTH file is empty. We still want to create a PTHManager
503 // as the PTH could be used with -include-pth.
504 if (FL->isEmpty())
505 InvalidPTH(Diags, "PTH file contains no cached source data");
506
507 // Get the location of the table mapping from persistent ids to the
508 // data needed to reconstruct identifiers.
509 const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
510 const unsigned char *IData =
511 BufBeg + endian::readNext<uint32_t, little, aligned>(IDTableOffset);
512
513 if (!(IData >= BufBeg && IData < BufEnd)) {
514 Diags.Report(diag::err_invalid_pth_file) << file;
515 return nullptr;
516 }
517
518 // Get the location of the hashtable mapping between strings and
519 // persistent IDs.
520 const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
521 const unsigned char *StringIdTable =
522 BufBeg + endian::readNext<uint32_t, little, aligned>(StringIdTableOffset);
523 if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
524 Diags.Report(diag::err_invalid_pth_file) << file;
525 return nullptr;
526 }
527
528 std::unique_ptr<PTHStringIdLookup> SL(
529 PTHStringIdLookup::Create(StringIdTable, BufBeg));
530
531 // Get the location of the spelling cache.
532 const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
533 const unsigned char *spellingBase =
534 BufBeg + endian::readNext<uint32_t, little, aligned>(spellingBaseOffset);
535 if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
536 Diags.Report(diag::err_invalid_pth_file) << file;
537 return nullptr;
538 }
539
540 // Get the number of IdentifierInfos and pre-allocate the identifier cache.
541 uint32_t NumIds = endian::readNext<uint32_t, little, aligned>(IData);
542
543 // Pre-allocate the persistent ID -> IdentifierInfo* cache. We use calloc()
544 // so that we in the best case only zero out memory once when the OS returns
545 // us new pages.
546 IdentifierInfo **PerIDCache = nullptr;
547
548 if (NumIds) {
549 PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
550 if (!PerIDCache) {
551 InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
552 return nullptr;
553 }
554 }
555
556 // Compute the address of the original source file.
557 const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
558 unsigned len =
559 endian::readNext<uint16_t, little, unaligned>(originalSourceBase);
560 if (!len) originalSourceBase = nullptr;
561
562 // Create the new PTHManager.
563 return new PTHManager(File.release(), FL.release(), IData, PerIDCache,
564 SL.release(), NumIds, spellingBase,
565 (const char *)originalSourceBase);
566 }
567
LazilyCreateIdentifierInfo(unsigned PersistentID)568 IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
569 using namespace llvm::support;
570 // Look in the PTH file for the string data for the IdentifierInfo object.
571 const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
572 const unsigned char *IDData =
573 (const unsigned char *)Buf->getBufferStart() +
574 endian::readNext<uint32_t, little, aligned>(TableEntry);
575 assert(IDData < (const unsigned char*)Buf->getBufferEnd());
576
577 // Allocate the object.
578 std::pair<IdentifierInfo,const unsigned char*> *Mem =
579 Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
580
581 Mem->second = IDData;
582 assert(IDData[0] != '\0');
583 IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
584
585 // Store the new IdentifierInfo in the cache.
586 PerIDCache[PersistentID] = II;
587 assert(II->getNameStart() && II->getNameStart()[0] != '\0');
588 return II;
589 }
590
get(StringRef Name)591 IdentifierInfo* PTHManager::get(StringRef Name) {
592 PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
593 // Double check our assumption that the last character isn't '\0'.
594 assert(Name.empty() || Name.back() != '\0');
595 PTHStringIdLookup::iterator I = SL.find(std::make_pair(Name.data(),
596 Name.size()));
597 if (I == SL.end()) // No identifier found?
598 return nullptr;
599
600 // Match found. Return the identifier!
601 assert(*I > 0);
602 return GetIdentifierInfo(*I-1);
603 }
604
CreateLexer(FileID FID)605 PTHLexer *PTHManager::CreateLexer(FileID FID) {
606 const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
607 if (!FE)
608 return nullptr;
609
610 using namespace llvm::support;
611
612 // Lookup the FileEntry object in our file lookup data structure. It will
613 // return a variant that indicates whether or not there is an offset within
614 // the PTH file that contains cached tokens.
615 PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
616 PTHFileLookup::iterator I = PFL.find(FE);
617
618 if (I == PFL.end()) // No tokens available?
619 return nullptr;
620
621 const PTHFileData& FileData = *I;
622
623 const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
624 // Compute the offset of the token data within the buffer.
625 const unsigned char* data = BufStart + FileData.getTokenOffset();
626
627 // Get the location of pp-conditional table.
628 const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
629 uint32_t Len = endian::readNext<uint32_t, little, aligned>(ppcond);
630 if (Len == 0) ppcond = nullptr;
631
632 assert(PP && "No preprocessor set yet!");
633 return new PTHLexer(*PP, FID, data, ppcond, *this);
634 }
635
636 //===----------------------------------------------------------------------===//
637 // 'stat' caching.
638 //===----------------------------------------------------------------------===//
639
640 namespace {
641 class PTHStatData {
642 public:
643 const bool HasData;
644 uint64_t Size;
645 time_t ModTime;
646 llvm::sys::fs::UniqueID UniqueID;
647 bool IsDirectory;
648
PTHStatData(uint64_t Size,time_t ModTime,llvm::sys::fs::UniqueID UniqueID,bool IsDirectory)649 PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
650 bool IsDirectory)
651 : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
652 IsDirectory(IsDirectory) {}
653
PTHStatData()654 PTHStatData() : HasData(false) {}
655 };
656
657 class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
658 public:
659 typedef const char* external_key_type; // const char*
660 typedef PTHStatData data_type;
661
GetInternalKey(const char * path)662 static internal_key_type GetInternalKey(const char *path) {
663 // The key 'kind' doesn't matter here because it is ignored in EqualKey.
664 return std::make_pair((unsigned char) 0x0, path);
665 }
666
EqualKey(internal_key_type a,internal_key_type b)667 static bool EqualKey(internal_key_type a, internal_key_type b) {
668 // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
669 // just the paths.
670 return strcmp(a.second, b.second) == 0;
671 }
672
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)673 static data_type ReadData(const internal_key_type& k, const unsigned char* d,
674 unsigned) {
675
676 if (k.first /* File or Directory */) {
677 bool IsDirectory = true;
678 if (k.first == 0x1 /* File */) {
679 IsDirectory = false;
680 d += 4 * 2; // Skip the first 2 words.
681 }
682
683 using namespace llvm::support;
684
685 uint64_t File = endian::readNext<uint64_t, little, unaligned>(d);
686 uint64_t Device = endian::readNext<uint64_t, little, unaligned>(d);
687 llvm::sys::fs::UniqueID UniqueID(File, Device);
688 time_t ModTime = endian::readNext<uint64_t, little, unaligned>(d);
689 uint64_t Size = endian::readNext<uint64_t, little, unaligned>(d);
690 return data_type(Size, ModTime, UniqueID, IsDirectory);
691 }
692
693 // Negative stat. Don't read anything.
694 return data_type();
695 }
696 };
697
698 class PTHStatCache : public FileSystemStatCache {
699 typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
700 CacheTy Cache;
701
702 public:
PTHStatCache(PTHFileLookup & FL)703 PTHStatCache(PTHFileLookup &FL) :
704 Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
705 FL.getBase()) {}
706
getStat(const char * Path,FileData & Data,bool isFile,std::unique_ptr<vfs::File> * F,vfs::FileSystem & FS)707 LookupResult getStat(const char *Path, FileData &Data, bool isFile,
708 std::unique_ptr<vfs::File> *F,
709 vfs::FileSystem &FS) override {
710 // Do the lookup for the file's data in the PTH file.
711 CacheTy::iterator I = Cache.find(Path);
712
713 // If we don't get a hit in the PTH file just forward to 'stat'.
714 if (I == Cache.end())
715 return statChained(Path, Data, isFile, F, FS);
716
717 const PTHStatData &D = *I;
718
719 if (!D.HasData)
720 return CacheMissing;
721
722 Data.Name = Path;
723 Data.Size = D.Size;
724 Data.ModTime = D.ModTime;
725 Data.UniqueID = D.UniqueID;
726 Data.IsDirectory = D.IsDirectory;
727 Data.IsNamedPipe = false;
728 Data.InPCH = true;
729
730 return CacheExists;
731 }
732 };
733 } // end anonymous namespace
734
createStatCache()735 FileSystemStatCache *PTHManager::createStatCache() {
736 return new PTHStatCache(*((PTHFileLookup*) FileLookup));
737 }
738