• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PTHLexer interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Lex/PTHLexer.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemStatCache.h"
17 #include "clang/Basic/IdentifierTable.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/LexDiagnostic.h"
20 #include "clang/Lex/PTHManager.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Lex/Token.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringMap.h"
25 #include "llvm/Support/EndianStream.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/OnDiskHashTable.h"
28 #include <memory>
29 #include <system_error>
30 using namespace clang;
31 
32 static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
33 
34 //===----------------------------------------------------------------------===//
35 // PTHLexer methods.
36 //===----------------------------------------------------------------------===//
37 
PTHLexer(Preprocessor & PP,FileID FID,const unsigned char * D,const unsigned char * ppcond,PTHManager & PM)38 PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
39                    const unsigned char *ppcond, PTHManager &PM)
40   : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
41     PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
42 
43   FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
44 }
45 
Lex(Token & Tok)46 bool PTHLexer::Lex(Token& Tok) {
47   //===--------------------------------------==//
48   // Read the raw token data.
49   //===--------------------------------------==//
50   using namespace llvm::support;
51 
52   // Shadow CurPtr into an automatic variable.
53   const unsigned char *CurPtrShadow = CurPtr;
54 
55   // Read in the data for the token.
56   unsigned Word0 = endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
57   uint32_t IdentifierID =
58       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
59   uint32_t FileOffset =
60       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
61 
62   tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
63   Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
64   uint32_t Len = Word0 >> 16;
65 
66   CurPtr = CurPtrShadow;
67 
68   //===--------------------------------------==//
69   // Construct the token itself.
70   //===--------------------------------------==//
71 
72   Tok.startToken();
73   Tok.setKind(TKind);
74   Tok.setFlag(TFlags);
75   assert(!LexingRawMode);
76   Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
77   Tok.setLength(Len);
78 
79   // Handle identifiers.
80   if (Tok.isLiteral()) {
81     Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
82   }
83   else if (IdentifierID) {
84     MIOpt.ReadToken();
85     IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
86 
87     Tok.setIdentifierInfo(II);
88 
89     // Change the kind of this identifier to the appropriate token kind, e.g.
90     // turning "for" into a keyword.
91     Tok.setKind(II->getTokenID());
92 
93     if (II->isHandleIdentifierCase())
94       return PP->HandleIdentifier(Tok);
95 
96     return true;
97   }
98 
99   //===--------------------------------------==//
100   // Process the token.
101   //===--------------------------------------==//
102   if (TKind == tok::eof) {
103     // Save the end-of-file token.
104     EofToken = Tok;
105 
106     assert(!ParsingPreprocessorDirective);
107     assert(!LexingRawMode);
108 
109     return LexEndOfFile(Tok);
110   }
111 
112   if (TKind == tok::hash && Tok.isAtStartOfLine()) {
113     LastHashTokPtr = CurPtr - StoredTokenSize;
114     assert(!LexingRawMode);
115     PP->HandleDirective(Tok);
116 
117     return false;
118   }
119 
120   if (TKind == tok::eod) {
121     assert(ParsingPreprocessorDirective);
122     ParsingPreprocessorDirective = false;
123     return true;
124   }
125 
126   MIOpt.ReadToken();
127   return true;
128 }
129 
LexEndOfFile(Token & Result)130 bool PTHLexer::LexEndOfFile(Token &Result) {
131   // If we hit the end of the file while parsing a preprocessor directive,
132   // end the preprocessor directive first.  The next token returned will
133   // then be the end of file.
134   if (ParsingPreprocessorDirective) {
135     ParsingPreprocessorDirective = false; // Done parsing the "line".
136     return true;  // Have a token.
137   }
138 
139   assert(!LexingRawMode);
140 
141   // If we are in a #if directive, emit an error.
142   while (!ConditionalStack.empty()) {
143     if (PP->getCodeCompletionFileLoc() != FileStartLoc)
144       PP->Diag(ConditionalStack.back().IfLoc,
145                diag::err_pp_unterminated_conditional);
146     ConditionalStack.pop_back();
147   }
148 
149   // Finally, let the preprocessor handle this.
150   return PP->HandleEndOfFile(Result);
151 }
152 
153 // FIXME: We can just grab the last token instead of storing a copy
154 // into EofToken.
getEOF(Token & Tok)155 void PTHLexer::getEOF(Token& Tok) {
156   assert(EofToken.is(tok::eof));
157   Tok = EofToken;
158 }
159 
DiscardToEndOfLine()160 void PTHLexer::DiscardToEndOfLine() {
161   assert(ParsingPreprocessorDirective && ParsingFilename == false &&
162          "Must be in a preprocessing directive!");
163 
164   // We assume that if the preprocessor wishes to discard to the end of
165   // the line that it also means to end the current preprocessor directive.
166   ParsingPreprocessorDirective = false;
167 
168   // Skip tokens by only peeking at their token kind and the flags.
169   // We don't need to actually reconstruct full tokens from the token buffer.
170   // This saves some copies and it also reduces IdentifierInfo* lookup.
171   const unsigned char* p = CurPtr;
172   while (1) {
173     // Read the token kind.  Are we at the end of the file?
174     tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
175     if (x == tok::eof) break;
176 
177     // Read the token flags.  Are we at the start of the next line?
178     Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
179     if (y & Token::StartOfLine) break;
180 
181     // Skip to the next token.
182     p += StoredTokenSize;
183   }
184 
185   CurPtr = p;
186 }
187 
188 /// SkipBlock - Used by Preprocessor to skip the current conditional block.
SkipBlock()189 bool PTHLexer::SkipBlock() {
190   using namespace llvm::support;
191   assert(CurPPCondPtr && "No cached PP conditional information.");
192   assert(LastHashTokPtr && "No known '#' token.");
193 
194   const unsigned char *HashEntryI = nullptr;
195   uint32_t TableIdx;
196 
197   do {
198     // Read the token offset from the side-table.
199     uint32_t Offset = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
200 
201     // Read the target table index from the side-table.
202     TableIdx = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
203 
204     // Compute the actual memory address of the '#' token data for this entry.
205     HashEntryI = TokBuf + Offset;
206 
207     // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
208     //  contain nested blocks.  In the side-table we can jump over these
209     //  nested blocks instead of doing a linear search if the next "sibling"
210     //  entry is not at a location greater than LastHashTokPtr.
211     if (HashEntryI < LastHashTokPtr && TableIdx) {
212       // In the side-table we are still at an entry for a '#' token that
213       // is earlier than the last one we saw.  Check if the location we would
214       // stride gets us closer.
215       const unsigned char* NextPPCondPtr =
216         PPCond + TableIdx*(sizeof(uint32_t)*2);
217       assert(NextPPCondPtr >= CurPPCondPtr);
218       // Read where we should jump to.
219       const unsigned char *HashEntryJ =
220           TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
221 
222       if (HashEntryJ <= LastHashTokPtr) {
223         // Jump directly to the next entry in the side table.
224         HashEntryI = HashEntryJ;
225         TableIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
226         CurPPCondPtr = NextPPCondPtr;
227       }
228     }
229   }
230   while (HashEntryI < LastHashTokPtr);
231   assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
232   assert(TableIdx && "No jumping from #endifs.");
233 
234   // Update our side-table iterator.
235   const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
236   assert(NextPPCondPtr >= CurPPCondPtr);
237   CurPPCondPtr = NextPPCondPtr;
238 
239   // Read where we should jump to.
240   HashEntryI =
241       TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
242   uint32_t NextIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
243 
244   // By construction NextIdx will be zero if this is a #endif.  This is useful
245   // to know to obviate lexing another token.
246   bool isEndif = NextIdx == 0;
247 
248   // This case can occur when we see something like this:
249   //
250   //  #if ...
251   //   /* a comment or nothing */
252   //  #elif
253   //
254   // If we are skipping the first #if block it will be the case that CurPtr
255   // already points 'elif'.  Just return.
256 
257   if (CurPtr > HashEntryI) {
258     assert(CurPtr == HashEntryI + StoredTokenSize);
259     // Did we reach a #endif?  If so, go ahead and consume that token as well.
260     if (isEndif)
261       CurPtr += StoredTokenSize * 2;
262     else
263       LastHashTokPtr = HashEntryI;
264 
265     return isEndif;
266   }
267 
268   // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
269   CurPtr = HashEntryI;
270 
271   // Update the location of the last observed '#'.  This is useful if we
272   // are skipping multiple blocks.
273   LastHashTokPtr = CurPtr;
274 
275   // Skip the '#' token.
276   assert(((tok::TokenKind)*CurPtr) == tok::hash);
277   CurPtr += StoredTokenSize;
278 
279   // Did we reach a #endif?  If so, go ahead and consume that token as well.
280   if (isEndif) {
281     CurPtr += StoredTokenSize * 2;
282   }
283 
284   return isEndif;
285 }
286 
getSourceLocation()287 SourceLocation PTHLexer::getSourceLocation() {
288   // getSourceLocation is not on the hot path.  It is used to get the location
289   // of the next token when transitioning back to this lexer when done
290   // handling a #included file.  Just read the necessary data from the token
291   // data buffer to construct the SourceLocation object.
292   // NOTE: This is a virtual function; hence it is defined out-of-line.
293   using namespace llvm::support;
294 
295   const unsigned char *OffsetPtr = CurPtr + (StoredTokenSize - 4);
296   uint32_t Offset = endian::readNext<uint32_t, little, aligned>(OffsetPtr);
297   return FileStartLoc.getLocWithOffset(Offset);
298 }
299 
300 //===----------------------------------------------------------------------===//
301 // PTH file lookup: map from strings to file data.
302 //===----------------------------------------------------------------------===//
303 
304 /// PTHFileLookup - This internal data structure is used by the PTHManager
305 ///  to map from FileEntry objects managed by FileManager to offsets within
306 ///  the PTH file.
307 namespace {
308 class PTHFileData {
309   const uint32_t TokenOff;
310   const uint32_t PPCondOff;
311 public:
PTHFileData(uint32_t tokenOff,uint32_t ppCondOff)312   PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
313     : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
314 
getTokenOffset() const315   uint32_t getTokenOffset() const { return TokenOff; }
getPPCondOffset() const316   uint32_t getPPCondOffset() const { return PPCondOff; }
317 };
318 
319 
320 class PTHFileLookupCommonTrait {
321 public:
322   typedef std::pair<unsigned char, const char*> internal_key_type;
323   typedef unsigned hash_value_type;
324   typedef unsigned offset_type;
325 
ComputeHash(internal_key_type x)326   static hash_value_type ComputeHash(internal_key_type x) {
327     return llvm::HashString(x.second);
328   }
329 
330   static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)331   ReadKeyDataLength(const unsigned char*& d) {
332     using namespace llvm::support;
333     unsigned keyLen =
334         (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
335     unsigned dataLen = (unsigned) *(d++);
336     return std::make_pair(keyLen, dataLen);
337   }
338 
ReadKey(const unsigned char * d,unsigned)339   static internal_key_type ReadKey(const unsigned char* d, unsigned) {
340     unsigned char k = *(d++); // Read the entry kind.
341     return std::make_pair(k, (const char*) d);
342   }
343 };
344 
345 class PTHFileLookupTrait : public PTHFileLookupCommonTrait {
346 public:
347   typedef const FileEntry* external_key_type;
348   typedef PTHFileData      data_type;
349 
GetInternalKey(const FileEntry * FE)350   static internal_key_type GetInternalKey(const FileEntry* FE) {
351     return std::make_pair((unsigned char) 0x1, FE->getName());
352   }
353 
EqualKey(internal_key_type a,internal_key_type b)354   static bool EqualKey(internal_key_type a, internal_key_type b) {
355     return a.first == b.first && strcmp(a.second, b.second) == 0;
356   }
357 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)358   static PTHFileData ReadData(const internal_key_type& k,
359                               const unsigned char* d, unsigned) {
360     assert(k.first == 0x1 && "Only file lookups can match!");
361     using namespace llvm::support;
362     uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
363     uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
364     return PTHFileData(x, y);
365   }
366 };
367 
368 class PTHStringLookupTrait {
369 public:
370   typedef uint32_t data_type;
371   typedef const std::pair<const char*, unsigned> external_key_type;
372   typedef external_key_type internal_key_type;
373   typedef uint32_t hash_value_type;
374   typedef unsigned offset_type;
375 
EqualKey(const internal_key_type & a,const internal_key_type & b)376   static bool EqualKey(const internal_key_type& a,
377                        const internal_key_type& b) {
378     return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
379                                   : false;
380   }
381 
ComputeHash(const internal_key_type & a)382   static hash_value_type ComputeHash(const internal_key_type& a) {
383     return llvm::HashString(StringRef(a.first, a.second));
384   }
385 
386   // This hopefully will just get inlined and removed by the optimizer.
387   static const internal_key_type&
GetInternalKey(const external_key_type & x)388   GetInternalKey(const external_key_type& x) { return x; }
389 
390   static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)391   ReadKeyDataLength(const unsigned char*& d) {
392     using namespace llvm::support;
393     return std::make_pair(
394         (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
395         sizeof(uint32_t));
396   }
397 
398   static std::pair<const char*, unsigned>
ReadKey(const unsigned char * d,unsigned n)399   ReadKey(const unsigned char* d, unsigned n) {
400       assert(n >= 2 && d[n-1] == '\0');
401       return std::make_pair((const char*) d, n-1);
402     }
403 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)404   static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
405                            unsigned) {
406     using namespace llvm::support;
407     return endian::readNext<uint32_t, little, unaligned>(d);
408   }
409 };
410 
411 } // end anonymous namespace
412 
413 typedef llvm::OnDiskChainedHashTable<PTHFileLookupTrait>   PTHFileLookup;
414 typedef llvm::OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup;
415 
416 //===----------------------------------------------------------------------===//
417 // PTHManager methods.
418 //===----------------------------------------------------------------------===//
419 
PTHManager(const llvm::MemoryBuffer * buf,void * fileLookup,const unsigned char * idDataTable,IdentifierInfo ** perIDCache,void * stringIdLookup,unsigned numIds,const unsigned char * spellingBase,const char * originalSourceFile)420 PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
421                        const unsigned char* idDataTable,
422                        IdentifierInfo** perIDCache,
423                        void* stringIdLookup, unsigned numIds,
424                        const unsigned char* spellingBase,
425                        const char* originalSourceFile)
426 : Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
427   IdDataTable(idDataTable), StringIdLookup(stringIdLookup),
428   NumIds(numIds), PP(nullptr), SpellingBase(spellingBase),
429   OriginalSourceFile(originalSourceFile) {}
430 
~PTHManager()431 PTHManager::~PTHManager() {
432   delete Buf;
433   delete (PTHFileLookup*) FileLookup;
434   delete (PTHStringIdLookup*) StringIdLookup;
435   free(PerIDCache);
436 }
437 
InvalidPTH(DiagnosticsEngine & Diags,const char * Msg)438 static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
439   Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
440 }
441 
Create(const std::string & file,DiagnosticsEngine & Diags)442 PTHManager *PTHManager::Create(const std::string &file,
443                                DiagnosticsEngine &Diags) {
444   // Memory map the PTH file.
445   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
446       llvm::MemoryBuffer::getFile(file);
447 
448   if (!FileOrErr) {
449     // FIXME: Add ec.message() to this diag.
450     Diags.Report(diag::err_invalid_pth_file) << file;
451     return nullptr;
452   }
453   std::unique_ptr<llvm::MemoryBuffer> File = std::move(FileOrErr.get());
454 
455   using namespace llvm::support;
456 
457   // Get the buffer ranges and check if there are at least three 32-bit
458   // words at the end of the file.
459   const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
460   const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
461 
462   // Check the prologue of the file.
463   if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
464       memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
465     Diags.Report(diag::err_invalid_pth_file) << file;
466     return nullptr;
467   }
468 
469   // Read the PTH version.
470   const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
471   unsigned Version = endian::readNext<uint32_t, little, aligned>(p);
472 
473   if (Version < PTHManager::Version) {
474     InvalidPTH(Diags,
475         Version < PTHManager::Version
476         ? "PTH file uses an older PTH format that is no longer supported"
477         : "PTH file uses a newer PTH format that cannot be read");
478     return nullptr;
479   }
480 
481   // Compute the address of the index table at the end of the PTH file.
482   const unsigned char *PrologueOffset = p;
483 
484   if (PrologueOffset >= BufEnd) {
485     Diags.Report(diag::err_invalid_pth_file) << file;
486     return nullptr;
487   }
488 
489   // Construct the file lookup table.  This will be used for mapping from
490   // FileEntry*'s to cached tokens.
491   const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
492   const unsigned char *FileTable =
493       BufBeg + endian::readNext<uint32_t, little, aligned>(FileTableOffset);
494 
495   if (!(FileTable > BufBeg && FileTable < BufEnd)) {
496     Diags.Report(diag::err_invalid_pth_file) << file;
497     return nullptr; // FIXME: Proper error diagnostic?
498   }
499 
500   std::unique_ptr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
501 
502   // Warn if the PTH file is empty.  We still want to create a PTHManager
503   // as the PTH could be used with -include-pth.
504   if (FL->isEmpty())
505     InvalidPTH(Diags, "PTH file contains no cached source data");
506 
507   // Get the location of the table mapping from persistent ids to the
508   // data needed to reconstruct identifiers.
509   const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
510   const unsigned char *IData =
511       BufBeg + endian::readNext<uint32_t, little, aligned>(IDTableOffset);
512 
513   if (!(IData >= BufBeg && IData < BufEnd)) {
514     Diags.Report(diag::err_invalid_pth_file) << file;
515     return nullptr;
516   }
517 
518   // Get the location of the hashtable mapping between strings and
519   // persistent IDs.
520   const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
521   const unsigned char *StringIdTable =
522       BufBeg + endian::readNext<uint32_t, little, aligned>(StringIdTableOffset);
523   if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
524     Diags.Report(diag::err_invalid_pth_file) << file;
525     return nullptr;
526   }
527 
528   std::unique_ptr<PTHStringIdLookup> SL(
529       PTHStringIdLookup::Create(StringIdTable, BufBeg));
530 
531   // Get the location of the spelling cache.
532   const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
533   const unsigned char *spellingBase =
534       BufBeg + endian::readNext<uint32_t, little, aligned>(spellingBaseOffset);
535   if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
536     Diags.Report(diag::err_invalid_pth_file) << file;
537     return nullptr;
538   }
539 
540   // Get the number of IdentifierInfos and pre-allocate the identifier cache.
541   uint32_t NumIds = endian::readNext<uint32_t, little, aligned>(IData);
542 
543   // Pre-allocate the persistent ID -> IdentifierInfo* cache.  We use calloc()
544   // so that we in the best case only zero out memory once when the OS returns
545   // us new pages.
546   IdentifierInfo **PerIDCache = nullptr;
547 
548   if (NumIds) {
549     PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
550     if (!PerIDCache) {
551       InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
552       return nullptr;
553     }
554   }
555 
556   // Compute the address of the original source file.
557   const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
558   unsigned len =
559       endian::readNext<uint16_t, little, unaligned>(originalSourceBase);
560   if (!len) originalSourceBase = nullptr;
561 
562   // Create the new PTHManager.
563   return new PTHManager(File.release(), FL.release(), IData, PerIDCache,
564                         SL.release(), NumIds, spellingBase,
565                         (const char *)originalSourceBase);
566 }
567 
LazilyCreateIdentifierInfo(unsigned PersistentID)568 IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
569   using namespace llvm::support;
570   // Look in the PTH file for the string data for the IdentifierInfo object.
571   const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
572   const unsigned char *IDData =
573       (const unsigned char *)Buf->getBufferStart() +
574       endian::readNext<uint32_t, little, aligned>(TableEntry);
575   assert(IDData < (const unsigned char*)Buf->getBufferEnd());
576 
577   // Allocate the object.
578   std::pair<IdentifierInfo,const unsigned char*> *Mem =
579     Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
580 
581   Mem->second = IDData;
582   assert(IDData[0] != '\0');
583   IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
584 
585   // Store the new IdentifierInfo in the cache.
586   PerIDCache[PersistentID] = II;
587   assert(II->getNameStart() && II->getNameStart()[0] != '\0');
588   return II;
589 }
590 
get(StringRef Name)591 IdentifierInfo* PTHManager::get(StringRef Name) {
592   PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
593   // Double check our assumption that the last character isn't '\0'.
594   assert(Name.empty() || Name.back() != '\0');
595   PTHStringIdLookup::iterator I = SL.find(std::make_pair(Name.data(),
596                                                          Name.size()));
597   if (I == SL.end()) // No identifier found?
598     return nullptr;
599 
600   // Match found.  Return the identifier!
601   assert(*I > 0);
602   return GetIdentifierInfo(*I-1);
603 }
604 
CreateLexer(FileID FID)605 PTHLexer *PTHManager::CreateLexer(FileID FID) {
606   const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
607   if (!FE)
608     return nullptr;
609 
610   using namespace llvm::support;
611 
612   // Lookup the FileEntry object in our file lookup data structure.  It will
613   // return a variant that indicates whether or not there is an offset within
614   // the PTH file that contains cached tokens.
615   PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
616   PTHFileLookup::iterator I = PFL.find(FE);
617 
618   if (I == PFL.end()) // No tokens available?
619     return nullptr;
620 
621   const PTHFileData& FileData = *I;
622 
623   const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
624   // Compute the offset of the token data within the buffer.
625   const unsigned char* data = BufStart + FileData.getTokenOffset();
626 
627   // Get the location of pp-conditional table.
628   const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
629   uint32_t Len = endian::readNext<uint32_t, little, aligned>(ppcond);
630   if (Len == 0) ppcond = nullptr;
631 
632   assert(PP && "No preprocessor set yet!");
633   return new PTHLexer(*PP, FID, data, ppcond, *this);
634 }
635 
636 //===----------------------------------------------------------------------===//
637 // 'stat' caching.
638 //===----------------------------------------------------------------------===//
639 
640 namespace {
641 class PTHStatData {
642 public:
643   const bool HasData;
644   uint64_t Size;
645   time_t ModTime;
646   llvm::sys::fs::UniqueID UniqueID;
647   bool IsDirectory;
648 
PTHStatData(uint64_t Size,time_t ModTime,llvm::sys::fs::UniqueID UniqueID,bool IsDirectory)649   PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
650               bool IsDirectory)
651       : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
652         IsDirectory(IsDirectory) {}
653 
PTHStatData()654   PTHStatData() : HasData(false) {}
655 };
656 
657 class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
658 public:
659   typedef const char* external_key_type;  // const char*
660   typedef PTHStatData data_type;
661 
GetInternalKey(const char * path)662   static internal_key_type GetInternalKey(const char *path) {
663     // The key 'kind' doesn't matter here because it is ignored in EqualKey.
664     return std::make_pair((unsigned char) 0x0, path);
665   }
666 
EqualKey(internal_key_type a,internal_key_type b)667   static bool EqualKey(internal_key_type a, internal_key_type b) {
668     // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
669     // just the paths.
670     return strcmp(a.second, b.second) == 0;
671   }
672 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)673   static data_type ReadData(const internal_key_type& k, const unsigned char* d,
674                             unsigned) {
675 
676     if (k.first /* File or Directory */) {
677       bool IsDirectory = true;
678       if (k.first == 0x1 /* File */) {
679         IsDirectory = false;
680         d += 4 * 2; // Skip the first 2 words.
681       }
682 
683       using namespace llvm::support;
684 
685       uint64_t File = endian::readNext<uint64_t, little, unaligned>(d);
686       uint64_t Device = endian::readNext<uint64_t, little, unaligned>(d);
687       llvm::sys::fs::UniqueID UniqueID(File, Device);
688       time_t ModTime = endian::readNext<uint64_t, little, unaligned>(d);
689       uint64_t Size = endian::readNext<uint64_t, little, unaligned>(d);
690       return data_type(Size, ModTime, UniqueID, IsDirectory);
691     }
692 
693     // Negative stat.  Don't read anything.
694     return data_type();
695   }
696 };
697 
698 class PTHStatCache : public FileSystemStatCache {
699   typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
700   CacheTy Cache;
701 
702 public:
PTHStatCache(PTHFileLookup & FL)703   PTHStatCache(PTHFileLookup &FL) :
704     Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
705           FL.getBase()) {}
706 
getStat(const char * Path,FileData & Data,bool isFile,std::unique_ptr<vfs::File> * F,vfs::FileSystem & FS)707   LookupResult getStat(const char *Path, FileData &Data, bool isFile,
708                        std::unique_ptr<vfs::File> *F,
709                        vfs::FileSystem &FS) override {
710     // Do the lookup for the file's data in the PTH file.
711     CacheTy::iterator I = Cache.find(Path);
712 
713     // If we don't get a hit in the PTH file just forward to 'stat'.
714     if (I == Cache.end())
715       return statChained(Path, Data, isFile, F, FS);
716 
717     const PTHStatData &D = *I;
718 
719     if (!D.HasData)
720       return CacheMissing;
721 
722     Data.Name = Path;
723     Data.Size = D.Size;
724     Data.ModTime = D.ModTime;
725     Data.UniqueID = D.UniqueID;
726     Data.IsDirectory = D.IsDirectory;
727     Data.IsNamedPipe = false;
728     Data.InPCH = true;
729 
730     return CacheExists;
731   }
732 };
733 } // end anonymous namespace
734 
createStatCache()735 FileSystemStatCache *PTHManager::createStatCache() {
736   return new PTHStatCache(*((PTHFileLookup*) FileLookup));
737 }
738