1 //===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "FileIndex.h"
10 #include "CollectMacros.h"
11 #include "ParsedAST.h"
12 #include "SymbolCollector.h"
13 #include "index/CanonicalIncludes.h"
14 #include "index/Index.h"
15 #include "index/MemIndex.h"
16 #include "index/Merge.h"
17 #include "index/Ref.h"
18 #include "index/Relation.h"
19 #include "index/Serialization.h"
20 #include "index/Symbol.h"
21 #include "index/SymbolID.h"
22 #include "index/SymbolOrigin.h"
23 #include "index/dex/Dex.h"
24 #include "support/Logger.h"
25 #include "support/MemoryTree.h"
26 #include "support/Path.h"
27 #include "clang/AST/ASTContext.h"
28 #include "clang/Index/IndexingAction.h"
29 #include "clang/Index/IndexingOptions.h"
30 #include "clang/Lex/MacroInfo.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/StringMap.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/Support/Error.h"
38 #include <algorithm>
39 #include <memory>
40 #include <tuple>
41 #include <utility>
42 #include <vector>
43
44 namespace clang {
45 namespace clangd {
46 namespace {
47
indexSymbols(ASTContext & AST,std::shared_ptr<Preprocessor> PP,llvm::ArrayRef<Decl * > DeclsToIndex,const MainFileMacros * MacroRefsToIndex,const CanonicalIncludes & Includes,bool IsIndexMainAST,llvm::StringRef Version,bool CollectMainFileRefs)48 SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
49 llvm::ArrayRef<Decl *> DeclsToIndex,
50 const MainFileMacros *MacroRefsToIndex,
51 const CanonicalIncludes &Includes, bool IsIndexMainAST,
52 llvm::StringRef Version, bool CollectMainFileRefs) {
53 SymbolCollector::Options CollectorOpts;
54 CollectorOpts.CollectIncludePath = true;
55 CollectorOpts.Includes = &Includes;
56 CollectorOpts.CountReferences = false;
57 CollectorOpts.Origin = SymbolOrigin::Dynamic;
58 CollectorOpts.CollectMainFileRefs = CollectMainFileRefs;
59
60 index::IndexingOptions IndexOpts;
61 // We only need declarations, because we don't count references.
62 IndexOpts.SystemSymbolFilter =
63 index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly;
64 IndexOpts.IndexFunctionLocals = false;
65 if (IsIndexMainAST) {
66 // We only collect refs when indexing main AST.
67 CollectorOpts.RefFilter = RefKind::All;
68 // Comments for main file can always be obtained from sema, do not store
69 // them in the index.
70 CollectorOpts.StoreAllDocumentation = false;
71 } else {
72 IndexOpts.IndexMacrosInPreprocessor = true;
73 CollectorOpts.CollectMacro = true;
74 CollectorOpts.StoreAllDocumentation = true;
75 }
76
77 SymbolCollector Collector(std::move(CollectorOpts));
78 Collector.setPreprocessor(PP);
79 if (MacroRefsToIndex)
80 Collector.handleMacros(*MacroRefsToIndex);
81 index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts);
82
83 const auto &SM = AST.getSourceManager();
84 const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
85 std::string FileName =
86 std::string(MainFileEntry ? MainFileEntry->getName() : "");
87
88 auto Syms = Collector.takeSymbols();
89 auto Refs = Collector.takeRefs();
90 auto Relations = Collector.takeRelations();
91
92 vlog("indexed {0} AST for {1} version {2}:\n"
93 " symbol slab: {3} symbols, {4} bytes\n"
94 " ref slab: {5} symbols, {6} refs, {7} bytes\n"
95 " relations slab: {8} relations, {9} bytes",
96 IsIndexMainAST ? "file" : "preamble", FileName, Version, Syms.size(),
97 Syms.bytes(), Refs.size(), Refs.numRefs(), Refs.bytes(),
98 Relations.size(), Relations.bytes());
99 return std::make_tuple(std::move(Syms), std::move(Refs),
100 std::move(Relations));
101 }
102
103 // We keep only the node "U" and its edges. Any node other than "U" will be
104 // empty in the resultant graph.
getSubGraph(llvm::StringRef URI,const IncludeGraph & FullGraph)105 IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {
106 IncludeGraph IG;
107
108 auto Entry = IG.try_emplace(URI).first;
109 auto &Node = Entry->getValue();
110 Node = FullGraph.lookup(Entry->getKey());
111 Node.URI = Entry->getKey();
112
113 // URIs inside nodes must point into the keys of the same IncludeGraph.
114 for (auto &Include : Node.DirectIncludes) {
115 auto I = IG.try_emplace(Include).first;
116 I->getValue().URI = I->getKey();
117 Include = I->getKey();
118 }
119 return IG;
120 }
121 } // namespace
122
FileShardedIndex(IndexFileIn Input)123 FileShardedIndex::FileShardedIndex(IndexFileIn Input)
124 : Index(std::move(Input)) {
125 // Used to build RelationSlabs.
126 llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile;
127
128 // Attribute each Symbol to both their declaration and definition locations.
129 if (Index.Symbols) {
130 for (const auto &S : *Index.Symbols) {
131 auto It = Shards.try_emplace(S.CanonicalDeclaration.FileURI);
132 It.first->getValue().Symbols.insert(&S);
133 SymbolIDToFile[S.ID] = &It.first->getValue();
134 // Only bother if definition file is different than declaration file.
135 if (S.Definition &&
136 S.Definition.FileURI != S.CanonicalDeclaration.FileURI) {
137 auto It = Shards.try_emplace(S.Definition.FileURI);
138 It.first->getValue().Symbols.insert(&S);
139 }
140 }
141 }
142 // Attribute references into each file they occured in.
143 if (Index.Refs) {
144 for (const auto &SymRefs : *Index.Refs) {
145 for (const auto &R : SymRefs.second) {
146 const auto It = Shards.try_emplace(R.Location.FileURI);
147 It.first->getValue().Refs.insert(&R);
148 RefToSymID[&R] = SymRefs.first;
149 }
150 }
151 }
152 // The Subject and/or Object shards might be part of multiple TUs. In
153 // such cases there will be a race and the last TU to write the shard
154 // will win and all the other relations will be lost. To avoid this,
155 // we store relations in both shards. A race might still happen if the
156 // same translation unit produces different relations under different
157 // configurations, but that's something clangd doesn't handle in general.
158 if (Index.Relations) {
159 for (const auto &R : *Index.Relations) {
160 // FIXME: RelationSlab shouldn't contain dangling relations.
161 FileShard *SubjectFile = SymbolIDToFile.lookup(R.Subject);
162 FileShard *ObjectFile = SymbolIDToFile.lookup(R.Object);
163 if (SubjectFile)
164 SubjectFile->Relations.insert(&R);
165 if (ObjectFile && ObjectFile != SubjectFile)
166 ObjectFile->Relations.insert(&R);
167 }
168 }
169 // Store only the direct includes of a file in a shard.
170 if (Index.Sources) {
171 const auto &FullGraph = *Index.Sources;
172 for (const auto &It : FullGraph) {
173 auto ShardIt = Shards.try_emplace(It.first());
174 ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph);
175 }
176 }
177 }
getAllSources() const178 std::vector<llvm::StringRef> FileShardedIndex::getAllSources() const {
179 // It should be enough to construct a vector with {Shards.keys().begin(),
180 // Shards.keys().end()} but MSVC fails to compile that.
181 std::vector<PathRef> Result;
182 Result.reserve(Shards.size());
183 for (auto Key : Shards.keys())
184 Result.push_back(Key);
185 return Result;
186 }
187
188 llvm::Optional<IndexFileIn>
getShard(llvm::StringRef Uri) const189 FileShardedIndex::getShard(llvm::StringRef Uri) const {
190 auto It = Shards.find(Uri);
191 if (It == Shards.end())
192 return llvm::None;
193
194 IndexFileIn IF;
195 IF.Sources = It->getValue().IG;
196 IF.Cmd = Index.Cmd;
197
198 SymbolSlab::Builder SymB;
199 for (const auto *S : It->getValue().Symbols)
200 SymB.insert(*S);
201 IF.Symbols = std::move(SymB).build();
202
203 RefSlab::Builder RefB;
204 for (const auto *Ref : It->getValue().Refs) {
205 auto SID = RefToSymID.lookup(Ref);
206 RefB.insert(SID, *Ref);
207 }
208 IF.Refs = std::move(RefB).build();
209
210 RelationSlab::Builder RelB;
211 for (const auto *Rel : It->getValue().Relations) {
212 RelB.insert(*Rel);
213 }
214 IF.Relations = std::move(RelB).build();
215 // Explicit move here is needed by some compilers.
216 return std::move(IF);
217 }
218
indexMainDecls(ParsedAST & AST,bool CollectMainFileRefs)219 SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs) {
220 return indexSymbols(
221 AST.getASTContext(), AST.getPreprocessorPtr(),
222 AST.getLocalTopLevelDecls(), &AST.getMacros(), AST.getCanonicalIncludes(),
223 /*IsIndexMainAST=*/true, AST.version(), CollectMainFileRefs);
224 }
225
indexHeaderSymbols(llvm::StringRef Version,ASTContext & AST,std::shared_ptr<Preprocessor> PP,const CanonicalIncludes & Includes)226 SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
227 std::shared_ptr<Preprocessor> PP,
228 const CanonicalIncludes &Includes) {
229 std::vector<Decl *> DeclsToIndex(
230 AST.getTranslationUnitDecl()->decls().begin(),
231 AST.getTranslationUnitDecl()->decls().end());
232 return indexSymbols(AST, std::move(PP), DeclsToIndex,
233 /*MainFileMacros=*/nullptr, Includes,
234 /*IsIndexMainAST=*/false, Version,
235 /*CollectMainFileRefs=*/false);
236 }
237
update(llvm::StringRef Key,std::unique_ptr<SymbolSlab> Symbols,std::unique_ptr<RefSlab> Refs,std::unique_ptr<RelationSlab> Relations,bool CountReferences)238 void FileSymbols::update(llvm::StringRef Key,
239 std::unique_ptr<SymbolSlab> Symbols,
240 std::unique_ptr<RefSlab> Refs,
241 std::unique_ptr<RelationSlab> Relations,
242 bool CountReferences) {
243 std::lock_guard<std::mutex> Lock(Mutex);
244 ++Version;
245 if (!Symbols)
246 SymbolsSnapshot.erase(Key);
247 else
248 SymbolsSnapshot[Key] = std::move(Symbols);
249 if (!Refs) {
250 RefsSnapshot.erase(Key);
251 } else {
252 RefSlabAndCountReferences Item;
253 Item.CountReferences = CountReferences;
254 Item.Slab = std::move(Refs);
255 RefsSnapshot[Key] = std::move(Item);
256 }
257 if (!Relations)
258 RelationsSnapshot.erase(Key);
259 else
260 RelationsSnapshot[Key] = std::move(Relations);
261 }
262
263 std::unique_ptr<SymbolIndex>
buildIndex(IndexType Type,DuplicateHandling DuplicateHandle,size_t * Version)264 FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle,
265 size_t *Version) {
266 std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
267 std::vector<std::shared_ptr<RefSlab>> RefSlabs;
268 std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;
269 std::vector<RefSlab *> MainFileRefs;
270 {
271 std::lock_guard<std::mutex> Lock(Mutex);
272 for (const auto &FileAndSymbols : SymbolsSnapshot)
273 SymbolSlabs.push_back(FileAndSymbols.second);
274 for (const auto &FileAndRefs : RefsSnapshot) {
275 RefSlabs.push_back(FileAndRefs.second.Slab);
276 if (FileAndRefs.second.CountReferences)
277 MainFileRefs.push_back(RefSlabs.back().get());
278 }
279 for (const auto &FileAndRelations : RelationsSnapshot)
280 RelationSlabs.push_back(FileAndRelations.second);
281
282 if (Version)
283 *Version = this->Version;
284 }
285 std::vector<const Symbol *> AllSymbols;
286 std::vector<Symbol> SymsStorage;
287 switch (DuplicateHandle) {
288 case DuplicateHandling::Merge: {
289 llvm::DenseMap<SymbolID, Symbol> Merged;
290 for (const auto &Slab : SymbolSlabs) {
291 for (const auto &Sym : *Slab) {
292 assert(Sym.References == 0 &&
293 "Symbol with non-zero references sent to FileSymbols");
294 auto I = Merged.try_emplace(Sym.ID, Sym);
295 if (!I.second)
296 I.first->second = mergeSymbol(I.first->second, Sym);
297 }
298 }
299 for (const RefSlab *Refs : MainFileRefs)
300 for (const auto &Sym : *Refs) {
301 auto It = Merged.find(Sym.first);
302 // This might happen while background-index is still running.
303 if (It == Merged.end())
304 continue;
305 It->getSecond().References += Sym.second.size();
306 }
307 SymsStorage.reserve(Merged.size());
308 for (auto &Sym : Merged) {
309 SymsStorage.push_back(std::move(Sym.second));
310 AllSymbols.push_back(&SymsStorage.back());
311 }
312 break;
313 }
314 case DuplicateHandling::PickOne: {
315 llvm::DenseSet<SymbolID> AddedSymbols;
316 for (const auto &Slab : SymbolSlabs)
317 for (const auto &Sym : *Slab) {
318 assert(Sym.References == 0 &&
319 "Symbol with non-zero references sent to FileSymbols");
320 if (AddedSymbols.insert(Sym.ID).second)
321 AllSymbols.push_back(&Sym);
322 }
323 break;
324 }
325 }
326
327 std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.
328 llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs;
329 {
330 llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs;
331 size_t Count = 0;
332 for (const auto &RefSlab : RefSlabs)
333 for (const auto &Sym : *RefSlab) {
334 MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end());
335 Count += Sym.second.size();
336 }
337 RefsStorage.reserve(Count);
338 AllRefs.reserve(MergedRefs.size());
339 for (auto &Sym : MergedRefs) {
340 auto &SymRefs = Sym.second;
341 // Sorting isn't required, but yields more stable results over rebuilds.
342 llvm::sort(SymRefs);
343 llvm::copy(SymRefs, back_inserter(RefsStorage));
344 AllRefs.try_emplace(
345 Sym.first,
346 llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()],
347 SymRefs.size()));
348 }
349 }
350
351 std::vector<Relation> AllRelations;
352 for (const auto &RelationSlab : RelationSlabs) {
353 for (const auto &R : *RelationSlab)
354 AllRelations.push_back(R);
355 }
356 // Sort relations and remove duplicates that could arise due to
357 // relations being stored in both the shards containing their
358 // subject and object.
359 llvm::sort(AllRelations);
360 AllRelations.erase(std::unique(AllRelations.begin(), AllRelations.end()),
361 AllRelations.end());
362
363 size_t StorageSize =
364 RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);
365 for (const auto &Slab : SymbolSlabs)
366 StorageSize += Slab->bytes();
367 for (const auto &RefSlab : RefSlabs)
368 StorageSize += RefSlab->bytes();
369
370 // Index must keep the slabs and contiguous ranges alive.
371 switch (Type) {
372 case IndexType::Light:
373 return std::make_unique<MemIndex>(
374 llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
375 std::move(AllRelations),
376 std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
377 std::move(RefsStorage), std::move(SymsStorage)),
378 StorageSize);
379 case IndexType::Heavy:
380 return std::make_unique<dex::Dex>(
381 llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
382 std::move(AllRelations),
383 std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
384 std::move(RefsStorage), std::move(SymsStorage)),
385 StorageSize);
386 }
387 llvm_unreachable("Unknown clangd::IndexType");
388 }
389
profile(MemoryTree & MT) const390 void FileSymbols::profile(MemoryTree &MT) const {
391 std::lock_guard<std::mutex> Lock(Mutex);
392 for (const auto &SymSlab : SymbolsSnapshot) {
393 MT.detail(SymSlab.first())
394 .child("symbols")
395 .addUsage(SymSlab.second->bytes());
396 }
397 for (const auto &RefSlab : RefsSnapshot) {
398 MT.detail(RefSlab.first())
399 .child("references")
400 .addUsage(RefSlab.second.Slab->bytes());
401 }
402 for (const auto &RelSlab : RelationsSnapshot) {
403 MT.detail(RelSlab.first())
404 .child("relations")
405 .addUsage(RelSlab.second->bytes());
406 }
407 }
408
FileIndex(bool UseDex,bool CollectMainFileRefs)409 FileIndex::FileIndex(bool UseDex, bool CollectMainFileRefs)
410 : MergedIndex(&MainFileIndex, &PreambleIndex), UseDex(UseDex),
411 CollectMainFileRefs(CollectMainFileRefs),
412 PreambleIndex(std::make_unique<MemIndex>()),
413 MainFileIndex(std::make_unique<MemIndex>()) {}
414
updatePreamble(PathRef Path,llvm::StringRef Version,ASTContext & AST,std::shared_ptr<Preprocessor> PP,const CanonicalIncludes & Includes)415 void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
416 ASTContext &AST,
417 std::shared_ptr<Preprocessor> PP,
418 const CanonicalIncludes &Includes) {
419 IndexFileIn IF;
420 std::tie(IF.Symbols, std::ignore, IF.Relations) =
421 indexHeaderSymbols(Version, AST, std::move(PP), Includes);
422 FileShardedIndex ShardedIndex(std::move(IF));
423 for (auto Uri : ShardedIndex.getAllSources()) {
424 auto IF = ShardedIndex.getShard(Uri);
425 // We are using the key received from ShardedIndex, so it should always
426 // exist.
427 assert(IF);
428 PreambleSymbols.update(
429 Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),
430 std::make_unique<RefSlab>(),
431 std::make_unique<RelationSlab>(std::move(*IF->Relations)),
432 /*CountReferences=*/false);
433 }
434 size_t IndexVersion = 0;
435 auto NewIndex =
436 PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light,
437 DuplicateHandling::PickOne, &IndexVersion);
438 {
439 std::lock_guard<std::mutex> Lock(UpdateIndexMu);
440 if (IndexVersion <= PreambleIndexVersion) {
441 // We lost the race, some other thread built a later version.
442 return;
443 }
444 PreambleIndexVersion = IndexVersion;
445 PreambleIndex.reset(std::move(NewIndex));
446 vlog(
447 "Build dynamic index for header symbols with estimated memory usage of "
448 "{0} bytes",
449 PreambleIndex.estimateMemoryUsage());
450 }
451 }
452
updateMain(PathRef Path,ParsedAST & AST)453 void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
454 auto Contents = indexMainDecls(AST, CollectMainFileRefs);
455 MainFileSymbols.update(
456 Path, std::make_unique<SymbolSlab>(std::move(std::get<0>(Contents))),
457 std::make_unique<RefSlab>(std::move(std::get<1>(Contents))),
458 std::make_unique<RelationSlab>(std::move(std::get<2>(Contents))),
459 /*CountReferences=*/true);
460 size_t IndexVersion = 0;
461 auto NewIndex = MainFileSymbols.buildIndex(
462 IndexType::Light, DuplicateHandling::Merge, &IndexVersion);
463 {
464 std::lock_guard<std::mutex> Lock(UpdateIndexMu);
465 if (IndexVersion <= MainIndexVersion) {
466 // We lost the race, some other thread built a later version.
467 return;
468 }
469 MainIndexVersion = IndexVersion;
470 MainFileIndex.reset(std::move(NewIndex));
471 vlog(
472 "Build dynamic index for main-file symbols with estimated memory usage "
473 "of {0} bytes",
474 MainFileIndex.estimateMemoryUsage());
475 }
476 }
477
profile(MemoryTree & MT) const478 void FileIndex::profile(MemoryTree &MT) const {
479 PreambleSymbols.profile(MT.child("preamble").child("slabs"));
480 MT.child("preamble")
481 .child("index")
482 .addUsage(PreambleIndex.estimateMemoryUsage());
483 MainFileSymbols.profile(MT.child("main_file").child("slabs"));
484 MT.child("main_file")
485 .child("index")
486 .addUsage(MainFileIndex.estimateMemoryUsage());
487 }
488 } // namespace clangd
489 } // namespace clang
490