//===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "FileIndex.h" #include "CollectMacros.h" #include "ParsedAST.h" #include "SymbolCollector.h" #include "index/CanonicalIncludes.h" #include "index/Index.h" #include "index/MemIndex.h" #include "index/Merge.h" #include "index/Ref.h" #include "index/Relation.h" #include "index/Serialization.h" #include "index/Symbol.h" #include "index/SymbolID.h" #include "index/SymbolOrigin.h" #include "index/dex/Dex.h" #include "support/Logger.h" #include "support/MemoryTree.h" #include "support/Path.h" #include "clang/AST/ASTContext.h" #include "clang/Index/IndexingAction.h" #include "clang/Index/IndexingOptions.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" #include #include #include #include #include namespace clang { namespace clangd { namespace { SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr PP, llvm::ArrayRef DeclsToIndex, const MainFileMacros *MacroRefsToIndex, const CanonicalIncludes &Includes, bool IsIndexMainAST, llvm::StringRef Version, bool CollectMainFileRefs) { SymbolCollector::Options CollectorOpts; CollectorOpts.CollectIncludePath = true; CollectorOpts.Includes = &Includes; CollectorOpts.CountReferences = false; CollectorOpts.Origin = SymbolOrigin::Dynamic; CollectorOpts.CollectMainFileRefs = CollectMainFileRefs; index::IndexingOptions IndexOpts; // We only need declarations, because we don't count references. IndexOpts.SystemSymbolFilter = index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly; IndexOpts.IndexFunctionLocals = false; if (IsIndexMainAST) { // We only collect refs when indexing main AST. CollectorOpts.RefFilter = RefKind::All; // Comments for main file can always be obtained from sema, do not store // them in the index. CollectorOpts.StoreAllDocumentation = false; } else { IndexOpts.IndexMacrosInPreprocessor = true; CollectorOpts.CollectMacro = true; CollectorOpts.StoreAllDocumentation = true; } SymbolCollector Collector(std::move(CollectorOpts)); Collector.setPreprocessor(PP); if (MacroRefsToIndex) Collector.handleMacros(*MacroRefsToIndex); index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts); const auto &SM = AST.getSourceManager(); const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID()); std::string FileName = std::string(MainFileEntry ? MainFileEntry->getName() : ""); auto Syms = Collector.takeSymbols(); auto Refs = Collector.takeRefs(); auto Relations = Collector.takeRelations(); vlog("indexed {0} AST for {1} version {2}:\n" " symbol slab: {3} symbols, {4} bytes\n" " ref slab: {5} symbols, {6} refs, {7} bytes\n" " relations slab: {8} relations, {9} bytes", IsIndexMainAST ? "file" : "preamble", FileName, Version, Syms.size(), Syms.bytes(), Refs.size(), Refs.numRefs(), Refs.bytes(), Relations.size(), Relations.bytes()); return std::make_tuple(std::move(Syms), std::move(Refs), std::move(Relations)); } // We keep only the node "U" and its edges. Any node other than "U" will be // empty in the resultant graph. IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) { IncludeGraph IG; auto Entry = IG.try_emplace(URI).first; auto &Node = Entry->getValue(); Node = FullGraph.lookup(Entry->getKey()); Node.URI = Entry->getKey(); // URIs inside nodes must point into the keys of the same IncludeGraph. for (auto &Include : Node.DirectIncludes) { auto I = IG.try_emplace(Include).first; I->getValue().URI = I->getKey(); Include = I->getKey(); } return IG; } } // namespace FileShardedIndex::FileShardedIndex(IndexFileIn Input) : Index(std::move(Input)) { // Used to build RelationSlabs. llvm::DenseMap SymbolIDToFile; // Attribute each Symbol to both their declaration and definition locations. if (Index.Symbols) { for (const auto &S : *Index.Symbols) { auto It = Shards.try_emplace(S.CanonicalDeclaration.FileURI); It.first->getValue().Symbols.insert(&S); SymbolIDToFile[S.ID] = &It.first->getValue(); // Only bother if definition file is different than declaration file. if (S.Definition && S.Definition.FileURI != S.CanonicalDeclaration.FileURI) { auto It = Shards.try_emplace(S.Definition.FileURI); It.first->getValue().Symbols.insert(&S); } } } // Attribute references into each file they occured in. if (Index.Refs) { for (const auto &SymRefs : *Index.Refs) { for (const auto &R : SymRefs.second) { const auto It = Shards.try_emplace(R.Location.FileURI); It.first->getValue().Refs.insert(&R); RefToSymID[&R] = SymRefs.first; } } } // The Subject and/or Object shards might be part of multiple TUs. In // such cases there will be a race and the last TU to write the shard // will win and all the other relations will be lost. To avoid this, // we store relations in both shards. A race might still happen if the // same translation unit produces different relations under different // configurations, but that's something clangd doesn't handle in general. if (Index.Relations) { for (const auto &R : *Index.Relations) { // FIXME: RelationSlab shouldn't contain dangling relations. FileShard *SubjectFile = SymbolIDToFile.lookup(R.Subject); FileShard *ObjectFile = SymbolIDToFile.lookup(R.Object); if (SubjectFile) SubjectFile->Relations.insert(&R); if (ObjectFile && ObjectFile != SubjectFile) ObjectFile->Relations.insert(&R); } } // Store only the direct includes of a file in a shard. if (Index.Sources) { const auto &FullGraph = *Index.Sources; for (const auto &It : FullGraph) { auto ShardIt = Shards.try_emplace(It.first()); ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph); } } } std::vector FileShardedIndex::getAllSources() const { // It should be enough to construct a vector with {Shards.keys().begin(), // Shards.keys().end()} but MSVC fails to compile that. std::vector Result; Result.reserve(Shards.size()); for (auto Key : Shards.keys()) Result.push_back(Key); return Result; } llvm::Optional FileShardedIndex::getShard(llvm::StringRef Uri) const { auto It = Shards.find(Uri); if (It == Shards.end()) return llvm::None; IndexFileIn IF; IF.Sources = It->getValue().IG; IF.Cmd = Index.Cmd; SymbolSlab::Builder SymB; for (const auto *S : It->getValue().Symbols) SymB.insert(*S); IF.Symbols = std::move(SymB).build(); RefSlab::Builder RefB; for (const auto *Ref : It->getValue().Refs) { auto SID = RefToSymID.lookup(Ref); RefB.insert(SID, *Ref); } IF.Refs = std::move(RefB).build(); RelationSlab::Builder RelB; for (const auto *Rel : It->getValue().Relations) { RelB.insert(*Rel); } IF.Relations = std::move(RelB).build(); // Explicit move here is needed by some compilers. return std::move(IF); } SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs) { return indexSymbols( AST.getASTContext(), AST.getPreprocessorPtr(), AST.getLocalTopLevelDecls(), &AST.getMacros(), AST.getCanonicalIncludes(), /*IsIndexMainAST=*/true, AST.version(), CollectMainFileRefs); } SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, std::shared_ptr PP, const CanonicalIncludes &Includes) { std::vector DeclsToIndex( AST.getTranslationUnitDecl()->decls().begin(), AST.getTranslationUnitDecl()->decls().end()); return indexSymbols(AST, std::move(PP), DeclsToIndex, /*MainFileMacros=*/nullptr, Includes, /*IsIndexMainAST=*/false, Version, /*CollectMainFileRefs=*/false); } void FileSymbols::update(llvm::StringRef Key, std::unique_ptr Symbols, std::unique_ptr Refs, std::unique_ptr Relations, bool CountReferences) { std::lock_guard Lock(Mutex); ++Version; if (!Symbols) SymbolsSnapshot.erase(Key); else SymbolsSnapshot[Key] = std::move(Symbols); if (!Refs) { RefsSnapshot.erase(Key); } else { RefSlabAndCountReferences Item; Item.CountReferences = CountReferences; Item.Slab = std::move(Refs); RefsSnapshot[Key] = std::move(Item); } if (!Relations) RelationsSnapshot.erase(Key); else RelationsSnapshot[Key] = std::move(Relations); } std::unique_ptr FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle, size_t *Version) { std::vector> SymbolSlabs; std::vector> RefSlabs; std::vector> RelationSlabs; std::vector MainFileRefs; { std::lock_guard Lock(Mutex); for (const auto &FileAndSymbols : SymbolsSnapshot) SymbolSlabs.push_back(FileAndSymbols.second); for (const auto &FileAndRefs : RefsSnapshot) { RefSlabs.push_back(FileAndRefs.second.Slab); if (FileAndRefs.second.CountReferences) MainFileRefs.push_back(RefSlabs.back().get()); } for (const auto &FileAndRelations : RelationsSnapshot) RelationSlabs.push_back(FileAndRelations.second); if (Version) *Version = this->Version; } std::vector AllSymbols; std::vector SymsStorage; switch (DuplicateHandle) { case DuplicateHandling::Merge: { llvm::DenseMap Merged; for (const auto &Slab : SymbolSlabs) { for (const auto &Sym : *Slab) { assert(Sym.References == 0 && "Symbol with non-zero references sent to FileSymbols"); auto I = Merged.try_emplace(Sym.ID, Sym); if (!I.second) I.first->second = mergeSymbol(I.first->second, Sym); } } for (const RefSlab *Refs : MainFileRefs) for (const auto &Sym : *Refs) { auto It = Merged.find(Sym.first); // This might happen while background-index is still running. if (It == Merged.end()) continue; It->getSecond().References += Sym.second.size(); } SymsStorage.reserve(Merged.size()); for (auto &Sym : Merged) { SymsStorage.push_back(std::move(Sym.second)); AllSymbols.push_back(&SymsStorage.back()); } break; } case DuplicateHandling::PickOne: { llvm::DenseSet AddedSymbols; for (const auto &Slab : SymbolSlabs) for (const auto &Sym : *Slab) { assert(Sym.References == 0 && "Symbol with non-zero references sent to FileSymbols"); if (AddedSymbols.insert(Sym.ID).second) AllSymbols.push_back(&Sym); } break; } } std::vector RefsStorage; // Contiguous ranges for each SymbolID. llvm::DenseMap> AllRefs; { llvm::DenseMap> MergedRefs; size_t Count = 0; for (const auto &RefSlab : RefSlabs) for (const auto &Sym : *RefSlab) { MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end()); Count += Sym.second.size(); } RefsStorage.reserve(Count); AllRefs.reserve(MergedRefs.size()); for (auto &Sym : MergedRefs) { auto &SymRefs = Sym.second; // Sorting isn't required, but yields more stable results over rebuilds. llvm::sort(SymRefs); llvm::copy(SymRefs, back_inserter(RefsStorage)); AllRefs.try_emplace( Sym.first, llvm::ArrayRef(&RefsStorage[RefsStorage.size() - SymRefs.size()], SymRefs.size())); } } std::vector AllRelations; for (const auto &RelationSlab : RelationSlabs) { for (const auto &R : *RelationSlab) AllRelations.push_back(R); } // Sort relations and remove duplicates that could arise due to // relations being stored in both the shards containing their // subject and object. llvm::sort(AllRelations); AllRelations.erase(std::unique(AllRelations.begin(), AllRelations.end()), AllRelations.end()); size_t StorageSize = RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol); for (const auto &Slab : SymbolSlabs) StorageSize += Slab->bytes(); for (const auto &RefSlab : RefSlabs) StorageSize += RefSlab->bytes(); // Index must keep the slabs and contiguous ranges alive. switch (Type) { case IndexType::Light: return std::make_unique( llvm::make_pointee_range(AllSymbols), std::move(AllRefs), std::move(AllRelations), std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs), std::move(RefsStorage), std::move(SymsStorage)), StorageSize); case IndexType::Heavy: return std::make_unique( llvm::make_pointee_range(AllSymbols), std::move(AllRefs), std::move(AllRelations), std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs), std::move(RefsStorage), std::move(SymsStorage)), StorageSize); } llvm_unreachable("Unknown clangd::IndexType"); } void FileSymbols::profile(MemoryTree &MT) const { std::lock_guard Lock(Mutex); for (const auto &SymSlab : SymbolsSnapshot) { MT.detail(SymSlab.first()) .child("symbols") .addUsage(SymSlab.second->bytes()); } for (const auto &RefSlab : RefsSnapshot) { MT.detail(RefSlab.first()) .child("references") .addUsage(RefSlab.second.Slab->bytes()); } for (const auto &RelSlab : RelationsSnapshot) { MT.detail(RelSlab.first()) .child("relations") .addUsage(RelSlab.second->bytes()); } } FileIndex::FileIndex(bool UseDex, bool CollectMainFileRefs) : MergedIndex(&MainFileIndex, &PreambleIndex), UseDex(UseDex), CollectMainFileRefs(CollectMainFileRefs), PreambleIndex(std::make_unique()), MainFileIndex(std::make_unique()) {} void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST, std::shared_ptr PP, const CanonicalIncludes &Includes) { IndexFileIn IF; std::tie(IF.Symbols, std::ignore, IF.Relations) = indexHeaderSymbols(Version, AST, std::move(PP), Includes); FileShardedIndex ShardedIndex(std::move(IF)); for (auto Uri : ShardedIndex.getAllSources()) { auto IF = ShardedIndex.getShard(Uri); // We are using the key received from ShardedIndex, so it should always // exist. assert(IF); PreambleSymbols.update( Uri, std::make_unique(std::move(*IF->Symbols)), std::make_unique(), std::make_unique(std::move(*IF->Relations)), /*CountReferences=*/false); } size_t IndexVersion = 0; auto NewIndex = PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light, DuplicateHandling::PickOne, &IndexVersion); { std::lock_guard Lock(UpdateIndexMu); if (IndexVersion <= PreambleIndexVersion) { // We lost the race, some other thread built a later version. return; } PreambleIndexVersion = IndexVersion; PreambleIndex.reset(std::move(NewIndex)); vlog( "Build dynamic index for header symbols with estimated memory usage of " "{0} bytes", PreambleIndex.estimateMemoryUsage()); } } void FileIndex::updateMain(PathRef Path, ParsedAST &AST) { auto Contents = indexMainDecls(AST, CollectMainFileRefs); MainFileSymbols.update( Path, std::make_unique(std::move(std::get<0>(Contents))), std::make_unique(std::move(std::get<1>(Contents))), std::make_unique(std::move(std::get<2>(Contents))), /*CountReferences=*/true); size_t IndexVersion = 0; auto NewIndex = MainFileSymbols.buildIndex( IndexType::Light, DuplicateHandling::Merge, &IndexVersion); { std::lock_guard Lock(UpdateIndexMu); if (IndexVersion <= MainIndexVersion) { // We lost the race, some other thread built a later version. return; } MainIndexVersion = IndexVersion; MainFileIndex.reset(std::move(NewIndex)); vlog( "Build dynamic index for main-file symbols with estimated memory usage " "of {0} bytes", MainFileIndex.estimateMemoryUsage()); } } void FileIndex::profile(MemoryTree &MT) const { PreambleSymbols.profile(MT.child("preamble").child("slabs")); MT.child("preamble") .child("index") .addUsage(PreambleIndex.estimateMemoryUsage()); MainFileSymbols.profile(MT.child("main_file").child("slabs")); MT.child("main_file") .child("index") .addUsage(MainFileIndex.estimateMemoryUsage()); } } // namespace clangd } // namespace clang