1 //===--- FileIndex.h - Index for files. ---------------------------- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // FileIndex implements SymbolIndex for symbols from a set of files. Symbols are 10 // maintained at source-file granularity (e.g. with ASTs), and files can be 11 // updated dynamically. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H 16 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H 17 18 #include "Headers.h" 19 #include "Index.h" 20 #include "MemIndex.h" 21 #include "Merge.h" 22 #include "index/CanonicalIncludes.h" 23 #include "index/Ref.h" 24 #include "index/Relation.h" 25 #include "index/Serialization.h" 26 #include "index/Symbol.h" 27 #include "support/MemoryTree.h" 28 #include "support/Path.h" 29 #include "clang/Lex/Preprocessor.h" 30 #include "clang/Tooling/CompilationDatabase.h" 31 #include "llvm/ADT/DenseSet.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/StringMap.h" 34 #include "llvm/ADT/StringRef.h" 35 #include <memory> 36 #include <vector> 37 38 namespace clang { 39 class ASTContext; 40 namespace clangd { 41 class ParsedAST; 42 43 /// Select between in-memory index implementations, which have tradeoffs. 44 enum class IndexType { 45 // MemIndex is trivially cheap to build, but has poor query performance. 46 Light, 47 // Dex is relatively expensive to build and uses more memory, but is fast. 48 Heavy, 49 }; 50 51 /// How to handle duplicated symbols across multiple files. 52 enum class DuplicateHandling { 53 // Pick a random symbol. Less accurate but faster. 54 PickOne, 55 // Merge symbols. More accurate but slower. 56 Merge, 57 }; 58 59 /// A container of slabs associated with a key. It can be updated at key 60 /// granularity, replacing all slabs belonging to a key with a new set. Keys are 61 /// usually file paths/uris. 62 /// 63 /// This implements snapshot semantics. Each update will create a new snapshot 64 /// for all slabs of the Key. Snapshots are managed with shared pointers that 65 /// are shared between this class and the users. For each key, this class only 66 /// stores a pointer pointing to the newest snapshot, and an outdated snapshot 67 /// is deleted by the last owner of the snapshot, either this class or the 68 /// symbol index. 69 /// 70 /// The snapshot semantics keeps critical sections minimal since we only need 71 /// locking when we swap or obtain references to snapshots. 72 class FileSymbols { 73 public: 74 /// Updates all slabs associated with the \p Key. 75 /// If either is nullptr, corresponding data for \p Key will be removed. 76 /// If CountReferences is true, \p Refs will be used for counting references 77 /// during merging. 78 void update(llvm::StringRef Key, std::unique_ptr<SymbolSlab> Symbols, 79 std::unique_ptr<RefSlab> Refs, 80 std::unique_ptr<RelationSlab> Relations, bool CountReferences); 81 82 /// The index keeps the slabs alive. 83 /// Will count Symbol::References based on number of references in the main 84 /// files, while building the index with DuplicateHandling::Merge option. 85 /// Version is populated with an increasing sequence counter. 86 std::unique_ptr<SymbolIndex> 87 buildIndex(IndexType, 88 DuplicateHandling DuplicateHandle = DuplicateHandling::PickOne, 89 size_t *Version = nullptr); 90 91 void profile(MemoryTree &MT) const; 92 93 private: 94 struct RefSlabAndCountReferences { 95 std::shared_ptr<RefSlab> Slab; 96 bool CountReferences = false; 97 }; 98 mutable std::mutex Mutex; 99 100 size_t Version = 0; 101 llvm::StringMap<std::shared_ptr<SymbolSlab>> SymbolsSnapshot; 102 llvm::StringMap<RefSlabAndCountReferences> RefsSnapshot; 103 llvm::StringMap<std::shared_ptr<RelationSlab>> RelationsSnapshot; 104 }; 105 106 /// This manages symbols from files and an in-memory index on all symbols. 107 /// FIXME: Expose an interface to remove files that are closed. 108 class FileIndex : public MergedIndex { 109 public: 110 FileIndex(bool UseDex = true, bool CollectMainFileRefs = false); 111 112 /// Update preamble symbols of file \p Path with all declarations in \p AST 113 /// and macros in \p PP. 114 void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST, 115 std::shared_ptr<Preprocessor> PP, 116 const CanonicalIncludes &Includes); 117 118 /// Update symbols and references from main file \p Path with 119 /// `indexMainDecls`. 120 void updateMain(PathRef Path, ParsedAST &AST); 121 122 void profile(MemoryTree &MT) const; 123 124 private: 125 bool UseDex; // FIXME: this should be always on. 126 bool CollectMainFileRefs; 127 128 // Contains information from each file's preamble only. Symbols and relations 129 // are sharded per declaration file to deduplicate multiple symbols and reduce 130 // memory usage. 131 // Missing information: 132 // - symbol refs (these are always "from the main file") 133 // - definition locations in the main file 134 // 135 // Note that we store only one version of a header, hence symbols appearing in 136 // different PP states will be missing. 137 FileSymbols PreambleSymbols; 138 SwapIndex PreambleIndex; 139 140 // Contains information from each file's main AST. 141 // These are updated frequently (on file change), but are relatively small. 142 // Mostly contains: 143 // - refs to symbols declared in the preamble and referenced from main 144 // - symbols declared both in the main file and the preamble 145 // (Note that symbols *only* in the main file are not indexed). 146 FileSymbols MainFileSymbols; 147 SwapIndex MainFileIndex; 148 149 // While both the FileIndex and SwapIndex are threadsafe, we need to track 150 // versions to ensure that we don't overwrite newer indexes with older ones. 151 std::mutex UpdateIndexMu; 152 unsigned MainIndexVersion = 0; 153 unsigned PreambleIndexVersion = 0; 154 }; 155 156 using SlabTuple = std::tuple<SymbolSlab, RefSlab, RelationSlab>; 157 158 /// Retrieves symbols and refs of local top level decls in \p AST (i.e. 159 /// `AST.getLocalTopLevelDecls()`). 160 /// Exposed to assist in unit tests. 161 SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs = false); 162 163 /// Index declarations from \p AST and macros from \p PP that are declared in 164 /// included headers. 165 SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, 166 std::shared_ptr<Preprocessor> PP, 167 const CanonicalIncludes &Includes); 168 169 /// Takes slabs coming from a TU (multiple files) and shards them per 170 /// declaration location. 171 struct FileShardedIndex { 172 /// \p HintPath is used to convert file URIs stored in symbols into absolute 173 /// paths. 174 explicit FileShardedIndex(IndexFileIn Input); 175 176 /// Returns uris for all files that has a shard. 177 std::vector<llvm::StringRef> getAllSources() const; 178 179 /// Generates index shard for the \p Uri. Note that this function results in 180 /// a copy of all the relevant data. 181 /// Returned index will always have Symbol/Refs/Relation Slabs set, even if 182 /// they are empty. 183 llvm::Optional<IndexFileIn> getShard(llvm::StringRef Uri) const; 184 185 private: 186 // Contains all the information that belongs to a single file. 187 struct FileShard { 188 // Either declared or defined in the file. 189 llvm::DenseSet<const Symbol *> Symbols; 190 // Reference occurs in the file. 191 llvm::DenseSet<const Ref *> Refs; 192 // Subject is declared in the file. 193 llvm::DenseSet<const Relation *> Relations; 194 // Contains edges for only the direct includes. 195 IncludeGraph IG; 196 }; 197 198 // Keeps all the information alive. 199 const IndexFileIn Index; 200 // Mapping from URIs to slab information. 201 llvm::StringMap<FileShard> Shards; 202 // Used to build RefSlabs. 203 llvm::DenseMap<const Ref *, SymbolID> RefToSymID; 204 }; 205 206 } // namespace clangd 207 } // namespace clang 208 209 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H 210