1 //===--- SymbolCollector.h ---------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_COLLECTOR_H 9 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_COLLECTOR_H 10 11 #include "CanonicalIncludes.h" 12 #include "CollectMacros.h" 13 #include "Index.h" 14 #include "SymbolOrigin.h" 15 #include "clang/AST/ASTContext.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/Basic/SourceLocation.h" 18 #include "clang/Basic/SourceManager.h" 19 #include "clang/Index/IndexDataConsumer.h" 20 #include "clang/Index/IndexSymbol.h" 21 #include "clang/Sema/CodeCompleteConsumer.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/Support/Regex.h" 24 #include <functional> 25 26 namespace clang { 27 namespace clangd { 28 29 /// Collect declarations (symbols) from an AST. 30 /// It collects most declarations except: 31 /// - Implicit declarations 32 /// - Anonymous declarations (anonymous enum/class/struct, etc) 33 /// - Declarations in anonymous namespaces in headers 34 /// - Local declarations (in function bodies, blocks, etc) 35 /// - Template specializations 36 /// - Library-specific private declarations (e.g. private declaration generated 37 /// by protobuf compiler) 38 /// 39 /// References to main-file symbols are not collected. 40 /// 41 /// See also shouldCollectSymbol(...). 42 /// 43 /// Clients (e.g. clangd) can use SymbolCollector together with 44 /// index::indexTopLevelDecls to retrieve all symbols when the source file is 45 /// changed. 46 class SymbolCollector : public index::IndexDataConsumer { 47 public: 48 struct Options { 49 /// When symbol paths cannot be resolved to absolute paths (e.g. files in 50 /// VFS that does not have absolute path), combine the fallback directory 51 /// with symbols' paths to get absolute paths. This must be an absolute 52 /// path. 53 std::string FallbackDir; 54 bool CollectIncludePath = false; 55 /// If set, this is used to map symbol #include path to a potentially 56 /// different #include path. 57 const CanonicalIncludes *Includes = nullptr; 58 // Populate the Symbol.References field. 59 bool CountReferences = false; 60 /// The symbol ref kinds that will be collected. 61 /// If not set, SymbolCollector will not collect refs. 62 /// Note that references of namespace decls are not collected, as they 63 /// contribute large part of the index, and they are less useful compared 64 /// with other decls. 65 RefKind RefFilter = RefKind::Unknown; 66 /// If set to true, SymbolCollector will collect all refs (from main file 67 /// and included headers); otherwise, only refs from main file will be 68 /// collected. 69 /// This flag is only meaningful when RefFilter is set. 70 bool RefsInHeaders = false; 71 // Every symbol collected will be stamped with this origin. 72 SymbolOrigin Origin = SymbolOrigin::Unknown; 73 /// Collect macros. 74 /// Note that SymbolCollector must be run with preprocessor in order to 75 /// collect macros. For example, `indexTopLevelDecls` will not index any 76 /// macro even if this is true. 77 bool CollectMacro = false; 78 /// Collect symbols local to main-files, such as static functions 79 /// and symbols inside an anonymous namespace. 80 bool CollectMainFileSymbols = true; 81 /// Collect references to main-file symbols. 82 bool CollectMainFileRefs = false; 83 /// If set to true, SymbolCollector will collect doc for all symbols. 84 /// Note that documents of symbols being indexed for completion will always 85 /// be collected regardless of this option. 86 bool StoreAllDocumentation = false; 87 /// If this is set, only collect symbols/references from a file if 88 /// `FileFilter(SM, FID)` is true. If not set, all files are indexed. 89 std::function<bool(const SourceManager &, FileID)> FileFilter = nullptr; 90 }; 91 92 SymbolCollector(Options Opts); 93 94 /// Returns true is \p ND should be collected. 95 static bool shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx, 96 const Options &Opts, bool IsMainFileSymbol); 97 98 void initialize(ASTContext &Ctx) override; 99 setPreprocessor(std::shared_ptr<Preprocessor> PP)100 void setPreprocessor(std::shared_ptr<Preprocessor> PP) override { 101 this->PP = std::move(PP); 102 } 103 104 bool 105 handleDeclOccurrence(const Decl *D, index::SymbolRoleSet Roles, 106 ArrayRef<index::SymbolRelation> Relations, 107 SourceLocation Loc, 108 index::IndexDataConsumer::ASTNodeInfo ASTNode) override; 109 110 bool handleMacroOccurrence(const IdentifierInfo *Name, const MacroInfo *MI, 111 index::SymbolRoleSet Roles, 112 SourceLocation Loc) override; 113 114 void handleMacros(const MainFileMacros &MacroRefsToIndex); 115 takeSymbols()116 SymbolSlab takeSymbols() { return std::move(Symbols).build(); } takeRefs()117 RefSlab takeRefs() { return std::move(Refs).build(); } takeRelations()118 RelationSlab takeRelations() { return std::move(Relations).build(); } 119 120 /// Returns true if we are interested in references and declarations from \p 121 /// FID. If this function return false, bodies of functions inside those files 122 /// will be skipped to decrease indexing time. 123 bool shouldIndexFile(FileID FID); 124 125 void finish() override; 126 127 private: 128 const Symbol *addDeclaration(const NamedDecl &, SymbolID, 129 bool IsMainFileSymbol); 130 void addDefinition(const NamedDecl &, const Symbol &DeclSymbol); 131 void processRelations(const NamedDecl &ND, const SymbolID &ID, 132 ArrayRef<index::SymbolRelation> Relations); 133 134 llvm::Optional<std::string> getIncludeHeader(const Symbol &S, FileID); 135 bool isSelfContainedHeader(FileID); 136 // Heuristically headers that only want to be included via an umbrella. 137 static bool isDontIncludeMeHeader(llvm::StringRef); 138 139 // All Symbols collected from the AST. 140 SymbolSlab::Builder Symbols; 141 // File IDs for Symbol.IncludeHeaders. 142 // The final spelling is calculated in finish(). 143 llvm::DenseMap<SymbolID, FileID> IncludeFiles; 144 void setIncludeLocation(const Symbol &S, SourceLocation); 145 // Indexed macros, to be erased if they turned out to be include guards. 146 llvm::DenseSet<const IdentifierInfo *> IndexedMacros; 147 // All refs collected from the AST. It includes: 148 // 1) symbols declared in the preamble and referenced from the main file ( 149 // which is not a header), or 150 // 2) symbols declared and referenced from the main file (which is a header) 151 RefSlab::Builder Refs; 152 // All relations collected from the AST. 153 RelationSlab::Builder Relations; 154 ASTContext *ASTCtx; 155 std::shared_ptr<Preprocessor> PP; 156 std::shared_ptr<GlobalCodeCompletionAllocator> CompletionAllocator; 157 std::unique_ptr<CodeCompletionTUInfo> CompletionTUInfo; 158 Options Opts; 159 struct SymbolRef { 160 SourceLocation Loc; 161 index::SymbolRoleSet Roles; 162 const Decl *Container; 163 }; 164 // Symbols referenced from the current TU, flushed on finish(). 165 llvm::DenseSet<const NamedDecl *> ReferencedDecls; 166 llvm::DenseSet<const IdentifierInfo *> ReferencedMacros; 167 llvm::DenseMap<const NamedDecl *, std::vector<SymbolRef>> DeclRefs; 168 llvm::DenseMap<SymbolID, std::vector<SymbolRef>> MacroRefs; 169 // Maps canonical declaration provided by clang to canonical declaration for 170 // an index symbol, if clangd prefers a different declaration than that 171 // provided by clang. For example, friend declaration might be considered 172 // canonical by clang but should not be considered canonical in the index 173 // unless it's a definition. 174 llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls; 175 // Cache whether to index a file or not. 176 llvm::DenseMap<FileID, bool> FilesToIndexCache; 177 llvm::DenseMap<FileID, bool> HeaderIsSelfContainedCache; 178 }; 179 180 } // namespace clangd 181 } // namespace clang 182 183 #endif 184