1 //===--- Symbol.h ------------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
10 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
11
12 #include "SymbolID.h"
13 #include "SymbolLocation.h"
14 #include "SymbolOrigin.h"
15 #include "clang/Index/IndexSymbol.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/StringSaver.h"
18
19 namespace clang {
20 namespace clangd {
21
22 /// The class presents a C++ symbol, e.g. class, function.
23 ///
24 /// WARNING: Symbols do not own much of their underlying data - typically
25 /// strings are owned by a SymbolSlab. They should be treated as non-owning
26 /// references. Copies are shallow.
27 ///
28 /// When adding new unowned data fields to Symbol, remember to update:
29 /// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage.
30 /// - mergeSymbol in Merge.cpp, to properly combine two Symbols.
31 ///
32 /// A fully documented symbol can be split as:
33 /// size_type std::map<k, t>::count(const K& key) const
34 /// | Return | Scope |Name| Signature |
35 /// We split up these components to allow display flexibility later.
36 struct Symbol {
37 /// The ID of the symbol.
38 SymbolID ID;
39 /// The symbol information, like symbol kind.
40 index::SymbolInfo SymInfo = index::SymbolInfo();
41 /// The unqualified name of the symbol, e.g. "bar" (for ns::bar).
42 llvm::StringRef Name;
43 /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
44 llvm::StringRef Scope;
45 /// The location of the symbol's definition, if one was found.
46 /// This just covers the symbol name (e.g. without class/function body).
47 SymbolLocation Definition;
48 /// The location of the preferred declaration of the symbol.
49 /// This just covers the symbol name.
50 /// This may be the same as Definition.
51 ///
52 /// A C++ symbol may have multiple declarations, and we pick one to prefer.
53 /// * For classes, the canonical declaration should be the definition.
54 /// * For non-inline functions, the canonical declaration typically appears
55 /// in the ".h" file corresponding to the definition.
56 SymbolLocation CanonicalDeclaration;
57 /// The number of translation units that reference this symbol from their main
58 /// file. This number is only meaningful if aggregated in an index.
59 unsigned References = 0;
60 /// Where this symbol came from. Usually an index provides a constant value.
61 SymbolOrigin Origin = SymbolOrigin::Unknown;
62 /// A brief description of the symbol that can be appended in the completion
63 /// candidate list. For example, "(X x, Y y) const" is a function signature.
64 /// Only set when the symbol is indexed for completion.
65 llvm::StringRef Signature;
66 /// Argument list in human-readable format, will be displayed to help
67 /// disambiguate between different specializations of a template. Empty for
68 /// non-specializations. Example: "<int, bool, 3>"
69 llvm::StringRef TemplateSpecializationArgs;
70 /// What to insert when completing this symbol, after the symbol name.
71 /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function).
72 /// (When snippets are disabled, the symbol name alone is used).
73 /// Only set when the symbol is indexed for completion.
74 llvm::StringRef CompletionSnippetSuffix;
75 /// Documentation including comment for the symbol declaration.
76 llvm::StringRef Documentation;
77 /// Type when this symbol is used in an expression. (Short display form).
78 /// e.g. return type of a function, or type of a variable.
79 /// Only set when the symbol is indexed for completion.
80 llvm::StringRef ReturnType;
81
82 /// Raw representation of the OpaqueType of the symbol, used for scoring
83 /// purposes.
84 /// Only set when the symbol is indexed for completion.
85 llvm::StringRef Type;
86
87 struct IncludeHeaderWithReferences {
88 IncludeHeaderWithReferences() = default;
89
IncludeHeaderWithReferencesSymbol::IncludeHeaderWithReferences90 IncludeHeaderWithReferences(llvm::StringRef IncludeHeader,
91 unsigned References)
92 : IncludeHeader(IncludeHeader), References(References) {}
93
94 /// This can be either a URI of the header to be #include'd
95 /// for this symbol, or a literal header quoted with <> or "" that is
96 /// suitable to be included directly. When it is a URI, the exact #include
97 /// path needs to be calculated according to the URI scheme.
98 ///
99 /// Note that the include header is a canonical include for the symbol and
100 /// can be different from FileURI in the CanonicalDeclaration.
101 llvm::StringRef IncludeHeader = "";
102 /// The number of translation units that reference this symbol and include
103 /// this header. This number is only meaningful if aggregated in an index.
104 unsigned References = 0;
105 };
106 /// One Symbol can potentially be included via different headers.
107 /// - If we haven't seen a definition, this covers all declarations.
108 /// - If we have seen a definition, this covers declarations visible from
109 /// any definition.
110 /// Only set when the symbol is indexed for completion.
111 llvm::SmallVector<IncludeHeaderWithReferences, 1> IncludeHeaders;
112
113 enum SymbolFlag : uint8_t {
114 None = 0,
115 /// Whether or not this symbol is meant to be used for the code completion.
116 /// See also isIndexedForCodeCompletion().
117 /// Note that we don't store completion information (signature, snippet,
118 /// type, includes) if the symbol is not indexed for code completion.
119 IndexedForCodeCompletion = 1 << 0,
120 /// Indicates if the symbol is deprecated.
121 Deprecated = 1 << 1,
122 /// Symbol is an implementation detail.
123 ImplementationDetail = 1 << 2,
124 /// Symbol is visible to other files (not e.g. a static helper function).
125 VisibleOutsideFile = 1 << 3,
126 };
127
128 SymbolFlag Flags = SymbolFlag::None;
129 /// FIXME: also add deprecation message and fixit?
130 };
131
132 inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A,
133 Symbol::SymbolFlag B) {
134 return static_cast<Symbol::SymbolFlag>(static_cast<uint8_t>(A) |
135 static_cast<uint8_t>(B));
136 }
137 inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A,
138 Symbol::SymbolFlag B) {
139 return A = A | B;
140 }
141
142 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
143 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag);
144
145 /// Invokes Callback with each StringRef& contained in the Symbol.
146 /// Useful for deduplicating backing strings.
visitStrings(Symbol & S,const Callback & CB)147 template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) {
148 CB(S.Name);
149 CB(S.Scope);
150 CB(S.TemplateSpecializationArgs);
151 CB(S.Signature);
152 CB(S.CompletionSnippetSuffix);
153 CB(S.Documentation);
154 CB(S.ReturnType);
155 CB(S.Type);
156 auto RawCharPointerCB = [&CB](const char *&P) {
157 llvm::StringRef S(P);
158 CB(S);
159 assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated");
160 P = S.data();
161 };
162 RawCharPointerCB(S.CanonicalDeclaration.FileURI);
163 RawCharPointerCB(S.Definition.FileURI);
164
165 for (auto &Include : S.IncludeHeaders)
166 CB(Include.IncludeHeader);
167 }
168
169 /// Computes query-independent quality score for a Symbol.
170 /// This currently falls in the range [1, ln(#indexed documents)].
171 /// FIXME: this should probably be split into symbol -> signals
172 /// and signals -> score, so it can be reused for Sema completions.
173 float quality(const Symbol &S);
174
175 /// An immutable symbol container that stores a set of symbols.
176 /// The container will maintain the lifetime of the symbols.
177 class SymbolSlab {
178 public:
179 using const_iterator = std::vector<Symbol>::const_iterator;
180 using iterator = const_iterator;
181 using value_type = Symbol;
182
183 SymbolSlab() = default;
184
begin()185 const_iterator begin() const { return Symbols.begin(); }
end()186 const_iterator end() const { return Symbols.end(); }
187 const_iterator find(const SymbolID &SymID) const;
188
189 using size_type = size_t;
size()190 size_type size() const { return Symbols.size(); }
empty()191 bool empty() const { return Symbols.empty(); }
192 // Estimates the total memory usage.
bytes()193 size_t bytes() const {
194 return sizeof(*this) + Arena.getTotalMemory() +
195 Symbols.capacity() * sizeof(Symbol);
196 }
197
198 /// SymbolSlab::Builder is a mutable container that can 'freeze' to
199 /// SymbolSlab. The frozen SymbolSlab will use less memory.
200 class Builder {
201 public:
Builder()202 Builder() : UniqueStrings(Arena) {}
203
204 /// Adds a symbol, overwriting any existing one with the same ID.
205 /// This is a deep copy: underlying strings will be owned by the slab.
206 void insert(const Symbol &S);
207
208 /// Removes the symbol with an ID, if it exists.
erase(const SymbolID & ID)209 void erase(const SymbolID &ID) { Symbols.erase(ID); }
210
211 /// Returns the symbol with an ID, if it exists. Valid until insert/remove.
find(const SymbolID & ID)212 const Symbol *find(const SymbolID &ID) {
213 auto I = Symbols.find(ID);
214 return I == Symbols.end() ? nullptr : &I->second;
215 }
216
217 /// Consumes the builder to finalize the slab.
218 SymbolSlab build() &&;
219
220 private:
221 llvm::BumpPtrAllocator Arena;
222 /// Intern table for strings. Contents are on the arena.
223 llvm::UniqueStringSaver UniqueStrings;
224 /// Values are indices into Symbols vector.
225 llvm::DenseMap<SymbolID, Symbol> Symbols;
226 };
227
228 private:
SymbolSlab(llvm::BumpPtrAllocator Arena,std::vector<Symbol> Symbols)229 SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols)
230 : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {}
231
232 llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not.
233 std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup.
234 };
235
236 } // namespace clangd
237 } // namespace clang
238
239 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
240