//===--- Symbol.h ------------------------------------------------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H #include "SymbolID.h" #include "SymbolLocation.h" #include "SymbolOrigin.h" #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/StringSaver.h" namespace clang { namespace clangd { /// The class presents a C++ symbol, e.g. class, function. /// /// WARNING: Symbols do not own much of their underlying data - typically /// strings are owned by a SymbolSlab. They should be treated as non-owning /// references. Copies are shallow. /// /// When adding new unowned data fields to Symbol, remember to update: /// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage. /// - mergeSymbol in Merge.cpp, to properly combine two Symbols. /// /// A fully documented symbol can be split as: /// size_type std::map::count(const K& key) const /// | Return | Scope |Name| Signature | /// We split up these components to allow display flexibility later. struct Symbol { /// The ID of the symbol. SymbolID ID; /// The symbol information, like symbol kind. index::SymbolInfo SymInfo = index::SymbolInfo(); /// The unqualified name of the symbol, e.g. "bar" (for ns::bar). llvm::StringRef Name; /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace). llvm::StringRef Scope; /// The location of the symbol's definition, if one was found. /// This just covers the symbol name (e.g. without class/function body). SymbolLocation Definition; /// The location of the preferred declaration of the symbol. /// This just covers the symbol name. /// This may be the same as Definition. /// /// A C++ symbol may have multiple declarations, and we pick one to prefer. /// * For classes, the canonical declaration should be the definition. /// * For non-inline functions, the canonical declaration typically appears /// in the ".h" file corresponding to the definition. SymbolLocation CanonicalDeclaration; /// The number of translation units that reference this symbol from their main /// file. This number is only meaningful if aggregated in an index. unsigned References = 0; /// Where this symbol came from. Usually an index provides a constant value. SymbolOrigin Origin = SymbolOrigin::Unknown; /// A brief description of the symbol that can be appended in the completion /// candidate list. For example, "(X x, Y y) const" is a function signature. /// Only set when the symbol is indexed for completion. llvm::StringRef Signature; /// Argument list in human-readable format, will be displayed to help /// disambiguate between different specializations of a template. Empty for /// non-specializations. Example: "" llvm::StringRef TemplateSpecializationArgs; /// What to insert when completing this symbol, after the symbol name. /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function). /// (When snippets are disabled, the symbol name alone is used). /// Only set when the symbol is indexed for completion. llvm::StringRef CompletionSnippetSuffix; /// Documentation including comment for the symbol declaration. llvm::StringRef Documentation; /// Type when this symbol is used in an expression. (Short display form). /// e.g. return type of a function, or type of a variable. /// Only set when the symbol is indexed for completion. llvm::StringRef ReturnType; /// Raw representation of the OpaqueType of the symbol, used for scoring /// purposes. /// Only set when the symbol is indexed for completion. llvm::StringRef Type; struct IncludeHeaderWithReferences { IncludeHeaderWithReferences() = default; IncludeHeaderWithReferences(llvm::StringRef IncludeHeader, unsigned References) : IncludeHeader(IncludeHeader), References(References) {} /// This can be either a URI of the header to be #include'd /// for this symbol, or a literal header quoted with <> or "" that is /// suitable to be included directly. When it is a URI, the exact #include /// path needs to be calculated according to the URI scheme. /// /// Note that the include header is a canonical include for the symbol and /// can be different from FileURI in the CanonicalDeclaration. llvm::StringRef IncludeHeader = ""; /// The number of translation units that reference this symbol and include /// this header. This number is only meaningful if aggregated in an index. unsigned References = 0; }; /// One Symbol can potentially be included via different headers. /// - If we haven't seen a definition, this covers all declarations. /// - If we have seen a definition, this covers declarations visible from /// any definition. /// Only set when the symbol is indexed for completion. llvm::SmallVector IncludeHeaders; enum SymbolFlag : uint8_t { None = 0, /// Whether or not this symbol is meant to be used for the code completion. /// See also isIndexedForCodeCompletion(). /// Note that we don't store completion information (signature, snippet, /// type, includes) if the symbol is not indexed for code completion. IndexedForCodeCompletion = 1 << 0, /// Indicates if the symbol is deprecated. Deprecated = 1 << 1, /// Symbol is an implementation detail. ImplementationDetail = 1 << 2, /// Symbol is visible to other files (not e.g. a static helper function). VisibleOutsideFile = 1 << 3, }; SymbolFlag Flags = SymbolFlag::None; /// FIXME: also add deprecation message and fixit? }; inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A, Symbol::SymbolFlag B) { return static_cast(static_cast(A) | static_cast(B)); } inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A, Symbol::SymbolFlag B) { return A = A | B; } llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S); llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag); /// Invokes Callback with each StringRef& contained in the Symbol. /// Useful for deduplicating backing strings. template void visitStrings(Symbol &S, const Callback &CB) { CB(S.Name); CB(S.Scope); CB(S.TemplateSpecializationArgs); CB(S.Signature); CB(S.CompletionSnippetSuffix); CB(S.Documentation); CB(S.ReturnType); CB(S.Type); auto RawCharPointerCB = [&CB](const char *&P) { llvm::StringRef S(P); CB(S); assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated"); P = S.data(); }; RawCharPointerCB(S.CanonicalDeclaration.FileURI); RawCharPointerCB(S.Definition.FileURI); for (auto &Include : S.IncludeHeaders) CB(Include.IncludeHeader); } /// Computes query-independent quality score for a Symbol. /// This currently falls in the range [1, ln(#indexed documents)]. /// FIXME: this should probably be split into symbol -> signals /// and signals -> score, so it can be reused for Sema completions. float quality(const Symbol &S); /// An immutable symbol container that stores a set of symbols. /// The container will maintain the lifetime of the symbols. class SymbolSlab { public: using const_iterator = std::vector::const_iterator; using iterator = const_iterator; using value_type = Symbol; SymbolSlab() = default; const_iterator begin() const { return Symbols.begin(); } const_iterator end() const { return Symbols.end(); } const_iterator find(const SymbolID &SymID) const; using size_type = size_t; size_type size() const { return Symbols.size(); } bool empty() const { return Symbols.empty(); } // Estimates the total memory usage. size_t bytes() const { return sizeof(*this) + Arena.getTotalMemory() + Symbols.capacity() * sizeof(Symbol); } /// SymbolSlab::Builder is a mutable container that can 'freeze' to /// SymbolSlab. The frozen SymbolSlab will use less memory. class Builder { public: Builder() : UniqueStrings(Arena) {} /// Adds a symbol, overwriting any existing one with the same ID. /// This is a deep copy: underlying strings will be owned by the slab. void insert(const Symbol &S); /// Removes the symbol with an ID, if it exists. void erase(const SymbolID &ID) { Symbols.erase(ID); } /// Returns the symbol with an ID, if it exists. Valid until insert/remove. const Symbol *find(const SymbolID &ID) { auto I = Symbols.find(ID); return I == Symbols.end() ? nullptr : &I->second; } /// Consumes the builder to finalize the slab. SymbolSlab build() &&; private: llvm::BumpPtrAllocator Arena; /// Intern table for strings. Contents are on the arena. llvm::UniqueStringSaver UniqueStrings; /// Values are indices into Symbols vector. llvm::DenseMap Symbols; }; private: SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector Symbols) : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. std::vector Symbols; // Sorted by SymbolID to allow lookup. }; } // namespace clangd } // namespace clang #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H