1 //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// Some operations such as code completion produce a set of candidates. 10 /// Usually the user can choose between them, but we should put the best options 11 /// at the top (they're easier to select, and more likely to be seen). 12 /// 13 /// This file defines building blocks for ranking candidates. 14 /// It's used by the features directly and also in the implementation of 15 /// indexes, as indexes also need to heuristically limit their results. 16 /// 17 /// The facilities here are: 18 /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString 19 /// These are structured in a way that they can be debugged, and are fairly 20 /// consistent regardless of the source. 21 /// - compute scores from scoring signals. These are suitable for sorting. 22 /// - sorting utilities like the TopN container. 23 /// These could be split up further to isolate dependencies if we care. 24 /// 25 //===----------------------------------------------------------------------===// 26 27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 29 30 #include "ExpectedTypes.h" 31 #include "FileDistance.h" 32 #include "clang/Sema/CodeCompleteConsumer.h" 33 #include "llvm/ADT/ArrayRef.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/ADT/StringSet.h" 36 #include <algorithm> 37 #include <functional> 38 #include <vector> 39 40 namespace llvm { 41 class raw_ostream; 42 } // namespace llvm 43 44 namespace clang { 45 class CodeCompletionResult; 46 47 namespace clangd { 48 49 struct Symbol; 50 class URIDistance; 51 52 // Signals structs are designed to be aggregated from 0 or more sources. 53 // A default instance has neutral signals, and sources are merged into it. 54 // They can be dumped for debugging, and evaluate()d into a score. 55 56 /// Attributes of a symbol that affect how much we like it. 57 struct SymbolQualitySignals { 58 bool Deprecated = false; 59 bool ReservedName = false; // __foo, _Foo are usually implementation details. 60 // FIXME: make these findable once user types _. 61 bool ImplementationDetail = false; 62 unsigned References = 0; 63 64 enum SymbolCategory { 65 Unknown = 0, 66 Variable, 67 Macro, 68 Type, 69 Function, 70 Constructor, 71 Destructor, 72 Namespace, 73 Keyword, 74 Operator, 75 } Category = Unknown; 76 77 void merge(const CodeCompletionResult &SemaCCResult); 78 void merge(const Symbol &IndexResult); 79 80 // Condense these signals down to a single number, higher is better. 81 float evaluateHeuristics() const; 82 }; 83 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 84 const SymbolQualitySignals &); 85 86 /// Attributes of a symbol-query pair that affect how much we like it. 87 struct SymbolRelevanceSignals { 88 /// The name of the symbol (for ContextWords). Must be explicitly assigned. 89 llvm::StringRef Name; 90 /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. 91 float NameMatch = 1; 92 /// Lowercase words relevant to the context (e.g. near the completion point). 93 llvm::StringSet<>* ContextWords = nullptr; 94 bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). 95 /// Whether fixits needs to be applied for that completion or not. 96 bool NeedsFixIts = false; 97 bool InBaseClass = false; // A member from base class of the accessed class. 98 99 URIDistance *FileProximityMatch = nullptr; 100 /// These are used to calculate proximity between the index symbol and the 101 /// query. 102 llvm::StringRef SymbolURI; 103 /// FIXME: unify with index proximity score - signals should be 104 /// source-independent. 105 /// Proximity between best declaration and the query. [0-1], 1 is closest. 106 float SemaFileProximityScore = 0; 107 108 // Scope proximity is only considered (both index and sema) when this is set. 109 ScopeDistance *ScopeProximityMatch = nullptr; 110 llvm::Optional<llvm::StringRef> SymbolScope; 111 // A symbol from sema should be accessible from the current scope. 112 bool SemaSaysInScope = false; 113 114 // An approximate measure of where we expect the symbol to be used. 115 enum AccessibleScope { 116 FunctionScope, 117 ClassScope, 118 FileScope, 119 GlobalScope, 120 } Scope = GlobalScope; 121 122 enum QueryType { 123 CodeComplete, 124 Generic, 125 } Query = Generic; 126 127 CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other; 128 129 // Whether symbol is an instance member of a class. 130 bool IsInstanceMember = false; 131 132 // Whether clang provided a preferred type in the completion context. 133 bool HadContextType = false; 134 // Whether a source completion item or a symbol had a type information. 135 bool HadSymbolType = false; 136 // Whether the item matches the type expected in the completion context. 137 bool TypeMatchesPreferred = false; 138 139 /// Length of the unqualified partial name of Symbol typed in 140 /// CompletionPrefix. 141 unsigned FilterLength = 0; 142 143 /// Set of derived signals computed by calculateDerivedSignals(). Must not be 144 /// set explicitly. 145 struct DerivedSignals { 146 /// Whether Name contains some word from context. 147 bool NameMatchesContext = false; 148 /// Min distance between SymbolURI and all the headers included by the TU. 149 unsigned FileProximityDistance = FileDistance::Unreachable; 150 /// Min distance between SymbolScope and all the available scopes. 151 unsigned ScopeProximityDistance = FileDistance::Unreachable; 152 }; 153 154 DerivedSignals calculateDerivedSignals() const; 155 156 void merge(const CodeCompletionResult &SemaResult); 157 void merge(const Symbol &IndexResult); 158 159 // Condense these signals down to a single number, higher is better. 160 float evaluateHeuristics() const; 161 }; 162 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 163 const SymbolRelevanceSignals &); 164 165 /// Combine symbol quality and relevance into a single score. 166 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); 167 168 /// Same semantics as CodeComplete::Score. Quality score and Relevance score 169 /// have been removed since DecisionForest cannot assign individual scores to 170 /// Quality and Relevance signals. 171 struct DecisionForestScores { 172 float Total = 0.f; 173 float ExcludingName = 0.f; 174 }; 175 176 DecisionForestScores 177 evaluateDecisionForest(const SymbolQualitySignals &Quality, 178 const SymbolRelevanceSignals &Relevance, float Base); 179 180 /// TopN<T> is a lossy container that preserves only the "best" N elements. 181 template <typename T, typename Compare = std::greater<T>> class TopN { 182 public: 183 using value_type = T; 184 TopN(size_t N, Compare Greater = Compare()) N(N)185 : N(N), Greater(std::move(Greater)) {} 186 187 // Adds a candidate to the set. 188 // Returns true if a candidate was dropped to get back under N. push(value_type && V)189 bool push(value_type &&V) { 190 bool Dropped = false; 191 if (Heap.size() >= N) { 192 Dropped = true; 193 if (N > 0 && Greater(V, Heap.front())) { 194 std::pop_heap(Heap.begin(), Heap.end(), Greater); 195 Heap.back() = std::move(V); 196 std::push_heap(Heap.begin(), Heap.end(), Greater); 197 } 198 } else { 199 Heap.push_back(std::move(V)); 200 std::push_heap(Heap.begin(), Heap.end(), Greater); 201 } 202 assert(Heap.size() <= N); 203 assert(std::is_heap(Heap.begin(), Heap.end(), Greater)); 204 return Dropped; 205 } 206 207 // Returns candidates from best to worst. items()208 std::vector<value_type> items() && { 209 std::sort_heap(Heap.begin(), Heap.end(), Greater); 210 assert(Heap.size() <= N); 211 return std::move(Heap); 212 } 213 214 private: 215 const size_t N; 216 std::vector<value_type> Heap; // Min-heap, comparator is Greater. 217 Compare Greater; 218 }; 219 220 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for 221 /// LSP. (The highest score compares smallest so it sorts at the top). 222 std::string sortText(float Score, llvm::StringRef Tiebreak = ""); 223 224 struct SignatureQualitySignals { 225 uint32_t NumberOfParameters = 0; 226 uint32_t NumberOfOptionalParameters = 0; 227 CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind = 228 CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function; 229 }; 230 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 231 const SignatureQualitySignals &); 232 233 } // namespace clangd 234 } // namespace clang 235 236 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 237