1 //===--- Merge.cpp -----------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "Merge.h"
10 #include "index/Symbol.h"
11 #include "index/SymbolLocation.h"
12 #include "index/SymbolOrigin.h"
13 #include "support/Logger.h"
14 #include "support/Trace.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/Support/raw_ostream.h"
19 #include <algorithm>
20 #include <iterator>
21
22 namespace clang {
23 namespace clangd {
24
25 // FIXME: Deleted symbols in dirty files are still returned (from Static).
26 // To identify these eliminate these, we should:
27 // - find the generating file from each Symbol which is Static-only
28 // - ask Dynamic if it has that file (needs new SymbolIndex method)
29 // - if so, drop the Symbol.
fuzzyFind(const FuzzyFindRequest & Req,llvm::function_ref<void (const Symbol &)> Callback) const30 bool MergedIndex::fuzzyFind(
31 const FuzzyFindRequest &Req,
32 llvm::function_ref<void(const Symbol &)> Callback) const {
33 // We can't step through both sources in parallel. So:
34 // 1) query all dynamic symbols, slurping results into a slab
35 // 2) query the static symbols, for each one:
36 // a) if it's not in the dynamic slab, yield it directly
37 // b) if it's in the dynamic slab, merge it and yield the result
38 // 3) now yield all the dynamic symbols we haven't processed.
39 trace::Span Tracer("MergedIndex fuzzyFind");
40 bool More = false; // We'll be incomplete if either source was.
41 SymbolSlab::Builder DynB;
42 unsigned DynamicCount = 0;
43 unsigned StaticCount = 0;
44 unsigned MergedCount = 0;
45 More |= Dynamic->fuzzyFind(Req, [&](const Symbol &S) {
46 ++DynamicCount;
47 DynB.insert(S);
48 });
49 SymbolSlab Dyn = std::move(DynB).build();
50
51 llvm::DenseSet<SymbolID> SeenDynamicSymbols;
52 More |= Static->fuzzyFind(Req, [&](const Symbol &S) {
53 auto DynS = Dyn.find(S.ID);
54 ++StaticCount;
55 if (DynS == Dyn.end())
56 return Callback(S);
57 ++MergedCount;
58 SeenDynamicSymbols.insert(S.ID);
59 Callback(mergeSymbol(*DynS, S));
60 });
61 SPAN_ATTACH(Tracer, "dynamic", DynamicCount);
62 SPAN_ATTACH(Tracer, "static", StaticCount);
63 SPAN_ATTACH(Tracer, "merged", MergedCount);
64 for (const Symbol &S : Dyn)
65 if (!SeenDynamicSymbols.count(S.ID))
66 Callback(S);
67 return More;
68 }
69
lookup(const LookupRequest & Req,llvm::function_ref<void (const Symbol &)> Callback) const70 void MergedIndex::lookup(
71 const LookupRequest &Req,
72 llvm::function_ref<void(const Symbol &)> Callback) const {
73 trace::Span Tracer("MergedIndex lookup");
74 SymbolSlab::Builder B;
75
76 Dynamic->lookup(Req, [&](const Symbol &S) { B.insert(S); });
77
78 auto RemainingIDs = Req.IDs;
79 Static->lookup(Req, [&](const Symbol &S) {
80 const Symbol *Sym = B.find(S.ID);
81 RemainingIDs.erase(S.ID);
82 if (!Sym)
83 Callback(S);
84 else
85 Callback(mergeSymbol(*Sym, S));
86 });
87 for (const auto &ID : RemainingIDs)
88 if (const Symbol *Sym = B.find(ID))
89 Callback(*Sym);
90 }
91
refs(const RefsRequest & Req,llvm::function_ref<void (const Ref &)> Callback) const92 bool MergedIndex::refs(const RefsRequest &Req,
93 llvm::function_ref<void(const Ref &)> Callback) const {
94 trace::Span Tracer("MergedIndex refs");
95 bool More = false;
96 uint32_t Remaining =
97 Req.Limit.getValueOr(std::numeric_limits<uint32_t>::max());
98 // We don't want duplicated refs from the static/dynamic indexes,
99 // and we can't reliably deduplicate them because offsets may differ slightly.
100 // We consider the dynamic index authoritative and report all its refs,
101 // and only report static index refs from other files.
102 //
103 // FIXME: The heuristic fails if the dynamic index contains a file, but all
104 // refs were removed (we will report stale ones from the static index).
105 // Ultimately we should explicit check which index has the file instead.
106 llvm::StringSet<> DynamicIndexFileURIs;
107 More |= Dynamic->refs(Req, [&](const Ref &O) {
108 DynamicIndexFileURIs.insert(O.Location.FileURI);
109 Callback(O);
110 assert(Remaining != 0);
111 --Remaining;
112 });
113 if (Remaining == 0 && More)
114 return More;
115 // We return less than Req.Limit if static index returns more refs for dirty
116 // files.
117 bool StaticHadMore = Static->refs(Req, [&](const Ref &O) {
118 if (DynamicIndexFileURIs.count(O.Location.FileURI))
119 return; // ignore refs that have been seen from dynamic index.
120 if (Remaining == 0) {
121 More = true;
122 return;
123 }
124 --Remaining;
125 Callback(O);
126 });
127 return More || StaticHadMore;
128 }
129
relations(const RelationsRequest & Req,llvm::function_ref<void (const SymbolID &,const Symbol &)> Callback) const130 void MergedIndex::relations(
131 const RelationsRequest &Req,
132 llvm::function_ref<void(const SymbolID &, const Symbol &)> Callback) const {
133 uint32_t Remaining =
134 Req.Limit.getValueOr(std::numeric_limits<uint32_t>::max());
135 // Return results from both indexes but avoid duplicates.
136 // We might return stale relations from the static index;
137 // we don't currently have a good way of identifying them.
138 llvm::DenseSet<std::pair<SymbolID, SymbolID>> SeenRelations;
139 Dynamic->relations(Req, [&](const SymbolID &Subject, const Symbol &Object) {
140 Callback(Subject, Object);
141 SeenRelations.insert(std::make_pair(Subject, Object.ID));
142 --Remaining;
143 });
144 if (Remaining == 0)
145 return;
146 Static->relations(Req, [&](const SymbolID &Subject, const Symbol &Object) {
147 if (Remaining > 0 &&
148 !SeenRelations.count(std::make_pair(Subject, Object.ID))) {
149 --Remaining;
150 Callback(Subject, Object);
151 }
152 });
153 }
154
155 // Returns true if \p L is (strictly) preferred to \p R (e.g. by file paths). If
156 // neither is preferred, this returns false.
prefer(const SymbolLocation & L,const SymbolLocation & R)157 bool prefer(const SymbolLocation &L, const SymbolLocation &R) {
158 if (!L)
159 return false;
160 if (!R)
161 return true;
162 auto HasCodeGenSuffix = [](const SymbolLocation &Loc) {
163 constexpr static const char *CodegenSuffixes[] = {".proto"};
164 return std::any_of(std::begin(CodegenSuffixes), std::end(CodegenSuffixes),
165 [&](llvm::StringRef Suffix) {
166 return llvm::StringRef(Loc.FileURI).endswith(Suffix);
167 });
168 };
169 return HasCodeGenSuffix(L) && !HasCodeGenSuffix(R);
170 }
171
mergeSymbol(const Symbol & L,const Symbol & R)172 Symbol mergeSymbol(const Symbol &L, const Symbol &R) {
173 assert(L.ID == R.ID);
174 // We prefer information from TUs that saw the definition.
175 // Classes: this is the def itself. Functions: hopefully the header decl.
176 // If both did (or both didn't), continue to prefer L over R.
177 bool PreferR = R.Definition && !L.Definition;
178 // Merge include headers only if both have definitions or both have no
179 // definition; otherwise, only accumulate references of common includes.
180 assert(L.Definition.FileURI && R.Definition.FileURI);
181 bool MergeIncludes =
182 bool(*L.Definition.FileURI) == bool(*R.Definition.FileURI);
183 Symbol S = PreferR ? R : L; // The target symbol we're merging into.
184 const Symbol &O = PreferR ? L : R; // The "other" less-preferred symbol.
185
186 // Only use locations in \p O if it's (strictly) preferred.
187 if (prefer(O.CanonicalDeclaration, S.CanonicalDeclaration))
188 S.CanonicalDeclaration = O.CanonicalDeclaration;
189 if (prefer(O.Definition, S.Definition))
190 S.Definition = O.Definition;
191 S.References += O.References;
192 if (S.Signature == "")
193 S.Signature = O.Signature;
194 if (S.CompletionSnippetSuffix == "")
195 S.CompletionSnippetSuffix = O.CompletionSnippetSuffix;
196 if (S.Documentation == "") {
197 // Don't accept documentation from bare forward class declarations, if there
198 // is a definition and it didn't provide one. S is often an undocumented
199 // class, and O is a non-canonical forward decl preceded by an irrelevant
200 // comment.
201 bool IsClass = S.SymInfo.Kind == index::SymbolKind::Class ||
202 S.SymInfo.Kind == index::SymbolKind::Struct ||
203 S.SymInfo.Kind == index::SymbolKind::Union;
204 if (!IsClass || !S.Definition)
205 S.Documentation = O.Documentation;
206 }
207 if (S.ReturnType == "")
208 S.ReturnType = O.ReturnType;
209 if (S.Type == "")
210 S.Type = O.Type;
211 for (const auto &OI : O.IncludeHeaders) {
212 bool Found = false;
213 for (auto &SI : S.IncludeHeaders) {
214 if (SI.IncludeHeader == OI.IncludeHeader) {
215 Found = true;
216 SI.References += OI.References;
217 break;
218 }
219 }
220 if (!Found && MergeIncludes)
221 S.IncludeHeaders.emplace_back(OI.IncludeHeader, OI.References);
222 }
223
224 S.Origin |= O.Origin | SymbolOrigin::Merge;
225 S.Flags |= O.Flags;
226 return S;
227 }
228
229 } // namespace clangd
230 } // namespace clang
231