1 //===--- CodeComplete.cpp ----------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Code completion has several moving parts:
10 // - AST-based completions are provided using the completion hooks in Sema.
11 // - external completions are retrieved from the index (using hints from Sema)
12 // - the two sources overlap, and must be merged and overloads bundled
13 // - results must be scored and ranked (see Quality.h) before rendering
14 //
15 // Signature help works in a similar way as code completion, but it is simpler:
16 // it's purely AST-based, and there are few candidates.
17 //
18 //===----------------------------------------------------------------------===//
19
20 #include "CodeComplete.h"
21 #include "AST.h"
22 #include "CodeCompletionStrings.h"
23 #include "Compiler.h"
24 #include "Diagnostics.h"
25 #include "ExpectedTypes.h"
26 #include "FileDistance.h"
27 #include "FuzzyMatch.h"
28 #include "Headers.h"
29 #include "Hover.h"
30 #include "Preamble.h"
31 #include "Protocol.h"
32 #include "Quality.h"
33 #include "SourceCode.h"
34 #include "TUScheduler.h"
35 #include "URI.h"
36 #include "index/Index.h"
37 #include "index/Symbol.h"
38 #include "index/SymbolOrigin.h"
39 #include "support/Logger.h"
40 #include "support/Threading.h"
41 #include "support/ThreadsafeFS.h"
42 #include "support/Trace.h"
43 #include "clang/AST/Decl.h"
44 #include "clang/AST/DeclBase.h"
45 #include "clang/Basic/CharInfo.h"
46 #include "clang/Basic/LangOptions.h"
47 #include "clang/Basic/SourceLocation.h"
48 #include "clang/Basic/TokenKinds.h"
49 #include "clang/Format/Format.h"
50 #include "clang/Frontend/CompilerInstance.h"
51 #include "clang/Frontend/FrontendActions.h"
52 #include "clang/Lex/ExternalPreprocessorSource.h"
53 #include "clang/Lex/Lexer.h"
54 #include "clang/Lex/Preprocessor.h"
55 #include "clang/Lex/PreprocessorOptions.h"
56 #include "clang/Sema/CodeCompleteConsumer.h"
57 #include "clang/Sema/DeclSpec.h"
58 #include "clang/Sema/Sema.h"
59 #include "llvm/ADT/ArrayRef.h"
60 #include "llvm/ADT/None.h"
61 #include "llvm/ADT/Optional.h"
62 #include "llvm/ADT/SmallVector.h"
63 #include "llvm/ADT/StringExtras.h"
64 #include "llvm/ADT/StringRef.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/Debug.h"
67 #include "llvm/Support/Error.h"
68 #include "llvm/Support/Format.h"
69 #include "llvm/Support/FormatVariadic.h"
70 #include "llvm/Support/ScopedPrinter.h"
71 #include <algorithm>
72 #include <iterator>
73
74 // We log detailed candidate here if you run with -debug-only=codecomplete.
75 #define DEBUG_TYPE "CodeComplete"
76
77 namespace clang {
78 namespace clangd {
79 namespace {
80
toCompletionItemKind(index::SymbolKind Kind)81 CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
82 using SK = index::SymbolKind;
83 switch (Kind) {
84 case SK::Unknown:
85 return CompletionItemKind::Missing;
86 case SK::Module:
87 case SK::Namespace:
88 case SK::NamespaceAlias:
89 return CompletionItemKind::Module;
90 case SK::Macro:
91 return CompletionItemKind::Text;
92 case SK::Enum:
93 return CompletionItemKind::Enum;
94 case SK::Struct:
95 return CompletionItemKind::Struct;
96 case SK::Class:
97 case SK::Protocol:
98 case SK::Extension:
99 case SK::Union:
100 return CompletionItemKind::Class;
101 case SK::TypeAlias:
102 // We use the same kind as the VSCode C++ extension.
103 // FIXME: pick a better option when we have one.
104 return CompletionItemKind::Interface;
105 case SK::Using:
106 return CompletionItemKind::Reference;
107 case SK::Function:
108 case SK::ConversionFunction:
109 return CompletionItemKind::Function;
110 case SK::Variable:
111 case SK::Parameter:
112 case SK::NonTypeTemplateParm:
113 return CompletionItemKind::Variable;
114 case SK::Field:
115 return CompletionItemKind::Field;
116 case SK::EnumConstant:
117 return CompletionItemKind::EnumMember;
118 case SK::InstanceMethod:
119 case SK::ClassMethod:
120 case SK::StaticMethod:
121 case SK::Destructor:
122 return CompletionItemKind::Method;
123 case SK::InstanceProperty:
124 case SK::ClassProperty:
125 case SK::StaticProperty:
126 return CompletionItemKind::Property;
127 case SK::Constructor:
128 return CompletionItemKind::Constructor;
129 case SK::TemplateTypeParm:
130 case SK::TemplateTemplateParm:
131 return CompletionItemKind::TypeParameter;
132 }
133 llvm_unreachable("Unhandled clang::index::SymbolKind.");
134 }
135
136 CompletionItemKind
toCompletionItemKind(CodeCompletionResult::ResultKind ResKind,const NamedDecl * Decl,CodeCompletionContext::Kind CtxKind)137 toCompletionItemKind(CodeCompletionResult::ResultKind ResKind,
138 const NamedDecl *Decl,
139 CodeCompletionContext::Kind CtxKind) {
140 if (Decl)
141 return toCompletionItemKind(index::getSymbolInfo(Decl).Kind);
142 if (CtxKind == CodeCompletionContext::CCC_IncludedFile)
143 return CompletionItemKind::File;
144 switch (ResKind) {
145 case CodeCompletionResult::RK_Declaration:
146 llvm_unreachable("RK_Declaration without Decl");
147 case CodeCompletionResult::RK_Keyword:
148 return CompletionItemKind::Keyword;
149 case CodeCompletionResult::RK_Macro:
150 return CompletionItemKind::Text; // unfortunately, there's no 'Macro'
151 // completion items in LSP.
152 case CodeCompletionResult::RK_Pattern:
153 return CompletionItemKind::Snippet;
154 }
155 llvm_unreachable("Unhandled CodeCompletionResult::ResultKind.");
156 }
157
158 // Identifier code completion result.
159 struct RawIdentifier {
160 llvm::StringRef Name;
161 unsigned References; // # of usages in file.
162 };
163
164 /// A code completion result, in clang-native form.
165 /// It may be promoted to a CompletionItem if it's among the top-ranked results.
166 struct CompletionCandidate {
167 llvm::StringRef Name; // Used for filtering and sorting.
168 // We may have a result from Sema, from the index, or both.
169 const CodeCompletionResult *SemaResult = nullptr;
170 const Symbol *IndexResult = nullptr;
171 const RawIdentifier *IdentifierResult = nullptr;
172 llvm::SmallVector<llvm::StringRef, 1> RankedIncludeHeaders;
173
174 // Returns a token identifying the overload set this is part of.
175 // 0 indicates it's not part of any overload set.
overloadSetclang::clangd::__anon4117fc060111::CompletionCandidate176 size_t overloadSet(const CodeCompleteOptions &Opts, llvm::StringRef FileName,
177 IncludeInserter *Inserter) const {
178 if (!Opts.BundleOverloads.getValueOr(false))
179 return 0;
180
181 // Depending on the index implementation, we can see different header
182 // strings (literal or URI) mapping to the same file. We still want to
183 // bundle those, so we must resolve the header to be included here.
184 std::string HeaderForHash;
185 if (Inserter)
186 if (auto Header = headerToInsertIfAllowed(Opts))
187 if (auto HeaderFile = toHeaderFile(*Header, FileName))
188 if (auto Spelled =
189 Inserter->calculateIncludePath(*HeaderFile, FileName))
190 HeaderForHash = *Spelled;
191
192 llvm::SmallString<256> Scratch;
193 if (IndexResult) {
194 switch (IndexResult->SymInfo.Kind) {
195 case index::SymbolKind::ClassMethod:
196 case index::SymbolKind::InstanceMethod:
197 case index::SymbolKind::StaticMethod:
198 #ifndef NDEBUG
199 llvm_unreachable("Don't expect members from index in code completion");
200 #else
201 LLVM_FALLTHROUGH;
202 #endif
203 case index::SymbolKind::Function:
204 // We can't group overloads together that need different #includes.
205 // This could break #include insertion.
206 return llvm::hash_combine(
207 (IndexResult->Scope + IndexResult->Name).toStringRef(Scratch),
208 HeaderForHash);
209 default:
210 return 0;
211 }
212 }
213 if (SemaResult) {
214 // We need to make sure we're consistent with the IndexResult case!
215 const NamedDecl *D = SemaResult->Declaration;
216 if (!D || !D->isFunctionOrFunctionTemplate())
217 return 0;
218 {
219 llvm::raw_svector_ostream OS(Scratch);
220 D->printQualifiedName(OS);
221 }
222 return llvm::hash_combine(Scratch, HeaderForHash);
223 }
224 assert(IdentifierResult);
225 return 0;
226 }
227
228 // The best header to include if include insertion is allowed.
229 llvm::Optional<llvm::StringRef>
headerToInsertIfAllowedclang::clangd::__anon4117fc060111::CompletionCandidate230 headerToInsertIfAllowed(const CodeCompleteOptions &Opts) const {
231 if (Opts.InsertIncludes == CodeCompleteOptions::NeverInsert ||
232 RankedIncludeHeaders.empty())
233 return None;
234 if (SemaResult && SemaResult->Declaration) {
235 // Avoid inserting new #include if the declaration is found in the current
236 // file e.g. the symbol is forward declared.
237 auto &SM = SemaResult->Declaration->getASTContext().getSourceManager();
238 for (const Decl *RD : SemaResult->Declaration->redecls())
239 if (SM.isInMainFile(SM.getExpansionLoc(RD->getBeginLoc())))
240 return None;
241 }
242 return RankedIncludeHeaders[0];
243 }
244
245 using Bundle = llvm::SmallVector<CompletionCandidate, 4>;
246 };
247 using ScoredBundle =
248 std::pair<CompletionCandidate::Bundle, CodeCompletion::Scores>;
249 struct ScoredBundleGreater {
operator ()clang::clangd::__anon4117fc060111::ScoredBundleGreater250 bool operator()(const ScoredBundle &L, const ScoredBundle &R) {
251 if (L.second.Total != R.second.Total)
252 return L.second.Total > R.second.Total;
253 return L.first.front().Name <
254 R.first.front().Name; // Earlier name is better.
255 }
256 };
257
258 // Assembles a code completion out of a bundle of >=1 completion candidates.
259 // Many of the expensive strings are only computed at this point, once we know
260 // the candidate bundle is going to be returned.
261 //
262 // Many fields are the same for all candidates in a bundle (e.g. name), and are
263 // computed from the first candidate, in the constructor.
264 // Others vary per candidate, so add() must be called for remaining candidates.
265 struct CodeCompletionBuilder {
CodeCompletionBuilderclang::clangd::__anon4117fc060111::CodeCompletionBuilder266 CodeCompletionBuilder(ASTContext *ASTCtx, const CompletionCandidate &C,
267 CodeCompletionString *SemaCCS,
268 llvm::ArrayRef<std::string> QueryScopes,
269 const IncludeInserter &Includes,
270 llvm::StringRef FileName,
271 CodeCompletionContext::Kind ContextKind,
272 const CodeCompleteOptions &Opts,
273 bool IsUsingDeclaration, tok::TokenKind NextTokenKind)
274 : ASTCtx(ASTCtx), ExtractDocumentation(Opts.IncludeComments),
275 EnableFunctionArgSnippets(Opts.EnableFunctionArgSnippets),
276 IsUsingDeclaration(IsUsingDeclaration), NextTokenKind(NextTokenKind) {
277 add(C, SemaCCS);
278 if (C.SemaResult) {
279 assert(ASTCtx);
280 Completion.Origin |= SymbolOrigin::AST;
281 Completion.Name = std::string(llvm::StringRef(SemaCCS->getTypedText()));
282 if (Completion.Scope.empty()) {
283 if ((C.SemaResult->Kind == CodeCompletionResult::RK_Declaration) ||
284 (C.SemaResult->Kind == CodeCompletionResult::RK_Pattern))
285 if (const auto *D = C.SemaResult->getDeclaration())
286 if (const auto *ND = dyn_cast<NamedDecl>(D))
287 Completion.Scope = std::string(
288 splitQualifiedName(printQualifiedName(*ND)).first);
289 }
290 Completion.Kind = toCompletionItemKind(
291 C.SemaResult->Kind, C.SemaResult->Declaration, ContextKind);
292 // Sema could provide more info on whether the completion was a file or
293 // folder.
294 if (Completion.Kind == CompletionItemKind::File &&
295 Completion.Name.back() == '/')
296 Completion.Kind = CompletionItemKind::Folder;
297 for (const auto &FixIt : C.SemaResult->FixIts) {
298 Completion.FixIts.push_back(toTextEdit(
299 FixIt, ASTCtx->getSourceManager(), ASTCtx->getLangOpts()));
300 }
301 llvm::sort(Completion.FixIts, [](const TextEdit &X, const TextEdit &Y) {
302 return std::tie(X.range.start.line, X.range.start.character) <
303 std::tie(Y.range.start.line, Y.range.start.character);
304 });
305 Completion.Deprecated |=
306 (C.SemaResult->Availability == CXAvailability_Deprecated);
307 }
308 if (C.IndexResult) {
309 Completion.Origin |= C.IndexResult->Origin;
310 if (Completion.Scope.empty())
311 Completion.Scope = std::string(C.IndexResult->Scope);
312 if (Completion.Kind == CompletionItemKind::Missing)
313 Completion.Kind = toCompletionItemKind(C.IndexResult->SymInfo.Kind);
314 if (Completion.Name.empty())
315 Completion.Name = std::string(C.IndexResult->Name);
316 // If the completion was visible to Sema, no qualifier is needed. This
317 // avoids unneeded qualifiers in cases like with `using ns::X`.
318 if (Completion.RequiredQualifier.empty() && !C.SemaResult) {
319 llvm::StringRef ShortestQualifier = C.IndexResult->Scope;
320 for (llvm::StringRef Scope : QueryScopes) {
321 llvm::StringRef Qualifier = C.IndexResult->Scope;
322 if (Qualifier.consume_front(Scope) &&
323 Qualifier.size() < ShortestQualifier.size())
324 ShortestQualifier = Qualifier;
325 }
326 Completion.RequiredQualifier = std::string(ShortestQualifier);
327 }
328 Completion.Deprecated |= (C.IndexResult->Flags & Symbol::Deprecated);
329 }
330 if (C.IdentifierResult) {
331 Completion.Origin |= SymbolOrigin::Identifier;
332 Completion.Kind = CompletionItemKind::Text;
333 Completion.Name = std::string(C.IdentifierResult->Name);
334 }
335
336 // Turn absolute path into a literal string that can be #included.
337 auto Inserted = [&](llvm::StringRef Header)
338 -> llvm::Expected<std::pair<std::string, bool>> {
339 auto ResolvedDeclaring =
340 URI::resolve(C.IndexResult->CanonicalDeclaration.FileURI, FileName);
341 if (!ResolvedDeclaring)
342 return ResolvedDeclaring.takeError();
343 auto ResolvedInserted = toHeaderFile(Header, FileName);
344 if (!ResolvedInserted)
345 return ResolvedInserted.takeError();
346 auto Spelled = Includes.calculateIncludePath(*ResolvedInserted, FileName);
347 if (!Spelled)
348 return error("Header not on include path");
349 return std::make_pair(
350 std::move(*Spelled),
351 Includes.shouldInsertInclude(*ResolvedDeclaring, *ResolvedInserted));
352 };
353 bool ShouldInsert = C.headerToInsertIfAllowed(Opts).hasValue();
354 // Calculate include paths and edits for all possible headers.
355 for (const auto &Inc : C.RankedIncludeHeaders) {
356 if (auto ToInclude = Inserted(Inc)) {
357 CodeCompletion::IncludeCandidate Include;
358 Include.Header = ToInclude->first;
359 if (ToInclude->second && ShouldInsert)
360 Include.Insertion = Includes.insert(ToInclude->first);
361 Completion.Includes.push_back(std::move(Include));
362 } else
363 log("Failed to generate include insertion edits for adding header "
364 "(FileURI='{0}', IncludeHeader='{1}') into {2}: {3}",
365 C.IndexResult->CanonicalDeclaration.FileURI, Inc, FileName,
366 ToInclude.takeError());
367 }
368 // Prefer includes that do not need edits (i.e. already exist).
369 std::stable_partition(Completion.Includes.begin(),
370 Completion.Includes.end(),
371 [](const CodeCompletion::IncludeCandidate &I) {
372 return !I.Insertion.hasValue();
373 });
374 }
375
addclang::clangd::__anon4117fc060111::CodeCompletionBuilder376 void add(const CompletionCandidate &C, CodeCompletionString *SemaCCS) {
377 assert(bool(C.SemaResult) == bool(SemaCCS));
378 Bundled.emplace_back();
379 BundledEntry &S = Bundled.back();
380 if (C.SemaResult) {
381 bool IsPattern = C.SemaResult->Kind == CodeCompletionResult::RK_Pattern;
382 getSignature(*SemaCCS, &S.Signature, &S.SnippetSuffix,
383 &Completion.RequiredQualifier, IsPattern);
384 S.ReturnType = getReturnType(*SemaCCS);
385 } else if (C.IndexResult) {
386 S.Signature = std::string(C.IndexResult->Signature);
387 S.SnippetSuffix = std::string(C.IndexResult->CompletionSnippetSuffix);
388 S.ReturnType = std::string(C.IndexResult->ReturnType);
389 }
390 if (ExtractDocumentation && !Completion.Documentation) {
391 auto SetDoc = [&](llvm::StringRef Doc) {
392 if (!Doc.empty()) {
393 Completion.Documentation.emplace();
394 parseDocumentation(Doc, *Completion.Documentation);
395 }
396 };
397 if (C.IndexResult) {
398 SetDoc(C.IndexResult->Documentation);
399 } else if (C.SemaResult) {
400 SetDoc(getDocComment(*ASTCtx, *C.SemaResult,
401 /*CommentsFromHeader=*/false));
402 }
403 }
404 }
405
buildclang::clangd::__anon4117fc060111::CodeCompletionBuilder406 CodeCompletion build() {
407 Completion.ReturnType = summarizeReturnType();
408 Completion.Signature = summarizeSignature();
409 Completion.SnippetSuffix = summarizeSnippet();
410 Completion.BundleSize = Bundled.size();
411 return std::move(Completion);
412 }
413
414 private:
415 struct BundledEntry {
416 std::string SnippetSuffix;
417 std::string Signature;
418 std::string ReturnType;
419 };
420
421 // If all BundledEntries have the same value for a property, return it.
422 template <std::string BundledEntry::*Member>
onlyValueclang::clangd::__anon4117fc060111::CodeCompletionBuilder423 const std::string *onlyValue() const {
424 auto B = Bundled.begin(), E = Bundled.end();
425 for (auto I = B + 1; I != E; ++I)
426 if (I->*Member != B->*Member)
427 return nullptr;
428 return &(B->*Member);
429 }
430
onlyValueclang::clangd::__anon4117fc060111::CodeCompletionBuilder431 template <bool BundledEntry::*Member> const bool *onlyValue() const {
432 auto B = Bundled.begin(), E = Bundled.end();
433 for (auto I = B + 1; I != E; ++I)
434 if (I->*Member != B->*Member)
435 return nullptr;
436 return &(B->*Member);
437 }
438
summarizeReturnTypeclang::clangd::__anon4117fc060111::CodeCompletionBuilder439 std::string summarizeReturnType() const {
440 if (auto *RT = onlyValue<&BundledEntry::ReturnType>())
441 return *RT;
442 return "";
443 }
444
summarizeSnippetclang::clangd::__anon4117fc060111::CodeCompletionBuilder445 std::string summarizeSnippet() const {
446 if (IsUsingDeclaration)
447 return "";
448 // Suppress function argument snippets if args are already present.
449 if ((Completion.Kind == CompletionItemKind::Function ||
450 Completion.Kind == CompletionItemKind::Method ||
451 Completion.Kind == CompletionItemKind::Constructor) &&
452 NextTokenKind == tok::l_paren)
453 return "";
454 auto *Snippet = onlyValue<&BundledEntry::SnippetSuffix>();
455 if (!Snippet)
456 // All bundles are function calls.
457 // FIXME(ibiryukov): sometimes add template arguments to a snippet, e.g.
458 // we need to complete 'forward<$1>($0)'.
459 return "($0)";
460 if (EnableFunctionArgSnippets)
461 return *Snippet;
462
463 // Replace argument snippets with a simplified pattern.
464 if (Snippet->empty())
465 return "";
466 if (Completion.Kind == CompletionItemKind::Function ||
467 Completion.Kind == CompletionItemKind::Method) {
468 // Functions snippets can be of 2 types:
469 // - containing only function arguments, e.g.
470 // foo(${1:int p1}, ${2:int p2});
471 // We transform this pattern to '($0)' or '()'.
472 // - template arguments and function arguments, e.g.
473 // foo<${1:class}>(${2:int p1}).
474 // We transform this pattern to '<$1>()$0' or '<$0>()'.
475
476 bool EmptyArgs = llvm::StringRef(*Snippet).endswith("()");
477 if (Snippet->front() == '<')
478 return EmptyArgs ? "<$1>()$0" : "<$1>($0)";
479 if (Snippet->front() == '(')
480 return EmptyArgs ? "()" : "($0)";
481 return *Snippet; // Not an arg snippet?
482 }
483 // 'CompletionItemKind::Interface' matches template type aliases.
484 if (Completion.Kind == CompletionItemKind::Interface ||
485 Completion.Kind == CompletionItemKind::Class) {
486 if (Snippet->front() != '<')
487 return *Snippet; // Not an arg snippet?
488
489 // Classes and template using aliases can only have template arguments,
490 // e.g. Foo<${1:class}>.
491 if (llvm::StringRef(*Snippet).endswith("<>"))
492 return "<>"; // can happen with defaulted template arguments.
493 return "<$0>";
494 }
495 return *Snippet;
496 }
497
summarizeSignatureclang::clangd::__anon4117fc060111::CodeCompletionBuilder498 std::string summarizeSignature() const {
499 if (auto *Signature = onlyValue<&BundledEntry::Signature>())
500 return *Signature;
501 // All bundles are function calls.
502 return "(…)";
503 }
504
505 // ASTCtx can be nullptr if not run with sema.
506 ASTContext *ASTCtx;
507 CodeCompletion Completion;
508 llvm::SmallVector<BundledEntry, 1> Bundled;
509 bool ExtractDocumentation;
510 bool EnableFunctionArgSnippets;
511 // No snippets will be generated for using declarations and when the function
512 // arguments are already present.
513 bool IsUsingDeclaration;
514 tok::TokenKind NextTokenKind;
515 };
516
517 // Determine the symbol ID for a Sema code completion result, if possible.
getSymbolID(const CodeCompletionResult & R,const SourceManager & SM)518 SymbolID getSymbolID(const CodeCompletionResult &R, const SourceManager &SM) {
519 switch (R.Kind) {
520 case CodeCompletionResult::RK_Declaration:
521 case CodeCompletionResult::RK_Pattern: {
522 // Computing USR caches linkage, which may change after code completion.
523 if (hasUnstableLinkage(R.Declaration))
524 return {};
525 return clang::clangd::getSymbolID(R.Declaration);
526 }
527 case CodeCompletionResult::RK_Macro:
528 return clang::clangd::getSymbolID(R.Macro->getName(), R.MacroDefInfo, SM);
529 case CodeCompletionResult::RK_Keyword:
530 return {};
531 }
532 llvm_unreachable("unknown CodeCompletionResult kind");
533 }
534
535 // Scopes of the partial identifier we're trying to complete.
536 // It is used when we query the index for more completion results.
537 struct SpecifiedScope {
538 // The scopes we should look in, determined by Sema.
539 //
540 // If the qualifier was fully resolved, we look for completions in these
541 // scopes; if there is an unresolved part of the qualifier, it should be
542 // resolved within these scopes.
543 //
544 // Examples of qualified completion:
545 //
546 // "::vec" => {""}
547 // "using namespace std; ::vec^" => {"", "std::"}
548 // "namespace ns {using namespace std;} ns::^" => {"ns::", "std::"}
549 // "std::vec^" => {""} // "std" unresolved
550 //
551 // Examples of unqualified completion:
552 //
553 // "vec^" => {""}
554 // "using namespace std; vec^" => {"", "std::"}
555 // "using namespace std; namespace ns { vec^ }" => {"ns::", "std::", ""}
556 //
557 // "" for global namespace, "ns::" for normal namespace.
558 std::vector<std::string> AccessibleScopes;
559 // The full scope qualifier as typed by the user (without the leading "::").
560 // Set if the qualifier is not fully resolved by Sema.
561 llvm::Optional<std::string> UnresolvedQualifier;
562
563 // Construct scopes being queried in indexes. The results are deduplicated.
564 // This method format the scopes to match the index request representation.
scopesForIndexQueryclang::clangd::__anon4117fc060111::SpecifiedScope565 std::vector<std::string> scopesForIndexQuery() {
566 std::set<std::string> Results;
567 for (llvm::StringRef AS : AccessibleScopes)
568 Results.insert(
569 (AS + (UnresolvedQualifier ? *UnresolvedQualifier : "")).str());
570 return {Results.begin(), Results.end()};
571 }
572 };
573
574 // Get all scopes that will be queried in indexes and whether symbols from
575 // any scope is allowed. The first scope in the list is the preferred scope
576 // (e.g. enclosing namespace).
577 std::pair<std::vector<std::string>, bool>
getQueryScopes(CodeCompletionContext & CCContext,const Sema & CCSema,const CompletionPrefix & HeuristicPrefix,const CodeCompleteOptions & Opts)578 getQueryScopes(CodeCompletionContext &CCContext, const Sema &CCSema,
579 const CompletionPrefix &HeuristicPrefix,
580 const CodeCompleteOptions &Opts) {
581 SpecifiedScope Scopes;
582 for (auto *Context : CCContext.getVisitedContexts()) {
583 if (isa<TranslationUnitDecl>(Context))
584 Scopes.AccessibleScopes.push_back(""); // global namespace
585 else if (isa<NamespaceDecl>(Context))
586 Scopes.AccessibleScopes.push_back(printNamespaceScope(*Context));
587 }
588
589 const CXXScopeSpec *SemaSpecifier =
590 CCContext.getCXXScopeSpecifier().getValueOr(nullptr);
591 // Case 1: unqualified completion.
592 if (!SemaSpecifier) {
593 // Case 2 (exception): sema saw no qualifier, but there appears to be one!
594 // This can happen e.g. in incomplete macro expansions. Use heuristics.
595 if (!HeuristicPrefix.Qualifier.empty()) {
596 vlog("Sema said no scope specifier, but we saw {0} in the source code",
597 HeuristicPrefix.Qualifier);
598 StringRef SpelledSpecifier = HeuristicPrefix.Qualifier;
599 if (SpelledSpecifier.consume_front("::"))
600 Scopes.AccessibleScopes = {""};
601 Scopes.UnresolvedQualifier = std::string(SpelledSpecifier);
602 return {Scopes.scopesForIndexQuery(), false};
603 }
604 // The enclosing namespace must be first, it gets a quality boost.
605 std::vector<std::string> EnclosingAtFront;
606 std::string EnclosingScope = printNamespaceScope(*CCSema.CurContext);
607 EnclosingAtFront.push_back(EnclosingScope);
608 for (auto &S : Scopes.scopesForIndexQuery()) {
609 if (EnclosingScope != S)
610 EnclosingAtFront.push_back(std::move(S));
611 }
612 // Allow AllScopes completion as there is no explicit scope qualifier.
613 return {EnclosingAtFront, Opts.AllScopes};
614 }
615 // Case 3: sema saw and resolved a scope qualifier.
616 if (SemaSpecifier && SemaSpecifier->isValid())
617 return {Scopes.scopesForIndexQuery(), false};
618
619 // Case 4: There was a qualifier, and Sema didn't resolve it.
620 Scopes.AccessibleScopes.push_back(""); // Make sure global scope is included.
621 llvm::StringRef SpelledSpecifier = Lexer::getSourceText(
622 CharSourceRange::getCharRange(SemaSpecifier->getRange()),
623 CCSema.SourceMgr, clang::LangOptions());
624 if (SpelledSpecifier.consume_front("::"))
625 Scopes.AccessibleScopes = {""};
626 Scopes.UnresolvedQualifier = std::string(SpelledSpecifier);
627 // Sema excludes the trailing "::".
628 if (!Scopes.UnresolvedQualifier->empty())
629 *Scopes.UnresolvedQualifier += "::";
630
631 return {Scopes.scopesForIndexQuery(), false};
632 }
633
634 // Should we perform index-based completion in a context of the specified kind?
635 // FIXME: consider allowing completion, but restricting the result types.
contextAllowsIndex(enum CodeCompletionContext::Kind K)636 bool contextAllowsIndex(enum CodeCompletionContext::Kind K) {
637 switch (K) {
638 case CodeCompletionContext::CCC_TopLevel:
639 case CodeCompletionContext::CCC_ObjCInterface:
640 case CodeCompletionContext::CCC_ObjCImplementation:
641 case CodeCompletionContext::CCC_ObjCIvarList:
642 case CodeCompletionContext::CCC_ClassStructUnion:
643 case CodeCompletionContext::CCC_Statement:
644 case CodeCompletionContext::CCC_Expression:
645 case CodeCompletionContext::CCC_ObjCMessageReceiver:
646 case CodeCompletionContext::CCC_EnumTag:
647 case CodeCompletionContext::CCC_UnionTag:
648 case CodeCompletionContext::CCC_ClassOrStructTag:
649 case CodeCompletionContext::CCC_ObjCProtocolName:
650 case CodeCompletionContext::CCC_Namespace:
651 case CodeCompletionContext::CCC_Type:
652 case CodeCompletionContext::CCC_ParenthesizedExpression:
653 case CodeCompletionContext::CCC_ObjCInterfaceName:
654 case CodeCompletionContext::CCC_ObjCCategoryName:
655 case CodeCompletionContext::CCC_Symbol:
656 case CodeCompletionContext::CCC_SymbolOrNewName:
657 return true;
658 case CodeCompletionContext::CCC_OtherWithMacros:
659 case CodeCompletionContext::CCC_DotMemberAccess:
660 case CodeCompletionContext::CCC_ArrowMemberAccess:
661 case CodeCompletionContext::CCC_ObjCPropertyAccess:
662 case CodeCompletionContext::CCC_MacroName:
663 case CodeCompletionContext::CCC_MacroNameUse:
664 case CodeCompletionContext::CCC_PreprocessorExpression:
665 case CodeCompletionContext::CCC_PreprocessorDirective:
666 case CodeCompletionContext::CCC_SelectorName:
667 case CodeCompletionContext::CCC_TypeQualifiers:
668 case CodeCompletionContext::CCC_ObjCInstanceMessage:
669 case CodeCompletionContext::CCC_ObjCClassMessage:
670 case CodeCompletionContext::CCC_IncludedFile:
671 // FIXME: Provide identifier based completions for the following contexts:
672 case CodeCompletionContext::CCC_Other: // Be conservative.
673 case CodeCompletionContext::CCC_NaturalLanguage:
674 case CodeCompletionContext::CCC_Recovery:
675 case CodeCompletionContext::CCC_NewName:
676 return false;
677 }
678 llvm_unreachable("unknown code completion context");
679 }
680
isInjectedClass(const NamedDecl & D)681 static bool isInjectedClass(const NamedDecl &D) {
682 if (auto *R = dyn_cast_or_null<RecordDecl>(&D))
683 if (R->isInjectedClassName())
684 return true;
685 return false;
686 }
687
688 // Some member calls are excluded because they're so rarely useful.
isExcludedMember(const NamedDecl & D)689 static bool isExcludedMember(const NamedDecl &D) {
690 // Destructor completion is rarely useful, and works inconsistently.
691 // (s.^ completes ~string, but s.~st^ is an error).
692 if (D.getKind() == Decl::CXXDestructor)
693 return true;
694 // Injected name may be useful for A::foo(), but who writes A::A::foo()?
695 if (isInjectedClass(D))
696 return true;
697 // Explicit calls to operators are also rare.
698 auto NameKind = D.getDeclName().getNameKind();
699 if (NameKind == DeclarationName::CXXOperatorName ||
700 NameKind == DeclarationName::CXXLiteralOperatorName ||
701 NameKind == DeclarationName::CXXConversionFunctionName)
702 return true;
703 return false;
704 }
705
706 // The CompletionRecorder captures Sema code-complete output, including context.
707 // It filters out ignored results (but doesn't apply fuzzy-filtering yet).
708 // It doesn't do scoring or conversion to CompletionItem yet, as we want to
709 // merge with index results first.
710 // Generally the fields and methods of this object should only be used from
711 // within the callback.
712 struct CompletionRecorder : public CodeCompleteConsumer {
CompletionRecorderclang::clangd::__anon4117fc060111::CompletionRecorder713 CompletionRecorder(const CodeCompleteOptions &Opts,
714 llvm::unique_function<void()> ResultsCallback)
715 : CodeCompleteConsumer(Opts.getClangCompleteOpts()),
716 CCContext(CodeCompletionContext::CCC_Other), Opts(Opts),
717 CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()),
718 CCTUInfo(CCAllocator), ResultsCallback(std::move(ResultsCallback)) {
719 assert(this->ResultsCallback);
720 }
721
722 std::vector<CodeCompletionResult> Results;
723 CodeCompletionContext CCContext;
724 Sema *CCSema = nullptr; // Sema that created the results.
725 // FIXME: Sema is scary. Can we store ASTContext and Preprocessor, instead?
726
ProcessCodeCompleteResultsclang::clangd::__anon4117fc060111::CompletionRecorder727 void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context,
728 CodeCompletionResult *InResults,
729 unsigned NumResults) override final {
730 // Results from recovery mode are generally useless, and the callback after
731 // recovery (if any) is usually more interesting. To make sure we handle the
732 // future callback from sema, we just ignore all callbacks in recovery mode,
733 // as taking only results from recovery mode results in poor completion
734 // results.
735 // FIXME: in case there is no future sema completion callback after the
736 // recovery mode, we might still want to provide some results (e.g. trivial
737 // identifier-based completion).
738 if (Context.getKind() == CodeCompletionContext::CCC_Recovery) {
739 log("Code complete: Ignoring sema code complete callback with Recovery "
740 "context.");
741 return;
742 }
743 // If a callback is called without any sema result and the context does not
744 // support index-based completion, we simply skip it to give way to
745 // potential future callbacks with results.
746 if (NumResults == 0 && !contextAllowsIndex(Context.getKind()))
747 return;
748 if (CCSema) {
749 log("Multiple code complete callbacks (parser backtracked?). "
750 "Dropping results from context {0}, keeping results from {1}.",
751 getCompletionKindString(Context.getKind()),
752 getCompletionKindString(this->CCContext.getKind()));
753 return;
754 }
755 // Record the completion context.
756 CCSema = &S;
757 CCContext = Context;
758
759 // Retain the results we might want.
760 for (unsigned I = 0; I < NumResults; ++I) {
761 auto &Result = InResults[I];
762 // Class members that are shadowed by subclasses are usually noise.
763 if (Result.Hidden && Result.Declaration &&
764 Result.Declaration->isCXXClassMember())
765 continue;
766 if (!Opts.IncludeIneligibleResults &&
767 (Result.Availability == CXAvailability_NotAvailable ||
768 Result.Availability == CXAvailability_NotAccessible))
769 continue;
770 if (Result.Declaration &&
771 !Context.getBaseType().isNull() // is this a member-access context?
772 && isExcludedMember(*Result.Declaration))
773 continue;
774 // Skip injected class name when no class scope is not explicitly set.
775 // E.g. show injected A::A in `using A::A^` but not in "A^".
776 if (Result.Declaration && !Context.getCXXScopeSpecifier().hasValue() &&
777 isInjectedClass(*Result.Declaration))
778 continue;
779 // We choose to never append '::' to completion results in clangd.
780 Result.StartsNestedNameSpecifier = false;
781 Results.push_back(Result);
782 }
783 ResultsCallback();
784 }
785
getAllocatorclang::clangd::__anon4117fc060111::CompletionRecorder786 CodeCompletionAllocator &getAllocator() override { return *CCAllocator; }
getCodeCompletionTUInfoclang::clangd::__anon4117fc060111::CompletionRecorder787 CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
788
789 // Returns the filtering/sorting name for Result, which must be from Results.
790 // Returned string is owned by this recorder (or the AST).
getNameclang::clangd::__anon4117fc060111::CompletionRecorder791 llvm::StringRef getName(const CodeCompletionResult &Result) {
792 switch (Result.Kind) {
793 case CodeCompletionResult::RK_Declaration:
794 if (auto *ID = Result.Declaration->getIdentifier())
795 return ID->getName();
796 break;
797 case CodeCompletionResult::RK_Keyword:
798 return Result.Keyword;
799 case CodeCompletionResult::RK_Macro:
800 return Result.Macro->getName();
801 case CodeCompletionResult::RK_Pattern:
802 return Result.Pattern->getTypedText();
803 }
804 auto *CCS = codeCompletionString(Result);
805 return CCS->getTypedText();
806 }
807
808 // Build a CodeCompletion string for R, which must be from Results.
809 // The CCS will be owned by this recorder.
codeCompletionStringclang::clangd::__anon4117fc060111::CompletionRecorder810 CodeCompletionString *codeCompletionString(const CodeCompletionResult &R) {
811 // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway.
812 return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString(
813 *CCSema, CCContext, *CCAllocator, CCTUInfo,
814 /*IncludeBriefComments=*/false);
815 }
816
817 private:
818 CodeCompleteOptions Opts;
819 std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator;
820 CodeCompletionTUInfo CCTUInfo;
821 llvm::unique_function<void()> ResultsCallback;
822 };
823
824 struct ScoredSignature {
825 // When set, requires documentation to be requested from the index with this
826 // ID.
827 llvm::Optional<SymbolID> IDForDoc;
828 SignatureInformation Signature;
829 SignatureQualitySignals Quality;
830 };
831
832 class SignatureHelpCollector final : public CodeCompleteConsumer {
833 public:
SignatureHelpCollector(const clang::CodeCompleteOptions & CodeCompleteOpts,const SymbolIndex * Index,SignatureHelp & SigHelp)834 SignatureHelpCollector(const clang::CodeCompleteOptions &CodeCompleteOpts,
835 const SymbolIndex *Index, SignatureHelp &SigHelp)
836 : CodeCompleteConsumer(CodeCompleteOpts), SigHelp(SigHelp),
837 Allocator(std::make_shared<clang::GlobalCodeCompletionAllocator>()),
838 CCTUInfo(Allocator), Index(Index) {}
839
ProcessOverloadCandidates(Sema & S,unsigned CurrentArg,OverloadCandidate * Candidates,unsigned NumCandidates,SourceLocation OpenParLoc)840 void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg,
841 OverloadCandidate *Candidates,
842 unsigned NumCandidates,
843 SourceLocation OpenParLoc) override {
844 assert(!OpenParLoc.isInvalid());
845 SourceManager &SrcMgr = S.getSourceManager();
846 OpenParLoc = SrcMgr.getFileLoc(OpenParLoc);
847 if (SrcMgr.isInMainFile(OpenParLoc))
848 SigHelp.argListStart = sourceLocToPosition(SrcMgr, OpenParLoc);
849 else
850 elog("Location oustide main file in signature help: {0}",
851 OpenParLoc.printToString(SrcMgr));
852
853 std::vector<ScoredSignature> ScoredSignatures;
854 SigHelp.signatures.reserve(NumCandidates);
855 ScoredSignatures.reserve(NumCandidates);
856 // FIXME(rwols): How can we determine the "active overload candidate"?
857 // Right now the overloaded candidates seem to be provided in a "best fit"
858 // order, so I'm not too worried about this.
859 SigHelp.activeSignature = 0;
860 assert(CurrentArg <= (unsigned)std::numeric_limits<int>::max() &&
861 "too many arguments");
862 SigHelp.activeParameter = static_cast<int>(CurrentArg);
863 for (unsigned I = 0; I < NumCandidates; ++I) {
864 OverloadCandidate Candidate = Candidates[I];
865 // We want to avoid showing instantiated signatures, because they may be
866 // long in some cases (e.g. when 'T' is substituted with 'std::string', we
867 // would get 'std::basic_string<char>').
868 if (auto *Func = Candidate.getFunction()) {
869 if (auto *Pattern = Func->getTemplateInstantiationPattern())
870 Candidate = OverloadCandidate(Pattern);
871 }
872
873 const auto *CCS = Candidate.CreateSignatureString(
874 CurrentArg, S, *Allocator, CCTUInfo, true);
875 assert(CCS && "Expected the CodeCompletionString to be non-null");
876 ScoredSignatures.push_back(processOverloadCandidate(
877 Candidate, *CCS,
878 Candidate.getFunction()
879 ? getDeclComment(S.getASTContext(), *Candidate.getFunction())
880 : ""));
881 }
882
883 // Sema does not load the docs from the preamble, so we need to fetch extra
884 // docs from the index instead.
885 llvm::DenseMap<SymbolID, std::string> FetchedDocs;
886 if (Index) {
887 LookupRequest IndexRequest;
888 for (const auto &S : ScoredSignatures) {
889 if (!S.IDForDoc)
890 continue;
891 IndexRequest.IDs.insert(*S.IDForDoc);
892 }
893 Index->lookup(IndexRequest, [&](const Symbol &S) {
894 if (!S.Documentation.empty())
895 FetchedDocs[S.ID] = std::string(S.Documentation);
896 });
897 log("SigHelp: requested docs for {0} symbols from the index, got {1} "
898 "symbols with non-empty docs in the response",
899 IndexRequest.IDs.size(), FetchedDocs.size());
900 }
901
902 llvm::sort(ScoredSignatures, [](const ScoredSignature &L,
903 const ScoredSignature &R) {
904 // Ordering follows:
905 // - Less number of parameters is better.
906 // - Function is better than FunctionType which is better than
907 // Function Template.
908 // - High score is better.
909 // - Shorter signature is better.
910 // - Alphabetically smaller is better.
911 if (L.Quality.NumberOfParameters != R.Quality.NumberOfParameters)
912 return L.Quality.NumberOfParameters < R.Quality.NumberOfParameters;
913 if (L.Quality.NumberOfOptionalParameters !=
914 R.Quality.NumberOfOptionalParameters)
915 return L.Quality.NumberOfOptionalParameters <
916 R.Quality.NumberOfOptionalParameters;
917 if (L.Quality.Kind != R.Quality.Kind) {
918 using OC = CodeCompleteConsumer::OverloadCandidate;
919 switch (L.Quality.Kind) {
920 case OC::CK_Function:
921 return true;
922 case OC::CK_FunctionType:
923 return R.Quality.Kind != OC::CK_Function;
924 case OC::CK_FunctionTemplate:
925 return false;
926 }
927 llvm_unreachable("Unknown overload candidate type.");
928 }
929 if (L.Signature.label.size() != R.Signature.label.size())
930 return L.Signature.label.size() < R.Signature.label.size();
931 return L.Signature.label < R.Signature.label;
932 });
933
934 for (auto &SS : ScoredSignatures) {
935 auto IndexDocIt =
936 SS.IDForDoc ? FetchedDocs.find(*SS.IDForDoc) : FetchedDocs.end();
937 if (IndexDocIt != FetchedDocs.end())
938 SS.Signature.documentation = IndexDocIt->second;
939
940 SigHelp.signatures.push_back(std::move(SS.Signature));
941 }
942 }
943
getAllocator()944 GlobalCodeCompletionAllocator &getAllocator() override { return *Allocator; }
945
getCodeCompletionTUInfo()946 CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
947
948 private:
processParameterChunk(llvm::StringRef ChunkText,SignatureInformation & Signature) const949 void processParameterChunk(llvm::StringRef ChunkText,
950 SignatureInformation &Signature) const {
951 // (!) this is O(n), should still be fast compared to building ASTs.
952 unsigned ParamStartOffset = lspLength(Signature.label);
953 unsigned ParamEndOffset = ParamStartOffset + lspLength(ChunkText);
954 // A piece of text that describes the parameter that corresponds to
955 // the code-completion location within a function call, message send,
956 // macro invocation, etc.
957 Signature.label += ChunkText;
958 ParameterInformation Info;
959 Info.labelOffsets.emplace(ParamStartOffset, ParamEndOffset);
960 // FIXME: only set 'labelOffsets' when all clients migrate out of it.
961 Info.labelString = std::string(ChunkText);
962
963 Signature.parameters.push_back(std::move(Info));
964 }
965
processOptionalChunk(const CodeCompletionString & CCS,SignatureInformation & Signature,SignatureQualitySignals & Signal) const966 void processOptionalChunk(const CodeCompletionString &CCS,
967 SignatureInformation &Signature,
968 SignatureQualitySignals &Signal) const {
969 for (const auto &Chunk : CCS) {
970 switch (Chunk.Kind) {
971 case CodeCompletionString::CK_Optional:
972 assert(Chunk.Optional &&
973 "Expected the optional code completion string to be non-null.");
974 processOptionalChunk(*Chunk.Optional, Signature, Signal);
975 break;
976 case CodeCompletionString::CK_VerticalSpace:
977 break;
978 case CodeCompletionString::CK_CurrentParameter:
979 case CodeCompletionString::CK_Placeholder:
980 processParameterChunk(Chunk.Text, Signature);
981 Signal.NumberOfOptionalParameters++;
982 break;
983 default:
984 Signature.label += Chunk.Text;
985 break;
986 }
987 }
988 }
989
990 // FIXME(ioeric): consider moving CodeCompletionString logic here to
991 // CompletionString.h.
processOverloadCandidate(const OverloadCandidate & Candidate,const CodeCompletionString & CCS,llvm::StringRef DocComment) const992 ScoredSignature processOverloadCandidate(const OverloadCandidate &Candidate,
993 const CodeCompletionString &CCS,
994 llvm::StringRef DocComment) const {
995 SignatureInformation Signature;
996 SignatureQualitySignals Signal;
997 const char *ReturnType = nullptr;
998
999 Signature.documentation = formatDocumentation(CCS, DocComment);
1000 Signal.Kind = Candidate.getKind();
1001
1002 for (const auto &Chunk : CCS) {
1003 switch (Chunk.Kind) {
1004 case CodeCompletionString::CK_ResultType:
1005 // A piece of text that describes the type of an entity or,
1006 // for functions and methods, the return type.
1007 assert(!ReturnType && "Unexpected CK_ResultType");
1008 ReturnType = Chunk.Text;
1009 break;
1010 case CodeCompletionString::CK_CurrentParameter:
1011 case CodeCompletionString::CK_Placeholder:
1012 processParameterChunk(Chunk.Text, Signature);
1013 Signal.NumberOfParameters++;
1014 break;
1015 case CodeCompletionString::CK_Optional: {
1016 // The rest of the parameters are defaulted/optional.
1017 assert(Chunk.Optional &&
1018 "Expected the optional code completion string to be non-null.");
1019 processOptionalChunk(*Chunk.Optional, Signature, Signal);
1020 break;
1021 }
1022 case CodeCompletionString::CK_VerticalSpace:
1023 break;
1024 default:
1025 Signature.label += Chunk.Text;
1026 break;
1027 }
1028 }
1029 if (ReturnType) {
1030 Signature.label += " -> ";
1031 Signature.label += ReturnType;
1032 }
1033 dlog("Signal for {0}: {1}", Signature, Signal);
1034 ScoredSignature Result;
1035 Result.Signature = std::move(Signature);
1036 Result.Quality = Signal;
1037 const FunctionDecl *Func = Candidate.getFunction();
1038 if (Func && Result.Signature.documentation.empty()) {
1039 // Computing USR caches linkage, which may change after code completion.
1040 if (!hasUnstableLinkage(Func))
1041 Result.IDForDoc = clangd::getSymbolID(Func);
1042 }
1043 return Result;
1044 }
1045
1046 SignatureHelp &SigHelp;
1047 std::shared_ptr<clang::GlobalCodeCompletionAllocator> Allocator;
1048 CodeCompletionTUInfo CCTUInfo;
1049 const SymbolIndex *Index;
1050 }; // SignatureHelpCollector
1051
1052 struct SemaCompleteInput {
1053 PathRef FileName;
1054 size_t Offset;
1055 const PreambleData &Preamble;
1056 const llvm::Optional<PreamblePatch> Patch;
1057 const ParseInputs &ParseInput;
1058 };
1059
loadMainFilePreambleMacros(const Preprocessor & PP,const PreambleData & Preamble)1060 void loadMainFilePreambleMacros(const Preprocessor &PP,
1061 const PreambleData &Preamble) {
1062 // The ExternalPreprocessorSource has our macros, if we know where to look.
1063 // We can read all the macros using PreambleMacros->ReadDefinedMacros(),
1064 // but this includes transitively included files, so may deserialize a lot.
1065 ExternalPreprocessorSource *PreambleMacros = PP.getExternalSource();
1066 // As we have the names of the macros, we can look up their IdentifierInfo
1067 // and then use this to load just the macros we want.
1068 IdentifierInfoLookup *PreambleIdentifiers =
1069 PP.getIdentifierTable().getExternalIdentifierLookup();
1070 if (!PreambleIdentifiers || !PreambleMacros)
1071 return;
1072 for (const auto &MacroName : Preamble.Macros.Names)
1073 if (auto *II = PreambleIdentifiers->get(MacroName.getKey()))
1074 if (II->isOutOfDate())
1075 PreambleMacros->updateOutOfDateIdentifier(*II);
1076 }
1077
1078 // Invokes Sema code completion on a file.
1079 // If \p Includes is set, it will be updated based on the compiler invocation.
semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,const clang::CodeCompleteOptions & Options,const SemaCompleteInput & Input,IncludeStructure * Includes=nullptr)1080 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
1081 const clang::CodeCompleteOptions &Options,
1082 const SemaCompleteInput &Input,
1083 IncludeStructure *Includes = nullptr) {
1084 trace::Span Tracer("Sema completion");
1085
1086 IgnoreDiagnostics IgnoreDiags;
1087 auto CI = buildCompilerInvocation(Input.ParseInput, IgnoreDiags);
1088 if (!CI) {
1089 elog("Couldn't create CompilerInvocation");
1090 return false;
1091 }
1092 auto &FrontendOpts = CI->getFrontendOpts();
1093 FrontendOpts.SkipFunctionBodies = true;
1094 // Disable typo correction in Sema.
1095 CI->getLangOpts()->SpellChecking = false;
1096 // Code completion won't trigger in delayed template bodies.
1097 // This is on-by-default in windows to allow parsing SDK headers; we're only
1098 // disabling it for the main-file (not preamble).
1099 CI->getLangOpts()->DelayedTemplateParsing = false;
1100 // Setup code completion.
1101 FrontendOpts.CodeCompleteOpts = Options;
1102 FrontendOpts.CodeCompletionAt.FileName = std::string(Input.FileName);
1103 std::tie(FrontendOpts.CodeCompletionAt.Line,
1104 FrontendOpts.CodeCompletionAt.Column) =
1105 offsetToClangLineColumn(Input.ParseInput.Contents, Input.Offset);
1106
1107 std::unique_ptr<llvm::MemoryBuffer> ContentsBuffer =
1108 llvm::MemoryBuffer::getMemBufferCopy(Input.ParseInput.Contents,
1109 Input.FileName);
1110 // The diagnostic options must be set before creating a CompilerInstance.
1111 CI->getDiagnosticOpts().IgnoreWarnings = true;
1112 // We reuse the preamble whether it's valid or not. This is a
1113 // correctness/performance tradeoff: building without a preamble is slow, and
1114 // completion is latency-sensitive.
1115 // However, if we're completing *inside* the preamble section of the draft,
1116 // overriding the preamble will break sema completion. Fortunately we can just
1117 // skip all includes in this case; these completions are really simple.
1118 PreambleBounds PreambleRegion =
1119 ComputePreambleBounds(*CI->getLangOpts(), *ContentsBuffer, 0);
1120 bool CompletingInPreamble = PreambleRegion.Size > Input.Offset;
1121 if (Input.Patch)
1122 Input.Patch->apply(*CI);
1123 // NOTE: we must call BeginSourceFile after prepareCompilerInstance. Otherwise
1124 // the remapped buffers do not get freed.
1125 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS =
1126 Input.ParseInput.TFS->view(Input.ParseInput.CompileCommand.Directory);
1127 if (Input.Preamble.StatCache)
1128 VFS = Input.Preamble.StatCache->getConsumingFS(std::move(VFS));
1129 auto Clang = prepareCompilerInstance(
1130 std::move(CI), !CompletingInPreamble ? &Input.Preamble.Preamble : nullptr,
1131 std::move(ContentsBuffer), std::move(VFS), IgnoreDiags);
1132 Clang->getPreprocessorOpts().SingleFileParseMode = CompletingInPreamble;
1133 Clang->setCodeCompletionConsumer(Consumer.release());
1134
1135 SyntaxOnlyAction Action;
1136 if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) {
1137 log("BeginSourceFile() failed when running codeComplete for {0}",
1138 Input.FileName);
1139 return false;
1140 }
1141 // Macros can be defined within the preamble region of the main file.
1142 // They don't fall nicely into our index/Sema dichotomy:
1143 // - they're not indexed for completion (they're not available across files)
1144 // - but Sema code complete won't see them: as part of the preamble, they're
1145 // deserialized only when mentioned.
1146 // Force them to be deserialized so SemaCodeComplete sees them.
1147 loadMainFilePreambleMacros(Clang->getPreprocessor(), Input.Preamble);
1148 if (Includes)
1149 Clang->getPreprocessor().addPPCallbacks(
1150 collectIncludeStructureCallback(Clang->getSourceManager(), Includes));
1151 if (llvm::Error Err = Action.Execute()) {
1152 log("Execute() failed when running codeComplete for {0}: {1}",
1153 Input.FileName, toString(std::move(Err)));
1154 return false;
1155 }
1156 Action.EndSourceFile();
1157
1158 return true;
1159 }
1160
1161 // Should we allow index completions in the specified context?
allowIndex(CodeCompletionContext & CC)1162 bool allowIndex(CodeCompletionContext &CC) {
1163 if (!contextAllowsIndex(CC.getKind()))
1164 return false;
1165 // We also avoid ClassName::bar (but allow namespace::bar).
1166 auto Scope = CC.getCXXScopeSpecifier();
1167 if (!Scope)
1168 return true;
1169 NestedNameSpecifier *NameSpec = (*Scope)->getScopeRep();
1170 if (!NameSpec)
1171 return true;
1172 // We only query the index when qualifier is a namespace.
1173 // If it's a class, we rely solely on sema completions.
1174 switch (NameSpec->getKind()) {
1175 case NestedNameSpecifier::Global:
1176 case NestedNameSpecifier::Namespace:
1177 case NestedNameSpecifier::NamespaceAlias:
1178 return true;
1179 case NestedNameSpecifier::Super:
1180 case NestedNameSpecifier::TypeSpec:
1181 case NestedNameSpecifier::TypeSpecWithTemplate:
1182 // Unresolved inside a template.
1183 case NestedNameSpecifier::Identifier:
1184 return false;
1185 }
1186 llvm_unreachable("invalid NestedNameSpecifier kind");
1187 }
1188
startAsyncFuzzyFind(const SymbolIndex & Index,const FuzzyFindRequest & Req)1189 std::future<SymbolSlab> startAsyncFuzzyFind(const SymbolIndex &Index,
1190 const FuzzyFindRequest &Req) {
1191 return runAsync<SymbolSlab>([&Index, Req]() {
1192 trace::Span Tracer("Async fuzzyFind");
1193 SymbolSlab::Builder Syms;
1194 Index.fuzzyFind(Req, [&Syms](const Symbol &Sym) { Syms.insert(Sym); });
1195 return std::move(Syms).build();
1196 });
1197 }
1198
1199 // Creates a `FuzzyFindRequest` based on the cached index request from the
1200 // last completion, if any, and the speculated completion filter text in the
1201 // source code.
speculativeFuzzyFindRequestForCompletion(FuzzyFindRequest CachedReq,const CompletionPrefix & HeuristicPrefix)1202 FuzzyFindRequest speculativeFuzzyFindRequestForCompletion(
1203 FuzzyFindRequest CachedReq, const CompletionPrefix &HeuristicPrefix) {
1204 CachedReq.Query = std::string(HeuristicPrefix.Name);
1205 return CachedReq;
1206 }
1207
1208 // Runs Sema-based (AST) and Index-based completion, returns merged results.
1209 //
1210 // There are a few tricky considerations:
1211 // - the AST provides information needed for the index query (e.g. which
1212 // namespaces to search in). So Sema must start first.
1213 // - we only want to return the top results (Opts.Limit).
1214 // Building CompletionItems for everything else is wasteful, so we want to
1215 // preserve the "native" format until we're done with scoring.
1216 // - the data underlying Sema completion items is owned by the AST and various
1217 // other arenas, which must stay alive for us to build CompletionItems.
1218 // - we may get duplicate results from Sema and the Index, we need to merge.
1219 //
1220 // So we start Sema completion first, and do all our work in its callback.
1221 // We use the Sema context information to query the index.
1222 // Then we merge the two result sets, producing items that are Sema/Index/Both.
1223 // These items are scored, and the top N are synthesized into the LSP response.
1224 // Finally, we can clean up the data structures created by Sema completion.
1225 //
1226 // Main collaborators are:
1227 // - semaCodeComplete sets up the compiler machinery to run code completion.
1228 // - CompletionRecorder captures Sema completion results, including context.
1229 // - SymbolIndex (Opts.Index) provides index completion results as Symbols
1230 // - CompletionCandidates are the result of merging Sema and Index results.
1231 // Each candidate points to an underlying CodeCompletionResult (Sema), a
1232 // Symbol (Index), or both. It computes the result quality score.
1233 // CompletionCandidate also does conversion to CompletionItem (at the end).
1234 // - FuzzyMatcher scores how the candidate matches the partial identifier.
1235 // This score is combined with the result quality score for the final score.
1236 // - TopN determines the results with the best score.
1237 class CodeCompleteFlow {
1238 PathRef FileName;
1239 IncludeStructure Includes; // Complete once the compiler runs.
1240 SpeculativeFuzzyFind *SpecFuzzyFind; // Can be nullptr.
1241 const CodeCompleteOptions &Opts;
1242
1243 // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
1244 CompletionRecorder *Recorder = nullptr;
1245 CodeCompletionContext::Kind CCContextKind = CodeCompletionContext::CCC_Other;
1246 bool IsUsingDeclaration = false;
1247 // The snippets will not be generated if the token following completion
1248 // location is an opening parenthesis (tok::l_paren) because this would add
1249 // extra parenthesis.
1250 tok::TokenKind NextTokenKind = tok::eof;
1251 // Counters for logging.
1252 int NSema = 0, NIndex = 0, NSemaAndIndex = 0, NIdent = 0;
1253 bool Incomplete = false; // Would more be available with a higher limit?
1254 CompletionPrefix HeuristicPrefix;
1255 llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
1256 Range ReplacedRange;
1257 std::vector<std::string> QueryScopes; // Initialized once Sema runs.
1258 // Initialized once QueryScopes is initialized, if there are scopes.
1259 llvm::Optional<ScopeDistance> ScopeProximity;
1260 llvm::Optional<OpaqueType> PreferredType; // Initialized once Sema runs.
1261 // Whether to query symbols from any scope. Initialized once Sema runs.
1262 bool AllScopes = false;
1263 llvm::StringSet<> ContextWords;
1264 // Include-insertion and proximity scoring rely on the include structure.
1265 // This is available after Sema has run.
1266 llvm::Optional<IncludeInserter> Inserter; // Available during runWithSema.
1267 llvm::Optional<URIDistance> FileProximity; // Initialized once Sema runs.
1268 /// Speculative request based on the cached request and the filter text before
1269 /// the cursor.
1270 /// Initialized right before sema run. This is only set if `SpecFuzzyFind` is
1271 /// set and contains a cached request.
1272 llvm::Optional<FuzzyFindRequest> SpecReq;
1273
1274 public:
1275 // A CodeCompleteFlow object is only useful for calling run() exactly once.
CodeCompleteFlow(PathRef FileName,const IncludeStructure & Includes,SpeculativeFuzzyFind * SpecFuzzyFind,const CodeCompleteOptions & Opts)1276 CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes,
1277 SpeculativeFuzzyFind *SpecFuzzyFind,
1278 const CodeCompleteOptions &Opts)
1279 : FileName(FileName), Includes(Includes), SpecFuzzyFind(SpecFuzzyFind),
1280 Opts(Opts) {}
1281
run(const SemaCompleteInput & SemaCCInput)1282 CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && {
1283 trace::Span Tracer("CodeCompleteFlow");
1284 HeuristicPrefix = guessCompletionPrefix(SemaCCInput.ParseInput.Contents,
1285 SemaCCInput.Offset);
1286 populateContextWords(SemaCCInput.ParseInput.Contents);
1287 if (Opts.Index && SpecFuzzyFind && SpecFuzzyFind->CachedReq.hasValue()) {
1288 assert(!SpecFuzzyFind->Result.valid());
1289 SpecReq = speculativeFuzzyFindRequestForCompletion(
1290 *SpecFuzzyFind->CachedReq, HeuristicPrefix);
1291 SpecFuzzyFind->Result = startAsyncFuzzyFind(*Opts.Index, *SpecReq);
1292 }
1293
1294 // We run Sema code completion first. It builds an AST and calculates:
1295 // - completion results based on the AST.
1296 // - partial identifier and context. We need these for the index query.
1297 CodeCompleteResult Output;
1298 auto RecorderOwner = std::make_unique<CompletionRecorder>(Opts, [&]() {
1299 assert(Recorder && "Recorder is not set");
1300 CCContextKind = Recorder->CCContext.getKind();
1301 IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration();
1302 auto Style = getFormatStyleForFile(SemaCCInput.FileName,
1303 SemaCCInput.ParseInput.Contents,
1304 *SemaCCInput.ParseInput.TFS);
1305 const auto NextToken = Lexer::findNextToken(
1306 Recorder->CCSema->getPreprocessor().getCodeCompletionLoc(),
1307 Recorder->CCSema->getSourceManager(), Recorder->CCSema->LangOpts);
1308 if (NextToken)
1309 NextTokenKind = NextToken->getKind();
1310 // If preprocessor was run, inclusions from preprocessor callback should
1311 // already be added to Includes.
1312 Inserter.emplace(
1313 SemaCCInput.FileName, SemaCCInput.ParseInput.Contents, Style,
1314 SemaCCInput.ParseInput.CompileCommand.Directory,
1315 &Recorder->CCSema->getPreprocessor().getHeaderSearchInfo());
1316 for (const auto &Inc : Includes.MainFileIncludes)
1317 Inserter->addExisting(Inc);
1318
1319 // Most of the cost of file proximity is in initializing the FileDistance
1320 // structures based on the observed includes, once per query. Conceptually
1321 // that happens here (though the per-URI-scheme initialization is lazy).
1322 // The per-result proximity scoring is (amortized) very cheap.
1323 FileDistanceOptions ProxOpts{}; // Use defaults.
1324 const auto &SM = Recorder->CCSema->getSourceManager();
1325 llvm::StringMap<SourceParams> ProxSources;
1326 for (auto &Entry : Includes.includeDepth(
1327 SM.getFileEntryForID(SM.getMainFileID())->getName())) {
1328 auto &Source = ProxSources[Entry.getKey()];
1329 Source.Cost = Entry.getValue() * ProxOpts.IncludeCost;
1330 // Symbols near our transitive includes are good, but only consider
1331 // things in the same directory or below it. Otherwise there can be
1332 // many false positives.
1333 if (Entry.getValue() > 0)
1334 Source.MaxUpTraversals = 1;
1335 }
1336 FileProximity.emplace(ProxSources, ProxOpts);
1337
1338 Output = runWithSema();
1339 Inserter.reset(); // Make sure this doesn't out-live Clang.
1340 SPAN_ATTACH(Tracer, "sema_completion_kind",
1341 getCompletionKindString(CCContextKind));
1342 log("Code complete: sema context {0}, query scopes [{1}] (AnyScope={2}), "
1343 "expected type {3}{4}",
1344 getCompletionKindString(CCContextKind),
1345 llvm::join(QueryScopes.begin(), QueryScopes.end(), ","), AllScopes,
1346 PreferredType ? Recorder->CCContext.getPreferredType().getAsString()
1347 : "<none>",
1348 IsUsingDeclaration ? ", inside using declaration" : "");
1349 });
1350
1351 Recorder = RecorderOwner.get();
1352
1353 semaCodeComplete(std::move(RecorderOwner), Opts.getClangCompleteOpts(),
1354 SemaCCInput, &Includes);
1355 logResults(Output, Tracer);
1356 return Output;
1357 }
1358
logResults(const CodeCompleteResult & Output,const trace::Span & Tracer)1359 void logResults(const CodeCompleteResult &Output, const trace::Span &Tracer) {
1360 SPAN_ATTACH(Tracer, "sema_results", NSema);
1361 SPAN_ATTACH(Tracer, "index_results", NIndex);
1362 SPAN_ATTACH(Tracer, "merged_results", NSemaAndIndex);
1363 SPAN_ATTACH(Tracer, "identifier_results", NIdent);
1364 SPAN_ATTACH(Tracer, "returned_results", int64_t(Output.Completions.size()));
1365 SPAN_ATTACH(Tracer, "incomplete", Output.HasMore);
1366 log("Code complete: {0} results from Sema, {1} from Index, "
1367 "{2} matched, {3} from identifiers, {4} returned{5}.",
1368 NSema, NIndex, NSemaAndIndex, NIdent, Output.Completions.size(),
1369 Output.HasMore ? " (incomplete)" : "");
1370 assert(!Opts.Limit || Output.Completions.size() <= Opts.Limit);
1371 // We don't assert that isIncomplete means we hit a limit.
1372 // Indexes may choose to impose their own limits even if we don't have one.
1373 }
1374
runWithoutSema(llvm::StringRef Content,size_t Offset,const ThreadsafeFS & TFS)1375 CodeCompleteResult runWithoutSema(llvm::StringRef Content, size_t Offset,
1376 const ThreadsafeFS &TFS) && {
1377 trace::Span Tracer("CodeCompleteWithoutSema");
1378 // Fill in fields normally set by runWithSema()
1379 HeuristicPrefix = guessCompletionPrefix(Content, Offset);
1380 populateContextWords(Content);
1381 CCContextKind = CodeCompletionContext::CCC_Recovery;
1382 IsUsingDeclaration = false;
1383 Filter = FuzzyMatcher(HeuristicPrefix.Name);
1384 auto Pos = offsetToPosition(Content, Offset);
1385 ReplacedRange.start = ReplacedRange.end = Pos;
1386 ReplacedRange.start.character -= HeuristicPrefix.Name.size();
1387
1388 llvm::StringMap<SourceParams> ProxSources;
1389 ProxSources[FileName].Cost = 0;
1390 FileProximity.emplace(ProxSources);
1391
1392 auto Style = getFormatStyleForFile(FileName, Content, TFS);
1393 // This will only insert verbatim headers.
1394 Inserter.emplace(FileName, Content, Style,
1395 /*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr);
1396
1397 auto Identifiers = collectIdentifiers(Content, Style);
1398 std::vector<RawIdentifier> IdentifierResults;
1399 for (const auto &IDAndCount : Identifiers) {
1400 RawIdentifier ID;
1401 ID.Name = IDAndCount.first();
1402 ID.References = IDAndCount.second;
1403 // Avoid treating typed filter as an identifier.
1404 if (ID.Name == HeuristicPrefix.Name)
1405 --ID.References;
1406 if (ID.References > 0)
1407 IdentifierResults.push_back(std::move(ID));
1408 }
1409
1410 // Simplified version of getQueryScopes():
1411 // - accessible scopes are determined heuristically.
1412 // - all-scopes query if no qualifier was typed (and it's allowed).
1413 SpecifiedScope Scopes;
1414 Scopes.AccessibleScopes = visibleNamespaces(
1415 Content.take_front(Offset), format::getFormattingLangOpts(Style));
1416 for (std::string &S : Scopes.AccessibleScopes)
1417 if (!S.empty())
1418 S.append("::"); // visibleNamespaces doesn't include trailing ::.
1419 if (HeuristicPrefix.Qualifier.empty())
1420 AllScopes = Opts.AllScopes;
1421 else if (HeuristicPrefix.Qualifier.startswith("::")) {
1422 Scopes.AccessibleScopes = {""};
1423 Scopes.UnresolvedQualifier =
1424 std::string(HeuristicPrefix.Qualifier.drop_front(2));
1425 } else
1426 Scopes.UnresolvedQualifier = std::string(HeuristicPrefix.Qualifier);
1427 // First scope is the (modified) enclosing scope.
1428 QueryScopes = Scopes.scopesForIndexQuery();
1429 ScopeProximity.emplace(QueryScopes);
1430
1431 SymbolSlab IndexResults = Opts.Index ? queryIndex() : SymbolSlab();
1432
1433 CodeCompleteResult Output = toCodeCompleteResult(mergeResults(
1434 /*SemaResults=*/{}, IndexResults, IdentifierResults));
1435 Output.RanParser = false;
1436 logResults(Output, Tracer);
1437 return Output;
1438 }
1439
1440 private:
populateContextWords(llvm::StringRef Content)1441 void populateContextWords(llvm::StringRef Content) {
1442 // Take last 3 lines before the completion point.
1443 unsigned RangeEnd = HeuristicPrefix.Qualifier.begin() - Content.data(),
1444 RangeBegin = RangeEnd;
1445 for (size_t I = 0; I < 3 && RangeBegin > 0; ++I) {
1446 auto PrevNL = Content.rfind('\n', RangeBegin);
1447 if (PrevNL == StringRef::npos) {
1448 RangeBegin = 0;
1449 break;
1450 }
1451 RangeBegin = PrevNL;
1452 }
1453
1454 ContextWords = collectWords(Content.slice(RangeBegin, RangeEnd));
1455 dlog("Completion context words: {0}",
1456 llvm::join(ContextWords.keys(), ", "));
1457 }
1458
1459 // This is called by run() once Sema code completion is done, but before the
1460 // Sema data structures are torn down. It does all the real work.
runWithSema()1461 CodeCompleteResult runWithSema() {
1462 const auto &CodeCompletionRange = CharSourceRange::getCharRange(
1463 Recorder->CCSema->getPreprocessor().getCodeCompletionTokenRange());
1464 // When we are getting completions with an empty identifier, for example
1465 // std::vector<int> asdf;
1466 // asdf.^;
1467 // Then the range will be invalid and we will be doing insertion, use
1468 // current cursor position in such cases as range.
1469 if (CodeCompletionRange.isValid()) {
1470 ReplacedRange = halfOpenToRange(Recorder->CCSema->getSourceManager(),
1471 CodeCompletionRange);
1472 } else {
1473 const auto &Pos = sourceLocToPosition(
1474 Recorder->CCSema->getSourceManager(),
1475 Recorder->CCSema->getPreprocessor().getCodeCompletionLoc());
1476 ReplacedRange.start = ReplacedRange.end = Pos;
1477 }
1478 Filter = FuzzyMatcher(
1479 Recorder->CCSema->getPreprocessor().getCodeCompletionFilter());
1480 std::tie(QueryScopes, AllScopes) = getQueryScopes(
1481 Recorder->CCContext, *Recorder->CCSema, HeuristicPrefix, Opts);
1482 if (!QueryScopes.empty())
1483 ScopeProximity.emplace(QueryScopes);
1484 PreferredType =
1485 OpaqueType::fromType(Recorder->CCSema->getASTContext(),
1486 Recorder->CCContext.getPreferredType());
1487 // Sema provides the needed context to query the index.
1488 // FIXME: in addition to querying for extra/overlapping symbols, we should
1489 // explicitly request symbols corresponding to Sema results.
1490 // We can use their signals even if the index can't suggest them.
1491 // We must copy index results to preserve them, but there are at most Limit.
1492 auto IndexResults = (Opts.Index && allowIndex(Recorder->CCContext))
1493 ? queryIndex()
1494 : SymbolSlab();
1495 trace::Span Tracer("Populate CodeCompleteResult");
1496 // Merge Sema and Index results, score them, and pick the winners.
1497 auto Top =
1498 mergeResults(Recorder->Results, IndexResults, /*Identifiers*/ {});
1499 return toCodeCompleteResult(Top);
1500 }
1501
1502 CodeCompleteResult
toCodeCompleteResult(const std::vector<ScoredBundle> & Scored)1503 toCodeCompleteResult(const std::vector<ScoredBundle> &Scored) {
1504 CodeCompleteResult Output;
1505
1506 // Convert the results to final form, assembling the expensive strings.
1507 for (auto &C : Scored) {
1508 Output.Completions.push_back(toCodeCompletion(C.first));
1509 Output.Completions.back().Score = C.second;
1510 Output.Completions.back().CompletionTokenRange = ReplacedRange;
1511 }
1512 Output.HasMore = Incomplete;
1513 Output.Context = CCContextKind;
1514 Output.CompletionRange = ReplacedRange;
1515 return Output;
1516 }
1517
queryIndex()1518 SymbolSlab queryIndex() {
1519 trace::Span Tracer("Query index");
1520 SPAN_ATTACH(Tracer, "limit", int64_t(Opts.Limit));
1521
1522 // Build the query.
1523 FuzzyFindRequest Req;
1524 if (Opts.Limit)
1525 Req.Limit = Opts.Limit;
1526 Req.Query = std::string(Filter->pattern());
1527 Req.RestrictForCodeCompletion = true;
1528 Req.Scopes = QueryScopes;
1529 Req.AnyScope = AllScopes;
1530 // FIXME: we should send multiple weighted paths here.
1531 Req.ProximityPaths.push_back(std::string(FileName));
1532 if (PreferredType)
1533 Req.PreferredTypes.push_back(std::string(PreferredType->raw()));
1534 vlog("Code complete: fuzzyFind({0:2})", toJSON(Req));
1535
1536 if (SpecFuzzyFind)
1537 SpecFuzzyFind->NewReq = Req;
1538 if (SpecFuzzyFind && SpecFuzzyFind->Result.valid() && (*SpecReq == Req)) {
1539 vlog("Code complete: speculative fuzzy request matches the actual index "
1540 "request. Waiting for the speculative index results.");
1541 SPAN_ATTACH(Tracer, "Speculative results", true);
1542
1543 trace::Span WaitSpec("Wait speculative results");
1544 return SpecFuzzyFind->Result.get();
1545 }
1546
1547 SPAN_ATTACH(Tracer, "Speculative results", false);
1548
1549 // Run the query against the index.
1550 SymbolSlab::Builder ResultsBuilder;
1551 if (Opts.Index->fuzzyFind(
1552 Req, [&](const Symbol &Sym) { ResultsBuilder.insert(Sym); }))
1553 Incomplete = true;
1554 return std::move(ResultsBuilder).build();
1555 }
1556
1557 // Merges Sema and Index results where possible, to form CompletionCandidates.
1558 // \p Identifiers is raw identifiers that can also be completion candidates.
1559 // Identifiers are not merged with results from index or sema.
1560 // Groups overloads if desired, to form CompletionCandidate::Bundles. The
1561 // bundles are scored and top results are returned, best to worst.
1562 std::vector<ScoredBundle>
mergeResults(const std::vector<CodeCompletionResult> & SemaResults,const SymbolSlab & IndexResults,const std::vector<RawIdentifier> & IdentifierResults)1563 mergeResults(const std::vector<CodeCompletionResult> &SemaResults,
1564 const SymbolSlab &IndexResults,
1565 const std::vector<RawIdentifier> &IdentifierResults) {
1566 trace::Span Tracer("Merge and score results");
1567 std::vector<CompletionCandidate::Bundle> Bundles;
1568 llvm::DenseMap<size_t, size_t> BundleLookup;
1569 auto AddToBundles = [&](const CodeCompletionResult *SemaResult,
1570 const Symbol *IndexResult,
1571 const RawIdentifier *IdentifierResult) {
1572 CompletionCandidate C;
1573 C.SemaResult = SemaResult;
1574 C.IndexResult = IndexResult;
1575 C.IdentifierResult = IdentifierResult;
1576 if (C.IndexResult) {
1577 C.Name = IndexResult->Name;
1578 C.RankedIncludeHeaders = getRankedIncludes(*C.IndexResult);
1579 } else if (C.SemaResult) {
1580 C.Name = Recorder->getName(*SemaResult);
1581 } else {
1582 assert(IdentifierResult);
1583 C.Name = IdentifierResult->Name;
1584 }
1585 if (auto OverloadSet = C.overloadSet(
1586 Opts, FileName, Inserter ? Inserter.getPointer() : nullptr)) {
1587 auto Ret = BundleLookup.try_emplace(OverloadSet, Bundles.size());
1588 if (Ret.second)
1589 Bundles.emplace_back();
1590 Bundles[Ret.first->second].push_back(std::move(C));
1591 } else {
1592 Bundles.emplace_back();
1593 Bundles.back().push_back(std::move(C));
1594 }
1595 };
1596 llvm::DenseSet<const Symbol *> UsedIndexResults;
1597 auto CorrespondingIndexResult =
1598 [&](const CodeCompletionResult &SemaResult) -> const Symbol * {
1599 if (auto SymID =
1600 getSymbolID(SemaResult, Recorder->CCSema->getSourceManager())) {
1601 auto I = IndexResults.find(SymID);
1602 if (I != IndexResults.end()) {
1603 UsedIndexResults.insert(&*I);
1604 return &*I;
1605 }
1606 }
1607 return nullptr;
1608 };
1609 // Emit all Sema results, merging them with Index results if possible.
1610 for (auto &SemaResult : SemaResults)
1611 AddToBundles(&SemaResult, CorrespondingIndexResult(SemaResult), nullptr);
1612 // Now emit any Index-only results.
1613 for (const auto &IndexResult : IndexResults) {
1614 if (UsedIndexResults.count(&IndexResult))
1615 continue;
1616 AddToBundles(/*SemaResult=*/nullptr, &IndexResult, nullptr);
1617 }
1618 // Emit identifier results.
1619 for (const auto &Ident : IdentifierResults)
1620 AddToBundles(/*SemaResult=*/nullptr, /*IndexResult=*/nullptr, &Ident);
1621 // We only keep the best N results at any time, in "native" format.
1622 TopN<ScoredBundle, ScoredBundleGreater> Top(
1623 Opts.Limit == 0 ? std::numeric_limits<size_t>::max() : Opts.Limit);
1624 for (auto &Bundle : Bundles)
1625 addCandidate(Top, std::move(Bundle));
1626 return std::move(Top).items();
1627 }
1628
fuzzyScore(const CompletionCandidate & C)1629 llvm::Optional<float> fuzzyScore(const CompletionCandidate &C) {
1630 // Macros can be very spammy, so we only support prefix completion.
1631 if (((C.SemaResult &&
1632 C.SemaResult->Kind == CodeCompletionResult::RK_Macro) ||
1633 (C.IndexResult &&
1634 C.IndexResult->SymInfo.Kind == index::SymbolKind::Macro)) &&
1635 !C.Name.startswith_lower(Filter->pattern()))
1636 return None;
1637 return Filter->match(C.Name);
1638 }
1639
1640 CodeCompletion::Scores
evaluateCompletion(const SymbolQualitySignals & Quality,const SymbolRelevanceSignals & Relevance)1641 evaluateCompletion(const SymbolQualitySignals &Quality,
1642 const SymbolRelevanceSignals &Relevance) {
1643 using RM = CodeCompleteOptions::CodeCompletionRankingModel;
1644 CodeCompletion::Scores Scores;
1645 switch (Opts.RankingModel) {
1646 case RM::Heuristics:
1647 Scores.Quality = Quality.evaluateHeuristics();
1648 Scores.Relevance = Relevance.evaluateHeuristics();
1649 Scores.Total =
1650 evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance);
1651 // NameMatch is in fact a multiplier on total score, so rescoring is
1652 // sound.
1653 Scores.ExcludingName = Relevance.NameMatch
1654 ? Scores.Total / Relevance.NameMatch
1655 : Scores.Quality;
1656 return Scores;
1657
1658 case RM::DecisionForest:
1659 DecisionForestScores DFScores = Opts.DecisionForestScorer(
1660 Quality, Relevance, Opts.DecisionForestBase);
1661 Scores.ExcludingName = DFScores.ExcludingName;
1662 Scores.Total = DFScores.Total;
1663 return Scores;
1664 }
1665 llvm_unreachable("Unhandled CodeCompletion ranking model.");
1666 }
1667
1668 // Scores a candidate and adds it to the TopN structure.
addCandidate(TopN<ScoredBundle,ScoredBundleGreater> & Candidates,CompletionCandidate::Bundle Bundle)1669 void addCandidate(TopN<ScoredBundle, ScoredBundleGreater> &Candidates,
1670 CompletionCandidate::Bundle Bundle) {
1671 SymbolQualitySignals Quality;
1672 SymbolRelevanceSignals Relevance;
1673 Relevance.Context = CCContextKind;
1674 Relevance.Name = Bundle.front().Name;
1675 Relevance.FilterLength = HeuristicPrefix.Name.size();
1676 Relevance.Query = SymbolRelevanceSignals::CodeComplete;
1677 Relevance.FileProximityMatch = FileProximity.getPointer();
1678 if (ScopeProximity)
1679 Relevance.ScopeProximityMatch = ScopeProximity.getPointer();
1680 if (PreferredType)
1681 Relevance.HadContextType = true;
1682 Relevance.ContextWords = &ContextWords;
1683
1684 auto &First = Bundle.front();
1685 if (auto FuzzyScore = fuzzyScore(First))
1686 Relevance.NameMatch = *FuzzyScore;
1687 else
1688 return;
1689 SymbolOrigin Origin = SymbolOrigin::Unknown;
1690 bool FromIndex = false;
1691 for (const auto &Candidate : Bundle) {
1692 if (Candidate.IndexResult) {
1693 Quality.merge(*Candidate.IndexResult);
1694 Relevance.merge(*Candidate.IndexResult);
1695 Origin |= Candidate.IndexResult->Origin;
1696 FromIndex = true;
1697 if (!Candidate.IndexResult->Type.empty())
1698 Relevance.HadSymbolType |= true;
1699 if (PreferredType &&
1700 PreferredType->raw() == Candidate.IndexResult->Type) {
1701 Relevance.TypeMatchesPreferred = true;
1702 }
1703 }
1704 if (Candidate.SemaResult) {
1705 Quality.merge(*Candidate.SemaResult);
1706 Relevance.merge(*Candidate.SemaResult);
1707 if (PreferredType) {
1708 if (auto CompletionType = OpaqueType::fromCompletionResult(
1709 Recorder->CCSema->getASTContext(), *Candidate.SemaResult)) {
1710 Relevance.HadSymbolType |= true;
1711 if (PreferredType == CompletionType)
1712 Relevance.TypeMatchesPreferred = true;
1713 }
1714 }
1715 Origin |= SymbolOrigin::AST;
1716 }
1717 if (Candidate.IdentifierResult) {
1718 Quality.References = Candidate.IdentifierResult->References;
1719 Relevance.Scope = SymbolRelevanceSignals::FileScope;
1720 Origin |= SymbolOrigin::Identifier;
1721 }
1722 }
1723
1724 CodeCompletion::Scores Scores = evaluateCompletion(Quality, Relevance);
1725 if (Opts.RecordCCResult)
1726 Opts.RecordCCResult(toCodeCompletion(Bundle), Quality, Relevance,
1727 Scores.Total);
1728
1729 dlog("CodeComplete: {0} ({1}) = {2}\n{3}{4}\n", First.Name,
1730 llvm::to_string(Origin), Scores.Total, llvm::to_string(Quality),
1731 llvm::to_string(Relevance));
1732
1733 NSema += bool(Origin & SymbolOrigin::AST);
1734 NIndex += FromIndex;
1735 NSemaAndIndex += bool(Origin & SymbolOrigin::AST) && FromIndex;
1736 NIdent += bool(Origin & SymbolOrigin::Identifier);
1737 if (Candidates.push({std::move(Bundle), Scores}))
1738 Incomplete = true;
1739 }
1740
toCodeCompletion(const CompletionCandidate::Bundle & Bundle)1741 CodeCompletion toCodeCompletion(const CompletionCandidate::Bundle &Bundle) {
1742 llvm::Optional<CodeCompletionBuilder> Builder;
1743 for (const auto &Item : Bundle) {
1744 CodeCompletionString *SemaCCS =
1745 Item.SemaResult ? Recorder->codeCompletionString(*Item.SemaResult)
1746 : nullptr;
1747 if (!Builder)
1748 Builder.emplace(Recorder ? &Recorder->CCSema->getASTContext() : nullptr,
1749 Item, SemaCCS, QueryScopes, *Inserter, FileName,
1750 CCContextKind, Opts, IsUsingDeclaration, NextTokenKind);
1751 else
1752 Builder->add(Item, SemaCCS);
1753 }
1754 return Builder->build();
1755 }
1756 };
1757
1758 } // namespace
1759
getClangCompleteOpts() const1760 clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const {
1761 clang::CodeCompleteOptions Result;
1762 Result.IncludeCodePatterns = EnableSnippets && IncludeCodePatterns;
1763 Result.IncludeMacros = IncludeMacros;
1764 Result.IncludeGlobals = true;
1765 // We choose to include full comments and not do doxygen parsing in
1766 // completion.
1767 // FIXME: ideally, we should support doxygen in some form, e.g. do markdown
1768 // formatting of the comments.
1769 Result.IncludeBriefComments = false;
1770
1771 // When an is used, Sema is responsible for completing the main file,
1772 // the index can provide results from the preamble.
1773 // Tell Sema not to deserialize the preamble to look for results.
1774 Result.LoadExternal = !Index;
1775 Result.IncludeFixIts = IncludeFixIts;
1776
1777 return Result;
1778 }
1779
guessCompletionPrefix(llvm::StringRef Content,unsigned Offset)1780 CompletionPrefix guessCompletionPrefix(llvm::StringRef Content,
1781 unsigned Offset) {
1782 assert(Offset <= Content.size());
1783 StringRef Rest = Content.take_front(Offset);
1784 CompletionPrefix Result;
1785
1786 // Consume the unqualified name. We only handle ASCII characters.
1787 // isIdentifierBody will let us match "0invalid", but we don't mind.
1788 while (!Rest.empty() && isIdentifierBody(Rest.back()))
1789 Rest = Rest.drop_back();
1790 Result.Name = Content.slice(Rest.size(), Offset);
1791
1792 // Consume qualifiers.
1793 while (Rest.consume_back("::") && !Rest.endswith(":")) // reject ::::
1794 while (!Rest.empty() && isIdentifierBody(Rest.back()))
1795 Rest = Rest.drop_back();
1796 Result.Qualifier =
1797 Content.slice(Rest.size(), Result.Name.begin() - Content.begin());
1798
1799 return Result;
1800 }
1801
codeComplete(PathRef FileName,Position Pos,const PreambleData * Preamble,const ParseInputs & ParseInput,CodeCompleteOptions Opts,SpeculativeFuzzyFind * SpecFuzzyFind)1802 CodeCompleteResult codeComplete(PathRef FileName, Position Pos,
1803 const PreambleData *Preamble,
1804 const ParseInputs &ParseInput,
1805 CodeCompleteOptions Opts,
1806 SpeculativeFuzzyFind *SpecFuzzyFind) {
1807 auto Offset = positionToOffset(ParseInput.Contents, Pos);
1808 if (!Offset) {
1809 elog("Code completion position was invalid {0}", Offset.takeError());
1810 return CodeCompleteResult();
1811 }
1812 auto Flow = CodeCompleteFlow(
1813 FileName, Preamble ? Preamble->Includes : IncludeStructure(),
1814 SpecFuzzyFind, Opts);
1815 return (!Preamble || Opts.RunParser == CodeCompleteOptions::NeverParse)
1816 ? std::move(Flow).runWithoutSema(ParseInput.Contents, *Offset,
1817 *ParseInput.TFS)
1818 : std::move(Flow).run({FileName, *Offset, *Preamble,
1819 // We want to serve code completions with
1820 // low latency, so don't bother patching.
1821 /*PreamblePatch=*/llvm::None, ParseInput});
1822 }
1823
signatureHelp(PathRef FileName,Position Pos,const PreambleData & Preamble,const ParseInputs & ParseInput)1824 SignatureHelp signatureHelp(PathRef FileName, Position Pos,
1825 const PreambleData &Preamble,
1826 const ParseInputs &ParseInput) {
1827 auto Offset = positionToOffset(ParseInput.Contents, Pos);
1828 if (!Offset) {
1829 elog("Signature help position was invalid {0}", Offset.takeError());
1830 return SignatureHelp();
1831 }
1832 SignatureHelp Result;
1833 clang::CodeCompleteOptions Options;
1834 Options.IncludeGlobals = false;
1835 Options.IncludeMacros = false;
1836 Options.IncludeCodePatterns = false;
1837 Options.IncludeBriefComments = false;
1838 semaCodeComplete(
1839 std::make_unique<SignatureHelpCollector>(Options, ParseInput.Index,
1840 Result),
1841 Options,
1842 {FileName, *Offset, Preamble,
1843 PreamblePatch::create(FileName, ParseInput, Preamble), ParseInput});
1844 return Result;
1845 }
1846
isIndexedForCodeCompletion(const NamedDecl & ND,ASTContext & ASTCtx)1847 bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) {
1848 auto InTopLevelScope = [](const NamedDecl &ND) {
1849 switch (ND.getDeclContext()->getDeclKind()) {
1850 case Decl::TranslationUnit:
1851 case Decl::Namespace:
1852 case Decl::LinkageSpec:
1853 return true;
1854 default:
1855 break;
1856 };
1857 return false;
1858 };
1859 // We only complete symbol's name, which is the same as the name of the
1860 // *primary* template in case of template specializations.
1861 if (isExplicitTemplateSpecialization(&ND))
1862 return false;
1863
1864 if (InTopLevelScope(ND))
1865 return true;
1866
1867 if (const auto *EnumDecl = dyn_cast<clang::EnumDecl>(ND.getDeclContext()))
1868 return InTopLevelScope(*EnumDecl) && !EnumDecl->isScoped();
1869
1870 return false;
1871 }
1872
1873 // FIXME: find a home for this (that can depend on both markup and Protocol).
renderDoc(const markup::Document & Doc,MarkupKind Kind)1874 static MarkupContent renderDoc(const markup::Document &Doc, MarkupKind Kind) {
1875 MarkupContent Result;
1876 Result.kind = Kind;
1877 switch (Kind) {
1878 case MarkupKind::PlainText:
1879 Result.value.append(Doc.asPlainText());
1880 break;
1881 case MarkupKind::Markdown:
1882 Result.value.append(Doc.asMarkdown());
1883 break;
1884 }
1885 return Result;
1886 }
1887
render(const CodeCompleteOptions & Opts) const1888 CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const {
1889 CompletionItem LSP;
1890 const auto *InsertInclude = Includes.empty() ? nullptr : &Includes[0];
1891 LSP.label = ((InsertInclude && InsertInclude->Insertion)
1892 ? Opts.IncludeIndicator.Insert
1893 : Opts.IncludeIndicator.NoInsert) +
1894 (Opts.ShowOrigins ? "[" + llvm::to_string(Origin) + "]" : "") +
1895 RequiredQualifier + Name + Signature;
1896
1897 LSP.kind = Kind;
1898 LSP.detail = BundleSize > 1
1899 ? std::string(llvm::formatv("[{0} overloads]", BundleSize))
1900 : ReturnType;
1901 LSP.deprecated = Deprecated;
1902 // Combine header information and documentation in LSP `documentation` field.
1903 // This is not quite right semantically, but tends to display well in editors.
1904 if (InsertInclude || Documentation) {
1905 markup::Document Doc;
1906 if (InsertInclude)
1907 Doc.addParagraph().appendText("From ").appendCode(InsertInclude->Header);
1908 if (Documentation)
1909 Doc.append(*Documentation);
1910 LSP.documentation = renderDoc(Doc, Opts.DocumentationFormat);
1911 }
1912 LSP.sortText = sortText(Score.Total, Name);
1913 LSP.filterText = Name;
1914 LSP.textEdit = {CompletionTokenRange, RequiredQualifier + Name};
1915 // Merge continuous additionalTextEdits into main edit. The main motivation
1916 // behind this is to help LSP clients, it seems most of them are confused when
1917 // they are provided with additionalTextEdits that are consecutive to main
1918 // edit.
1919 // Note that we store additional text edits from back to front in a line. That
1920 // is mainly to help LSP clients again, so that changes do not effect each
1921 // other.
1922 for (const auto &FixIt : FixIts) {
1923 if (FixIt.range.end == LSP.textEdit->range.start) {
1924 LSP.textEdit->newText = FixIt.newText + LSP.textEdit->newText;
1925 LSP.textEdit->range.start = FixIt.range.start;
1926 } else {
1927 LSP.additionalTextEdits.push_back(FixIt);
1928 }
1929 }
1930 if (Opts.EnableSnippets)
1931 LSP.textEdit->newText += SnippetSuffix;
1932
1933 // FIXME(kadircet): Do not even fill insertText after making sure textEdit is
1934 // compatible with most of the editors.
1935 LSP.insertText = LSP.textEdit->newText;
1936 LSP.insertTextFormat = Opts.EnableSnippets ? InsertTextFormat::Snippet
1937 : InsertTextFormat::PlainText;
1938 if (InsertInclude && InsertInclude->Insertion)
1939 LSP.additionalTextEdits.push_back(*InsertInclude->Insertion);
1940
1941 LSP.score = Score.ExcludingName;
1942
1943 return LSP;
1944 }
1945
operator <<(llvm::raw_ostream & OS,const CodeCompletion & C)1946 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const CodeCompletion &C) {
1947 // For now just lean on CompletionItem.
1948 return OS << C.render(CodeCompleteOptions());
1949 }
1950
operator <<(llvm::raw_ostream & OS,const CodeCompleteResult & R)1951 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
1952 const CodeCompleteResult &R) {
1953 OS << "CodeCompleteResult: " << R.Completions.size() << (R.HasMore ? "+" : "")
1954 << " (" << getCompletionKindString(R.Context) << ")"
1955 << " items:\n";
1956 for (const auto &C : R.Completions)
1957 OS << C << "\n";
1958 return OS;
1959 }
1960
1961 // Heuristically detect whether the `Line` is an unterminated include filename.
isIncludeFile(llvm::StringRef Line)1962 bool isIncludeFile(llvm::StringRef Line) {
1963 Line = Line.ltrim();
1964 if (!Line.consume_front("#"))
1965 return false;
1966 Line = Line.ltrim();
1967 if (!(Line.consume_front("include_next") || Line.consume_front("include") ||
1968 Line.consume_front("import")))
1969 return false;
1970 Line = Line.ltrim();
1971 if (Line.consume_front("<"))
1972 return Line.count('>') == 0;
1973 if (Line.consume_front("\""))
1974 return Line.count('"') == 0;
1975 return false;
1976 }
1977
allowImplicitCompletion(llvm::StringRef Content,unsigned Offset)1978 bool allowImplicitCompletion(llvm::StringRef Content, unsigned Offset) {
1979 // Look at last line before completion point only.
1980 Content = Content.take_front(Offset);
1981 auto Pos = Content.rfind('\n');
1982 if (Pos != llvm::StringRef::npos)
1983 Content = Content.substr(Pos + 1);
1984
1985 // Complete after scope operators.
1986 if (Content.endswith(".") || Content.endswith("->") || Content.endswith("::"))
1987 return true;
1988 // Complete after `#include <` and #include `<foo/`.
1989 if ((Content.endswith("<") || Content.endswith("\"") ||
1990 Content.endswith("/")) &&
1991 isIncludeFile(Content))
1992 return true;
1993
1994 // Complete words. Give non-ascii characters the benefit of the doubt.
1995 return !Content.empty() &&
1996 (isIdentifierBody(Content.back()) || !llvm::isASCII(Content.back()));
1997 }
1998
1999 } // namespace clangd
2000 } // namespace clang
2001