1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "SymbolCollector.h"
10 #include "AST.h"
11 #include "CanonicalIncludes.h"
12 #include "CodeComplete.h"
13 #include "CodeCompletionStrings.h"
14 #include "ExpectedTypes.h"
15 #include "SourceCode.h"
16 #include "SymbolLocation.h"
17 #include "URI.h"
18 #include "index/Relation.h"
19 #include "index/SymbolID.h"
20 #include "support/Logger.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/DeclBase.h"
23 #include "clang/AST/DeclCXX.h"
24 #include "clang/AST/DeclObjC.h"
25 #include "clang/AST/DeclTemplate.h"
26 #include "clang/Basic/SourceLocation.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/Basic/Specifiers.h"
29 #include "clang/Index/IndexSymbol.h"
30 #include "clang/Index/IndexingAction.h"
31 #include "clang/Index/USRGeneration.h"
32 #include "clang/Lex/Preprocessor.h"
33 #include "clang/Tooling/Syntax/Tokens.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38
39 namespace clang {
40 namespace clangd {
41 namespace {
42
43 /// If \p ND is a template specialization, returns the described template.
44 /// Otherwise, returns \p ND.
getTemplateOrThis(const NamedDecl & ND)45 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
46 if (auto T = ND.getDescribedTemplate())
47 return *T;
48 return ND;
49 }
50
51 // Returns a URI of \p Path. Firstly, this makes the \p Path absolute using the
52 // current working directory of the given SourceManager if the Path is not an
53 // absolute path. If failed, this resolves relative paths against \p FallbackDir
54 // to get an absolute path. Then, this tries creating an URI for the absolute
55 // path with schemes specified in \p Opts. This returns an URI with the first
56 // working scheme, if there is any; otherwise, this returns None.
57 //
58 // The Path can be a path relative to the build directory, or retrieved from
59 // the SourceManager.
toURI(const SourceManager & SM,llvm::StringRef Path,const SymbolCollector::Options & Opts)60 std::string toURI(const SourceManager &SM, llvm::StringRef Path,
61 const SymbolCollector::Options &Opts) {
62 llvm::SmallString<128> AbsolutePath(Path);
63 if (auto File = SM.getFileManager().getFile(Path)) {
64 if (auto CanonPath = getCanonicalPath(*File, SM)) {
65 AbsolutePath = *CanonPath;
66 }
67 }
68 // We don't perform is_absolute check in an else branch because makeAbsolute
69 // might return a relative path on some InMemoryFileSystems.
70 if (!llvm::sys::path::is_absolute(AbsolutePath) && !Opts.FallbackDir.empty())
71 llvm::sys::fs::make_absolute(Opts.FallbackDir, AbsolutePath);
72 llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true);
73 return URI::create(AbsolutePath).toString();
74 }
75
76 // Checks whether the decl is a private symbol in a header generated by
77 // protobuf compiler.
78 // FIXME: make filtering extensible when there are more use cases for symbol
79 // filters.
isPrivateProtoDecl(const NamedDecl & ND)80 bool isPrivateProtoDecl(const NamedDecl &ND) {
81 const auto &SM = ND.getASTContext().getSourceManager();
82 if (!isProtoFile(nameLocation(ND, SM), SM))
83 return false;
84
85 // ND without identifier can be operators.
86 if (ND.getIdentifier() == nullptr)
87 return false;
88 auto Name = ND.getIdentifier()->getName();
89 if (!Name.contains('_'))
90 return false;
91 // Nested proto entities (e.g. Message::Nested) have top-level decls
92 // that shouldn't be used (Message_Nested). Ignore them completely.
93 // The nested entities are dangling type aliases, we may want to reconsider
94 // including them in the future.
95 // For enum constants, SOME_ENUM_CONSTANT is not private and should be
96 // indexed. Outer_INNER is private. This heuristic relies on naming style, it
97 // will include OUTER_INNER and exclude some_enum_constant.
98 // FIXME: the heuristic relies on naming style (i.e. no underscore in
99 // user-defined names) and can be improved.
100 return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
101 }
102
103 // We only collect #include paths for symbols that are suitable for global code
104 // completion, except for namespaces since #include path for a namespace is hard
105 // to define.
shouldCollectIncludePath(index::SymbolKind Kind)106 bool shouldCollectIncludePath(index::SymbolKind Kind) {
107 using SK = index::SymbolKind;
108 switch (Kind) {
109 case SK::Macro:
110 case SK::Enum:
111 case SK::Struct:
112 case SK::Class:
113 case SK::Union:
114 case SK::TypeAlias:
115 case SK::Using:
116 case SK::Function:
117 case SK::Variable:
118 case SK::EnumConstant:
119 return true;
120 default:
121 return false;
122 }
123 }
124
125 // Return the symbol range of the token at \p TokLoc.
126 std::pair<SymbolLocation::Position, SymbolLocation::Position>
getTokenRange(SourceLocation TokLoc,const SourceManager & SM,const LangOptions & LangOpts)127 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
128 const LangOptions &LangOpts) {
129 auto CreatePosition = [&SM](SourceLocation Loc) {
130 auto LSPLoc = sourceLocToPosition(SM, Loc);
131 SymbolLocation::Position Pos;
132 Pos.setLine(LSPLoc.line);
133 Pos.setColumn(LSPLoc.character);
134 return Pos;
135 };
136
137 auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
138 return {CreatePosition(TokLoc),
139 CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
140 }
141
142 // Return the symbol location of the token at \p TokLoc.
143 llvm::Optional<SymbolLocation>
getTokenLocation(SourceLocation TokLoc,const SourceManager & SM,const SymbolCollector::Options & Opts,const clang::LangOptions & LangOpts,std::string & FileURIStorage)144 getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
145 const SymbolCollector::Options &Opts,
146 const clang::LangOptions &LangOpts,
147 std::string &FileURIStorage) {
148 auto Path = SM.getFilename(TokLoc);
149 if (Path.empty())
150 return None;
151 FileURIStorage = toURI(SM, Path, Opts);
152 SymbolLocation Result;
153 Result.FileURI = FileURIStorage.c_str();
154 auto Range = getTokenRange(TokLoc, SM, LangOpts);
155 Result.Start = Range.first;
156 Result.End = Range.second;
157
158 return Result;
159 }
160
161 // Checks whether \p ND is a good candidate to be the *canonical* declaration of
162 // its symbol (e.g. a go-to-declaration target). This overrides the default of
163 // using Clang's canonical declaration, which is the first in the TU.
164 //
165 // Example: preferring a class declaration over its forward declaration.
isPreferredDeclaration(const NamedDecl & ND,index::SymbolRoleSet Roles)166 bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
167 const auto &SM = ND.getASTContext().getSourceManager();
168 if (isa<TagDecl>(ND))
169 return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
170 !isInsideMainFile(ND.getLocation(), SM);
171 if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
172 return ID->isThisDeclarationADefinition();
173 if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
174 return PD->isThisDeclarationADefinition();
175 return false;
176 }
177
toRefKind(index::SymbolRoleSet Roles,bool Spelled=false)178 RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
179 RefKind Result = RefKind::Unknown;
180 if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
181 Result |= RefKind::Declaration;
182 if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
183 Result |= RefKind::Definition;
184 if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
185 Result |= RefKind::Reference;
186 if (Spelled)
187 Result |= RefKind::Spelled;
188 return Result;
189 }
190
indexableRelation(const index::SymbolRelation & R)191 llvm::Optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
192 if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf))
193 return RelationKind::BaseOf;
194 if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf))
195 return RelationKind::OverriddenBy;
196 return None;
197 }
198
199 } // namespace
200
SymbolCollector(Options Opts)201 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
202
initialize(ASTContext & Ctx)203 void SymbolCollector::initialize(ASTContext &Ctx) {
204 ASTCtx = &Ctx;
205 CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
206 CompletionTUInfo =
207 std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
208 }
209
shouldCollectSymbol(const NamedDecl & ND,const ASTContext & ASTCtx,const Options & Opts,bool IsMainFileOnly)210 bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
211 const ASTContext &ASTCtx,
212 const Options &Opts,
213 bool IsMainFileOnly) {
214 // Skip anonymous declarations, e.g (anonymous enum/class/struct).
215 if (ND.getDeclName().isEmpty())
216 return false;
217
218 // Skip main-file symbols if we are not collecting them.
219 if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
220 return false;
221
222 // Skip symbols in anonymous namespaces in header files.
223 if (!IsMainFileOnly && ND.isInAnonymousNamespace())
224 return false;
225
226 // We want most things but not "local" symbols such as symbols inside
227 // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
228 // FIXME: Need a matcher for ExportDecl in order to include symbols declared
229 // within an export.
230 const auto *DeclCtx = ND.getDeclContext();
231 switch (DeclCtx->getDeclKind()) {
232 case Decl::TranslationUnit:
233 case Decl::Namespace:
234 case Decl::LinkageSpec:
235 case Decl::Enum:
236 case Decl::ObjCProtocol:
237 case Decl::ObjCInterface:
238 case Decl::ObjCCategory:
239 case Decl::ObjCCategoryImpl:
240 case Decl::ObjCImplementation:
241 break;
242 default:
243 // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
244 // easier to cast.
245 if (!isa<RecordDecl>(DeclCtx))
246 return false;
247 }
248
249 // Avoid indexing internal symbols in protobuf generated headers.
250 if (isPrivateProtoDecl(ND))
251 return false;
252 return true;
253 }
254
255 // Always return true to continue indexing.
handleDeclOccurrence(const Decl * D,index::SymbolRoleSet Roles,llvm::ArrayRef<index::SymbolRelation> Relations,SourceLocation Loc,index::IndexDataConsumer::ASTNodeInfo ASTNode)256 bool SymbolCollector::handleDeclOccurrence(
257 const Decl *D, index::SymbolRoleSet Roles,
258 llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
259 index::IndexDataConsumer::ASTNodeInfo ASTNode) {
260 assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
261 assert(CompletionAllocator && CompletionTUInfo);
262 assert(ASTNode.OrigD);
263 // Indexing API puts canonical decl into D, which might not have a valid
264 // source location for implicit/built-in decls. Fallback to original decl in
265 // such cases.
266 if (D->getLocation().isInvalid())
267 D = ASTNode.OrigD;
268 // If OrigD is an declaration associated with a friend declaration and it's
269 // not a definition, skip it. Note that OrigD is the occurrence that the
270 // collector is currently visiting.
271 if ((ASTNode.OrigD->getFriendObjectKind() !=
272 Decl::FriendObjectKind::FOK_None) &&
273 !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
274 return true;
275 // A declaration created for a friend declaration should not be used as the
276 // canonical declaration in the index. Use OrigD instead, unless we've already
277 // picked a replacement for D
278 if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
279 D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
280 // Flag to mark that D should be considered canonical meaning its declaration
281 // will override any previous declaration for the Symbol.
282 bool DeclIsCanonical = false;
283 // Avoid treating ObjCImplementationDecl as a canonical declaration if it has
284 // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
285 if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
286 DeclIsCanonical = true;
287 if (const auto *CID = IID->getClassInterface())
288 if (const auto *DD = CID->getDefinition())
289 if (!DD->isImplicitInterfaceDecl())
290 D = DD;
291 }
292 // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
293 // its ObjCCategoryDecl if it has one.
294 if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
295 DeclIsCanonical = true;
296 if (const auto *CD = CID->getCategoryDecl())
297 D = CD;
298 }
299 const NamedDecl *ND = dyn_cast<NamedDecl>(D);
300 if (!ND)
301 return true;
302
303 // Mark D as referenced if this is a reference coming from the main file.
304 // D may not be an interesting symbol, but it's cheaper to check at the end.
305 auto &SM = ASTCtx->getSourceManager();
306 if (Opts.CountReferences &&
307 (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
308 SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
309 ReferencedDecls.insert(ND);
310
311 auto ID = getSymbolID(ND);
312 if (!ID)
313 return true;
314
315 // ND is the canonical (i.e. first) declaration. If it's in the main file
316 // (which is not a header), then no public declaration was visible, so assume
317 // it's main-file only.
318 bool IsMainFileOnly =
319 SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
320 !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
321 ASTCtx->getLangOpts());
322 // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
323 if (ASTNode.OrigD->isImplicit() ||
324 !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
325 return true;
326
327 // Note: we need to process relations for all decl occurrences, including
328 // refs, because the indexing code only populates relations for specific
329 // occurrences. For example, RelationBaseOf is only populated for the
330 // occurrence inside the base-specifier.
331 processRelations(*ND, ID, Relations);
332
333 bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
334 bool IsOnlyRef =
335 !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
336 static_cast<unsigned>(index::SymbolRole::Definition)));
337
338 if (IsOnlyRef && !CollectRef)
339 return true;
340
341 // Unlike other fields, e.g. Symbols (which use spelling locations), we use
342 // file locations for references (as it aligns the behavior of clangd's
343 // AST-based xref).
344 // FIXME: we should try to use the file locations for other fields.
345 if (CollectRef &&
346 (!IsMainFileOnly || Opts.CollectMainFileRefs ||
347 ND->isExternallyVisible()) &&
348 !isa<NamespaceDecl>(ND) &&
349 (Opts.RefsInHeaders ||
350 SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID()))
351 DeclRefs[ND].push_back(
352 SymbolRef{SM.getFileLoc(Loc), Roles, ASTNode.Parent});
353 // Don't continue indexing if this is a mere reference.
354 if (IsOnlyRef)
355 return true;
356
357 // FIXME: ObjCPropertyDecl are not properly indexed here:
358 // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
359 // not a NamedDecl.
360 auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
361 if (!OriginalDecl)
362 return true;
363
364 const Symbol *BasicSymbol = Symbols.find(ID);
365 if (isPreferredDeclaration(*OriginalDecl, Roles))
366 // If OriginalDecl is preferred, replace/create the existing canonical
367 // declaration (e.g. a class forward declaration). There should be at most
368 // one duplicate as we expect to see only one preferred declaration per
369 // TU, because in practice they are definitions.
370 BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
371 else if (!BasicSymbol || DeclIsCanonical)
372 BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);
373
374 if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
375 addDefinition(*OriginalDecl, *BasicSymbol);
376
377 return true;
378 }
379
handleMacros(const MainFileMacros & MacroRefsToIndex)380 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
381 assert(PP.get());
382 const auto &SM = PP->getSourceManager();
383 const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
384 assert(MainFileEntry);
385
386 const auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts);
387 // Add macro references.
388 for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
389 for (const auto &Range : IDToRefs.second) {
390 Ref R;
391 R.Location.Start.setLine(Range.start.line);
392 R.Location.Start.setColumn(Range.start.character);
393 R.Location.End.setLine(Range.end.line);
394 R.Location.End.setColumn(Range.end.character);
395 R.Location.FileURI = MainFileURI.c_str();
396 // FIXME: Add correct RefKind information to MainFileMacros.
397 R.Kind = RefKind::Reference;
398 Refs.insert(IDToRefs.first, R);
399 }
400 }
401 }
402
handleMacroOccurrence(const IdentifierInfo * Name,const MacroInfo * MI,index::SymbolRoleSet Roles,SourceLocation Loc)403 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
404 const MacroInfo *MI,
405 index::SymbolRoleSet Roles,
406 SourceLocation Loc) {
407 assert(PP.get());
408 // Builtin macros don't have useful locations and aren't needed in completion.
409 if (MI->isBuiltinMacro())
410 return true;
411
412 const auto &SM = PP->getSourceManager();
413 auto DefLoc = MI->getDefinitionLoc();
414 // Also avoid storing predefined macros like __DBL_MIN__.
415 if (SM.isWrittenInBuiltinFile(DefLoc))
416 return true;
417
418 auto ID = getSymbolID(Name->getName(), MI, SM);
419 if (!ID)
420 return true;
421
422 auto SpellingLoc = SM.getSpellingLoc(Loc);
423 bool IsMainFileOnly =
424 SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
425 !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
426 ASTCtx->getLangOpts());
427 // Do not store references to main-file macros.
428 if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
429 (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID()))
430 // FIXME: Populate container information for macro references.
431 MacroRefs[ID].push_back({Loc, Roles, /*Container=*/nullptr});
432
433 // Collect symbols.
434 if (!Opts.CollectMacro)
435 return true;
436
437 // Skip main-file macros if we are not collecting them.
438 if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
439 return false;
440
441 // Mark the macro as referenced if this is a reference coming from the main
442 // file. The macro may not be an interesting symbol, but it's cheaper to check
443 // at the end.
444 if (Opts.CountReferences &&
445 (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
446 SM.getFileID(SpellingLoc) == SM.getMainFileID())
447 ReferencedMacros.insert(Name);
448
449 // Don't continue indexing if this is a mere reference.
450 // FIXME: remove macro with ID if it is undefined.
451 if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
452 Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
453 return true;
454
455 // Only collect one instance in case there are multiple.
456 if (Symbols.find(ID) != nullptr)
457 return true;
458
459 Symbol S;
460 S.ID = std::move(ID);
461 S.Name = Name->getName();
462 if (!IsMainFileOnly) {
463 S.Flags |= Symbol::IndexedForCodeCompletion;
464 S.Flags |= Symbol::VisibleOutsideFile;
465 }
466 S.SymInfo = index::getSymbolInfoForMacro(*MI);
467 S.Origin = Opts.Origin;
468 std::string FileURI;
469 // FIXME: use the result to filter out symbols.
470 shouldIndexFile(SM.getFileID(Loc));
471 if (auto DeclLoc =
472 getTokenLocation(DefLoc, SM, Opts, PP->getLangOpts(), FileURI))
473 S.CanonicalDeclaration = *DeclLoc;
474
475 CodeCompletionResult SymbolCompletion(Name);
476 const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
477 *PP, *CompletionAllocator, *CompletionTUInfo);
478 std::string Signature;
479 std::string SnippetSuffix;
480 getSignature(*CCS, &Signature, &SnippetSuffix);
481 S.Signature = Signature;
482 S.CompletionSnippetSuffix = SnippetSuffix;
483
484 IndexedMacros.insert(Name);
485 setIncludeLocation(S, DefLoc);
486 Symbols.insert(S);
487 return true;
488 }
489
processRelations(const NamedDecl & ND,const SymbolID & ID,ArrayRef<index::SymbolRelation> Relations)490 void SymbolCollector::processRelations(
491 const NamedDecl &ND, const SymbolID &ID,
492 ArrayRef<index::SymbolRelation> Relations) {
493 for (const auto &R : Relations) {
494 auto RKind = indexableRelation(R);
495 if (!RKind)
496 continue;
497 const Decl *Object = R.RelatedSymbol;
498
499 auto ObjectID = getSymbolID(Object);
500 if (!ObjectID)
501 continue;
502
503 // Record the relation.
504 // TODO: There may be cases where the object decl is not indexed for some
505 // reason. Those cases should probably be removed in due course, but for
506 // now there are two possible ways to handle it:
507 // (A) Avoid storing the relation in such cases.
508 // (B) Store it anyways. Clients will likely lookup() the SymbolID
509 // in the index and find nothing, but that's a situation they
510 // probably need to handle for other reasons anyways.
511 // We currently do (B) because it's simpler.
512 if (*RKind == RelationKind::BaseOf)
513 this->Relations.insert({ID, *RKind, ObjectID});
514 else if (*RKind == RelationKind::OverriddenBy)
515 this->Relations.insert({ObjectID, *RKind, ID});
516 }
517 }
518
setIncludeLocation(const Symbol & S,SourceLocation Loc)519 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
520 if (Opts.CollectIncludePath)
521 if (shouldCollectIncludePath(S.SymInfo.Kind))
522 // Use the expansion location to get the #include header since this is
523 // where the symbol is exposed.
524 IncludeFiles[S.ID] =
525 PP->getSourceManager().getDecomposedExpansionLoc(Loc).first;
526 }
527
finish()528 void SymbolCollector::finish() {
529 // At the end of the TU, add 1 to the refcount of all referenced symbols.
530 auto IncRef = [this](const SymbolID &ID) {
531 if (const auto *S = Symbols.find(ID)) {
532 Symbol Inc = *S;
533 ++Inc.References;
534 Symbols.insert(Inc);
535 }
536 };
537 for (const NamedDecl *ND : ReferencedDecls) {
538 if (auto ID = getSymbolID(ND)) {
539 IncRef(ID);
540 }
541 }
542 if (Opts.CollectMacro) {
543 assert(PP);
544 // First, drop header guards. We can't identify these until EOF.
545 for (const IdentifierInfo *II : IndexedMacros) {
546 if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
547 if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
548 if (MI->isUsedForHeaderGuard())
549 Symbols.erase(ID);
550 }
551 // Now increment refcounts.
552 for (const IdentifierInfo *II : ReferencedMacros) {
553 if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
554 if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
555 IncRef(ID);
556 }
557 }
558 // Fill in IncludeHeaders.
559 // We delay this until end of TU so header guards are all resolved.
560 // Symbols in slabs aren't mutable, so insert() has to walk all the strings
561 // :-(
562 for (const auto &Entry : IncludeFiles)
563 if (const Symbol *S = Symbols.find(Entry.first)) {
564 if (auto Header = getIncludeHeader(*S, Entry.second)) {
565 Symbol NewSym = *S;
566 NewSym.IncludeHeaders.push_back({std::move(*Header), 1});
567 Symbols.insert(NewSym);
568 }
569 }
570
571 const auto &SM = ASTCtx->getSourceManager();
572 llvm::DenseMap<FileID, std::string> URICache;
573 auto GetURI = [&](FileID FID) -> llvm::Optional<std::string> {
574 auto Found = URICache.find(FID);
575 if (Found == URICache.end()) {
576 if (auto *FileEntry = SM.getFileEntryForID(FID)) {
577 auto FileURI = toURI(SM, FileEntry->getName(), Opts);
578 Found = URICache.insert({FID, FileURI}).first;
579 } else {
580 // Ignore cases where we can not find a corresponding file entry for
581 // given location, e.g. symbols formed via macro concatenation.
582 return None;
583 }
584 }
585 return Found->second;
586 };
587 auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole,
588 bool Spelled = false) {
589 auto FileID = SM.getFileID(LocAndRole.Loc);
590 // FIXME: use the result to filter out references.
591 shouldIndexFile(FileID);
592 if (auto FileURI = GetURI(FileID)) {
593 auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts());
594 Ref R;
595 R.Location.Start = Range.first;
596 R.Location.End = Range.second;
597 R.Location.FileURI = FileURI->c_str();
598 R.Kind = toRefKind(LocAndRole.Roles, Spelled);
599 R.Container = getSymbolID(LocAndRole.Container);
600 Refs.insert(ID, R);
601 }
602 };
603 // Populate Refs slab from MacroRefs.
604 // FIXME: All MacroRefs are marked as Spelled now, but this should be checked.
605 for (const auto &IDAndRefs : MacroRefs)
606 for (const auto &LocAndRole : IDAndRefs.second)
607 CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true);
608 // Populate Refs slab from DeclRefs.
609 llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache;
610 for (auto &DeclAndRef : DeclRefs) {
611 if (auto ID = getSymbolID(DeclAndRef.first)) {
612 for (auto &LocAndRole : DeclAndRef.second) {
613 const auto FileID = SM.getFileID(LocAndRole.Loc);
614 // FIXME: It's better to use TokenBuffer by passing spelled tokens from
615 // the caller of SymbolCollector.
616 if (!FilesToTokensCache.count(FileID))
617 FilesToTokensCache[FileID] =
618 syntax::tokenize(FileID, SM, ASTCtx->getLangOpts());
619 llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID];
620 // Check if the referenced symbol is spelled exactly the same way the
621 // corresponding NamedDecl is. If it is, mark this reference as spelled.
622 const auto *IdentifierToken =
623 spelledIdentifierTouching(LocAndRole.Loc, Tokens);
624 DeclarationName Name = DeclAndRef.first->getDeclName();
625 const auto NameKind = Name.getNameKind();
626 bool IsTargetKind = NameKind == DeclarationName::Identifier ||
627 NameKind == DeclarationName::CXXConstructorName;
628 bool Spelled = IdentifierToken && IsTargetKind &&
629 Name.getAsString() == IdentifierToken->text(SM);
630 CollectRef(ID, LocAndRole, Spelled);
631 }
632 }
633 }
634
635 ReferencedDecls.clear();
636 ReferencedMacros.clear();
637 DeclRefs.clear();
638 FilesToIndexCache.clear();
639 HeaderIsSelfContainedCache.clear();
640 IncludeFiles.clear();
641 }
642
addDeclaration(const NamedDecl & ND,SymbolID ID,bool IsMainFileOnly)643 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
644 bool IsMainFileOnly) {
645 auto &Ctx = ND.getASTContext();
646 auto &SM = Ctx.getSourceManager();
647
648 Symbol S;
649 S.ID = std::move(ID);
650 std::string QName = printQualifiedName(ND);
651 // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
652 // for consistency with CodeCompletionString and a clean name/signature split.
653 std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
654 std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
655 S.TemplateSpecializationArgs = TemplateSpecializationArgs;
656
657 // We collect main-file symbols, but do not use them for code completion.
658 if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
659 S.Flags |= Symbol::IndexedForCodeCompletion;
660 if (isImplementationDetail(&ND))
661 S.Flags |= Symbol::ImplementationDetail;
662 if (!IsMainFileOnly)
663 S.Flags |= Symbol::VisibleOutsideFile;
664 S.SymInfo = index::getSymbolInfo(&ND);
665 std::string FileURI;
666 auto Loc = nameLocation(ND, SM);
667 assert(Loc.isValid() && "Invalid source location for NamedDecl");
668 // FIXME: use the result to filter out symbols.
669 shouldIndexFile(SM.getFileID(Loc));
670 if (auto DeclLoc =
671 getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
672 S.CanonicalDeclaration = *DeclLoc;
673
674 S.Origin = Opts.Origin;
675 if (ND.getAvailability() == AR_Deprecated)
676 S.Flags |= Symbol::Deprecated;
677
678 // Add completion info.
679 // FIXME: we may want to choose a different redecl, or combine from several.
680 assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
681 // We use the primary template, as clang does during code completion.
682 CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
683 const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
684 *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
685 *CompletionTUInfo,
686 /*IncludeBriefComments*/ false);
687 std::string Documentation =
688 formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
689 /*CommentsFromHeaders=*/true));
690 if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
691 if (Opts.StoreAllDocumentation)
692 S.Documentation = Documentation;
693 Symbols.insert(S);
694 return Symbols.find(S.ID);
695 }
696 S.Documentation = Documentation;
697 std::string Signature;
698 std::string SnippetSuffix;
699 getSignature(*CCS, &Signature, &SnippetSuffix);
700 S.Signature = Signature;
701 S.CompletionSnippetSuffix = SnippetSuffix;
702 std::string ReturnType = getReturnType(*CCS);
703 S.ReturnType = ReturnType;
704
705 llvm::Optional<OpaqueType> TypeStorage;
706 if (S.Flags & Symbol::IndexedForCodeCompletion) {
707 TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
708 if (TypeStorage)
709 S.Type = TypeStorage->raw();
710 }
711
712 Symbols.insert(S);
713 setIncludeLocation(S, ND.getLocation());
714 return Symbols.find(S.ID);
715 }
716
addDefinition(const NamedDecl & ND,const Symbol & DeclSym)717 void SymbolCollector::addDefinition(const NamedDecl &ND,
718 const Symbol &DeclSym) {
719 if (DeclSym.Definition)
720 return;
721 // If we saw some forward declaration, we end up copying the symbol.
722 // This is not ideal, but avoids duplicating the "is this a definition" check
723 // in clang::index. We should only see one definition.
724 Symbol S = DeclSym;
725 std::string FileURI;
726 const auto &SM = ND.getASTContext().getSourceManager();
727 auto Loc = nameLocation(ND, SM);
728 // FIXME: use the result to filter out symbols.
729 shouldIndexFile(SM.getFileID(Loc));
730 if (auto DefLoc =
731 getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
732 S.Definition = *DefLoc;
733 Symbols.insert(S);
734 }
735
736 /// Gets a canonical include (URI of the header or <header> or "header") for
737 /// header of \p FID (which should usually be the *expansion* file).
738 /// Returns None if includes should not be inserted for this file.
getIncludeHeader(const Symbol & S,FileID FID)739 llvm::Optional<std::string> SymbolCollector::getIncludeHeader(const Symbol &S,
740 FileID FID) {
741 const SourceManager &SM = ASTCtx->getSourceManager();
742 const FileEntry *FE = SM.getFileEntryForID(FID);
743 if (!FE || FE->getName().empty())
744 return llvm::None;
745 llvm::StringRef Filename = FE->getName();
746 // If a file is mapped by canonical headers, use that mapping, regardless
747 // of whether it's an otherwise-good header (header guards etc).
748 if (Opts.Includes) {
749 llvm::SmallString<256> QName = S.Scope;
750 QName.append(S.Name);
751 llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
752 // If we had a mapping, always use it.
753 if (Canonical.startswith("<") || Canonical.startswith("\"")) {
754 // Hack: there are two std::move() overloads from different headers.
755 // CanonicalIncludes returns the common one-arg one from <utility>.
756 if (Canonical == "<utility>" && S.Name == "move" &&
757 S.Signature.contains(','))
758 Canonical = "<algorithm>";
759 return Canonical.str();
760 }
761 if (Canonical != Filename)
762 return toURI(SM, Canonical, Opts);
763 }
764 if (!isSelfContainedHeader(FID)) {
765 // A .inc or .def file is often included into a real header to define
766 // symbols (e.g. LLVM tablegen files).
767 if (Filename.endswith(".inc") || Filename.endswith(".def"))
768 return getIncludeHeader(S, SM.getFileID(SM.getIncludeLoc(FID)));
769 // Conservatively refuse to insert #includes to files without guards.
770 return llvm::None;
771 }
772 // Standard case: just insert the file itself.
773 return toURI(SM, Filename, Opts);
774 }
775
isSelfContainedHeader(FileID FID)776 bool SymbolCollector::isSelfContainedHeader(FileID FID) {
777 // The real computation (which will be memoized).
778 auto Compute = [&] {
779 const SourceManager &SM = ASTCtx->getSourceManager();
780 const FileEntry *FE = SM.getFileEntryForID(FID);
781 if (!FE)
782 return false;
783 // FIXME: Should files that have been #import'd be considered
784 // self-contained? That's really a property of the includer,
785 // not of the file.
786 if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
787 !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
788 return false;
789 // This pattern indicates that a header can't be used without
790 // particular preprocessor state, usually set up by another header.
791 if (isDontIncludeMeHeader(SM.getBufferData(FID)))
792 return false;
793 return true;
794 };
795
796 auto R = HeaderIsSelfContainedCache.try_emplace(FID, false);
797 if (R.second)
798 R.first->second = Compute();
799 return R.first->second;
800 }
801
802 // Is Line an #if or #ifdef directive?
isIf(llvm::StringRef Line)803 static bool isIf(llvm::StringRef Line) {
804 Line = Line.ltrim();
805 if (!Line.consume_front("#"))
806 return false;
807 Line = Line.ltrim();
808 return Line.startswith("if");
809 }
810 // Is Line an #error directive mentioning includes?
isErrorAboutInclude(llvm::StringRef Line)811 static bool isErrorAboutInclude(llvm::StringRef Line) {
812 Line = Line.ltrim();
813 if (!Line.consume_front("#"))
814 return false;
815 Line = Line.ltrim();
816 if (!Line.startswith("error"))
817 return false;
818 return Line.contains_lower("includ"); // Matches "include" or "including".
819 }
820
isDontIncludeMeHeader(llvm::StringRef Content)821 bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
822 llvm::StringRef Line;
823 // Only sniff up to 100 lines or 10KB.
824 Content = Content.take_front(100 * 100);
825 for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
826 std::tie(Line, Content) = Content.split('\n');
827 if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
828 return true;
829 }
830 return false;
831 }
832
shouldIndexFile(FileID FID)833 bool SymbolCollector::shouldIndexFile(FileID FID) {
834 if (!Opts.FileFilter)
835 return true;
836 auto I = FilesToIndexCache.try_emplace(FID);
837 if (I.second)
838 I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
839 return I.first->second;
840 }
841
842 } // namespace clangd
843 } // namespace clang
844