1 //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The FileDistance structure allows calculating the minimum distance to paths
10 // in a single tree.
11 // We simply walk up the path's ancestors until we find a node whose cost is
12 // known, and add the cost of walking back down. Initialization ensures this
13 // gives the correct path to the roots.
14 // We cache the results, so that the runtime is O(|A|), where A is the set of
15 // all distinct ancestors of visited paths.
16 //
17 // Example after initialization with /=2, /bar=0, DownCost = 1:
18 // / = 2
19 // /bar = 0
20 //
21 // After querying /foo/bar and /bar/foo:
22 // / = 2
23 // /bar = 0
24 // /bar/foo = 1
25 // /foo = 3
26 // /foo/bar = 4
27 //
28 // URIDistance creates FileDistance lazily for each URI scheme encountered. In
29 // practice this is a small constant factor.
30 //
31 //===-------------------------------------------------------------------------//
32
33 #include "FileDistance.h"
34 #include "support/Logger.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include <queue>
37
38 namespace clang {
39 namespace clangd {
40
41 // Convert a path into the canonical form.
42 // Canonical form is either "/", or "/segment" * N:
43 // C:\foo\bar --> /c:/foo/bar
44 // /foo/ --> /foo
45 // a/b/c --> /a/b/c
canonicalize(llvm::StringRef Path)46 static llvm::SmallString<128> canonicalize(llvm::StringRef Path) {
47 llvm::SmallString<128> Result = Path.rtrim('/');
48 native(Result, llvm::sys::path::Style::posix);
49 if (Result.empty() || Result.front() != '/')
50 Result.insert(Result.begin(), '/');
51 return Result;
52 }
53
54 constexpr const unsigned FileDistance::Unreachable;
55 const llvm::hash_code FileDistance::RootHash =
56 llvm::hash_value(llvm::StringRef("/"));
57
FileDistance(llvm::StringMap<SourceParams> Sources,const FileDistanceOptions & Opts)58 FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources,
59 const FileDistanceOptions &Opts)
60 : Opts(Opts) {
61 llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code, 4>>
62 DownEdges;
63 // Compute the best distance following only up edges.
64 // Keep track of down edges, in case we can use them to improve on this.
65 for (const auto &S : Sources) {
66 auto Canonical = canonicalize(S.getKey());
67 dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
68 S.second.MaxUpTraversals);
69 // Walk up to ancestors of this source, assigning cost.
70 llvm::StringRef Rest = Canonical;
71 llvm::hash_code Hash = llvm::hash_value(Rest);
72 for (unsigned I = 0; !Rest.empty(); ++I) {
73 Rest = parent_path(Rest, llvm::sys::path::Style::posix);
74 auto NextHash = llvm::hash_value(Rest);
75 auto &Down = DownEdges[NextHash];
76 if (!llvm::is_contained(Down, Hash))
77 Down.push_back(Hash);
78 // We can't just break after MaxUpTraversals, must still set DownEdges.
79 if (I > S.getValue().MaxUpTraversals) {
80 if (Cache.find(Hash) != Cache.end())
81 break;
82 } else {
83 unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
84 auto R = Cache.try_emplace(Hash, Cost);
85 if (!R.second) {
86 if (Cost < R.first->second) {
87 R.first->second = Cost;
88 } else {
89 // If we're not the best way to get to this path, stop assigning.
90 break;
91 }
92 }
93 }
94 Hash = NextHash;
95 }
96 }
97 // Now propagate scores parent -> child if that's an improvement.
98 // BFS ensures we propagate down chains (must visit parents before children).
99 std::queue<llvm::hash_code> Next;
100 for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef(""))))
101 Next.push(Child);
102 while (!Next.empty()) {
103 auto Parent = Next.front();
104 Next.pop();
105 auto ParentCost = Cache.lookup(Parent);
106 for (auto Child : DownEdges.lookup(Parent)) {
107 if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) {
108 auto &ChildCost =
109 Cache.try_emplace(Child, Unreachable).first->getSecond();
110 if (ParentCost + Opts.DownCost < ChildCost)
111 ChildCost = ParentCost + Opts.DownCost;
112 }
113 Next.push(Child);
114 }
115 }
116 }
117
distance(llvm::StringRef Path)118 unsigned FileDistance::distance(llvm::StringRef Path) {
119 auto Canonical = canonicalize(Path);
120 unsigned Cost = Unreachable;
121 llvm::SmallVector<llvm::hash_code, 16> Ancestors;
122 // Walk up ancestors until we find a path we know the distance for.
123 for (llvm::StringRef Rest = Canonical; !Rest.empty();
124 Rest = parent_path(Rest, llvm::sys::path::Style::posix)) {
125 auto Hash = llvm::hash_value(Rest);
126 if (Hash == RootHash && !Ancestors.empty() &&
127 !Opts.AllowDownTraversalFromRoot) {
128 Cost = Unreachable;
129 break;
130 }
131 auto It = Cache.find(Hash);
132 if (It != Cache.end()) {
133 Cost = It->second;
134 break;
135 }
136 Ancestors.push_back(Hash);
137 }
138 // Now we know the costs for (known node, queried node].
139 // Fill these in, walking down the directory tree.
140 for (llvm::hash_code Hash : llvm::reverse(Ancestors)) {
141 if (Cost != Unreachable)
142 Cost += Opts.DownCost;
143 Cache.try_emplace(Hash, Cost);
144 }
145 dlog("distance({0} = {1})", Path, Cost);
146 return Cost;
147 }
148
distance(llvm::StringRef URI)149 unsigned URIDistance::distance(llvm::StringRef URI) {
150 auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable);
151 if (!R.second)
152 return R.first->getSecond();
153 if (auto U = clangd::URI::parse(URI)) {
154 dlog("distance({0} = {1})", URI, U->body());
155 R.first->second = forScheme(U->scheme()).distance(U->body());
156 } else {
157 log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
158 }
159 return R.first->second;
160 }
161
forScheme(llvm::StringRef Scheme)162 FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
163 auto &Delegate = ByScheme[Scheme];
164 if (!Delegate) {
165 llvm::StringMap<SourceParams> SchemeSources;
166 for (const auto &Source : Sources) {
167 if (auto U = clangd::URI::create(Source.getKey(), Scheme))
168 SchemeSources.try_emplace(U->body(), Source.getValue());
169 else
170 llvm::consumeError(U.takeError());
171 }
172 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
173 SchemeSources.size(), Sources.size());
174 Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
175 }
176 return *Delegate;
177 }
178
scopeToPath(llvm::StringRef Scope)179 static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) {
180 llvm::SmallVector<llvm::StringRef, 4> Split;
181 Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
182 return {"/" + llvm::join(Split, "/"), Split.size()};
183 }
184
185 static FileDistance
createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes)186 createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) {
187 FileDistanceOptions Opts;
188 Opts.UpCost = 2;
189 Opts.DownCost = 4;
190 Opts.AllowDownTraversalFromRoot = false;
191
192 llvm::StringMap<SourceParams> Sources;
193 llvm::StringRef Preferred =
194 QueryScopes.empty() ? "" : QueryScopes.front().c_str();
195 for (llvm::StringRef S : QueryScopes) {
196 SourceParams Param;
197 // Penalize the global scope even it's preferred, as all projects can define
198 // symbols in it, and there is pattern where using-namespace is used in
199 // place of enclosing namespaces (e.g. in implementation files).
200 if (S == Preferred)
201 Param.Cost = S == "" ? 4 : 0;
202 else if (Preferred.startswith(S) && !S.empty())
203 continue; // just rely on up-traversals.
204 else
205 Param.Cost = S == "" ? 6 : 2;
206 auto Path = scopeToPath(S);
207 // The global namespace is not 'near' its children.
208 Param.MaxUpTraversals = std::max(Path.second - 1, 0);
209 Sources[Path.first] = std::move(Param);
210 }
211 return FileDistance(std::move(Sources), Opts);
212 }
213
ScopeDistance(llvm::ArrayRef<std::string> QueryScopes)214 ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes)
215 : Distance(createScopeFileDistance(QueryScopes)) {}
216
distance(llvm::StringRef SymbolScope)217 unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) {
218 return Distance.distance(scopeToPath(SymbolScope).first);
219 }
220
221 } // namespace clangd
222 } // namespace clang
223