1 //===-- Background.cpp - Build an index in a background thread ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "index/Background.h"
10 #include "Compiler.h"
11 #include "Config.h"
12 #include "Headers.h"
13 #include "ParsedAST.h"
14 #include "SourceCode.h"
15 #include "Symbol.h"
16 #include "URI.h"
17 #include "index/BackgroundIndexLoader.h"
18 #include "index/FileIndex.h"
19 #include "index/Index.h"
20 #include "index/IndexAction.h"
21 #include "index/MemIndex.h"
22 #include "index/Ref.h"
23 #include "index/Relation.h"
24 #include "index/Serialization.h"
25 #include "index/SymbolCollector.h"
26 #include "support/Context.h"
27 #include "support/Logger.h"
28 #include "support/Path.h"
29 #include "support/Threading.h"
30 #include "support/ThreadsafeFS.h"
31 #include "support/Trace.h"
32 #include "clang/Basic/SourceLocation.h"
33 #include "clang/Basic/SourceManager.h"
34 #include "clang/Driver/Types.h"
35 #include "llvm/ADT/ArrayRef.h"
36 #include "llvm/ADT/DenseSet.h"
37 #include "llvm/ADT/Hashing.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/ScopeExit.h"
40 #include "llvm/ADT/StringMap.h"
41 #include "llvm/ADT/StringRef.h"
42 #include "llvm/ADT/StringSet.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/Path.h"
45 #include "llvm/Support/Threading.h"
46
47 #include <algorithm>
48 #include <atomic>
49 #include <chrono>
50 #include <condition_variable>
51 #include <cstddef>
52 #include <memory>
53 #include <mutex>
54 #include <numeric>
55 #include <queue>
56 #include <random>
57 #include <string>
58 #include <thread>
59 #include <utility>
60 #include <vector>
61
62 namespace clang {
63 namespace clangd {
64 namespace {
65
66 // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or
67 // relative to Cmd.Directory, which might not be the same as current working
68 // directory.
getAbsolutePath(const tooling::CompileCommand & Cmd)69 llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) {
70 llvm::SmallString<128> AbsolutePath;
71 if (llvm::sys::path::is_absolute(Cmd.Filename)) {
72 AbsolutePath = Cmd.Filename;
73 } else {
74 AbsolutePath = Cmd.Directory;
75 llvm::sys::path::append(AbsolutePath, Cmd.Filename);
76 llvm::sys::path::remove_dots(AbsolutePath, true);
77 }
78 return AbsolutePath;
79 }
80
shardIsStale(const LoadedShard & LS,llvm::vfs::FileSystem * FS)81 bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) {
82 auto Buf = FS->getBufferForFile(LS.AbsolutePath);
83 if (!Buf) {
84 elog("Background-index: Couldn't read {0} to validate stored index: {1}",
85 LS.AbsolutePath, Buf.getError().message());
86 // There is no point in indexing an unreadable file.
87 return false;
88 }
89 return digest(Buf->get()->getBuffer()) != LS.Digest;
90 }
91
92 } // namespace
93
BackgroundIndex(const ThreadsafeFS & TFS,const GlobalCompilationDatabase & CDB,BackgroundIndexStorage::Factory IndexStorageFactory,Options Opts)94 BackgroundIndex::BackgroundIndex(
95 const ThreadsafeFS &TFS, const GlobalCompilationDatabase &CDB,
96 BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts)
97 : SwapIndex(std::make_unique<MemIndex>()), TFS(TFS), CDB(CDB),
98 ContextProvider(std::move(Opts.ContextProvider)),
99 CollectMainFileRefs(Opts.CollectMainFileRefs),
100 Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize),
101 IndexStorageFactory(std::move(IndexStorageFactory)),
102 Queue(std::move(Opts.OnProgress)),
103 CommandsChanged(
104 CDB.watch([&](const std::vector<std::string> &ChangedFiles) {
105 enqueue(ChangedFiles);
106 })) {
107 assert(Opts.ThreadPoolSize > 0 && "Thread pool size can't be zero.");
108 assert(this->IndexStorageFactory && "Storage factory can not be null!");
109 for (unsigned I = 0; I < Opts.ThreadPoolSize; ++I) {
110 ThreadPool.runAsync("background-worker-" + llvm::Twine(I + 1),
__anonfb3054390302() 111 [this, Ctx(Context::current().clone())]() mutable {
112 WithContext BGContext(std::move(Ctx));
113 Queue.work([&] { Rebuilder.idle(); });
114 });
115 }
116 }
117
~BackgroundIndex()118 BackgroundIndex::~BackgroundIndex() {
119 stop();
120 ThreadPool.wait();
121 }
122
changedFilesTask(const std::vector<std::string> & ChangedFiles)123 BackgroundQueue::Task BackgroundIndex::changedFilesTask(
124 const std::vector<std::string> &ChangedFiles) {
125 BackgroundQueue::Task T([this, ChangedFiles] {
126 trace::Span Tracer("BackgroundIndexEnqueue");
127
128 llvm::Optional<WithContext> WithProvidedContext;
129 if (ContextProvider)
130 WithProvidedContext.emplace(ContextProvider(/*Path=*/""));
131
132 // We're doing this asynchronously, because we'll read shards here too.
133 log("Enqueueing {0} commands for indexing", ChangedFiles.size());
134 SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size()));
135
136 auto NeedsReIndexing = loadProject(std::move(ChangedFiles));
137 // Run indexing for files that need to be updated.
138 std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(),
139 std::mt19937(std::random_device{}()));
140 std::vector<BackgroundQueue::Task> Tasks;
141 Tasks.reserve(NeedsReIndexing.size());
142 for (auto &Cmd : NeedsReIndexing)
143 Tasks.push_back(indexFileTask(std::move(Cmd)));
144 Queue.append(std::move(Tasks));
145 });
146
147 T.QueuePri = LoadShards;
148 T.ThreadPri = llvm::ThreadPriority::Default;
149 return T;
150 }
151
filenameWithoutExtension(llvm::StringRef Path)152 static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) {
153 Path = llvm::sys::path::filename(Path);
154 return Path.drop_back(llvm::sys::path::extension(Path).size());
155 }
156
indexFileTask(std::string Path)157 BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) {
158 std::string Tag = filenameWithoutExtension(Path).str();
159 BackgroundQueue::Task T([this, Path(std::move(Path))] {
160 llvm::Optional<WithContext> WithProvidedContext;
161 if (ContextProvider)
162 WithProvidedContext.emplace(ContextProvider(Path));
163 auto Cmd = CDB.getCompileCommand(Path);
164 if (!Cmd)
165 return;
166 if (auto Error = index(std::move(*Cmd)))
167 elog("Indexing {0} failed: {1}", Path, std::move(Error));
168 });
169 T.QueuePri = IndexFile;
170 T.Tag = std::move(Tag);
171 return T;
172 }
173
boostRelated(llvm::StringRef Path)174 void BackgroundIndex::boostRelated(llvm::StringRef Path) {
175 if (isHeaderFile(Path))
176 Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile);
177 }
178
179 /// Given index results from a TU, only update symbols coming from files that
180 /// are different or missing from than \p ShardVersionsSnapshot. Also stores new
181 /// index information on IndexStorage.
update(llvm::StringRef MainFile,IndexFileIn Index,const llvm::StringMap<ShardVersion> & ShardVersionsSnapshot,bool HadErrors)182 void BackgroundIndex::update(
183 llvm::StringRef MainFile, IndexFileIn Index,
184 const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
185 bool HadErrors) {
186 // Keys are URIs.
187 llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate;
188 // Note that sources do not contain any information regarding missing headers,
189 // since we don't even know what absolute path they should fall in.
190 for (const auto &IndexIt : *Index.Sources) {
191 const auto &IGN = IndexIt.getValue();
192 auto AbsPath = URI::resolve(IGN.URI, MainFile);
193 if (!AbsPath) {
194 elog("Failed to resolve URI: {0}", AbsPath.takeError());
195 continue;
196 }
197 const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath);
198 // File has different contents, or indexing was successful this time.
199 if (DigestIt == ShardVersionsSnapshot.end() ||
200 DigestIt->getValue().Digest != IGN.Digest ||
201 (DigestIt->getValue().HadErrors && !HadErrors))
202 FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest};
203 }
204
205 // Shard slabs into files.
206 FileShardedIndex ShardedIndex(std::move(Index));
207
208 // Build and store new slabs for each updated file.
209 for (const auto &FileIt : FilesToUpdate) {
210 auto Uri = FileIt.first();
211 auto IF = ShardedIndex.getShard(Uri);
212 assert(IF && "no shard for file in Index.Sources?");
213 PathRef Path = FileIt.getValue().first;
214
215 // Only store command line hash for main files of the TU, since our
216 // current model keeps only one version of a header file.
217 if (Path != MainFile)
218 IF->Cmd.reset();
219
220 // We need to store shards before updating the index, since the latter
221 // consumes slabs.
222 // FIXME: Also skip serializing the shard if it is already up-to-date.
223 if (auto Error = IndexStorageFactory(Path)->storeShard(Path, *IF))
224 elog("Failed to write background-index shard for file {0}: {1}", Path,
225 std::move(Error));
226
227 {
228 std::lock_guard<std::mutex> Lock(ShardVersionsMu);
229 const auto &Hash = FileIt.getValue().second;
230 auto DigestIt = ShardVersions.try_emplace(Path);
231 ShardVersion &SV = DigestIt.first->second;
232 // Skip if file is already up to date, unless previous index was broken
233 // and this one is not.
234 if (!DigestIt.second && SV.Digest == Hash && SV.HadErrors && !HadErrors)
235 continue;
236 SV.Digest = Hash;
237 SV.HadErrors = HadErrors;
238
239 // This can override a newer version that is added in another thread, if
240 // this thread sees the older version but finishes later. This should be
241 // rare in practice.
242 IndexedSymbols.update(
243 Path, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),
244 std::make_unique<RefSlab>(std::move(*IF->Refs)),
245 std::make_unique<RelationSlab>(std::move(*IF->Relations)),
246 Path == MainFile);
247 }
248 }
249 }
250
index(tooling::CompileCommand Cmd)251 llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
252 trace::Span Tracer("BackgroundIndex");
253 SPAN_ATTACH(Tracer, "file", Cmd.Filename);
254 auto AbsolutePath = getAbsolutePath(Cmd);
255
256 auto FS = TFS.view(Cmd.Directory);
257 auto Buf = FS->getBufferForFile(AbsolutePath);
258 if (!Buf)
259 return llvm::errorCodeToError(Buf.getError());
260 auto Hash = digest(Buf->get()->getBuffer());
261
262 // Take a snapshot of the versions to avoid locking for each file in the TU.
263 llvm::StringMap<ShardVersion> ShardVersionsSnapshot;
264 {
265 std::lock_guard<std::mutex> Lock(ShardVersionsMu);
266 ShardVersionsSnapshot = ShardVersions;
267 }
268
269 vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash));
270 ParseInputs Inputs;
271 Inputs.TFS = &TFS;
272 Inputs.CompileCommand = std::move(Cmd);
273 IgnoreDiagnostics IgnoreDiags;
274 auto CI = buildCompilerInvocation(Inputs, IgnoreDiags);
275 if (!CI)
276 return error("Couldn't build compiler invocation");
277
278 auto Clang =
279 prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr,
280 std::move(*Buf), std::move(FS), IgnoreDiags);
281 if (!Clang)
282 return error("Couldn't build compiler instance");
283
284 SymbolCollector::Options IndexOpts;
285 // Creates a filter to not collect index results from files with unchanged
286 // digests.
287 IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM,
288 FileID FID) {
289 const auto *F = SM.getFileEntryForID(FID);
290 if (!F)
291 return false; // Skip invalid files.
292 auto AbsPath = getCanonicalPath(F, SM);
293 if (!AbsPath)
294 return false; // Skip files without absolute path.
295 auto Digest = digestFile(SM, FID);
296 if (!Digest)
297 return false;
298 auto D = ShardVersionsSnapshot.find(*AbsPath);
299 if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest &&
300 !D->second.HadErrors)
301 return false; // Skip files that haven't changed, without errors.
302 return true;
303 };
304 IndexOpts.CollectMainFileRefs = CollectMainFileRefs;
305
306 IndexFileIn Index;
307 auto Action = createStaticIndexingAction(
308 IndexOpts, [&](SymbolSlab S) { Index.Symbols = std::move(S); },
309 [&](RefSlab R) { Index.Refs = std::move(R); },
310 [&](RelationSlab R) { Index.Relations = std::move(R); },
311 [&](IncludeGraph IG) { Index.Sources = std::move(IG); });
312
313 // We're going to run clang here, and it could potentially crash.
314 // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
315 // but the leaky "recovery" is pretty scary too in a long-running process.
316 // If crashes are a real problem, maybe we should fork a child process.
317
318 const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front();
319 if (!Action->BeginSourceFile(*Clang, Input))
320 return error("BeginSourceFile() failed");
321 if (llvm::Error Err = Action->Execute())
322 return Err;
323
324 Action->EndSourceFile();
325
326 Index.Cmd = Inputs.CompileCommand;
327 assert(Index.Symbols && Index.Refs && Index.Sources &&
328 "Symbols, Refs and Sources must be set.");
329
330 log("Indexed {0} ({1} symbols, {2} refs, {3} files)",
331 Inputs.CompileCommand.Filename, Index.Symbols->size(),
332 Index.Refs->numRefs(), Index.Sources->size());
333 SPAN_ATTACH(Tracer, "symbols", int(Index.Symbols->size()));
334 SPAN_ATTACH(Tracer, "refs", int(Index.Refs->numRefs()));
335 SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size()));
336
337 bool HadErrors = Clang->hasDiagnostics() &&
338 Clang->getDiagnostics().hasUncompilableErrorOccurred();
339 if (HadErrors) {
340 log("Failed to compile {0}, index may be incomplete", AbsolutePath);
341 for (auto &It : *Index.Sources)
342 It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
343 }
344 update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors);
345
346 Rebuilder.indexedTU();
347 return llvm::Error::success();
348 }
349
350 // Restores shards for \p MainFiles from index storage. Then checks staleness of
351 // those shards and returns a list of TUs that needs to be indexed to update
352 // staleness.
353 std::vector<std::string>
loadProject(std::vector<std::string> MainFiles)354 BackgroundIndex::loadProject(std::vector<std::string> MainFiles) {
355 // Drop files where background indexing is disabled in config.
356 if (ContextProvider)
357 llvm::erase_if(MainFiles, [&](const std::string &TU) {
358 // Load the config for each TU, as indexing may be selectively enabled.
359 WithContext WithProvidedContext(ContextProvider(TU));
360 return Config::current().Index.Background ==
361 Config::BackgroundPolicy::Skip;
362 });
363 Rebuilder.startLoading();
364 // Load shards for all of the mainfiles.
365 const std::vector<LoadedShard> Result =
366 loadIndexShards(MainFiles, IndexStorageFactory, CDB);
367 size_t LoadedShards = 0;
368 {
369 // Update in-memory state.
370 std::lock_guard<std::mutex> Lock(ShardVersionsMu);
371 for (auto &LS : Result) {
372 if (!LS.Shard)
373 continue;
374 auto SS =
375 LS.Shard->Symbols
376 ? std::make_unique<SymbolSlab>(std::move(*LS.Shard->Symbols))
377 : nullptr;
378 auto RS = LS.Shard->Refs
379 ? std::make_unique<RefSlab>(std::move(*LS.Shard->Refs))
380 : nullptr;
381 auto RelS =
382 LS.Shard->Relations
383 ? std::make_unique<RelationSlab>(std::move(*LS.Shard->Relations))
384 : nullptr;
385 ShardVersion &SV = ShardVersions[LS.AbsolutePath];
386 SV.Digest = LS.Digest;
387 SV.HadErrors = LS.HadErrors;
388 ++LoadedShards;
389
390 IndexedSymbols.update(LS.AbsolutePath, std::move(SS), std::move(RS),
391 std::move(RelS), LS.CountReferences);
392 }
393 }
394 Rebuilder.loadedShard(LoadedShards);
395 Rebuilder.doneLoading();
396
397 auto FS = TFS.view(/*CWD=*/llvm::None);
398 llvm::DenseSet<PathRef> TUsToIndex;
399 // We'll accept data from stale shards, but ensure the files get reindexed
400 // soon.
401 for (auto &LS : Result) {
402 if (!shardIsStale(LS, FS.get()))
403 continue;
404 PathRef TUForFile = LS.DependentTU;
405 assert(!TUForFile.empty() && "File without a TU!");
406
407 // FIXME: Currently, we simply schedule indexing on a TU whenever any of
408 // its dependencies needs re-indexing. We might do it smarter by figuring
409 // out a minimal set of TUs that will cover all the stale dependencies.
410 // FIXME: Try looking at other TUs if no compile commands are available
411 // for this TU, i.e TU was deleted after we performed indexing.
412 TUsToIndex.insert(TUForFile);
413 }
414
415 return {TUsToIndex.begin(), TUsToIndex.end()};
416 }
417
profile(MemoryTree & MT) const418 void BackgroundIndex::profile(MemoryTree &MT) const {
419 IndexedSymbols.profile(MT.child("slabs"));
420 // We don't want to mix memory used by index and symbols, so call base class.
421 MT.child("index").addUsage(SwapIndex::estimateMemoryUsage());
422 }
423 } // namespace clangd
424 } // namespace clang
425