1 //===-- Serialization.cpp - Binary serialization of index data ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "Serialization.h"
10 #include "Headers.h"
11 #include "RIFF.h"
12 #include "SymbolLocation.h"
13 #include "SymbolOrigin.h"
14 #include "dex/Dex.h"
15 #include "support/Logger.h"
16 #include "support/Trace.h"
17 #include "clang/Tooling/CompilationDatabase.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/Compression.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <cstdint>
25 #include <vector>
26
27 namespace clang {
28 namespace clangd {
29 namespace {
30
31 // IO PRIMITIVES
32 // We use little-endian 32 bit ints, sometimes with variable-length encoding.
33 //
34 // Variable-length int encoding (varint) uses the bottom 7 bits of each byte
35 // to encode the number, and the top bit to indicate whether more bytes follow.
36 // e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
37 // This represents 0x1a | 0x2f<<7 = 6042.
38 // A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
39
40 // Reads binary data from a StringRef, and keeps track of position.
41 class Reader {
42 const char *Begin, *End;
43 bool Err = false;
44
45 public:
Reader(llvm::StringRef Data)46 Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
47 // The "error" bit is set by reading past EOF or reading invalid data.
48 // When in an error state, reads may return zero values: callers should check.
err() const49 bool err() const { return Err; }
50 // Did we read all the data, or encounter an error?
eof() const51 bool eof() const { return Begin == End || Err; }
52 // All the data we didn't read yet.
rest() const53 llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); }
54
consume8()55 uint8_t consume8() {
56 if (LLVM_UNLIKELY(Begin == End)) {
57 Err = true;
58 return 0;
59 }
60 return *Begin++;
61 }
62
consume32()63 uint32_t consume32() {
64 if (LLVM_UNLIKELY(Begin + 4 > End)) {
65 Err = true;
66 return 0;
67 }
68 auto Ret = llvm::support::endian::read32le(Begin);
69 Begin += 4;
70 return Ret;
71 }
72
consume(int N)73 llvm::StringRef consume(int N) {
74 if (LLVM_UNLIKELY(Begin + N > End)) {
75 Err = true;
76 return llvm::StringRef();
77 }
78 llvm::StringRef Ret(Begin, N);
79 Begin += N;
80 return Ret;
81 }
82
consumeVar()83 uint32_t consumeVar() {
84 constexpr static uint8_t More = 1 << 7;
85
86 // Use a 32 bit unsigned here to prevent promotion to signed int (unless int
87 // is wider than 32 bits).
88 uint32_t B = consume8();
89 if (LLVM_LIKELY(!(B & More)))
90 return B;
91 uint32_t Val = B & ~More;
92 for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
93 B = consume8();
94 // 5th byte of a varint can only have lowest 4 bits set.
95 assert((Shift != 28 || B == (B & 0x0f)) && "Invalid varint encoding");
96 Val |= (B & ~More) << Shift;
97 }
98 return Val;
99 }
100
consumeString(llvm::ArrayRef<llvm::StringRef> Strings)101 llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) {
102 auto StringIndex = consumeVar();
103 if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
104 Err = true;
105 return llvm::StringRef();
106 }
107 return Strings[StringIndex];
108 }
109
consumeID()110 SymbolID consumeID() {
111 llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated.
112 return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw);
113 }
114
115 // Read a varint (as consumeVar) and resize the container accordingly.
116 // If the size is invalid, return false and mark an error.
117 // (The caller should abort in this case).
consumeSize(T & Container)118 template <typename T> LLVM_NODISCARD bool consumeSize(T &Container) {
119 auto Size = consumeVar();
120 // Conservatively assume each element is at least one byte.
121 if (Size > (End - Begin)) {
122 Err = true;
123 return false;
124 }
125 Container.resize(Size);
126 return true;
127 }
128 };
129
write32(uint32_t I,llvm::raw_ostream & OS)130 void write32(uint32_t I, llvm::raw_ostream &OS) {
131 char Buf[4];
132 llvm::support::endian::write32le(Buf, I);
133 OS.write(Buf, sizeof(Buf));
134 }
135
writeVar(uint32_t I,llvm::raw_ostream & OS)136 void writeVar(uint32_t I, llvm::raw_ostream &OS) {
137 constexpr static uint8_t More = 1 << 7;
138 if (LLVM_LIKELY(I < 1 << 7)) {
139 OS.write(I);
140 return;
141 }
142 for (;;) {
143 OS.write(I | More);
144 I >>= 7;
145 if (I < 1 << 7) {
146 OS.write(I);
147 return;
148 }
149 }
150 }
151
152 // STRING TABLE ENCODING
153 // Index data has many string fields, and many strings are identical.
154 // We store each string once, and refer to them by index.
155 //
156 // The string table's format is:
157 // - UncompressedSize : uint32 (or 0 for no compression)
158 // - CompressedData : byte[CompressedSize]
159 //
160 // CompressedData is a zlib-compressed byte[UncompressedSize].
161 // It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
162 // These are sorted to improve compression.
163
164 // Maps each string to a canonical representation.
165 // Strings remain owned externally (e.g. by SymbolSlab).
166 class StringTableOut {
167 llvm::DenseSet<llvm::StringRef> Unique;
168 std::vector<llvm::StringRef> Sorted;
169 // Since strings are interned, look up can be by pointer.
170 llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index;
171
172 public:
StringTableOut()173 StringTableOut() {
174 // Ensure there's at least one string in the table.
175 // Table size zero is reserved to indicate no compression.
176 Unique.insert("");
177 }
178 // Add a string to the table. Overwrites S if an identical string exists.
intern(llvm::StringRef & S)179 void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
180 // Finalize the table and write it to OS. No more strings may be added.
finalize(llvm::raw_ostream & OS)181 void finalize(llvm::raw_ostream &OS) {
182 Sorted = {Unique.begin(), Unique.end()};
183 llvm::sort(Sorted);
184 for (unsigned I = 0; I < Sorted.size(); ++I)
185 Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
186
187 std::string RawTable;
188 for (llvm::StringRef S : Sorted) {
189 RawTable.append(std::string(S));
190 RawTable.push_back(0);
191 }
192 if (llvm::zlib::isAvailable()) {
193 llvm::SmallString<1> Compressed;
194 llvm::cantFail(llvm::zlib::compress(RawTable, Compressed));
195 write32(RawTable.size(), OS);
196 OS << Compressed;
197 } else {
198 write32(0, OS); // No compression.
199 OS << RawTable;
200 }
201 }
202 // Get the ID of an string, which must be interned. Table must be finalized.
index(llvm::StringRef S) const203 unsigned index(llvm::StringRef S) const {
204 assert(!Sorted.empty() && "table not finalized");
205 assert(Index.count({S.data(), S.size()}) && "string not interned");
206 return Index.find({S.data(), S.size()})->second;
207 }
208 };
209
210 struct StringTableIn {
211 llvm::BumpPtrAllocator Arena;
212 std::vector<llvm::StringRef> Strings;
213 };
214
readStringTable(llvm::StringRef Data)215 llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
216 Reader R(Data);
217 size_t UncompressedSize = R.consume32();
218 if (R.err())
219 return error("Truncated string table");
220
221 llvm::StringRef Uncompressed;
222 llvm::SmallString<1> UncompressedStorage;
223 if (UncompressedSize == 0) // No compression
224 Uncompressed = R.rest();
225 else if (llvm::zlib::isAvailable()) {
226 // Don't allocate a massive buffer if UncompressedSize was corrupted
227 // This is effective for sharded index, but not big monolithic ones, as
228 // once compressed size reaches 4MB nothing can be ruled out.
229 // Theoretical max ratio from https://zlib.net/zlib_tech.html
230 constexpr int MaxCompressionRatio = 1032;
231 if (UncompressedSize / MaxCompressionRatio > R.rest().size())
232 return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
233 R.rest().size(), UncompressedSize);
234
235 if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
236 UncompressedSize))
237 return std::move(E);
238 Uncompressed = UncompressedStorage;
239 } else
240 return error("Compressed string table, but zlib is unavailable");
241
242 StringTableIn Table;
243 llvm::StringSaver Saver(Table.Arena);
244 R = Reader(Uncompressed);
245 for (Reader R(Uncompressed); !R.eof();) {
246 auto Len = R.rest().find(0);
247 if (Len == llvm::StringRef::npos)
248 return error("Bad string table: not null terminated");
249 Table.Strings.push_back(Saver.save(R.consume(Len)));
250 R.consume8();
251 }
252 if (R.err())
253 return error("Truncated string table");
254 return std::move(Table);
255 }
256
257 // SYMBOL ENCODING
258 // Each field of clangd::Symbol is encoded in turn (see implementation).
259 // - StringRef fields encode as varint (index into the string table)
260 // - enums encode as the underlying type
261 // - most numbers encode as varint
262
writeLocation(const SymbolLocation & Loc,const StringTableOut & Strings,llvm::raw_ostream & OS)263 void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
264 llvm::raw_ostream &OS) {
265 writeVar(Strings.index(Loc.FileURI), OS);
266 for (const auto &Endpoint : {Loc.Start, Loc.End}) {
267 writeVar(Endpoint.line(), OS);
268 writeVar(Endpoint.column(), OS);
269 }
270 }
271
readLocation(Reader & Data,llvm::ArrayRef<llvm::StringRef> Strings)272 SymbolLocation readLocation(Reader &Data,
273 llvm::ArrayRef<llvm::StringRef> Strings) {
274 SymbolLocation Loc;
275 Loc.FileURI = Data.consumeString(Strings).data();
276 for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
277 Endpoint->setLine(Data.consumeVar());
278 Endpoint->setColumn(Data.consumeVar());
279 }
280 return Loc;
281 }
282
readIncludeGraphNode(Reader & Data,llvm::ArrayRef<llvm::StringRef> Strings)283 IncludeGraphNode readIncludeGraphNode(Reader &Data,
284 llvm::ArrayRef<llvm::StringRef> Strings) {
285 IncludeGraphNode IGN;
286 IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
287 IGN.URI = Data.consumeString(Strings);
288 llvm::StringRef Digest = Data.consume(IGN.Digest.size());
289 std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
290 if (!Data.consumeSize(IGN.DirectIncludes))
291 return IGN;
292 for (llvm::StringRef &Include : IGN.DirectIncludes)
293 Include = Data.consumeString(Strings);
294 return IGN;
295 }
296
writeIncludeGraphNode(const IncludeGraphNode & IGN,const StringTableOut & Strings,llvm::raw_ostream & OS)297 void writeIncludeGraphNode(const IncludeGraphNode &IGN,
298 const StringTableOut &Strings,
299 llvm::raw_ostream &OS) {
300 OS.write(static_cast<uint8_t>(IGN.Flags));
301 writeVar(Strings.index(IGN.URI), OS);
302 llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
303 IGN.Digest.size());
304 OS << Hash;
305 writeVar(IGN.DirectIncludes.size(), OS);
306 for (llvm::StringRef Include : IGN.DirectIncludes)
307 writeVar(Strings.index(Include), OS);
308 }
309
writeSymbol(const Symbol & Sym,const StringTableOut & Strings,llvm::raw_ostream & OS)310 void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
311 llvm::raw_ostream &OS) {
312 OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
313 // symbol IDs should probably be in a string table.
314 OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
315 OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
316 writeVar(Strings.index(Sym.Name), OS);
317 writeVar(Strings.index(Sym.Scope), OS);
318 writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
319 writeLocation(Sym.Definition, Strings, OS);
320 writeLocation(Sym.CanonicalDeclaration, Strings, OS);
321 writeVar(Sym.References, OS);
322 OS.write(static_cast<uint8_t>(Sym.Flags));
323 OS.write(static_cast<uint8_t>(Sym.Origin));
324 writeVar(Strings.index(Sym.Signature), OS);
325 writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
326 writeVar(Strings.index(Sym.Documentation), OS);
327 writeVar(Strings.index(Sym.ReturnType), OS);
328 writeVar(Strings.index(Sym.Type), OS);
329
330 auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
331 writeVar(Strings.index(Include.IncludeHeader), OS);
332 writeVar(Include.References, OS);
333 };
334 writeVar(Sym.IncludeHeaders.size(), OS);
335 for (const auto &Include : Sym.IncludeHeaders)
336 WriteInclude(Include);
337 }
338
readSymbol(Reader & Data,llvm::ArrayRef<llvm::StringRef> Strings)339 Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
340 Symbol Sym;
341 Sym.ID = Data.consumeID();
342 Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
343 Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
344 Sym.Name = Data.consumeString(Strings);
345 Sym.Scope = Data.consumeString(Strings);
346 Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
347 Sym.Definition = readLocation(Data, Strings);
348 Sym.CanonicalDeclaration = readLocation(Data, Strings);
349 Sym.References = Data.consumeVar();
350 Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
351 Sym.Origin = static_cast<SymbolOrigin>(Data.consume8());
352 Sym.Signature = Data.consumeString(Strings);
353 Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
354 Sym.Documentation = Data.consumeString(Strings);
355 Sym.ReturnType = Data.consumeString(Strings);
356 Sym.Type = Data.consumeString(Strings);
357 if (!Data.consumeSize(Sym.IncludeHeaders))
358 return Sym;
359 for (auto &I : Sym.IncludeHeaders) {
360 I.IncludeHeader = Data.consumeString(Strings);
361 I.References = Data.consumeVar();
362 }
363 return Sym;
364 }
365
366 // REFS ENCODING
367 // A refs section has data grouped by Symbol. Each symbol has:
368 // - SymbolID: 8 bytes
369 // - NumRefs: varint
370 // - Ref[NumRefs]
371 // Fields of Ref are encoded in turn, see implementation.
372
writeRefs(const SymbolID & ID,llvm::ArrayRef<Ref> Refs,const StringTableOut & Strings,llvm::raw_ostream & OS)373 void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
374 const StringTableOut &Strings, llvm::raw_ostream &OS) {
375 OS << ID.raw();
376 writeVar(Refs.size(), OS);
377 for (const auto &Ref : Refs) {
378 OS.write(static_cast<unsigned char>(Ref.Kind));
379 writeLocation(Ref.Location, Strings, OS);
380 OS << Ref.Container.raw();
381 }
382 }
383
384 std::pair<SymbolID, std::vector<Ref>>
readRefs(Reader & Data,llvm::ArrayRef<llvm::StringRef> Strings)385 readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
386 std::pair<SymbolID, std::vector<Ref>> Result;
387 Result.first = Data.consumeID();
388 if (!Data.consumeSize(Result.second))
389 return Result;
390 for (auto &Ref : Result.second) {
391 Ref.Kind = static_cast<RefKind>(Data.consume8());
392 Ref.Location = readLocation(Data, Strings);
393 Ref.Container = Data.consumeID();
394 }
395 return Result;
396 }
397
398 // RELATIONS ENCODING
399 // A relations section is a flat list of relations. Each relation has:
400 // - SymbolID (subject): 8 bytes
401 // - relation kind (predicate): 1 byte
402 // - SymbolID (object): 8 bytes
403 // In the future, we might prefer a packed representation if the need arises.
404
writeRelation(const Relation & R,llvm::raw_ostream & OS)405 void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
406 OS << R.Subject.raw();
407 OS.write(static_cast<uint8_t>(R.Predicate));
408 OS << R.Object.raw();
409 }
410
readRelation(Reader & Data)411 Relation readRelation(Reader &Data) {
412 SymbolID Subject = Data.consumeID();
413 RelationKind Predicate = static_cast<RelationKind>(Data.consume8());
414 SymbolID Object = Data.consumeID();
415 return {Subject, Predicate, Object};
416 }
417
418 struct InternedCompileCommand {
419 llvm::StringRef Directory;
420 std::vector<llvm::StringRef> CommandLine;
421 };
422
writeCompileCommand(const InternedCompileCommand & Cmd,const StringTableOut & Strings,llvm::raw_ostream & CmdOS)423 void writeCompileCommand(const InternedCompileCommand &Cmd,
424 const StringTableOut &Strings,
425 llvm::raw_ostream &CmdOS) {
426 writeVar(Strings.index(Cmd.Directory), CmdOS);
427 writeVar(Cmd.CommandLine.size(), CmdOS);
428 for (llvm::StringRef C : Cmd.CommandLine)
429 writeVar(Strings.index(C), CmdOS);
430 }
431
432 InternedCompileCommand
readCompileCommand(Reader CmdReader,llvm::ArrayRef<llvm::StringRef> Strings)433 readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
434 InternedCompileCommand Cmd;
435 Cmd.Directory = CmdReader.consumeString(Strings);
436 if (!CmdReader.consumeSize(Cmd.CommandLine))
437 return Cmd;
438 for (llvm::StringRef &C : Cmd.CommandLine)
439 C = CmdReader.consumeString(Strings);
440 return Cmd;
441 }
442
443 // FILE ENCODING
444 // A file is a RIFF chunk with type 'CdIx'.
445 // It contains the sections:
446 // - meta: version number
447 // - srcs: information related to include graph
448 // - stri: string table
449 // - symb: symbols
450 // - refs: references to symbols
451
452 // The current versioning scheme is simple - non-current versions are rejected.
453 // If you make a breaking change, bump this version number to invalidate stored
454 // data. Later we may want to support some backward compatibility.
455 constexpr static uint32_t Version = 15;
456
readRIFF(llvm::StringRef Data)457 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
458 auto RIFF = riff::readFile(Data);
459 if (!RIFF)
460 return RIFF.takeError();
461 if (RIFF->Type != riff::fourCC("CdIx"))
462 return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type));
463 llvm::StringMap<llvm::StringRef> Chunks;
464 for (const auto &Chunk : RIFF->Chunks)
465 Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
466 Chunk.Data);
467
468 if (!Chunks.count("meta"))
469 return error("missing meta chunk");
470 Reader Meta(Chunks.lookup("meta"));
471 auto SeenVersion = Meta.consume32();
472 if (SeenVersion != Version)
473 return error("wrong version: want {0}, got {1}", Version, SeenVersion);
474
475 // meta chunk is checked above, as we prefer the "version mismatch" error.
476 for (llvm::StringRef RequiredChunk : {"stri"})
477 if (!Chunks.count(RequiredChunk))
478 return error("missing required chunk {0}", RequiredChunk);
479
480 auto Strings = readStringTable(Chunks.lookup("stri"));
481 if (!Strings)
482 return Strings.takeError();
483
484 IndexFileIn Result;
485 if (Chunks.count("srcs")) {
486 Reader SrcsReader(Chunks.lookup("srcs"));
487 Result.Sources.emplace();
488 while (!SrcsReader.eof()) {
489 auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
490 auto Entry = Result.Sources->try_emplace(IGN.URI).first;
491 Entry->getValue() = std::move(IGN);
492 // We change all the strings inside the structure to point at the keys in
493 // the map, since it is the only copy of the string that's going to live.
494 Entry->getValue().URI = Entry->getKey();
495 for (auto &Include : Entry->getValue().DirectIncludes)
496 Include = Result.Sources->try_emplace(Include).first->getKey();
497 }
498 if (SrcsReader.err())
499 return error("malformed or truncated include uri");
500 }
501
502 if (Chunks.count("symb")) {
503 Reader SymbolReader(Chunks.lookup("symb"));
504 SymbolSlab::Builder Symbols;
505 while (!SymbolReader.eof())
506 Symbols.insert(readSymbol(SymbolReader, Strings->Strings));
507 if (SymbolReader.err())
508 return error("malformed or truncated symbol");
509 Result.Symbols = std::move(Symbols).build();
510 }
511 if (Chunks.count("refs")) {
512 Reader RefsReader(Chunks.lookup("refs"));
513 RefSlab::Builder Refs;
514 while (!RefsReader.eof()) {
515 auto RefsBundle = readRefs(RefsReader, Strings->Strings);
516 for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
517 Refs.insert(RefsBundle.first, Ref);
518 }
519 if (RefsReader.err())
520 return error("malformed or truncated refs");
521 Result.Refs = std::move(Refs).build();
522 }
523 if (Chunks.count("rela")) {
524 Reader RelationsReader(Chunks.lookup("rela"));
525 RelationSlab::Builder Relations;
526 while (!RelationsReader.eof())
527 Relations.insert(readRelation(RelationsReader));
528 if (RelationsReader.err())
529 return error("malformed or truncated relations");
530 Result.Relations = std::move(Relations).build();
531 }
532 if (Chunks.count("cmdl")) {
533 Reader CmdReader(Chunks.lookup("cmdl"));
534 InternedCompileCommand Cmd =
535 readCompileCommand(CmdReader, Strings->Strings);
536 if (CmdReader.err())
537 return error("malformed or truncated commandline section");
538 Result.Cmd.emplace();
539 Result.Cmd->Directory = std::string(Cmd.Directory);
540 Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
541 for (llvm::StringRef C : Cmd.CommandLine)
542 Result.Cmd->CommandLine.emplace_back(C);
543 }
544 return std::move(Result);
545 }
546
547 template <class Callback>
visitStrings(IncludeGraphNode & IGN,const Callback & CB)548 void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
549 CB(IGN.URI);
550 for (llvm::StringRef &Include : IGN.DirectIncludes)
551 CB(Include);
552 }
553
writeRIFF(const IndexFileOut & Data,llvm::raw_ostream & OS)554 void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
555 assert(Data.Symbols && "An index file without symbols makes no sense!");
556 riff::File RIFF;
557 RIFF.Type = riff::fourCC("CdIx");
558
559 llvm::SmallString<4> Meta;
560 {
561 llvm::raw_svector_ostream MetaOS(Meta);
562 write32(Version, MetaOS);
563 }
564 RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
565
566 StringTableOut Strings;
567 std::vector<Symbol> Symbols;
568 for (const auto &Sym : *Data.Symbols) {
569 Symbols.emplace_back(Sym);
570 visitStrings(Symbols.back(),
571 [&](llvm::StringRef &S) { Strings.intern(S); });
572 }
573 std::vector<IncludeGraphNode> Sources;
574 if (Data.Sources)
575 for (const auto &Source : *Data.Sources) {
576 Sources.push_back(Source.getValue());
577 visitStrings(Sources.back(),
578 [&](llvm::StringRef &S) { Strings.intern(S); });
579 }
580
581 std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
582 if (Data.Refs) {
583 for (const auto &Sym : *Data.Refs) {
584 Refs.emplace_back(Sym);
585 for (auto &Ref : Refs.back().second) {
586 llvm::StringRef File = Ref.Location.FileURI;
587 Strings.intern(File);
588 Ref.Location.FileURI = File.data();
589 }
590 }
591 }
592
593 std::vector<Relation> Relations;
594 if (Data.Relations) {
595 for (const auto &Relation : *Data.Relations) {
596 Relations.emplace_back(Relation);
597 // No strings to be interned in relations.
598 }
599 }
600
601 InternedCompileCommand InternedCmd;
602 if (Data.Cmd) {
603 InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
604 InternedCmd.Directory = Data.Cmd->Directory;
605 Strings.intern(InternedCmd.Directory);
606 for (llvm::StringRef C : Data.Cmd->CommandLine) {
607 InternedCmd.CommandLine.emplace_back(C);
608 Strings.intern(InternedCmd.CommandLine.back());
609 }
610 }
611
612 std::string StringSection;
613 {
614 llvm::raw_string_ostream StringOS(StringSection);
615 Strings.finalize(StringOS);
616 }
617 RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});
618
619 std::string SymbolSection;
620 {
621 llvm::raw_string_ostream SymbolOS(SymbolSection);
622 for (const auto &Sym : Symbols)
623 writeSymbol(Sym, Strings, SymbolOS);
624 }
625 RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
626
627 std::string RefsSection;
628 if (Data.Refs) {
629 {
630 llvm::raw_string_ostream RefsOS(RefsSection);
631 for (const auto &Sym : Refs)
632 writeRefs(Sym.first, Sym.second, Strings, RefsOS);
633 }
634 RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
635 }
636
637 std::string RelationSection;
638 if (Data.Relations) {
639 {
640 llvm::raw_string_ostream RelationOS{RelationSection};
641 for (const auto &Relation : Relations)
642 writeRelation(Relation, RelationOS);
643 }
644 RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
645 }
646
647 std::string SrcsSection;
648 {
649 {
650 llvm::raw_string_ostream SrcsOS(SrcsSection);
651 for (const auto &SF : Sources)
652 writeIncludeGraphNode(SF, Strings, SrcsOS);
653 }
654 RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
655 }
656
657 std::string CmdlSection;
658 if (Data.Cmd) {
659 {
660 llvm::raw_string_ostream CmdOS(CmdlSection);
661 writeCompileCommand(InternedCmd, Strings, CmdOS);
662 }
663 RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection});
664 }
665
666 OS << RIFF;
667 }
668
669 } // namespace
670
671 // Defined in YAMLSerialization.cpp.
672 void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
673 llvm::Expected<IndexFileIn> readYAML(llvm::StringRef);
674
operator <<(llvm::raw_ostream & OS,const IndexFileOut & O)675 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
676 switch (O.Format) {
677 case IndexFileFormat::RIFF:
678 writeRIFF(O, OS);
679 break;
680 case IndexFileFormat::YAML:
681 writeYAML(O, OS);
682 break;
683 }
684 return OS;
685 }
686
readIndexFile(llvm::StringRef Data)687 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data) {
688 if (Data.startswith("RIFF")) {
689 return readRIFF(Data);
690 } else if (auto YAMLContents = readYAML(Data)) {
691 return std::move(*YAMLContents);
692 } else {
693 return error("Not a RIFF file and failed to parse as YAML: {0}",
694 YAMLContents.takeError());
695 }
696 }
697
loadIndex(llvm::StringRef SymbolFilename,bool UseDex)698 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
699 bool UseDex) {
700 trace::Span OverallTracer("LoadIndex");
701 auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
702 if (!Buffer) {
703 elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
704 return nullptr;
705 }
706
707 SymbolSlab Symbols;
708 RefSlab Refs;
709 RelationSlab Relations;
710 {
711 trace::Span Tracer("ParseIndex");
712 if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
713 if (I->Symbols)
714 Symbols = std::move(*I->Symbols);
715 if (I->Refs)
716 Refs = std::move(*I->Refs);
717 if (I->Relations)
718 Relations = std::move(*I->Relations);
719 } else {
720 elog("Bad index file: {0}", I.takeError());
721 return nullptr;
722 }
723 }
724
725 size_t NumSym = Symbols.size();
726 size_t NumRefs = Refs.numRefs();
727 size_t NumRelations = Relations.size();
728
729 trace::Span Tracer("BuildIndex");
730 auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs),
731 std::move(Relations))
732 : MemIndex::build(std::move(Symbols), std::move(Refs),
733 std::move(Relations));
734 vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
735 " - number of symbols: {3}\n"
736 " - number of refs: {4}\n"
737 " - number of relations: {5}",
738 UseDex ? "Dex" : "MemIndex", SymbolFilename,
739 Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
740 return Index;
741 }
742
743 } // namespace clangd
744 } // namespace clang
745