• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Headers.h"
10 #include "RIFF.h"
11 #include "index/Index.h"
12 #include "index/Serialization.h"
13 #include "support/Logger.h"
14 #include "clang/Tooling/CompilationDatabase.h"
15 #include "llvm/ADT/ScopeExit.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/Support/Compression.h"
18 #include "llvm/Support/Error.h"
19 #include "llvm/Support/ScopedPrinter.h"
20 #include "gmock/gmock.h"
21 #include "gtest/gtest.h"
22 #ifdef LLVM_ON_UNIX
23 #include <sys/resource.h>
24 #endif
25 
26 using ::testing::ElementsAre;
27 using ::testing::Pair;
28 using ::testing::UnorderedElementsAre;
29 using ::testing::UnorderedElementsAreArray;
30 
31 namespace clang {
32 namespace clangd {
33 namespace {
34 
35 const char *YAML = R"(
36 ---
37 !Symbol
38 ID: 057557CEBF6E6B2D
39 Name:   'Foo1'
40 Scope:   'clang::'
41 SymInfo:
42   Kind:            Function
43   Lang:            Cpp
44 CanonicalDeclaration:
45   FileURI:        file:///path/foo.h
46   Start:
47     Line: 1
48     Column: 0
49   End:
50     Line: 1
51     Column: 1
52 Origin:    128
53 Flags:    129
54 Documentation:    'Foo doc'
55 ReturnType:    'int'
56 IncludeHeaders:
57   - Header:    'include1'
58     References:    7
59   - Header:    'include2'
60     References:    3
61 ...
62 ---
63 !Symbol
64 ID: 057557CEBF6E6B2E
65 Name:   'Foo2'
66 Scope:   'clang::'
67 SymInfo:
68   Kind:            Function
69   Lang:            Cpp
70 CanonicalDeclaration:
71   FileURI:        file:///path/bar.h
72   Start:
73     Line: 1
74     Column: 0
75   End:
76     Line: 1
77     Column: 1
78 Flags:    2
79 Signature:    '-sig'
80 CompletionSnippetSuffix:    '-snippet'
81 ...
82 !Refs
83 ID: 057557CEBF6E6B2D
84 References:
85   - Kind: 4
86     Location:
87       FileURI:    file:///path/foo.cc
88       Start:
89         Line: 5
90         Column: 3
91       End:
92         Line: 5
93         Column: 8
94 ...
95 --- !Relations
96 Subject:
97   ID:              6481EE7AF2841756
98 Predicate:       0
99 Object:
100   ID:              6512AEC512EA3A2D
101 ...
102 --- !Cmd
103 Directory:       'testdir'
104 CommandLine:
105   - 'cmd1'
106   - 'cmd2'
107 ...
108 --- !Source
109 URI:             'file:///path/source1.cpp'
110 Flags:           1
111 Digest:          EED8F5EAF25C453C
112 DirectIncludes:
113   - 'file:///path/inc1.h'
114   - 'file:///path/inc2.h'
115 ...
116 )";
117 
118 MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
119 MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
120 MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") {
121   return (arg.IncludeHeader == IncludeHeader) && (arg.References == References);
122 }
123 
TEST(SerializationTest,NoCrashOnEmptyYAML)124 TEST(SerializationTest, NoCrashOnEmptyYAML) {
125   EXPECT_TRUE(bool(readIndexFile("")));
126 }
127 
TEST(SerializationTest,YAMLConversions)128 TEST(SerializationTest, YAMLConversions) {
129   auto ParsedYAML = readIndexFile(YAML);
130   ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError();
131   ASSERT_TRUE(bool(ParsedYAML->Symbols));
132   EXPECT_THAT(
133       *ParsedYAML->Symbols,
134       UnorderedElementsAre(ID("057557CEBF6E6B2D"), ID("057557CEBF6E6B2E")));
135 
136   auto Sym1 = *ParsedYAML->Symbols->find(
137       cantFail(SymbolID::fromStr("057557CEBF6E6B2D")));
138   auto Sym2 = *ParsedYAML->Symbols->find(
139       cantFail(SymbolID::fromStr("057557CEBF6E6B2E")));
140 
141   EXPECT_THAT(Sym1, QName("clang::Foo1"));
142   EXPECT_EQ(Sym1.Signature, "");
143   EXPECT_EQ(Sym1.Documentation, "Foo doc");
144   EXPECT_EQ(Sym1.ReturnType, "int");
145   EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h");
146   EXPECT_EQ(Sym1.Origin, static_cast<SymbolOrigin>(1 << 7));
147   EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129);
148   EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion);
149   EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated);
150   EXPECT_THAT(Sym1.IncludeHeaders,
151               UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u),
152                                    IncludeHeaderWithRef("include2", 3u)));
153 
154   EXPECT_THAT(Sym2, QName("clang::Foo2"));
155   EXPECT_EQ(Sym2.Signature, "-sig");
156   EXPECT_EQ(Sym2.ReturnType, "");
157   EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI),
158             "file:///path/bar.h");
159   EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
160   EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
161 
162   ASSERT_TRUE(bool(ParsedYAML->Refs));
163   EXPECT_THAT(
164       *ParsedYAML->Refs,
165       UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")),
166                                 ::testing::SizeIs(1))));
167   auto Ref1 = ParsedYAML->Refs->begin()->second.front();
168   EXPECT_EQ(Ref1.Kind, RefKind::Reference);
169   EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc");
170 
171   SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756"));
172   SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D"));
173   ASSERT_TRUE(bool(ParsedYAML->Relations));
174   EXPECT_THAT(
175       *ParsedYAML->Relations,
176       UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived}));
177 
178   ASSERT_TRUE(bool(ParsedYAML->Cmd));
179   auto &Cmd = *ParsedYAML->Cmd;
180   ASSERT_EQ(Cmd.Directory, "testdir");
181   EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1", "cmd2"));
182 
183   ASSERT_TRUE(bool(ParsedYAML->Sources));
184   const auto *URI = "file:///path/source1.cpp";
185   ASSERT_TRUE(ParsedYAML->Sources->count(URI));
186   auto IGNDeserialized = ParsedYAML->Sources->lookup(URI);
187   EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C");
188   EXPECT_THAT(IGNDeserialized.DirectIncludes,
189               ElementsAre("file:///path/inc1.h", "file:///path/inc2.h"));
190   EXPECT_EQ(IGNDeserialized.URI, URI);
191   EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1));
192 }
193 
YAMLFromSymbols(const SymbolSlab & Slab)194 std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
195   std::vector<std::string> Result;
196   for (const auto &Sym : Slab)
197     Result.push_back(toYAML(Sym));
198   return Result;
199 }
YAMLFromRefs(const RefSlab & Slab)200 std::vector<std::string> YAMLFromRefs(const RefSlab &Slab) {
201   std::vector<std::string> Result;
202   for (const auto &Refs : Slab)
203     Result.push_back(toYAML(Refs));
204   return Result;
205 }
206 
YAMLFromRelations(const RelationSlab & Slab)207 std::vector<std::string> YAMLFromRelations(const RelationSlab &Slab) {
208   std::vector<std::string> Result;
209   for (const auto &Rel : Slab)
210     Result.push_back(toYAML(Rel));
211   return Result;
212 }
213 
TEST(SerializationTest,BinaryConversions)214 TEST(SerializationTest, BinaryConversions) {
215   auto In = readIndexFile(YAML);
216   EXPECT_TRUE(bool(In)) << In.takeError();
217 
218   // Write to binary format, and parse again.
219   IndexFileOut Out(*In);
220   Out.Format = IndexFileFormat::RIFF;
221   std::string Serialized = llvm::to_string(Out);
222 
223   auto In2 = readIndexFile(Serialized);
224   ASSERT_TRUE(bool(In2)) << In.takeError();
225   ASSERT_TRUE(In2->Symbols);
226   ASSERT_TRUE(In2->Refs);
227   ASSERT_TRUE(In2->Relations);
228 
229   // Assert the YAML serializations match, for nice comparisons and diffs.
230   EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
231               UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
232   EXPECT_THAT(YAMLFromRefs(*In2->Refs),
233               UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
234   EXPECT_THAT(YAMLFromRelations(*In2->Relations),
235               UnorderedElementsAreArray(YAMLFromRelations(*In->Relations)));
236 }
237 
TEST(SerializationTest,SrcsTest)238 TEST(SerializationTest, SrcsTest) {
239   auto In = readIndexFile(YAML);
240   EXPECT_TRUE(bool(In)) << In.takeError();
241 
242   std::string TestContent("TestContent");
243   IncludeGraphNode IGN;
244   IGN.Digest = digest(TestContent);
245   IGN.DirectIncludes = {"inc1", "inc2"};
246   IGN.URI = "URI";
247   IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU;
248   IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
249   IncludeGraph Sources;
250   Sources[IGN.URI] = IGN;
251   // Write to binary format, and parse again.
252   IndexFileOut Out(*In);
253   Out.Format = IndexFileFormat::RIFF;
254   Out.Sources = &Sources;
255   {
256     std::string Serialized = llvm::to_string(Out);
257 
258     auto In = readIndexFile(Serialized);
259     ASSERT_TRUE(bool(In)) << In.takeError();
260     ASSERT_TRUE(In->Symbols);
261     ASSERT_TRUE(In->Refs);
262     ASSERT_TRUE(In->Sources);
263     ASSERT_TRUE(In->Sources->count(IGN.URI));
264     // Assert the YAML serializations match, for nice comparisons and diffs.
265     EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
266                 UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
267     EXPECT_THAT(YAMLFromRefs(*In->Refs),
268                 UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
269     auto IGNDeserialized = In->Sources->lookup(IGN.URI);
270     EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
271     EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
272     EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
273     EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags);
274   }
275 }
276 
TEST(SerializationTest,CmdlTest)277 TEST(SerializationTest, CmdlTest) {
278   auto In = readIndexFile(YAML);
279   EXPECT_TRUE(bool(In)) << In.takeError();
280 
281   tooling::CompileCommand Cmd;
282   Cmd.Directory = "testdir";
283   Cmd.CommandLine.push_back("cmd1");
284   Cmd.CommandLine.push_back("cmd2");
285   Cmd.Filename = "ignored";
286   Cmd.Heuristic = "ignored";
287   Cmd.Output = "ignored";
288 
289   IndexFileOut Out(*In);
290   Out.Format = IndexFileFormat::RIFF;
291   Out.Cmd = &Cmd;
292   {
293     std::string Serialized = llvm::to_string(Out);
294 
295     auto In = readIndexFile(Serialized);
296     ASSERT_TRUE(bool(In)) << In.takeError();
297     ASSERT_TRUE(In->Cmd);
298 
299     const tooling::CompileCommand &SerializedCmd = In->Cmd.getValue();
300     EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine);
301     EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory);
302     EXPECT_NE(SerializedCmd.Filename, Cmd.Filename);
303     EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic);
304     EXPECT_NE(SerializedCmd.Output, Cmd.Output);
305   }
306 }
307 
308 // rlimit is part of POSIX.
309 // ASan uses a lot of address space, so we can't apply strict limits.
310 #if LLVM_ON_UNIX && !LLVM_ADDRESS_SANITIZER_BUILD
311 class ScopedMemoryLimit {
312   struct rlimit OriginalLimit;
313   bool Succeeded = false;
314 
315 public:
ScopedMemoryLimit(rlim_t Bytes)316   ScopedMemoryLimit(rlim_t Bytes) {
317     if (!getrlimit(RLIMIT_AS, &OriginalLimit)) {
318       struct rlimit NewLimit = OriginalLimit;
319       NewLimit.rlim_cur = Bytes;
320       Succeeded = !setrlimit(RLIMIT_AS, &NewLimit);
321     }
322     if (!Succeeded)
323       log("Failed to set rlimit");
324   }
325 
~ScopedMemoryLimit()326   ~ScopedMemoryLimit() {
327     if (Succeeded)
328       setrlimit(RLIMIT_AS, &OriginalLimit);
329   }
330 };
331 #else
332 class ScopedMemoryLimit {
333 public:
ScopedMemoryLimit(unsigned Bytes)334   ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); }
335 };
336 #endif
337 
338 // Test that our deserialization detects invalid array sizes without allocating.
339 // If this detection fails, the test should allocate a huge array and crash.
TEST(SerializationTest,NoCrashOnBadArraySize)340 TEST(SerializationTest, NoCrashOnBadArraySize) {
341   // This test is tricky because we need to construct a subtly invalid file.
342   // First, create a valid serialized file.
343   auto In = readIndexFile(YAML);
344   ASSERT_FALSE(!In) << In.takeError();
345   IndexFileOut Out(*In);
346   Out.Format = IndexFileFormat::RIFF;
347   std::string Serialized = llvm::to_string(Out);
348 
349   // Low-level parse it again and find the `srcs` chunk we're going to corrupt.
350   auto Parsed = riff::readFile(Serialized);
351   ASSERT_FALSE(!Parsed) << Parsed.takeError();
352   auto Srcs = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
353     return C.ID == riff::fourCC("srcs");
354   });
355   ASSERT_NE(Srcs, Parsed->Chunks.end());
356 
357   // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one.
358   // The node has:
359   //  - 1 byte: flags (1)
360   //  - varint(stringID): URI
361   //  - 8 byte: file digest
362   //  - varint: DirectIncludes.length
363   //  - repeated varint(stringID): DirectIncludes
364   // We want to set DirectIncludes.length to a huge number.
365   // The offset isn't trivial to find, so we use the file digest.
366   std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
367   unsigned Pos = Srcs->Data.find_first_of(FileDigest);
368   ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest";
369   Pos += FileDigest.size();
370 
371   // Varints are little-endian base-128 numbers, where the top-bit of each byte
372   // indicates whether there are more. ffffffff0f -> 0xffffffff.
373   std::string CorruptSrcs =
374       (Srcs->Data.take_front(Pos) + llvm::fromHex("ffffffff0f") +
375        "some_random_garbage")
376           .str();
377   Srcs->Data = CorruptSrcs;
378 
379   // Try to crash rather than hang on large allocation.
380   ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
381 
382   std::string CorruptFile = llvm::to_string(*Parsed);
383   auto CorruptParsed = readIndexFile(CorruptFile);
384   ASSERT_TRUE(!CorruptParsed);
385   EXPECT_EQ(llvm::toString(CorruptParsed.takeError()),
386             "malformed or truncated include uri");
387 }
388 
389 // Check we detect invalid string table size size without allocating it first.
390 // If this detection fails, the test should allocate a huge array and crash.
TEST(SerializationTest,NoCrashOnBadStringTableSize)391 TEST(SerializationTest, NoCrashOnBadStringTableSize) {
392   if (!llvm::zlib::isAvailable()) {
393     log("skipping test, no zlib");
394     return;
395   }
396 
397   // First, create a valid serialized file.
398   auto In = readIndexFile(YAML);
399   ASSERT_FALSE(!In) << In.takeError();
400   IndexFileOut Out(*In);
401   Out.Format = IndexFileFormat::RIFF;
402   std::string Serialized = llvm::to_string(Out);
403 
404   // Low-level parse it again, we're going to replace the `stri` chunk.
405   auto Parsed = riff::readFile(Serialized);
406   ASSERT_FALSE(!Parsed) << Parsed.takeError();
407   auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
408     return C.ID == riff::fourCC("stri");
409   });
410   ASSERT_NE(Stri, Parsed->Chunks.end());
411 
412   // stri consists of an 8 byte uncompressed-size, and then compressed data.
413   // We'll claim our small amount of data expands to 4GB
414   std::string CorruptStri =
415       (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str();
416   Stri->Data = CorruptStri;
417   std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
418 
419   // Try to crash rather than hang on large allocation.
420   ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
421 
422   std::string CorruptFile = llvm::to_string(*Parsed);
423   auto CorruptParsed = readIndexFile(CorruptFile);
424   ASSERT_TRUE(!CorruptParsed);
425   EXPECT_THAT(llvm::toString(CorruptParsed.takeError()),
426               testing::HasSubstr("bytes is implausible"));
427 }
428 
429 } // namespace
430 } // namespace clangd
431 } // namespace clang
432