//===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Headers.h" #include "RIFF.h" #include "index/Index.h" #include "index/Serialization.h" #include "support/Logger.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Error.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #ifdef LLVM_ON_UNIX #include #endif using ::testing::ElementsAre; using ::testing::Pair; using ::testing::UnorderedElementsAre; using ::testing::UnorderedElementsAreArray; namespace clang { namespace clangd { namespace { const char *YAML = R"( --- !Symbol ID: 057557CEBF6E6B2D Name: 'Foo1' Scope: 'clang::' SymInfo: Kind: Function Lang: Cpp CanonicalDeclaration: FileURI: file:///path/foo.h Start: Line: 1 Column: 0 End: Line: 1 Column: 1 Origin: 128 Flags: 129 Documentation: 'Foo doc' ReturnType: 'int' IncludeHeaders: - Header: 'include1' References: 7 - Header: 'include2' References: 3 ... --- !Symbol ID: 057557CEBF6E6B2E Name: 'Foo2' Scope: 'clang::' SymInfo: Kind: Function Lang: Cpp CanonicalDeclaration: FileURI: file:///path/bar.h Start: Line: 1 Column: 0 End: Line: 1 Column: 1 Flags: 2 Signature: '-sig' CompletionSnippetSuffix: '-snippet' ... !Refs ID: 057557CEBF6E6B2D References: - Kind: 4 Location: FileURI: file:///path/foo.cc Start: Line: 5 Column: 3 End: Line: 5 Column: 8 ... --- !Relations Subject: ID: 6481EE7AF2841756 Predicate: 0 Object: ID: 6512AEC512EA3A2D ... --- !Cmd Directory: 'testdir' CommandLine: - 'cmd1' - 'cmd2' ... --- !Source URI: 'file:///path/source1.cpp' Flags: 1 Digest: EED8F5EAF25C453C DirectIncludes: - 'file:///path/inc1.h' - 'file:///path/inc2.h' ... )"; MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); } MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") { return (arg.IncludeHeader == IncludeHeader) && (arg.References == References); } TEST(SerializationTest, NoCrashOnEmptyYAML) { EXPECT_TRUE(bool(readIndexFile(""))); } TEST(SerializationTest, YAMLConversions) { auto ParsedYAML = readIndexFile(YAML); ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError(); ASSERT_TRUE(bool(ParsedYAML->Symbols)); EXPECT_THAT( *ParsedYAML->Symbols, UnorderedElementsAre(ID("057557CEBF6E6B2D"), ID("057557CEBF6E6B2E"))); auto Sym1 = *ParsedYAML->Symbols->find( cantFail(SymbolID::fromStr("057557CEBF6E6B2D"))); auto Sym2 = *ParsedYAML->Symbols->find( cantFail(SymbolID::fromStr("057557CEBF6E6B2E"))); EXPECT_THAT(Sym1, QName("clang::Foo1")); EXPECT_EQ(Sym1.Signature, ""); EXPECT_EQ(Sym1.Documentation, "Foo doc"); EXPECT_EQ(Sym1.ReturnType, "int"); EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h"); EXPECT_EQ(Sym1.Origin, static_cast(1 << 7)); EXPECT_EQ(static_cast(Sym1.Flags), 129); EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion); EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated); EXPECT_THAT(Sym1.IncludeHeaders, UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u), IncludeHeaderWithRef("include2", 3u))); EXPECT_THAT(Sym2, QName("clang::Foo2")); EXPECT_EQ(Sym2.Signature, "-sig"); EXPECT_EQ(Sym2.ReturnType, ""); EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI), "file:///path/bar.h"); EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion); EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated); ASSERT_TRUE(bool(ParsedYAML->Refs)); EXPECT_THAT( *ParsedYAML->Refs, UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")), ::testing::SizeIs(1)))); auto Ref1 = ParsedYAML->Refs->begin()->second.front(); EXPECT_EQ(Ref1.Kind, RefKind::Reference); EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc"); SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756")); SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D")); ASSERT_TRUE(bool(ParsedYAML->Relations)); EXPECT_THAT( *ParsedYAML->Relations, UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived})); ASSERT_TRUE(bool(ParsedYAML->Cmd)); auto &Cmd = *ParsedYAML->Cmd; ASSERT_EQ(Cmd.Directory, "testdir"); EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1", "cmd2")); ASSERT_TRUE(bool(ParsedYAML->Sources)); const auto *URI = "file:///path/source1.cpp"; ASSERT_TRUE(ParsedYAML->Sources->count(URI)); auto IGNDeserialized = ParsedYAML->Sources->lookup(URI); EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C"); EXPECT_THAT(IGNDeserialized.DirectIncludes, ElementsAre("file:///path/inc1.h", "file:///path/inc2.h")); EXPECT_EQ(IGNDeserialized.URI, URI); EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1)); } std::vector YAMLFromSymbols(const SymbolSlab &Slab) { std::vector Result; for (const auto &Sym : Slab) Result.push_back(toYAML(Sym)); return Result; } std::vector YAMLFromRefs(const RefSlab &Slab) { std::vector Result; for (const auto &Refs : Slab) Result.push_back(toYAML(Refs)); return Result; } std::vector YAMLFromRelations(const RelationSlab &Slab) { std::vector Result; for (const auto &Rel : Slab) Result.push_back(toYAML(Rel)); return Result; } TEST(SerializationTest, BinaryConversions) { auto In = readIndexFile(YAML); EXPECT_TRUE(bool(In)) << In.takeError(); // Write to binary format, and parse again. IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; std::string Serialized = llvm::to_string(Out); auto In2 = readIndexFile(Serialized); ASSERT_TRUE(bool(In2)) << In.takeError(); ASSERT_TRUE(In2->Symbols); ASSERT_TRUE(In2->Refs); ASSERT_TRUE(In2->Relations); // Assert the YAML serializations match, for nice comparisons and diffs. EXPECT_THAT(YAMLFromSymbols(*In2->Symbols), UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); EXPECT_THAT(YAMLFromRefs(*In2->Refs), UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); EXPECT_THAT(YAMLFromRelations(*In2->Relations), UnorderedElementsAreArray(YAMLFromRelations(*In->Relations))); } TEST(SerializationTest, SrcsTest) { auto In = readIndexFile(YAML); EXPECT_TRUE(bool(In)) << In.takeError(); std::string TestContent("TestContent"); IncludeGraphNode IGN; IGN.Digest = digest(TestContent); IGN.DirectIncludes = {"inc1", "inc2"}; IGN.URI = "URI"; IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU; IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors; IncludeGraph Sources; Sources[IGN.URI] = IGN; // Write to binary format, and parse again. IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; Out.Sources = &Sources; { std::string Serialized = llvm::to_string(Out); auto In = readIndexFile(Serialized); ASSERT_TRUE(bool(In)) << In.takeError(); ASSERT_TRUE(In->Symbols); ASSERT_TRUE(In->Refs); ASSERT_TRUE(In->Sources); ASSERT_TRUE(In->Sources->count(IGN.URI)); // Assert the YAML serializations match, for nice comparisons and diffs. EXPECT_THAT(YAMLFromSymbols(*In->Symbols), UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); EXPECT_THAT(YAMLFromRefs(*In->Refs), UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); auto IGNDeserialized = In->Sources->lookup(IGN.URI); EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest); EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes); EXPECT_EQ(IGNDeserialized.URI, IGN.URI); EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags); } } TEST(SerializationTest, CmdlTest) { auto In = readIndexFile(YAML); EXPECT_TRUE(bool(In)) << In.takeError(); tooling::CompileCommand Cmd; Cmd.Directory = "testdir"; Cmd.CommandLine.push_back("cmd1"); Cmd.CommandLine.push_back("cmd2"); Cmd.Filename = "ignored"; Cmd.Heuristic = "ignored"; Cmd.Output = "ignored"; IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; Out.Cmd = &Cmd; { std::string Serialized = llvm::to_string(Out); auto In = readIndexFile(Serialized); ASSERT_TRUE(bool(In)) << In.takeError(); ASSERT_TRUE(In->Cmd); const tooling::CompileCommand &SerializedCmd = In->Cmd.getValue(); EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine); EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory); EXPECT_NE(SerializedCmd.Filename, Cmd.Filename); EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic); EXPECT_NE(SerializedCmd.Output, Cmd.Output); } } // rlimit is part of POSIX. // ASan uses a lot of address space, so we can't apply strict limits. #if LLVM_ON_UNIX && !LLVM_ADDRESS_SANITIZER_BUILD class ScopedMemoryLimit { struct rlimit OriginalLimit; bool Succeeded = false; public: ScopedMemoryLimit(rlim_t Bytes) { if (!getrlimit(RLIMIT_AS, &OriginalLimit)) { struct rlimit NewLimit = OriginalLimit; NewLimit.rlim_cur = Bytes; Succeeded = !setrlimit(RLIMIT_AS, &NewLimit); } if (!Succeeded) log("Failed to set rlimit"); } ~ScopedMemoryLimit() { if (Succeeded) setrlimit(RLIMIT_AS, &OriginalLimit); } }; #else class ScopedMemoryLimit { public: ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); } }; #endif // Test that our deserialization detects invalid array sizes without allocating. // If this detection fails, the test should allocate a huge array and crash. TEST(SerializationTest, NoCrashOnBadArraySize) { // This test is tricky because we need to construct a subtly invalid file. // First, create a valid serialized file. auto In = readIndexFile(YAML); ASSERT_FALSE(!In) << In.takeError(); IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; std::string Serialized = llvm::to_string(Out); // Low-level parse it again and find the `srcs` chunk we're going to corrupt. auto Parsed = riff::readFile(Serialized); ASSERT_FALSE(!Parsed) << Parsed.takeError(); auto Srcs = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) { return C.ID == riff::fourCC("srcs"); }); ASSERT_NE(Srcs, Parsed->Chunks.end()); // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one. // The node has: // - 1 byte: flags (1) // - varint(stringID): URI // - 8 byte: file digest // - varint: DirectIncludes.length // - repeated varint(stringID): DirectIncludes // We want to set DirectIncludes.length to a huge number. // The offset isn't trivial to find, so we use the file digest. std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C"); unsigned Pos = Srcs->Data.find_first_of(FileDigest); ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest"; Pos += FileDigest.size(); // Varints are little-endian base-128 numbers, where the top-bit of each byte // indicates whether there are more. ffffffff0f -> 0xffffffff. std::string CorruptSrcs = (Srcs->Data.take_front(Pos) + llvm::fromHex("ffffffff0f") + "some_random_garbage") .str(); Srcs->Data = CorruptSrcs; // Try to crash rather than hang on large allocation. ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB std::string CorruptFile = llvm::to_string(*Parsed); auto CorruptParsed = readIndexFile(CorruptFile); ASSERT_TRUE(!CorruptParsed); EXPECT_EQ(llvm::toString(CorruptParsed.takeError()), "malformed or truncated include uri"); } // Check we detect invalid string table size size without allocating it first. // If this detection fails, the test should allocate a huge array and crash. TEST(SerializationTest, NoCrashOnBadStringTableSize) { if (!llvm::zlib::isAvailable()) { log("skipping test, no zlib"); return; } // First, create a valid serialized file. auto In = readIndexFile(YAML); ASSERT_FALSE(!In) << In.takeError(); IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; std::string Serialized = llvm::to_string(Out); // Low-level parse it again, we're going to replace the `stri` chunk. auto Parsed = riff::readFile(Serialized); ASSERT_FALSE(!Parsed) << Parsed.takeError(); auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) { return C.ID == riff::fourCC("stri"); }); ASSERT_NE(Stri, Parsed->Chunks.end()); // stri consists of an 8 byte uncompressed-size, and then compressed data. // We'll claim our small amount of data expands to 4GB std::string CorruptStri = (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str(); Stri->Data = CorruptStri; std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C"); // Try to crash rather than hang on large allocation. ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB std::string CorruptFile = llvm::to_string(*Parsed); auto CorruptParsed = readIndexFile(CorruptFile); ASSERT_TRUE(!CorruptParsed); EXPECT_THAT(llvm::toString(CorruptParsed.takeError()), testing::HasSubstr("bytes is implausible")); } } // namespace } // namespace clangd } // namespace clang