// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -*- mode: C++ -*- // // Copyright 2021-2024 Google LLC // // Licensed under the Apache License v2.0 with LLVM Exceptions (the // "License"); you may not use this file except in compliance with the // License. You may obtain a copy of the License at // // https://llvm.org/LICENSE.txt // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Author: Giuliano Procida // Author: Ignes Simeonova #include "abigail_reader.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // xmlFree moves to xmlmemory.h later #include #include #include #include "error.h" #include "file_descriptor.h" #include "graph.h" #include "runtime.h" #include "scope.h" #include "type_normalisation.h" namespace stg { namespace abixml { namespace { // Cast a libxml string to C string and present it as a string_view. std::string_view FromLibxml(const xmlChar* str) { return reinterpret_cast(str); } // Cast a C string to a libxml string. const xmlChar* ToLibxml(const char* str) { return reinterpret_cast(str); } // Get the name of an XML element. std::string_view GetName(xmlNodePtr element) { return FromLibxml(element->name); } void CheckName(const char* name, xmlNodePtr element) { const auto element_name = GetName(element); if (element_name != name) { Die() << "expected element '" << name << "' but got '" << element_name << "'"; } } xmlNodePtr Child(xmlNodePtr node) { return node->children; } xmlNodePtr Next(xmlNodePtr node) { return node->next; } xmlNodePtr GetOnlyChild(xmlNodePtr element) { const xmlNodePtr child = Child(element); if (child == nullptr || Next(child) != nullptr) { Die() << "element '" << GetName(element) << "' without exactly one child"; } return child; } // Get an optional attribute. std::optional GetAttribute(xmlNodePtr node, const char* name) { std::optional result; xmlChar* attribute = xmlGetProp(node, ToLibxml(name)); if (attribute) { result.emplace(FromLibxml(attribute)); xmlFree(attribute); } return result; } // Get an attribute. std::string GetAttributeOrDie(xmlNodePtr node, const char* name) { xmlChar* attribute = xmlGetProp(node, ToLibxml(name)); if (!attribute) { Die() << "element '" << GetName(node) << "' missing attribute '" << name << "'"; } const std::string result(FromLibxml(attribute)); xmlFree(attribute); return result; } // Set an attribute value. void SetAttribute(xmlNodePtr node, const char* name, const std::string& value) { xmlSetProp(node, ToLibxml(name), ToLibxml(value.c_str())); } // Unset an attribute value. void UnsetAttribute(xmlNodePtr node, const char* name) { xmlUnsetProp(node, ToLibxml(name)); } // Remove a node and free its storage. void RemoveNode(xmlNodePtr node) { xmlUnlinkNode(node); xmlFreeNode(node); } // Move a node to be the last child of another. void MoveNode(xmlNodePtr node, xmlNodePtr destination) { xmlUnlinkNode(node); xmlAddChild(destination, node); } template std::optional Parse(const std::string& value) { T result; std::istringstream is(value); is >> std::noskipws >> result; if (is && is.eof()) { return {result}; } return {}; } template <> std::optional Parse(const std::string& value) { if (value == "yes") { return {true}; } else if (value == "no") { return {false}; } return {}; } template <> std::optional Parse( const std::string& value) { if (value == "no-type") { return {ElfSymbol::SymbolType::NOTYPE}; } else if (value == "object-type") { return {ElfSymbol::SymbolType::OBJECT}; } else if (value == "func-type") { return {ElfSymbol::SymbolType::FUNCTION}; } else if (value == "common-type") { return {ElfSymbol::SymbolType::COMMON}; } else if (value == "tls-type") { return {ElfSymbol::SymbolType::TLS}; } else if (value == "gnu-ifunc-type") { return {ElfSymbol::SymbolType::GNU_IFUNC}; } return {}; } template <> std::optional Parse( const std::string& value) { if (value == "global-binding") { return {ElfSymbol::Binding::GLOBAL}; } else if (value == "local-binding") { return {ElfSymbol::Binding::LOCAL}; } else if (value == "weak-binding") { return {ElfSymbol::Binding::WEAK}; } else if (value == "gnu-unique-binding") { return {ElfSymbol::Binding::GNU_UNIQUE}; } return {}; } template <> std::optional Parse( const std::string& value) { if (value == "default-visibility") { return {ElfSymbol::Visibility::DEFAULT}; } else if (value == "protected-visibility") { return {ElfSymbol::Visibility::PROTECTED}; } else if (value == "hidden-visibility") { return {ElfSymbol::Visibility::HIDDEN}; } else if (value == "internal-visibility") { return {ElfSymbol::Visibility::INTERNAL}; } return {}; } template <> std::optional Parse(const std::string& value) { uint32_t number; std::istringstream is(value); is >> std::noskipws >> std::hex >> number; if (is && is.eof()) { return std::make_optional(number); } return std::nullopt; } template T GetParsedValueOrDie(xmlNodePtr element, const char* name, const std::string& value, const std::optional& parse) { if (parse) { return *parse; } Die() << "element '" << GetName(element) << "' has attribute '" << name << "' with bad value '" << value << "'"; } template T ReadAttributeOrDie(xmlNodePtr element, const char* name) { const auto value = GetAttributeOrDie(element, name); return GetParsedValueOrDie(element, name, value, Parse(value)); } template std::optional ReadAttribute(xmlNodePtr element, const char* name) { const auto value = GetAttribute(element, name); if (value) { return {GetParsedValueOrDie(element, name, *value, Parse(*value))}; } return {}; } template T ReadAttribute(xmlNodePtr element, const char* name, const T& default_value) { const auto value = GetAttribute(element, name); if (value) { return GetParsedValueOrDie(element, name, *value, Parse(*value)); } return default_value; } template T ReadAttribute(xmlNodePtr element, const char* name, std::function(const std::string&)> parse) { const auto value = GetAttributeOrDie(element, name); return GetParsedValueOrDie(element, name, value, parse(value)); } // Remove non-element nodes, recursively. // // This simplifies subsequent manipulation. This should only remove comment, // text and possibly CDATA nodes. void StripNonElements(xmlNodePtr node) { switch (node->type) { case XML_COMMENT_NODE: case XML_TEXT_NODE: case XML_CDATA_SECTION_NODE: RemoveNode(node); break; case XML_ELEMENT_NODE: { xmlNodePtr child = Child(node); while (child) { const xmlNodePtr next = Next(child); StripNonElements(child); child = next; } break; } default: Die() << "unexpected XML node type: " << node->type; } } // Determine whether one XML element is a subtree of another, and optionally, // actually equal to it. bool SubOrEqualTree(bool also_equal, xmlNodePtr left, xmlNodePtr right) { // Node names must match. const auto left_name = GetName(left); const auto right_name = GetName(right); if (left_name != right_name) { return false; } // Attributes may be missing on the left, but must match otherwise. size_t left_attributes = 0; for (auto* p = left->properties; p; p = p->next) { ++left_attributes; const auto attribute = FromLibxml(p->name); const char* attribute_name = attribute.data(); const auto left_value = GetAttributeOrDie(left, attribute_name); const auto right_value = GetAttribute(right, attribute_name); if (!right_value || left_value != right_value.value()) { return false; } } // To also be equal, we just need to check the counts are the same. if (also_equal) { size_t right_attributes = 0; for (auto* p = right->properties; p; p = p->next) { ++right_attributes; } if (left_attributes != right_attributes) { return false; } } // The left subelements must be a subsequence of the right ones and to also be // equal, we must not have skipped any right ones. xmlNodePtr left_child = Child(left); xmlNodePtr right_child = Child(right); while (left_child != nullptr && right_child != nullptr) { if (SubOrEqualTree(also_equal, left_child, right_child)) { left_child = Next(left_child); } else if (also_equal) { return false; } right_child = Next(right_child); } return left_child == nullptr && (right_child == nullptr || !also_equal); } } // namespace // Determine whether one XML element is a subtree of another. bool SubTree(xmlNodePtr left, xmlNodePtr right) { return SubOrEqualTree(false, left, right); } // Determine whether one XML element is the same as another. bool EqualTree(xmlNodePtr left, xmlNodePtr right) { return SubOrEqualTree(true, left, right); } // Find a maximal XML element if one exists. std::optional MaximalTree(const std::vector& nodes) { if (nodes.empty()) { return std::nullopt; } // Find a potentially maximal candidate by scanning through and retaining the // new node if it's a supertree of the current candidate. const auto count = nodes.size(); std::vector ok(count); size_t candidate = 0; ok[candidate] = true; for (size_t ix = 1; ix < count; ++ix) { if (SubTree(nodes[candidate], nodes[ix])) { candidate = ix; ok[candidate] = true; } } // Verify the candidate is indeed maximal by comparing it with the nodes not // already known to be subtrees of it. const auto& candidate_node = nodes[candidate]; for (size_t ix = 0; ix < count; ++ix) { const auto& node = nodes[ix]; if (!ok[ix] && !SubTree(node, candidate_node)) { return std::nullopt; } } return std::make_optional(candidate); } namespace { // Check if string_view is in an array. template bool Contains(const std::array& haystack, std::string_view needle) { return std::find(haystack.begin(), haystack.end(), needle) != haystack.end(); } // Remove source location attributes. // // This simplifies element comparison later. void StripLocationInfo(xmlNodePtr node) { static const std::array has_location_info = { "class-decl", "enum-decl", "function-decl", "parameter", "typedef-decl", "union-decl", "var-decl" }; if (Contains(has_location_info, GetName(node))) { UnsetAttribute(node, "filepath"); UnsetAttribute(node, "line"); UnsetAttribute(node, "column"); } for (auto* child = Child(node); child; child = Next(child)) { StripLocationInfo(child); } } // Remove access attribute. // // This simplifies element comparison later in a very specific way: libabigail // (possibly older versions) uses the access specifier for the type it's trying // to "emit in scope", even for its containing types, making deduplicating types // trickier. We don't care about access anyway, so just remove it everywhere. void StripAccess(xmlNodePtr node) { static const std::array has_access = { "base-class", "data-member", "member-function", "member-template", "member-type", }; if (Contains(has_access, GetName(node))) { UnsetAttribute(node, "access"); } for (auto* child = Child(node); child; child = Next(child)) { StripAccess(child); } } // Elements corresponding to named types that can be anonymous or marked as // unreachable by libabigail, so user-defined types, excepting typedefs. const std::array kNamedTypes = { "class-decl", "enum-decl", "union-decl", }; // Remove attributes emitted by abidw --load-all-types. // // With this invocation and if any user-defined types are deemed unreachable, // libabigail will output a tracking-non-reachable-types attribute on top-level // elements and an is-non-reachable attribute on each such type element. // // We have our own graph-theoretic notion of reachability and these attributes // have no ABI relevance and can interfere with element comparisons. void StripReachabilityAttributes(xmlNodePtr node) { const auto node_name = GetName(node); if (node_name == "abi-corpus-group" || node_name == "abi-corpus") { UnsetAttribute(node, "tracking-non-reachable-types"); } else if (Contains(kNamedTypes, node_name)) { UnsetAttribute(node, "is-non-reachable"); } for (auto* child = Child(node); child; child = Next(child)) { StripReachabilityAttributes(child); } } // Fix bad DWARF -> ELF links caused by size zero symbol confusion. // // libabigail used to be confused by these sorts of symbols, resulting in // declarations pointing at the wrong ELF symbols: // // 573623: ffffffc0122383c0 256 OBJECT GLOBAL DEFAULT 33 vm_node_stat // 573960: ffffffc0122383c0 0 OBJECT GLOBAL DEFAULT 33 vm_numa_stat void FixBadDwarfElfLinks(xmlNodePtr root) { std::unordered_map elf_links; // See which ELF symbol IDs have multiple declarations. const std::function count = [&](xmlNodePtr node) { if (GetName(node) == "var-decl") { const auto symbol_id = GetAttribute(node, "elf-symbol-id"); if (symbol_id) { ++elf_links[symbol_id.value()]; } } for (auto* child = Child(node); child; child = Next(child)) { count(child); } }; count(root); // Fix up likely bad links from DWARF declaration to ELF symbol. const std::function fix = [&](xmlNodePtr node) { if (GetName(node) == "var-decl") { const auto name = GetAttributeOrDie(node, "name"); const auto mangled_name = GetAttribute(node, "mangled-name"); const auto symbol_id = GetAttribute(node, "elf-symbol-id"); if (mangled_name && symbol_id && name != symbol_id.value() && elf_links[symbol_id.value()] > 1) { if (mangled_name.value() == name) { Warn() << "fixing up ELF symbol for '" << name << "' (was '" << symbol_id.value() << "')"; SetAttribute(node, "elf-symbol-id", name); } else if (mangled_name.value() == symbol_id.value()) { Warn() << "fixing up mangled name and ELF symbol for '" << name << "' (was '" << symbol_id.value() << "')"; SetAttribute(node, "mangled-name", name); SetAttribute(node, "elf-symbol-id", name); } } } for (auto* child = Child(node); child; child = Next(child)) { fix(child); } }; fix(root); } // Tidy anonymous types in various ways. // // 1. Normalise anonymous type names by dropping the name attribute. // // Anonymous type names take the form __anonymous_foo__N where foo is one of // enum, struct or union and N is an optional numerical suffix. We don't care // about these names but they may cause trouble when comparing elements. // // 2. Reanonymise anonymous types that have been given names. // // At some point abidw changed its behaviour given an anonymous type with a // naming typedef. In addition to linking the typedef and type in both // directions, the code now gives (some) anonymous types the same name as the // typedef. This misrepresents the original types. // // Such types should be anonymous. We set is-anonymous and drop the name. // // 3. Discard naming typedef backlinks. // // The attribute naming-typedef-id is a backwards link from an anonymous type to // the typedef that refers to it. // // We don't care about these attributes and they may cause comparison issues. void TidyAnonymousTypes(xmlNodePtr node) { if (Contains(kNamedTypes, GetName(node))) { const bool is_anon = ReadAttribute(node, "is-anonymous", false); const auto naming_attribute = GetAttribute(node, "naming-typedef-id"); if (is_anon) { UnsetAttribute(node, "name"); } else if (naming_attribute) { SetAttribute(node, "is-anonymous", "yes"); UnsetAttribute(node, "name"); } if (naming_attribute) { UnsetAttribute(node, "naming-typedef-id"); } } for (auto* child = Child(node); child; child = Next(child)) { TidyAnonymousTypes(child); } } // Remove duplicate members. void RemoveDuplicateMembers(xmlNodePtr root) { std::vector types; // find all structs and unions std::function dfs = [&](xmlNodePtr node) { const auto node_name = GetName(node); // preorder in case we delete a nested element for (auto* child = Child(node); child; child = Next(child)) { dfs(child); } if (node_name == "class-decl" || node_name == "union-decl") { types.push_back(node); } }; dfs(root); for (const auto& node : types) { // partition members by node name std::map> member_map; for (auto* child = Child(node); child; child = Next(child)) { member_map[GetName(child)].push_back(child); } // for each kind of member... for (auto& [name, members] : member_map) { // ... remove identical duplicate members - O(n^2) for (size_t i = 0; i < members.size(); ++i) { xmlNodePtr& i_node = members[i]; bool duplicate = false; for (size_t j = 0; j < i; ++j) { const xmlNodePtr& j_node = members[j]; if (j_node != nullptr && EqualTree(i_node, j_node)) { duplicate = true; break; } } if (duplicate) { RemoveNode(i_node); i_node = nullptr; } } } } } // Eliminate non-conflicting / report conflicting duplicate definitions. // // XML elements representing types are sometimes emitted multiple times, // identically. Also, member typedefs are sometimes emitted separately from // their types, resulting in duplicate XML fragments. // // Both these issues can be resolved by first detecting duplicate occurrences of // a given type id and then checking to see if there's an instance that subsumes // the others, which can then be eliminated. // // This function eliminates exact type duplicates and duplicates where there is // at least one maximal definition. It can report the remaining duplicate // definitions. // // If a type has duplicate definitions in multiple namespace scopes or // definitions with different effective names, these are considered to be // *conflicting* duplicate definitions. TODO: update text void HandleDuplicateTypes(xmlNodePtr root) { // Convenience typedef referring to a namespace scope. using namespace_scope = std::vector; // map of type-id to pair of set of namespace scopes and vector of // xmlNodes std::unordered_map< std::string, std::pair< std::set, std::vector>> types; namespace_scope namespaces; // find all type occurrences std::function dfs = [&](xmlNodePtr node) { const auto node_name = GetName(node); std::optional namespace_name; if (node_name == "namespace-decl") { namespace_name = GetAttribute(node, "name"); } if (namespace_name) { namespaces.push_back(namespace_name.value()); } if (node_name == "abi-corpus-group" || node_name == "abi-corpus" || node_name == "abi-instr" || namespace_name) { for (auto* child = Child(node); child; child = Next(child)) { dfs(child); } } else { const auto id = GetAttribute(node, "id"); if (id) { auto& info = types[id.value()]; info.first.insert(namespaces); info.second.push_back(node); } } if (namespace_name) { namespaces.pop_back(); } }; dfs(root); for (const auto& [id, scopes_and_definitions] : types) { const auto& [scopes, definitions] = scopes_and_definitions; if (scopes.size() > 1) { Warn() << "conflicting scopes found for type '" << id << '\''; continue; } const auto possible_maximal = MaximalTree(definitions); if (possible_maximal) { // Remove all but the maximal definition. const size_t maximal = possible_maximal.value(); for (size_t ix = 0; ix < definitions.size(); ++ix) { if (ix != maximal) { RemoveNode(definitions[ix]); } } continue; } // As a rare alternative, check for a stray anonymous member that has been // separated from the main definition. size_t strays = 0; std::optional stray; std::optional non_stray; for (size_t ix = 0; ix < definitions.size(); ++ix) { auto node = definitions[ix]; auto member = Child(node); if (member && !Next(member) && GetName(member) == "data-member") { auto decl = Child(member); if (decl && !Next(decl) && GetName(decl) == "var-decl") { auto name = GetAttribute(decl, "name"); if (name && name.value().empty()) { ++strays; stray = ix; continue; } } } non_stray = ix; } if (strays + 1 == definitions.size() && stray.has_value() && non_stray.has_value()) { const auto stray_index = stray.value(); const auto non_stray_index = non_stray.value(); bool good = true; for (size_t ix = 0; ix < definitions.size(); ++ix) { if (ix == stray_index || ix == non_stray_index) { continue; } if (EqualTree(definitions[stray_index], definitions[ix])) { // it doesn't hurt if we remove exact duplicates and then fail RemoveNode(definitions[ix]); } else { good = false; break; } } if (good) { MoveNode(Child(definitions[stray_index]), definitions[non_stray_index]); RemoveNode(definitions[stray_index]); continue; } } Warn() << "unresolvable duplicate definitions found for type '" << id << '\''; } } } // namespace // Remove XML nodes and attributes that are neither used or wanted. void Clean(xmlNodePtr root) { // Strip non-element nodes to simplify other operations. StripNonElements(root); // Strip location information. StripLocationInfo(root); // Strip access. StripAccess(root); // Strip reachability attributes. StripReachabilityAttributes(root); } namespace { // Transform XML elements to improve their semantics. void Tidy(xmlNodePtr root) { // Fix bad ELF symbol links FixBadDwarfElfLinks(root); // Normalise anonymous type names. // Reanonymise anonymous types. // Discard naming typedef backlinks. TidyAnonymousTypes(root); // Remove duplicate members. RemoveDuplicateMembers(root); // Eliminate complete duplicates and extra fragments of types. // Report conflicting duplicate defintions. // Record whether there are conflicting duplicate definitions. HandleDuplicateTypes(root); } std::optional ParseLength(const std::string& value) { if (value == "infinite" || value == "unknown") { return {0}; } return Parse(value); } std::optional ParseReferenceKind( const std::string& value) { if (value == "lvalue") { return {PointerReference::Kind::LVALUE_REFERENCE}; } else if (value == "rvalue") { return {PointerReference::Kind::RVALUE_REFERENCE}; } return {}; } // Parser for libabigail's ABI XML format, creating a Symbol-Type Graph. // // On construction Abigail consumes a libxml node tree and builds a graph. // // Note that the core parser sees a "clean and tidy" XML document due to // preprocessing that simplifies the XML and resolves several issues. One // notable exception is that duplicate nodes may still remain. // // The main producer of ABI XML is abidw. The format has no formal specification // and has very limited semantic versioning. This parser makes no attempt to // support or correct for deficiencies in older versions of the format. // // The parser detects and will abort on the presence of unexpected elements. // // The parser ignores attributes it doesn't care about, including member access // specifiers and (meaningless) type ids on array dimensions. // // The STG IR and libabigail ABI XML models diverge in some ways. The parser has // to do extra work for each of these, as follows. // // 0. XML uses type and symbol ids to link together elements. These become edges // in the graph between symbols and types and between types and types. Dangling // type references will cause an abort. libabigail is much more relaxed about // symbols without type information and these are modelled as such. // // 1. XML function declarations have in-line types. The parser creates // free-standing types on-the-fly. A useful space optimisation might be to // prevent duplicate creation of such types. // // 2. Variadic parameters are currently flagged with an XML attribute. A // variadic type node is created on demand and will be shared by all such // paramerters. // // 3. XML symbols and aliases have a rather poor repesentation with aliases // represented as comma-separated attribute values. Aliases are resolved in a // post-processing phase. // // 4. XML anonymous types may also have names, these are ignored. class Abigail { public: explicit Abigail(Graph& graph); Id ProcessRoot(xmlNodePtr root); private: struct SymbolInfo { std::string name; std::optional version_info; xmlNodePtr node; }; // Map from libabigail type ids to STG node ids; except for the type of // variadic parameters. Maker maker_; // The STG IR uses a distinct node type for the variadic parameter type; if // allocated, this is its STG node id. std::optional variadic_; // symbol id to symbol information std::unordered_map symbol_info_map_; // alias symbol id to main symbol id std::unordered_map alias_to_main_; // libabigail decorates certain declarations with symbol ids; this is the // mapping from symbol id to the corresponding type and full name. std::unordered_map> symbol_id_and_full_name_; // Full name of the current scope. Scope scope_; Id GetEdge(xmlNodePtr element); Id GetVariadic(); Function MakeFunctionType(xmlNodePtr function); void ProcessCorpusGroup(xmlNodePtr group); void ProcessCorpus(xmlNodePtr corpus); void ProcessSymbols(xmlNodePtr symbols); void ProcessSymbol(xmlNodePtr symbol); bool ProcessUserDefinedType(std::string_view name, const std::string& id, xmlNodePtr decl); void ProcessScope(xmlNodePtr scope); void ProcessInstr(xmlNodePtr instr); void ProcessNamespace(xmlNodePtr scope); Id ProcessDecl(bool is_variable, xmlNodePtr decl); void ProcessFunctionType(const std::string& id, xmlNodePtr function); void ProcessTypedef(const std::string& id, xmlNodePtr type_definition); void ProcessPointer(const std::string& id, bool is_pointer, xmlNodePtr pointer); void ProcessQualified(const std::string& id, xmlNodePtr qualified); void ProcessArray(const std::string& id, xmlNodePtr array); void ProcessTypeDecl(const std::string& id, xmlNodePtr type_decl); void ProcessStructUnion(const std::string& id, bool is_struct, xmlNodePtr struct_union); void ProcessEnum(const std::string& id, xmlNodePtr enumeration); Id ProcessBaseClass(xmlNodePtr base_class); std::optional ProcessDataMember(bool is_struct, xmlNodePtr data_member); void ProcessMemberFunction(std::vector& methods, xmlNodePtr method); void ProcessMemberType(xmlNodePtr member_type); Id BuildSymbol(const SymbolInfo& info, std::optional type_id, const std::optional& name); Id BuildSymbols(); }; Abigail::Abigail(Graph& graph) : maker_(graph) {} Id Abigail::GetEdge(xmlNodePtr element) { return maker_.Get(GetAttributeOrDie(element, "type-id")); } Id Abigail::GetVariadic() { if (!variadic_) { variadic_ = {maker_.Add(Special::Kind::VARIADIC)}; } return *variadic_; } Function Abigail::MakeFunctionType(xmlNodePtr function) { std::vector parameters; std::optional return_type; for (auto* child = Child(function); child; child = Next(child)) { const auto child_name = GetName(child); if (return_type) { Die() << "unexpected element after return-type"; } if (child_name == "parameter") { const auto is_variadic = ReadAttribute(child, "is-variadic", false); parameters.push_back(is_variadic ? GetVariadic() : GetEdge(child)); } else if (child_name == "return") { return_type = {GetEdge(child)}; } else { Die() << "unrecognised " << GetName(function) << " child element '" << child_name << "'"; } } if (!return_type) { Die() << "missing return-type"; } return {*return_type, parameters}; } Id Abigail::ProcessRoot(xmlNodePtr root) { Clean(root); Tidy(root); const auto name = GetName(root); if (name == "abi-corpus-group") { ProcessCorpusGroup(root); } else if (name == "abi-corpus") { ProcessCorpus(root); } else { Die() << "unrecognised root element '" << name << "'"; } return BuildSymbols(); } void Abigail::ProcessCorpusGroup(xmlNodePtr group) { for (auto* corpus = Child(group); corpus; corpus = Next(corpus)) { CheckName("abi-corpus", corpus); ProcessCorpus(corpus); } } void Abigail::ProcessCorpus(xmlNodePtr corpus) { for (auto* element = Child(corpus); element; element = Next(element)) { const auto name = GetName(element); if (name == "elf-function-symbols" || name == "elf-variable-symbols") { ProcessSymbols(element); } else if (name == "elf-needed") { // ignore this } else if (name == "abi-instr") { ProcessInstr(element); } else { Die() << "unrecognised abi-corpus child element '" << name << "'"; } } } void Abigail::ProcessSymbols(xmlNodePtr symbols) { for (auto* element = Child(symbols); element; element = Next(element)) { CheckName("elf-symbol", element); ProcessSymbol(element); } } void Abigail::ProcessSymbol(xmlNodePtr symbol) { // Symbol processing is done in two parts. In this first part, we parse just // enough XML attributes to generate a symbol id and determine any aliases. // Symbol ids in this format can be found in elf-symbol alias attributes and // in {var,function}-decl elf-symbol-id attributes. const auto name = GetAttributeOrDie(symbol, "name"); const auto version = ReadAttribute(symbol, "version", std::string()); const bool is_default_version = ReadAttribute(symbol, "is-default-version", false); const auto alias = GetAttribute(symbol, "alias"); std::string elf_symbol_id = name; std::optional version_info; if (!version.empty()) { version_info = ElfSymbol::VersionInfo{is_default_version, version}; elf_symbol_id += VersionInfoToString(*version_info); } Check(symbol_info_map_ .emplace(elf_symbol_id, SymbolInfo{name, version_info, symbol}) .second) << "multiple symbols with id " << elf_symbol_id; if (alias) { std::istringstream is(*alias); std::string item; while (std::getline(is, item, ',')) { Check(alias_to_main_.insert({item, elf_symbol_id}).second) << "multiple aliases with id " << elf_symbol_id; } } } bool Abigail::ProcessUserDefinedType( std::string_view name, const std::string& id, xmlNodePtr decl) { if (name == "typedef-decl") { ProcessTypedef(id, decl); } else if (name == "class-decl") { ProcessStructUnion(id, true, decl); } else if (name == "union-decl") { ProcessStructUnion(id, false, decl); } else if (name == "enum-decl") { ProcessEnum(id, decl); } else { return false; } return true; } void Abigail::ProcessScope(xmlNodePtr scope) { for (auto* element = Child(scope); element; element = Next(element)) { const auto name = GetName(element); const auto maybe_id = GetAttribute(element, "id"); // all type elements have "id", all non-types do not if (maybe_id) { const auto& id = *maybe_id; if (name == "function-type") { ProcessFunctionType(id, element); } else if (name == "pointer-type-def") { ProcessPointer(id, true, element); } else if (name == "reference-type-def") { ProcessPointer(id, false, element); } else if (name == "qualified-type-def") { ProcessQualified(id, element); } else if (name == "array-type-def") { ProcessArray(id, element); } else if (name == "type-decl") { ProcessTypeDecl(id, element); } else if (!ProcessUserDefinedType(name, id, element)) { Die() << "bad abi-instr type child element '" << name << "'"; } } else { if (name == "var-decl") { ProcessDecl(true, element); } else if (name == "function-decl") { ProcessDecl(false, element); } else if (name == "namespace-decl") { ProcessNamespace(element); } else { Die() << "bad abi-instr non-type child element '" << name << "'"; } } } } void Abigail::ProcessInstr(xmlNodePtr instr) { ProcessScope(instr); } void Abigail::ProcessNamespace(xmlNodePtr scope) { const auto name = GetAttributeOrDie(scope, "name"); const PushScopeName push_scope_name(scope_, "namespace", name); ProcessScope(scope); } Id Abigail::ProcessDecl(bool is_variable, xmlNodePtr decl) { const auto name = scope_.name + GetAttributeOrDie(decl, "name"); const auto symbol_id = GetAttribute(decl, "elf-symbol-id"); const auto type = is_variable ? GetEdge(decl) : maker_.Add(MakeFunctionType(decl)); if (symbol_id) { // There's a link to an ELF symbol. const auto [it, inserted] = symbol_id_and_full_name_.emplace( *symbol_id, std::make_pair(type, name)); if (!inserted) { Die() << "duplicate type for '" << *symbol_id << "'"; } } return type; } void Abigail::ProcessFunctionType(const std::string& id, xmlNodePtr function) { maker_.MaybeSet(id, MakeFunctionType(function)); } void Abigail::ProcessTypedef(const std::string& id, xmlNodePtr type_definition) { const auto name = scope_.name + GetAttributeOrDie(type_definition, "name"); const auto type = GetEdge(type_definition); maker_.MaybeSet(id, name, type); } void Abigail::ProcessPointer(const std::string& id, bool is_pointer, xmlNodePtr pointer) { const auto type = GetEdge(pointer); const auto kind = is_pointer ? PointerReference::Kind::POINTER : ReadAttribute( pointer, "kind", &ParseReferenceKind); maker_.MaybeSet(id, kind, type); } void Abigail::ProcessQualified(const std::string& id, xmlNodePtr qualified) { std::vector qualifiers; // Do these in reverse order so we get CVR ordering. if (ReadAttribute(qualified, "restrict", false)) { qualifiers.push_back(Qualifier::RESTRICT); } if (ReadAttribute(qualified, "volatile", false)) { qualifiers.push_back(Qualifier::VOLATILE); } if (ReadAttribute(qualified, "const", false)) { qualifiers.push_back(Qualifier::CONST); } Check(!qualifiers.empty()) << "qualified-type-def has no qualifiers"; // Handle multiple qualifiers by unconditionally adding as new nodes all but // the last qualifier which is set into place. auto type = GetEdge(qualified); auto count = qualifiers.size(); for (auto qualifier : qualifiers) { --count; const Qualified node(qualifier, type); if (count) { type = maker_.Add(node); } else { maker_.MaybeSet(id, node); } } } void Abigail::ProcessArray(const std::string& id, xmlNodePtr array) { std::vector dimensions; for (auto* child = Child(array); child; child = Next(child)) { CheckName("subrange", child); const auto length = ReadAttribute(child, "length", &ParseLength); dimensions.push_back(length); } Check(!dimensions.empty()) << "array-type-def element has no children"; // int[M][N] means array[M] of array[N] of int // // We need to chain a bunch of types together: // // id = array[n] of id = ... = array[n] of id // // where the first id is the new type in slot ix // and the last id is the old type in slot type // // Use the same approach as for qualifiers. auto type = GetEdge(array); auto count = dimensions.size(); for (auto it = dimensions.crbegin(); it != dimensions.crend(); ++it) { --count; const auto size = *it; const Array node(size, type); if (count) { type = maker_.Add(node); } else { maker_.MaybeSet(id, node); } } } void Abigail::ProcessTypeDecl(const std::string& id, xmlNodePtr type_decl) { const auto name = scope_.name + GetAttributeOrDie(type_decl, "name"); const auto bits = ReadAttribute(type_decl, "size-in-bits", 0); if (bits % 8) { Die() << "size-in-bits is not a multiple of 8"; } const auto bytes = bits / 8; if (name == "void") { maker_.MaybeSet(id, Special::Kind::VOID); } else { // libabigail doesn't model encoding at all and we don't want to parse names // (which will not always work) in an attempt to reconstruct it. maker_.MaybeSet(id, name, /* encoding= */ std::nullopt, bytes); } } void Abigail::ProcessStructUnion(const std::string& id, bool is_struct, xmlNodePtr struct_union) { // Libabigail sometimes reports is-declaration-only but still provides some // child elements. So we check both things. const bool forward = ReadAttribute(struct_union, "is-declaration-only", false) && Child(struct_union) == nullptr; const auto kind = is_struct ? StructUnion::Kind::STRUCT : StructUnion::Kind::UNION; const bool is_anonymous = ReadAttribute(struct_union, "is-anonymous", false); const auto name = is_anonymous ? std::string() : GetAttributeOrDie(struct_union, "name"); const auto full_name = is_anonymous ? std::string() : scope_.name + name; const PushScopeName push_scope_name(scope_, kind, name); if (forward) { maker_.MaybeSet(id, kind, full_name); return; } const auto bits = ReadAttribute(struct_union, "size-in-bits", 0); const auto bytes = (bits + 7) / 8; std::vector base_classes; std::vector methods; std::vector members; for (auto* child = Child(struct_union); child; child = Next(child)) { const auto child_name = GetName(child); if (child_name == "data-member") { if (const auto member = ProcessDataMember(is_struct, child)) { members.push_back(*member); } } else if (child_name == "member-type") { ProcessMemberType(child); } else if (child_name == "base-class") { base_classes.push_back(ProcessBaseClass(child)); } else if (child_name == "member-function") { ProcessMemberFunction(methods, child); } else { Die() << "unrecognised " << kind << "-decl child element '" << child_name << "'"; } } maker_.MaybeSet(id, kind, full_name, bytes, base_classes, methods, members); } void Abigail::ProcessEnum(const std::string& id, xmlNodePtr enumeration) { const bool forward = ReadAttribute(enumeration, "is-declaration-only", false); const auto name = ReadAttribute(enumeration, "is-anonymous", false) ? std::string() : scope_.name + GetAttributeOrDie(enumeration, "name"); if (forward) { maker_.MaybeSet(id, name); return; } const xmlNodePtr underlying = Child(enumeration); Check(underlying != nullptr) << "enum-decl has no child elements"; CheckName("underlying-type", underlying); const auto type = GetEdge(underlying); std::vector> enumerators; for (auto* enumerator = Next(underlying); enumerator; enumerator = Next(enumerator)) { CheckName("enumerator", enumerator); const auto enumerator_name = GetAttributeOrDie(enumerator, "name"); // libabigail currently supports anything that fits in an int64_t const auto enumerator_value = ReadAttributeOrDie(enumerator, "value"); enumerators.emplace_back(enumerator_name, enumerator_value); } maker_.MaybeSet(id, name, type, enumerators); } Id Abigail::ProcessBaseClass(xmlNodePtr base_class) { const auto& type = GetEdge(base_class); const auto offset = ReadAttributeOrDie(base_class, "layout-offset-in-bits"); const auto inheritance = ReadAttribute(base_class, "is-virtual", false) ? BaseClass::Inheritance::VIRTUAL : BaseClass::Inheritance::NON_VIRTUAL; return maker_.Add(type, offset, inheritance); } std::optional Abigail::ProcessDataMember(bool is_struct, xmlNodePtr data_member) { const xmlNodePtr decl = GetOnlyChild(data_member); CheckName("var-decl", decl); if (ReadAttribute(data_member, "static", false)) { ProcessDecl(true, decl); return {}; } const auto offset = is_struct ? ReadAttributeOrDie(data_member, "layout-offset-in-bits") : 0; const auto name = GetAttributeOrDie(decl, "name"); const auto type = GetEdge(decl); // Note: libabigail does not model member size, yet return {maker_.Add(name, type, offset, 0)}; } void Abigail::ProcessMemberFunction(std::vector& methods, xmlNodePtr method) { const xmlNodePtr decl = GetOnlyChild(method); CheckName("function-decl", decl); // ProcessDecl creates symbol references so must be called unconditionally. const auto type = ProcessDecl(false, decl); const auto vtable_offset = ReadAttribute(method, "vtable-offset"); if (vtable_offset) { static const std::string missing = "{missing}"; const auto mangled_name = ReadAttribute(decl, "mangled-name", missing); const auto name = GetAttributeOrDie(decl, "name"); methods.push_back( maker_.Add(mangled_name, name, vtable_offset.value(), type)); } } void Abigail::ProcessMemberType(xmlNodePtr member_type) { const xmlNodePtr decl = GetOnlyChild(member_type); const auto id = GetAttributeOrDie(decl, "id"); const auto name = GetName(decl); if (!ProcessUserDefinedType(name, id, decl)) { Die() << "unrecognised member-type child element '" << name << "'"; } } Id Abigail::BuildSymbol(const SymbolInfo& info, std::optional type_id, const std::optional& name) { const xmlNodePtr symbol = info.node; const bool is_defined = ReadAttributeOrDie(symbol, "is-defined"); const auto crc = ReadAttribute(symbol, "crc"); const auto ns = ReadAttribute(symbol, "namespace"); const auto type = ReadAttributeOrDie(symbol, "type"); const auto binding = ReadAttributeOrDie(symbol, "binding"); const auto visibility = ReadAttributeOrDie(symbol, "visibility"); return maker_.Add( info.name, info.version_info, is_defined, type, binding, visibility, crc, ns, type_id, name); } Id Abigail::BuildSymbols() { // Libabigail's model is (approximately): // // (alias)* -> main symbol <- some decl -> type // // which we turn into: // // symbol / alias -> type // for (const auto& [alias, main] : alias_to_main_) { Check(!alias_to_main_.contains(main)) << "found main symbol and alias with id " << main; } // Build final symbol table, tying symbols to their types. std::map symbols; for (const auto& [id, symbol_info] : symbol_info_map_) { const auto main = alias_to_main_.find(id); const auto lookup = main != alias_to_main_.end() ? main->second : id; const auto type_id_and_name_it = symbol_id_and_full_name_.find(lookup); std::optional type_id; std::optional name; if (type_id_and_name_it != symbol_id_and_full_name_.end()) { const auto& type_id_and_name = type_id_and_name_it->second; type_id = {type_id_and_name.first}; name = {type_id_and_name.second}; } symbols.insert({id, BuildSymbol(symbol_info, type_id, name)}); } return maker_.Add(symbols); } using Parser = xmlDocPtr(xmlParserCtxtPtr context, const char* url, const char* encoding, int options); Document Parse(Runtime& runtime, const std::function& parser) { const std::unique_ptr< std::remove_pointer_t, void(*)(xmlParserCtxtPtr)> context(xmlNewParserCtxt(), xmlFreeParserCtxt); Document document(nullptr, xmlFreeDoc); { const Time t(runtime, "abigail.libxml_parse"); document.reset(parser(context.get(), nullptr, nullptr, XML_PARSE_NONET)); } Check(document != nullptr) << "failed to parse input as XML"; return document; } } // namespace Id ProcessDocument(Graph& graph, xmlDocPtr document) { xmlNodePtr root = xmlDocGetRootElement(document); Check(root != nullptr) << "XML document has no root element"; const Id id = Abigail(graph).ProcessRoot(root); return RemoveUselessQualifiers(graph, id); } Document Read(Runtime& runtime, const std::string& path) { const FileDescriptor fd(path.c_str(), O_RDONLY); return Parse(runtime, [&](xmlParserCtxtPtr context, const char* url, const char* encoding, int options) { return xmlCtxtReadFd(context, fd.Value(), url, encoding, options); }); } Id Read(Runtime& runtime, Graph& graph, const std::string& path) { // Read the XML. const Document document = Read(runtime, path); // Process the XML. return ProcessDocument(graph, document.get()); } Id ReadFromString(Runtime& runtime, Graph& graph, const std::string_view xml) { // Read the XML. const Document document = Parse(runtime, [&](xmlParserCtxtPtr context, const char* url, const char* encoding, int options) { return xmlCtxtReadMemory(context, xml.data(), static_cast(xml.size()), url, encoding, options); }); // Process the XML. return ProcessDocument(graph, document.get()); } } // namespace abixml } // namespace stg