1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 2 // -*- mode: C++ -*- 3 // 4 // Copyright 2021-2023 Google LLC 5 // 6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the 7 // "License"); you may not use this file except in compliance with the 8 // License. You may obtain a copy of the License at 9 // 10 // https://llvm.org/LICENSE.txt 11 // 12 // Unless required by applicable law or agreed to in writing, software 13 // distributed under the License is distributed on an "AS IS" BASIS, 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 // See the License for the specific language governing permissions and 16 // limitations under the License. 17 // 18 // Author: Giuliano Procida 19 // Author: Ignes Simeonova 20 21 #ifndef STG_ABIGAIL_READER_H_ 22 #define STG_ABIGAIL_READER_H_ 23 24 #include <memory> 25 #include <optional> 26 #include <string> 27 #include <string_view> 28 #include <type_traits> 29 #include <unordered_map> 30 #include <utility> 31 #include <vector> 32 33 #include <libxml/tree.h> 34 #include "graph.h" 35 #include "runtime.h" 36 #include "scope.h" 37 38 namespace stg { 39 namespace abixml { 40 41 // Parser for libabigail's ABI XML format, creating a Symbol-Type Graph. 42 // 43 // On construction Abigail consumes a libxml node tree and builds a graph. 44 // 45 // The parser supports C types only, with C++ types to be added later. 46 // 47 // The main producer of ABI XML is abidw. The format has no formal specification 48 // and has very limited semantic versioning. This parser makes no attempt to 49 // support or correct for deficiencies in older versions of the format. 50 // 51 // The parser detects unexpected elements and will abort on the presence of at 52 // least: namespace, base class and member function information. 53 // 54 // The parser ignores attributes it doesn't care about, including member access 55 // specifiers and (meaningless) type ids on array dimensions. 56 // 57 // The STG IR and libabigail ABI XML models diverge in some ways. The parser has 58 // to do extra work for each of these, as follows. 59 // 60 // 0. XML uses type and symbol ids to link together elements. These become edges 61 // in the graph between symbols and types and between types and types. Dangling 62 // type references will cause an abort. libabigail is much more relaxed about 63 // symbols without type information and these are modelled as such. 64 // 65 // 1. XML function declarations have in-line types. The parser creates 66 // free-standing types on-the-fly. A useful space optimisation might be to 67 // prevent duplicate creation of such types. 68 // 69 // 2. Variadic parameters are currently flagged with an XML attribute. A 70 // variadic type node is created on demand and will be shared by all such 71 // paramerters. 72 // 73 // 3. XML symbols and aliases have a rather poor repesentation with aliases 74 // represented as comma-separated attribute values. Aliases are resolved in a 75 // post-processing phase. 76 // 77 // 4. XML anonymous types also have unhelpful names, these are ignored. 78 class Abigail { 79 public: 80 explicit Abigail(Graph& graph); 81 Id ProcessRoot(xmlNodePtr root); 82 83 private: 84 struct SymbolInfo { 85 std::string name; 86 std::optional<ElfSymbol::VersionInfo> version_info; 87 xmlNodePtr node; 88 }; 89 90 Graph& graph_; 91 92 // The STG IR uses a distinct node type for the variadic parameter type; if 93 // allocated, this is its STG node id. 94 std::optional<Id> variadic_; 95 // Map from libabigail type ids to STG node ids; except for the type of 96 // variadic parameters. 97 std::unordered_map<std::string, Id> type_ids_; 98 99 // symbol id to symbol information 100 std::unordered_map<std::string, SymbolInfo> symbol_info_map_; 101 // alias symbol id to main symbol id 102 std::unordered_map<std::string, std::string> alias_to_main_; 103 // libabigail decorates certain declarations with symbol ids; this is the 104 // mapping from symbol id to the corresponding type and full name. 105 std::unordered_map<std::string, std::pair<Id, std::string>> 106 symbol_id_and_full_name_; 107 108 // Full name of the current scope. 109 Scope scope_name_; 110 111 Id GetNode(const std::string& type_id); 112 Id GetEdge(xmlNodePtr element); 113 Id GetVariadic(); 114 Function MakeFunctionType(xmlNodePtr function); 115 116 void ProcessCorpusGroup(xmlNodePtr group); 117 void ProcessCorpus(xmlNodePtr corpus); 118 void ProcessSymbols(xmlNodePtr symbols); 119 void ProcessSymbol(xmlNodePtr symbol); 120 121 bool ProcessUserDefinedType(std::string_view name, Id id, xmlNodePtr decl); 122 void ProcessScope(xmlNodePtr scope); 123 124 void ProcessInstr(xmlNodePtr instr); 125 void ProcessNamespace(xmlNodePtr scope); 126 127 Id ProcessDecl(bool is_variable, xmlNodePtr decl); 128 129 void ProcessFunctionType(Id id, xmlNodePtr function); 130 void ProcessTypedef(Id id, xmlNodePtr type_definition); 131 void ProcessPointer(Id id, bool is_pointer, xmlNodePtr pointer); 132 void ProcessQualified(Id id, xmlNodePtr qualified); 133 void ProcessArray(Id id, xmlNodePtr array); 134 void ProcessTypeDecl(Id id, xmlNodePtr type_decl); 135 void ProcessStructUnion(Id id, bool is_struct, xmlNodePtr struct_union); 136 void ProcessEnum(Id id, xmlNodePtr enumeration); 137 138 Id ProcessBaseClass(xmlNodePtr base_class); 139 std::optional<Id> ProcessDataMember(bool is_struct, xmlNodePtr data_member); 140 void ProcessMemberFunction(std::vector<Id>& methods, xmlNodePtr method); 141 void ProcessMemberType(xmlNodePtr member_type); 142 143 Id BuildSymbol(const SymbolInfo& info, 144 std::optional<Id> type_id, 145 const std::optional<std::string>& name); 146 Id BuildSymbols(); 147 }; 148 149 Id Read(Runtime& runtime, Graph& graph, const std::string& path); 150 151 // Exposed for testing. 152 void Clean(xmlNodePtr root); 153 bool EqualTree(xmlNodePtr left, xmlNodePtr right); 154 bool SubTree(xmlNodePtr left, xmlNodePtr right); 155 using Document = 156 std::unique_ptr<std::remove_pointer_t<xmlDocPtr>, void(*)(xmlDocPtr)>; 157 Document Read(Runtime& runtime, const std::string& path); 158 159 } // namespace abixml 160 } // namespace stg 161 162 #endif // STG_ABIGAIL_READER_H_ 163