• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2021 Google, Inc.
5 //
6 // Author: Giuliano Procida
7 
8 /// @file
9 ///
10 /// This file contains ABI XML manipulation routines and a main driver.
11 ///
12 /// The libxml Tree API is used. The XPath API is not used as it proved
13 /// to be many times slower than direct traversal but only slightly more
14 /// convenient.
15 
16 #include <fcntl.h>
17 #include <unistd.h>
18 
19 #include <algorithm>
20 #include <array>
21 #include <cassert>
22 #include <cctype>
23 #include <cstring>
24 #include <fstream>
25 #include <functional>
26 #include <iomanip>
27 #include <ios>
28 #include <iostream>
29 #include <map>
30 #include <optional>
31 #include <set>
32 #include <sstream>
33 #include <string>
34 #include <unordered_map>
35 #include <unordered_set>
36 #include <vector>
37 
38 #include <libxml/globals.h>
39 #include <libxml/parser.h>
40 #include <libxml/tree.h>
41 
42 /// Convenience typedef referring to a namespace scope.
43 using namespace_scope = std::vector<std::string>;
44 
45 /// Convenience typedef referring to a set of symbols.
46 using symbol_set = std::unordered_set<std::string>;
47 
48 /// Level of location information to preserve.
49 enum struct LocationInfo { COLUMN, LINE, FILE, NONE };
50 
51 static const std::map<std::string, LocationInfo> LOCATION_INFO_NAME = {
52   {"column", LocationInfo::COLUMN},
53   {"line", LocationInfo::LINE},
54   {"file", LocationInfo::FILE},
55   {"none", LocationInfo::NONE},
56 };
57 
58 static const std::map<std::string, std::string, std::less<>> NAMED_TYPES = {
59   {"enum-decl", "__anonymous_enum__"},
60   {"class-decl", "__anonymous_struct__"},
61   {"union-decl", "__anonymous_union__"},
62 };
63 
64 /// Compare optional strings.
65 ///
66 /// TODO: Obsoleted by C++20 std::optional::operator<=>.
67 ///
68 /// @param a first operand of comparison
69 ///
70 /// @param b second operand of comparison
71 ///
72 /// @return an integral result
73 int
compare_optional(const std::optional<std::string> & a,const std::optional<std::string> & b)74 compare_optional(const std::optional<std::string>& a,
75                  const std::optional<std::string>& b)
76 {
77   int result = b.has_value() - a.has_value();
78   if (result)
79     return result;
80   return a ? a.value().compare(b.value()) : 0;
81 }
82 
83 /// Cast a C string to a libxml string.
84 ///
85 /// @param str the C string (pointer)
86 ///
87 /// @return the same thing, as a type compatible with the libxml API
88 static const xmlChar*
to_libxml(const char * str)89 to_libxml(const char* str)
90 {
91   return reinterpret_cast<const xmlChar*>(str);
92 }
93 
94 /// Cast a libxml string to C string.
95 ///
96 /// @param str the libxml string (pointer)
97 ///
98 /// @return the same thing, as a type compatible with the C library API
99 static const char*
from_libxml(const xmlChar * str)100 from_libxml(const xmlChar* str)
101 {
102   return reinterpret_cast<const char*>(str);
103 }
104 
105 /// Get comment node corresponding to a given node if it exists.
106 ///
107 /// Returns nullptr if previous node does not exist or is not a comment,
108 /// otherwise returns the previous node.
109 ///
110 /// @param node the node for which comment has to be returned
111 ///
112 /// @return pointer to the comment node
113 static xmlNodePtr
get_comment_node(xmlNodePtr node)114 get_comment_node(xmlNodePtr node)
115 {
116   xmlNodePtr previous_node = node->prev;
117   return previous_node && previous_node->type == XML_COMMENT_NODE
118       ? previous_node : nullptr;
119 }
120 
121 /// Remove a node from its document and free its storage.
122 ///
123 /// @param node the node to remove
124 static void
remove_node(xmlNodePtr node)125 remove_node(xmlNodePtr node)
126 {
127   xmlUnlinkNode(node);
128   xmlFreeNode(node);
129 }
130 
131 /// Remove an XML element and any immediately preceding comment.
132 ///
133 /// @param node the element to remove
134 static void
remove_element(xmlNodePtr node)135 remove_element(xmlNodePtr node)
136 {
137   if (auto comment_node = get_comment_node(node))
138     remove_node(comment_node);
139   remove_node(node);
140 }
141 
142 /// Move a node to an element.
143 ///
144 /// @param node the node to move
145 ///
146 /// @param destination the destination element
147 static void
move_node(xmlNodePtr node,xmlNodePtr destination)148 move_node(xmlNodePtr node, xmlNodePtr destination)
149 {
150   xmlUnlinkNode(node);
151   xmlAddChild(destination, node);
152 }
153 
154 /// Move an XML element and any immediately preceding comment to another
155 /// element.
156 ///
157 /// @param node the element to remove
158 ///
159 /// @param destination the destination element
160 static void
move_element(xmlNodePtr node,xmlNodePtr destination)161 move_element(xmlNodePtr node, xmlNodePtr destination)
162 {
163   if (auto comment_node = get_comment_node(node))
164     move_node(comment_node, destination);
165   move_node(node, destination);
166 }
167 
168 /// Get child nodes of given node.
169 ///
170 /// @param node the node whose children to fetch
171 ///
172 /// @return a vector of child nodes
173 static std::vector<xmlNodePtr>
get_children(xmlNodePtr node)174 get_children(xmlNodePtr node)
175 {
176   std::vector<xmlNodePtr> result;
177   for (xmlNodePtr child = node->children; child; child = child->next)
178     result.push_back(child);
179   return result;
180 }
181 
182 /// Fetch an attribute from a node.
183 ///
184 /// @param node the node
185 ///
186 /// @param name the attribute name
187 ///
188 /// @return the attribute value, if present
189 static std::optional<std::string>
get_attribute(xmlNodePtr node,const char * name)190 get_attribute(xmlNodePtr node, const char* name)
191 {
192   std::optional<std::string> result;
193   xmlChar* attribute = xmlGetProp(node, to_libxml(name));
194   if (attribute)
195     {
196       result = from_libxml(attribute);
197       xmlFree(attribute);
198     }
199   return result;
200 }
201 
202 /// Set an attribute value.
203 ///
204 /// @param node the node
205 ///
206 /// @param name the attribute name
207 ///
208 /// @param value the attribute value
209 static void
set_attribute(xmlNodePtr node,const char * name,const std::string & value)210 set_attribute(xmlNodePtr node, const char* name,
211               const std::string& value)
212 {
213   xmlSetProp(node, to_libxml(name), to_libxml(value.c_str()));
214 }
215 
216 /// Unset an attribute value.
217 ///
218 /// @param node the node
219 ///
220 /// @param name the attribute name
221 static void
unset_attribute(xmlNodePtr node,const char * name)222 unset_attribute(xmlNodePtr node, const char* name)
223 {
224   xmlUnsetProp(node, to_libxml(name));
225 }
226 
227 /// Remove text nodes, recursively.
228 ///
229 /// This simplifies subsequent analysis and manipulation. Removing and
230 /// moving elements will destroy formatting anyway. The only remaining
231 /// node types should be elements and comments.
232 ///
233 /// @param node the node to process
234 static void
strip_text(xmlNodePtr node)235 strip_text(xmlNodePtr node)
236 {
237   if (node->type == XML_TEXT_NODE)
238     remove_node(node);
239   else if (node->type == XML_ELEMENT_NODE)
240     for (xmlNodePtr child : get_children(node))
241       strip_text(child);
242 }
243 
244 /// Add text before / after a node.
245 ///
246 /// @param node the node
247 ///
248 /// @param after whether the next should go after
249 ///
250 /// @param text the text
251 static void
add_text(xmlNodePtr node,bool after,const std::string & text)252 add_text(xmlNodePtr node, bool after, const std::string& text)
253 {
254   xmlNodePtr text_node = xmlNewTextLen(to_libxml(text.data()), text.size());
255   if (after)
256     xmlAddNextSibling(node, text_node);
257   else
258     xmlAddPrevSibling(node, text_node);
259 }
260 
261 /// Format an XML element by adding internal indentation and newlines.
262 ///
263 /// This makes the XML readable.
264 ///
265 /// @param indentation what to add to the line indentation prefix
266 ///
267 /// @param prefix the current line indentation prefix
268 ///
269 /// @param node the node to format
270 static void
format_xml(const std::string & indentation,std::string prefix,xmlNodePtr node)271 format_xml(const std::string& indentation, std::string prefix, xmlNodePtr node)
272 {
273   std::vector<xmlNodePtr> children = get_children(node);
274   if (children.empty())
275     return;
276 
277   // The ordering of operations here is incidental. The outcomes we want
278   // are: 1. an extra newline after the opening tag and indentation of
279   // the closing tag to match, and 2. indentation and newline for each
280   // child.
281   add_text(children[0], false, "\n");
282   add_text(children[children.size() - 1], true, prefix);
283   prefix += indentation;
284   for (xmlNodePtr child : children)
285     {
286       add_text(child, false, prefix);
287       format_xml(indentation, prefix, child);
288       add_text(child, true, "\n");
289     }
290 }
291 
292 /// Rewrite attributes using single quotes.
293 ///
294 /// libxml uses double quotes but libabigail uses single quotes.
295 ///
296 /// Note that libabigail does not emit attributes *containing* single
297 /// quotes and if it did it would escape them as &quot; which libxml
298 /// would in turn preserve. However, the code here will handle all forms
299 /// of quotes, conservatively.
300 ///
301 /// Annotation comments can contain single quote characters so just
302 /// checking for any single quotes at all is insufficiently precise.
303 ///
304 /// @param start a pointer to the start of the XML text
305 ///
306 /// @param limit a pointer to just past the end of the XML text
307 static void
adjust_quotes(xmlChar * start,xmlChar * limit)308 adjust_quotes(xmlChar* start, xmlChar* limit)
309 {
310   const std::string open{"<!--"};
311   const std::string close{"-->"};
312   while (start < limit)
313     {
314       // Look for a '<'
315       start = std::find(start, limit, '<');
316       if (start == limit)
317         break;
318       if (start + open.size() < limit
319           && std::equal(open.begin(), open.end(), start))
320         {
321           // Have a comment, skip to the end.
322           start += open.size();
323           xmlChar* end = std::search(start, limit, close.begin(), close.end());
324           if (end == limit)
325             break;
326           start = end + close.size();
327         }
328       else
329         {
330           // Have some tag, search for the end.
331           start += 1;
332           xmlChar* end = std::find(start, limit, '>');
333           if (end == limit)
334             break;
335           // In general, inside a tag we could find either ' or " being
336           // used to quote attributes and the other quote character
337           // being used as part of the attribute data. However, libxml's
338           // xmlDocDump* functions use " to quote attributes and it's
339           // safe to substitute this quote character with ' so long as '
340           // does not appear within the attribute data.
341           if (std::find(start, end, '\'') == end)
342             for (xmlChar* c = start; c < end; ++c)
343               if (*c == '"')
344                 *c = '\'';
345           start = end + 1;
346         }
347     }
348 }
349 
350 /// Compare given attribute of 2 XML nodes.
351 ///
352 /// @param attribute the attribute to compare
353 ///
354 /// @param a first XML node to compare
355 ///
356 /// @param b second XML node to compare
357 ///
358 /// @return an integral result
359 static int
compare_attributes(const char * attribute,const xmlNodePtr & a,const xmlNodePtr & b)360 compare_attributes(
361     const char* attribute, const xmlNodePtr& a, const xmlNodePtr& b)
362 {
363   return compare_optional(get_attribute(a, attribute),
364                           get_attribute(b, attribute));
365 }
366 
367 static const std::set<std::string> DROP_IF_EMPTY = {
368   "elf-variable-symbols",
369   "elf-function-symbols",
370   "namespace-decl",
371   "abi-instr",
372   "abi-corpus",
373   "abi-corpus-group",
374 };
375 
376 /// Drop empty elements, if safe to do so, recursively.
377 ///
378 /// @param node the element to process
379 static void
drop_empty(xmlNodePtr node)380 drop_empty(xmlNodePtr node)
381 {
382   if (node->type != XML_ELEMENT_NODE)
383     return;
384   for (xmlNodePtr child : get_children(node))
385     drop_empty(child);
386   // Do not drop the root element, even if empty.
387   if (node->parent->type == XML_DOCUMENT_NODE)
388     return;
389   if (!node->children && DROP_IF_EMPTY.count(from_libxml(node->name)))
390     remove_element(node);
391 }
392 
393 /// Get ELF symbol id.
394 ///
395 /// This is not an explicit attribute. It takes one of these forms:
396 ///
397 /// * name (if symbol is not versioned)
398 /// * name@version (if symbol is versioned but not the default version)
399 /// * name@@version (if symbol is versioned and the default version)
400 ///
401 /// @param node the elf-symbol element
402 ///
403 /// @return the ELF symbol id
404 static std::string
get_elf_symbol_id(xmlNodePtr node)405 get_elf_symbol_id(xmlNodePtr node)
406 {
407   const auto name = get_attribute(node, "name");
408   assert(name);
409   std::string result = name.value();
410   const auto version = get_attribute(node, "version");
411   if (version)
412     {
413       result += '@';
414       const auto is_default = get_attribute(node, "is-default-version");
415       if (is_default && is_default.value() == "yes")
416         result += '@';
417       result += version.value();
418     }
419   return result;
420 }
421 
422 static const std::set<std::string> HAS_LOCATION = {
423   "class-decl",
424   "enum-decl",
425   "function-decl",
426   "parameter",
427   "typedef-decl",
428   "union-decl",
429   "var-decl"
430 };
431 
432 /// Limit location information.
433 ///
434 /// @param location_info the level of location information to retain
435 ///
436 /// @param node the element to process
437 static void
limit_locations(LocationInfo location_info,xmlNodePtr node)438 limit_locations(LocationInfo location_info, xmlNodePtr node)
439 {
440   if (node->type != XML_ELEMENT_NODE)
441     return;
442   if (HAS_LOCATION.count(from_libxml(node->name)))
443     {
444       if (location_info > LocationInfo::COLUMN)
445         {
446           unset_attribute(node, "column");
447           if (location_info > LocationInfo::LINE)
448             {
449               unset_attribute(node, "line");
450               if (location_info > LocationInfo::FILE)
451                 unset_attribute(node, "filepath");
452             }
453         }
454     }
455   for (xmlNodePtr child : get_children(node))
456     limit_locations(location_info, child);
457 }
458 
459 /// Handle unreachable elements.
460 ///
461 /// Reachability is defined to be union of contains, containing and
462 /// refers-to relationships for types, declarations and symbols. The
463 /// roots for reachability are the ELF elements in the ABI.
464 ///
465 /// The subrange element requires special treatment. It has a useless
466 /// type id, but it is not a type and its type id aliases with that of
467 /// all other subranges of the same length. So don't treat it as a type.
468 ///
469 /// @param prune whether to prune unreachable elements
470 ///
471 /// @param report whether to report untyped symbols
472 ///
473 /// @param alias_map mapping from alias to main elf-symbol-id
474 ///
475 /// @param root the XML root element
476 ///
477 /// @return the number of untyped symbols
478 static size_t
handle_unreachable(bool prune,bool report,const std::unordered_map<std::string,std::string> & alias_map,xmlNodePtr root)479 handle_unreachable(
480     bool prune, bool report,
481     const std::unordered_map<std::string, std::string>& alias_map,
482     xmlNodePtr root)
483 {
484   // ELF symbol ids.
485   std::set<std::string> elf_symbol_ids;
486 
487   // Simple way of allowing two kinds of nodes: false=>type,
488   // true=>symbol.
489   using vertex_t = std::pair<bool, std::string>;
490 
491   // Graph vertices.
492   std::set<vertex_t> vertices;
493   // Graph edges.
494   std::map<vertex_t, std::set<vertex_t>> edges;
495 
496   // Keep track of type / symbol nesting so we can identify contains,
497   // containing and refers-to relationships.
498   std::vector<vertex_t> stack;
499 
500   // Process an XML node, adding a vertex and possibly some edges.
501   std::function<void(xmlNodePtr)> process_node = [&](xmlNodePtr node) {
502     // We only care about elements and not comments, at this stage.
503     if (node->type != XML_ELEMENT_NODE)
504       return;
505 
506     const char* node_name = from_libxml(node->name);
507 
508     // Is this an ELF symbol?
509     if (strcmp(node_name, "elf-symbol") == 0)
510       {
511         elf_symbol_ids.insert(get_elf_symbol_id(node));
512         // Early return is safe, but not necessary.
513         return;
514       }
515 
516     // Is this a type? Note that the same id may appear multiple times.
517     const auto id = strcmp(node_name, "subrange") != 0
518                     ? get_attribute(node, "id")
519                     : std::optional<std::string>();
520     if (id)
521       {
522         vertex_t type_vertex{false, id.value()};
523         vertices.insert(type_vertex);
524         const auto naming_typedef_id = get_attribute(node, "naming-typedef-id");
525         if (naming_typedef_id)
526           {
527             // This is an odd one, there can be a backwards link from an
528             // anonymous type to a typedef that refers to it. The -t
529             // option will drop these, but if they are still present, we
530             // should model the link to avoid the risk of dangling
531             // references.
532             vertex_t naming_typedef_vertex{false, naming_typedef_id.value()};
533             edges[type_vertex].insert(naming_typedef_vertex);
534           }
535         if (!stack.empty())
536           {
537             // Parent<->child dependencies; record dependencies both
538             // ways to avoid holes in XML types and declarations.
539             const auto& parent = stack.back();
540             edges[parent].insert(type_vertex);
541             edges[type_vertex].insert(parent);
542           }
543         // Record the type.
544         stack.push_back(type_vertex);
545       }
546 
547     // Is this a (declaration expected to be linked to a) symbol?
548     const auto symbol = get_attribute(node, "elf-symbol-id");
549     if (symbol)
550       {
551         vertex_t symbol_vertex{true, symbol.value()};
552         vertices.insert(symbol_vertex);
553         if (!stack.empty())
554           {
555             // Parent<->child dependencies; record dependencies both
556             // ways to avoid making holes in XML types and declarations.
557             //
558             // Symbols exist outside of the type hierarchy, so choosing
559             // to make them depend on a containing type scope and vice
560             // versa is conservative and probably not necessary.
561             const auto& parent = stack.back();
562             edges[parent].insert(symbol_vertex);
563             edges[symbol_vertex].insert(parent);
564           }
565         // Record the symbol.
566         stack.push_back(symbol_vertex);
567         // In practice there will be at most one symbol on the stack; we could
568         // verify this here, but it wouldn't achieve anything.
569       }
570 
571     // Being both would make the stack ordering ambiguous.
572     if (id && symbol)
573       {
574         std::cerr << "cannot handle element which is both type and symbol\n";
575         exit(1);
576       }
577 
578     // Is there a reference to another type?
579     const auto type_id = get_attribute(node, "type-id");
580     if (type_id && !stack.empty())
581       {
582         // The enclosing type or symbol refers to another type.
583         const auto& parent = stack.back();
584         vertex_t type_id_vertex{false, type_id.value()};
585         edges[parent].insert(type_id_vertex);
586       }
587 
588     // Process recursively.
589     for (auto child : get_children(node))
590       process_node(child);
591 
592     // Restore the stack.
593     if (symbol)
594       stack.pop_back();
595     if (id)
596       stack.pop_back();
597   };
598 
599   // Traverse the whole root element and build a graph.
600   process_node(root);
601 
602   // Simple DFS.
603   std::set<vertex_t> seen;
604   std::function<void(vertex_t)> dfs = [&](vertex_t vertex) {
605     if (!seen.insert(vertex).second)
606       return;
607     auto it = edges.find(vertex);
608     if (it != edges.end())
609       for (auto to : it->second)
610         dfs(to);
611   };
612 
613   // Count of how many symbols are untyped.
614   size_t untyped = 0;
615 
616   // Traverse the graph, starting from the ELF symbols.
617   for (const auto& symbol_id : elf_symbol_ids)
618     {
619       const auto it = alias_map.find(symbol_id);
620       const auto& mapped_symbol_id =
621           it != alias_map.end() ? it->second : symbol_id;
622       vertex_t symbol_vertex{true, mapped_symbol_id};
623       if (vertices.count(symbol_vertex))
624         {
625           dfs(symbol_vertex);
626         }
627       else
628         {
629           if (report)
630             std::cerr << "no declaration found for ELF symbol with id "
631                       << symbol_id << '\n';
632           ++untyped;
633         }
634     }
635 
636   // This is a DFS with early stopping.
637   std::function<void(xmlNodePtr)> remove_unseen = [&](xmlNodePtr node) {
638     if (node->type != XML_ELEMENT_NODE)
639       return;
640 
641     const char* node_name = from_libxml(node->name);
642 
643     // Return if we know that this is a type to keep or drop in its
644     // entirety.
645     const auto id = strcmp(node_name, "subrange") != 0
646                     ? get_attribute(node, "id")
647                     : std::optional<std::string>();
648     if (id)
649       {
650         if (!seen.count(vertex_t{false, id.value()}))
651           remove_element(node);
652         return;
653       }
654 
655     // Return if we know that this is a declaration to keep or drop in
656     // its entirety. Note that var-decl and function-decl are the only
657     // elements that can have an elf-symbol-id attribute.
658     if (strcmp(node_name, "var-decl") == 0
659         || strcmp(node_name, "function-decl") == 0)
660       {
661         const auto symbol = get_attribute(node, "elf-symbol-id");
662         if (!(symbol && seen.count(vertex_t{true, symbol.value()})))
663           remove_element(node);
664         return;
665       }
666 
667     // Otherwise, this is not a type, declaration or part thereof, so
668     // process child elements.
669     for (auto child : get_children(node))
670       remove_unseen(child);
671   };
672 
673   if (prune)
674     // Traverse the XML, removing unseen elements.
675     remove_unseen(root);
676 
677   return untyped;
678 }
679 
680 /// Tidy anonymous types in various ways.
681 ///
682 /// 1. Normalise anonymous type names by removing the numerical suffix.
683 ///
684 /// Anonymous type names take the form __anonymous_foo__N where foo is
685 /// one of enum, struct or union and N is an optional numerical suffix.
686 /// The suffices are senstive to processing order and do not convey
687 /// useful ABI information. They can cause spurious harmless diffs and
688 /// make XML diffing and rebasing harder.
689 ///
690 /// It's best to remove the suffix.
691 ///
692 /// 2. Reanonymise anonymous types that have been given names.
693 ///
694 /// A recent change to abidw changed its behaviour for any anonymous
695 /// type that has a naming typedef. In addition to linking the typedef
696 /// and type in both directions, the code now gives (some) anonymous
697 /// types the same name as the typedef. This misrepresents the original
698 /// types.
699 ///
700 /// Such types should be anonymous.
701 ///
702 /// 3. Discard naming typedef backlinks.
703 ///
704 /// The attribute naming-typedef-id is a backwards link from an
705 /// anonymous type to the typedef that refers to it. It is ignored by
706 /// abidiff.
707 ///
708 /// Unfortunately, libabigail sometimes conflates multiple anonymous
709 /// types that have naming typedefs and only one of the typedefs can
710 /// "win". ABI XML is thus sensitive to processing order and can also
711 /// end up containing definitions of an anonymous type with differing
712 /// naming-typedef-id attributes.
713 ///
714 /// It's best to just drop the attribute.
715 ///
716 /// @param node the XML node to process
717 static void
handle_anonymous_types(bool normalise,bool reanonymise,bool discard_naming,xmlNodePtr node)718 handle_anonymous_types(bool normalise, bool reanonymise, bool discard_naming,
719                        xmlNodePtr node)
720 {
721   if (node->type != XML_ELEMENT_NODE)
722     return;
723 
724   const auto it = NAMED_TYPES.find(from_libxml(node->name));
725   if (it != NAMED_TYPES.end())
726     {
727       const auto& anon = it->second;
728       const auto name_attribute = get_attribute(node, "name");
729       const auto& name =
730           name_attribute ? name_attribute.value() : std::string();
731       const auto anon_attr = get_attribute(node, "is-anonymous");
732       const bool is_anon = anon_attr && anon_attr.value() == "yes";
733       const auto naming_attribute = get_attribute(node, "naming-typedef-id");
734       if (normalise && is_anon && name != anon) {
735         // __anonymous_foo__123 -> __anonymous_foo__
736         set_attribute(node, "name", anon);
737       }
738       if (reanonymise && !is_anon && naming_attribute) {
739         // bar with naming typedef -> __anonymous_foo__
740         set_attribute(node, "is-anonymous", "yes");
741         set_attribute(node, "name", anon);
742       }
743       if (discard_naming && naming_attribute)
744         unset_attribute(node, "naming-typedef-id");
745     }
746 
747   for (auto child : get_children(node))
748     handle_anonymous_types(normalise, reanonymise, discard_naming, child);
749 }
750 
751 /// Builds a mapping from qualified types to the underlying type ids.
752 ///
753 /// Recursively constructs a mapping from qualified types to the underlying
754 /// type ids found in the XML tree rooted at the given node.
755 ///
756 /// @param node node of the XML tree to process
757 ///
758 /// @param qualifier_id_to_type_id map from qualified types to underlying type
759 /// ids being constructed
760 static void
build_qualifier_id_to_type_id_map(const xmlNodePtr node,std::unordered_map<std::string,std::string> & qualifier_id_to_type_id)761 build_qualifier_id_to_type_id_map(
762     const xmlNodePtr node,
763     std::unordered_map<std::string, std::string>& qualifier_id_to_type_id)
764 {
765   if (node->type != XML_ELEMENT_NODE)
766     return;
767 
768   if (strcmp(from_libxml(node->name), "qualified-type-def") == 0)
769     {
770       const auto id = get_attribute(node, "id");
771       const auto type_id = get_attribute(node, "type-id");
772       if (!id || !type_id)
773         {
774           std::cerr << "found qualified type definition with missing id and/or "
775                     << "type id\nid: " << id.value_or("(missing)")
776                     << "\ntype id: " << type_id.value_or("(missing)") << '\n';
777           exit(1);
778         }
779       const auto& id_value = id.value();
780       const auto& type_id_value = type_id.value();
781       auto [it, inserted] =
782           qualifier_id_to_type_id.insert({id_value, type_id_value});
783       if (!inserted && it->second != type_id_value)
784         {
785           std::cerr << "conflicting type ids ('" << it->second << "' & '"
786                     << type_id_value << "') found for qualified type with "
787                     << "id: " << id_value << '\n';
788           exit(1);
789         }
790     }
791   else
792     {
793       for (auto child : get_children(node))
794         build_qualifier_id_to_type_id_map(child, qualifier_id_to_type_id);
795     }
796 }
797 
798 /// Determine mapping from qualified type to underlying unqualified type.
799 ///
800 /// This resolves chains of qualifiers on qualified types. Note that this does
801 /// not attempt to look through typedefs.
802 ///
803 /// @param qualifier_id_to_type_id map from qualified types to underlying type
804 /// ids
805 static void
resolve_qualifier_chains(std::unordered_map<std::string,std::string> & qualifier_id_to_type_id)806 resolve_qualifier_chains(
807     std::unordered_map<std::string, std::string>& qualifier_id_to_type_id)
808 {
809   for (auto& [id, type_id] : qualifier_id_to_type_id)
810     {
811       std::unordered_set<std::string> seen;
812       while (true)
813         {
814           if (!seen.insert(type_id).second)
815             {
816               std::cerr << "dequalification of type with id '" << id
817                         << "' ran into a self referencing loop\n";
818               exit(1);
819             }
820           auto it = qualifier_id_to_type_id.find(type_id);
821           if (it == qualifier_id_to_type_id.end())
822             break;
823           type_id = it->second;
824         }
825     }
826 }
827 
828 /// Removes top-level qualifiers from function parameter and return types.
829 ///
830 /// Recursively removes top-level qualifiers from parameter and return types of
831 /// all function declarations and function types found in the XML tree rooted
832 /// at the given node.
833 ///
834 /// This requires also requires a map of qualified types to the underlying type
835 /// ids, which enables the unqualification of qualified types.
836 ///
837 /// @param node node of the XML tree to process
838 ///
839 /// @param qualifier_id_to_type_id map from qualified types to underlying type
840 /// ids
841 static void
remove_function_parameter_type_qualifiers(const xmlNodePtr node,const std::unordered_map<std::string,std::string> & qualifier_id_to_type_id)842 remove_function_parameter_type_qualifiers(
843     const xmlNodePtr node,
844     const std::unordered_map<std::string, std::string>& qualifier_id_to_type_id)
845 {
846   if (node->type != XML_ELEMENT_NODE)
847     return;
848 
849   if (strcmp(from_libxml(node->name), "function-decl") == 0 ||
850       strcmp(from_libxml(node->name), "function-type") == 0)
851     {
852       bool type_changed = false;
853       for (auto child : get_children(node))
854         if (const auto type_id = get_attribute(child, "type-id"))
855           {
856             const auto& type_id_value = type_id.value();
857             auto it = qualifier_id_to_type_id.find(type_id_value);
858             if (it != qualifier_id_to_type_id.end())
859               {
860                 type_changed = true;
861                 set_attribute(child, "type-id", it->second);
862 
863                 // Parameter or return type has been modified, making a comment
864                 // describing the type for this node inconsistent. Thus the
865                 // comment must be removed if it exists.
866                 if (auto comment_node = get_comment_node(child))
867                   remove_node(comment_node);
868               }
869           }
870 
871       if (type_changed)
872         {
873           // Parameter or return type has been modified, making a comment
874           // describing the type for this node inconsistent. Thus the comment
875           // must be removed if it exists.
876           if (auto comment_node = get_comment_node(node))
877             remove_node(comment_node);
878         }
879     }
880   else
881     {
882       for (auto child : get_children(node))
883         remove_function_parameter_type_qualifiers(child, qualifier_id_to_type_id);
884     }
885 }
886 
887 /// Remove attributes emitted by abidw --load-all-types.
888 ///
889 /// With this invocation and if any user-defined types are deemed
890 /// unreachable, libabigail will output a tracking-non-reachable-types
891 /// attribute on top-level elements and a is-non-reachable attribute on
892 /// each such type element.
893 ///
894 /// abitidy has its own graph-theoretic notion of reachability and these
895 /// attributes have no ABI relevance.
896 ///
897 /// It's best to just drop them.
898 ///
899 /// @param node the XML node to process
900 void
clear_non_reachable(xmlNodePtr node)901 clear_non_reachable(xmlNodePtr node)
902 {
903   if (node->type != XML_ELEMENT_NODE)
904     return;
905 
906   const char* node_name = from_libxml(node->name);
907 
908   if (strcmp(node_name, "abi-corpus-group") == 0
909       || strcmp(node_name, "abi-corpus") == 0)
910     unset_attribute(node, "tracking-non-reachable-types");
911   else if (NAMED_TYPES.find(node_name) != NAMED_TYPES.end())
912     unset_attribute(node, "is-non-reachable");
913 
914   for (auto child : get_children(node))
915     clear_non_reachable(child);
916 }
917 
918 /// Determine the effective name of a given node.
919 ///
920 /// The effective name is same as the value of the 'name' attribute for all
921 /// nodes except nodes which represent anonymous types. For anonymous types, the
922 /// function returns std::nullopt.
923 ///
924 /// @param node the node for which effective name has to be determined
925 ///
926 /// @return an optional name string
927 std::optional<std::string>
get_effective_name(xmlNodePtr node)928 get_effective_name(xmlNodePtr node)
929 {
930   return get_attribute(node, "is-anonymous")
931       ? std::nullopt : get_attribute(node, "name");
932 }
933 
934 /// Record type ids for anonymous types that have to be renumbered.
935 ///
936 /// This constructs a map from the ids that need to be renumbered to the XML
937 /// node where the id is defined/declared. Also records hexadecimal hashes used
938 /// by non-anonymous types.
939 ///
940 /// @param node the node being processed
941 ///
942 /// @param to_renumber map from ids to be renumbered to corresponding XML node
943 ///
944 /// @param used_hashes set of hashes used by non-anonymous type ids
945 static void
record_ids_to_renumber(xmlNodePtr node,std::unordered_map<std::string,xmlNodePtr> & to_renumber,std::unordered_set<size_t> & used_hashes)946 record_ids_to_renumber(
947     xmlNodePtr node,
948     std::unordered_map<std::string, xmlNodePtr>& to_renumber,
949     std::unordered_set<size_t>& used_hashes)
950 {
951   if (node->type != XML_ELEMENT_NODE)
952     return;
953 
954   for (auto child : get_children(node))
955     record_ids_to_renumber(child, to_renumber, used_hashes);
956 
957   const auto& id_attr = get_attribute(node, "id");
958   if (!id_attr)
959     return;
960 
961   const auto& id = id_attr.value();
962   const std::string_view node_name(from_libxml(node->name));
963   const bool is_anonymous_type_candidate = NAMED_TYPES.count(node_name);
964   if (!is_anonymous_type_candidate || get_effective_name(node))
965     {
966       const bool is_hexadecimal = std::all_of(
967           id.begin(), id.end(), [](unsigned char c){ return std::isxdigit(c); });
968       if (id.size() == 8 && is_hexadecimal)
969         {
970           // Do not check for successful insertion since there can be multiple
971           // declarations/definitions for a type.
972           size_t hash = std::stoul(id, nullptr, 16);
973           used_hashes.insert(hash);
974         }
975     }
976   else
977     {
978       // Check for successful insertion since anonymous types are not prone to
979       // having multiple definitions/declarations.
980       if (!to_renumber.insert({id, node}).second)
981         {
982           std::cerr << "Found multiple definitions/declarations of anonmyous "
983                     << "type with id: " << id << '\n';
984           exit(1);
985         }
986     }
987 }
988 
989 /// Compute a stable string hash.
990 ///
991 /// This is the 32-bit FNV-1a algorithm. The algorithm, reference code
992 /// and constants are all unencumbered. It is fast and has reasonable
993 /// distribution properties.
994 ///
995 /// std::hash has no portability or stability guarantees so is
996 /// unsuitable where reproducibility is a requirement such as in XML
997 /// output.
998 ///
999 /// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function
1000 ///
1001 /// @param str the string to hash
1002 ///
1003 /// @return an unsigned 32 bit hash value
1004 static uint32_t
fnv_hash(const std::string & str)1005 fnv_hash(const std::string& str)
1006 {
1007   const uint32_t prime = 0x01000193;
1008   const uint32_t offset_basis = 0x811c9dc5;
1009   uint32_t hash = offset_basis;
1010   for (const char& c : str)
1011     {
1012       uint8_t byte = c;
1013       hash = hash ^ byte;
1014       hash = hash * prime;
1015     }
1016   return hash;
1017 }
1018 
1019 /// Generate a new 32 bit type id and return its hexadecimal representation.
1020 ///
1021 /// Generates hash of the given hash content. Uses linear probing to resolve
1022 /// hash collisions. Also, records the newly generated hash in a set of used
1023 /// hashes.
1024 ///
1025 /// @param hash_content the string which is used to generate a hash
1026 ///
1027 /// @param used_hashes the set of hashes which have already been used
1028 ///
1029 /// @return the hexadecimal representation of the newly generated hash
1030 static std::string
generate_new_id(const std::string & hash_content,std::unordered_set<size_t> & used_hashes)1031 generate_new_id(const std::string& hash_content,
1032                 std::unordered_set<size_t>& used_hashes)
1033 {
1034   auto hash = fnv_hash(hash_content);
1035   while (!used_hashes.insert(hash).second)
1036     ++hash;
1037   std::ostringstream os;
1038   os << std::hex << std::setfill('0') << std::setw(8) << hash;
1039   return os.str();
1040 }
1041 
1042 /// Find the first member for a user defined type.
1043 ///
1044 /// The first member for enums is the first enumerator while for structs and
1045 /// unions it is the variable declaration of the first data member.
1046 ///
1047 /// @param node the node being processed
1048 ///
1049 /// @return the node which represents the first member
1050 static xmlNodePtr
find_first_member(xmlNodePtr node)1051 find_first_member(xmlNodePtr node)
1052 {
1053   auto first_child_by_xml_node_name =
1054       [](const xmlNodePtr node, const std::string_view name) -> xmlNodePtr {
1055     for (auto child : get_children(node))
1056       if (child->type == XML_ELEMENT_NODE && from_libxml(child->name) == name)
1057         return child;
1058     return nullptr;
1059   };
1060 
1061   if (strcmp(from_libxml(node->name), "enum-decl") == 0)
1062       return first_child_by_xml_node_name(node, "enumerator");
1063   if (auto data_member = first_child_by_xml_node_name(node, "data-member"))
1064       return first_child_by_xml_node_name(data_member, "var-decl");
1065   return nullptr;
1066 }
1067 
1068 /// Calculate new type id for a given old type id.
1069 ///
1070 /// This resolves the old type ids for anonymous types to new ones, while ids
1071 /// which do not belong to anonymous types are returned as they are.
1072 ///
1073 /// @param type_id old type id
1074 ///
1075 /// @param to_renumber map from ids to be renumbered to corresponding XML node
1076 ///
1077 /// @param used_hashes set of hashes used by other type ids
1078 ///
1079 /// @param type_id_map mapping from old type ids to new ones
1080 ///
1081 /// @return resolved type id
1082 static std::string
resolve_ids_to_renumber(const std::string & type_id,const std::unordered_map<std::string,xmlNodePtr> & to_renumber,std::unordered_set<size_t> & used_hashes,std::unordered_map<std::string,std::string> & type_id_map)1083 resolve_ids_to_renumber(
1084     const std::string& type_id,
1085     const std::unordered_map<std::string, xmlNodePtr>& to_renumber,
1086     std::unordered_set<size_t>& used_hashes,
1087     std::unordered_map<std::string, std::string>& type_id_map)
1088 {
1089   // Check whether the given type_id needs to be renumbered. If not, the type_id
1090   // can be returned since it does not represent an anonymous type.
1091   const auto to_renumber_it = to_renumber.find(type_id);
1092   if (to_renumber_it == to_renumber.end())
1093     return type_id;
1094 
1095   // Insert an empty string placeholder to prevent infinite loops.
1096   const auto& [type_mapping, inserted] = type_id_map.insert({type_id, {}});
1097   if (!inserted)
1098     {
1099       if (!type_mapping->second.empty())
1100         return type_mapping->second;
1101       std::cerr << "new type id depends on itself for type with id: "
1102                 << type_id << '\n';
1103       exit(1);
1104     }
1105 
1106   const auto& node = to_renumber_it->second;
1107   std::ostringstream hash_content;
1108   hash_content << from_libxml(node->name);
1109   if (auto first_member = find_first_member(node))
1110     {
1111       // Create hash content by combining the name & resolved type id of the
1112       // first member and the kind of anonymous type.
1113       if (auto name = get_effective_name(first_member))
1114         hash_content << '-' << name.value();
1115       if (auto type_id = get_attribute(first_member, "type-id"))
1116         hash_content << '-' << resolve_ids_to_renumber(
1117             type_id.value(), to_renumber, used_hashes, type_id_map);
1118     }
1119   else
1120     {
1121       // No member information available. Possibly type is empty.
1122       hash_content << "__empty";
1123     }
1124 
1125   return type_mapping->second =
1126       generate_new_id(hash_content.str(), used_hashes);
1127 }
1128 
1129 /// Replace old type ids by new ones.
1130 ///
1131 /// @param node the node which is being processed
1132 ///
1133 /// @param type_id_map map from old type ids to replace to new ones
1134 static void
renumber_type_ids(xmlNodePtr node,const std::unordered_map<std::string,std::string> & type_id_map)1135 renumber_type_ids(
1136     xmlNodePtr node,
1137     const std::unordered_map<std::string, std::string>& type_id_map)
1138 {
1139   if (node->type != XML_ELEMENT_NODE)
1140     return;
1141 
1142   auto maybe_replace = [&](const char* attribute_name) {
1143     const auto& attribute = get_attribute(node, attribute_name);
1144     if (attribute)
1145       {
1146         const auto it = type_id_map.find(attribute.value());
1147         if (it != type_id_map.end())
1148           set_attribute(node, attribute_name, it->second);
1149       }
1150   };
1151 
1152   maybe_replace("id");
1153   maybe_replace("type-id");
1154   maybe_replace("naming-typedef-id");
1155 
1156   for (auto child : get_children(node))
1157     renumber_type_ids(child, type_id_map);
1158 }
1159 
1160 /// Determine whether one XML element is the same as another.
1161 ///
1162 /// XML elements representing members are sometimes emitted multiple
1163 /// times, identically.
1164 ///
1165 /// @param left the first element to compare
1166 ///
1167 /// @param right the second element to compare
1168 ///
1169 /// @return whether the first element is the same as the second
1170 bool
equal_tree(xmlNodePtr left,xmlNodePtr right)1171 equal_tree(xmlNodePtr left, xmlNodePtr right)
1172 {
1173   // Node names must match.
1174   const char* left_name = from_libxml(left->name);
1175   const char* right_name = from_libxml(right->name);
1176   if (strcmp(left_name, right_name) != 0)
1177     return false;
1178 
1179   // Attributes must match.
1180   std::unordered_set<std::string> attributes;
1181   for (auto properties : {left->properties, right->properties})
1182     for (auto property = properties; property; property = property->next)
1183       attributes.insert(from_libxml(property->name));
1184   for (const auto& attribute : attributes)
1185     {
1186       const char* name = attribute.c_str();
1187       const auto left_value = get_attribute(left, name);
1188       const auto right_value = get_attribute(right, name);
1189       if (left_value != right_value)
1190         return false;
1191     }
1192 
1193   // The left subelements must be the same as the right ones.
1194   xmlNodePtr left_child = xmlFirstElementChild(left);
1195   xmlNodePtr right_child = xmlFirstElementChild(right);
1196   while (left_child && right_child)
1197     {
1198       if (!equal_tree(left_child, right_child))
1199         return false;
1200       left_child = xmlNextElementSibling(left_child);
1201       right_child = xmlNextElementSibling(right_child);
1202     }
1203   if (left_child || right_child)
1204     return false;
1205   return true;
1206 }
1207 
1208 /// Determine whether one XML element is a subtree of another.
1209 ///
1210 /// XML elements representing types are sometimes emitted multiple
1211 /// times, identically. Also, member typedefs are sometimes emitted
1212 /// separately from their types, resulting in duplicate XML fragments.
1213 ///
1214 /// Both these issues can be resolved by first detecting duplicate
1215 /// occurrences of a given type id and then checking to see if there's
1216 /// an instance that subsumes the others, which can then be eliminated.
1217 ///
1218 /// @param left the first element to compare
1219 ///
1220 /// @param right the second element to compare
1221 ///
1222 /// @return whether the first element is a subtree of the second
1223 bool
sub_tree(xmlNodePtr left,xmlNodePtr right)1224 sub_tree(xmlNodePtr left, xmlNodePtr right)
1225 {
1226   // The set of attributes that should be excluded from consideration when
1227   // comparing XML elements. These attributes are either irrelevant for ABI
1228   // monitoring or already handled by another check.
1229   static const std::unordered_set<std::string> IRRELEVANT_ATTRIBUTES = {
1230     // Source location information. This can vary between duplicate type
1231     // definitions.
1232     "filepath",
1233     "line",
1234     "column",
1235     // Anonymous type to typedef backlinks.
1236     "naming-typedef-id",
1237     // Annotation that can appear with --load-all-types.
1238     "is-non-reachable",
1239     // Handled while checking for effective name equivalence.
1240     "name",
1241     "is-anonymous",
1242   };
1243 
1244   // Node names must match.
1245   const char* left_name = from_libxml(left->name);
1246   const char* right_name = from_libxml(right->name);
1247   if (strcmp(left_name, right_name) != 0)
1248     return false;
1249 
1250   // Effective names must match.
1251   if (get_effective_name(left) != get_effective_name(right))
1252     return false;
1253 
1254   // Attributes may be missing on the left, but must match otherwise.
1255   for (auto p = left->properties; p; p = p->next)
1256   {
1257     const char* attribute_name = from_libxml(p->name);
1258     if (IRRELEVANT_ATTRIBUTES.count(attribute_name))
1259       continue;
1260     // EXCEPTION: libabigail emits the access specifier for the type
1261     // it's trying to "emit in scope" rather than for what may be a
1262     // containing type; so allow member-type attribute access to differ.
1263     if (strcmp(left_name, "member-type") == 0
1264         && strcmp(attribute_name, "access") == 0)
1265       continue;
1266     const auto left_value = get_attribute(left, attribute_name);
1267     assert(left_value);
1268     const auto right_value = get_attribute(right, attribute_name);
1269     if (!right_value || left_value.value() != right_value.value())
1270       return false;
1271   }
1272 
1273   // The left subelements must be a subsequence of the right ones.
1274   xmlNodePtr left_child = xmlFirstElementChild(left);
1275   xmlNodePtr right_child = xmlFirstElementChild(right);
1276   while (left_child && right_child)
1277     {
1278       if (sub_tree(left_child, right_child))
1279         left_child = xmlNextElementSibling(left_child);
1280       right_child = xmlNextElementSibling(right_child);
1281     }
1282   return !left_child;
1283 }
1284 
1285 
1286 /// Handle excess data members.
1287 ///
1288 /// @param eliminate whether to eliminate
1289 ///
1290 /// @param root the root XML element
1291 ///
1292 /// @return the number of excess members
handle_excess_members(bool eliminate,xmlNodePtr root)1293 size_t handle_excess_members(bool eliminate, xmlNodePtr root)
1294 {
1295   std::vector<xmlNodePtr> types;
1296 
1297   // find all structs and unions
1298   std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) {
1299     if (node->type != XML_ELEMENT_NODE)
1300       return;
1301     const char* node_name = from_libxml(node->name);
1302     // preorder in case we delete a nested type
1303     for (auto child : get_children(node))
1304       dfs(child);
1305     if (strcmp(node_name, "class-decl") || strcmp(node_name, "union-decl"))
1306       types.push_back(node);
1307   };
1308   dfs(root);
1309 
1310   size_t count = 0;
1311   for (const auto& node : types)
1312     {
1313       // filter data members (skipping things like comments)
1314       std::vector<xmlNodePtr> data_members;
1315       for (auto child : get_children(node))
1316         if (strcmp(from_libxml(child->name), "data-member") == 0)
1317           data_members.push_back(child);
1318       // look for identical duplicate data members - O(n^2)
1319       for (size_t i = 0; i < data_members.size(); ++i)
1320         {
1321           const xmlNodePtr i_node = data_members[i];
1322           bool duplicate = false;
1323           for (size_t j = 0; j < i; ++j)
1324             {
1325               const xmlNodePtr j_node = data_members[j];
1326               if (j_node != nullptr && equal_tree(i_node, j_node))
1327                 {
1328                   duplicate = true;
1329                   break;
1330                 }
1331             }
1332           if (duplicate)
1333             {
1334               std::cerr << "found duplicate data-member\n";
1335               if (eliminate) {
1336                 remove_element(i_node);
1337                 data_members[i] = nullptr;
1338               }
1339               ++count;
1340             }
1341         }
1342     }
1343   return count;
1344 }
1345 
1346 /// Eliminate non-conflicting / report conflicting duplicate definitions.
1347 ///
1348 /// This function can eliminate exact type duplicates and duplicates
1349 /// where there is at least one maximal definition. It can report the
1350 /// remaining, conflicting duplicate definitions.
1351 ///
1352 /// If a type has duplicate definitions in multiple namespace scopes or
1353 /// definitions with different effective names, these are considered as
1354 /// conflicting duplicate definitions and should not be reordered. This function
1355 /// reports how many such types it finds.
1356 ///
1357 /// @param eliminate whether to eliminate non-conflicting duplicates
1358 ///
1359 /// @param report whether to report conflicting duplicate definitions
1360 ///
1361 /// @param root the root XML element
1362 ///
1363 /// @return the number of conflicting duplicate definitions
handle_duplicate_types(bool eliminate,bool report,xmlNodePtr root)1364 size_t handle_duplicate_types(bool eliminate, bool report, xmlNodePtr root)
1365 {
1366   // map of type-id to pair of set of namespace scopes and vector of
1367   // xmlNodes
1368   std::unordered_map<
1369       std::string,
1370       std::pair<
1371           std::set<namespace_scope>,
1372           std::vector<xmlNodePtr>>> types;
1373   namespace_scope namespaces;
1374 
1375   // find all type occurrences
1376   std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) {
1377     if (node->type != XML_ELEMENT_NODE)
1378       return;
1379     const char* node_name = from_libxml(node->name);
1380     std::optional<std::string> namespace_name;
1381     if (strcmp(node_name, "namespace-decl") == 0)
1382       namespace_name = get_attribute(node, "name");
1383     if (namespace_name)
1384       namespaces.push_back(namespace_name.value());
1385     if (strcmp(node_name, "abi-corpus-group") == 0
1386         || strcmp(node_name, "abi-corpus") == 0
1387         || strcmp(node_name, "abi-instr") == 0
1388         || namespace_name)
1389       {
1390         for (auto child : get_children(node))
1391           dfs(child);
1392       }
1393     else
1394       {
1395         const auto id = get_attribute(node, "id");
1396         if (id)
1397           {
1398             auto& info = types[id.value()];
1399             info.first.insert(namespaces);
1400             info.second.push_back(node);
1401           }
1402       }
1403     if (namespace_name)
1404       namespaces.pop_back();
1405   };
1406   dfs(root);
1407 
1408   size_t conflicting_types = 0;
1409   for (const auto& [id, scopes_and_definitions] : types)
1410     {
1411       const auto& [scopes, definitions] = scopes_and_definitions;
1412 
1413       if (scopes.size() > 1)
1414         {
1415           if (report)
1416             std::cerr << "conflicting scopes found for type '" << id << "'\n";
1417           ++conflicting_types;
1418           continue;
1419         }
1420 
1421       const size_t count = definitions.size();
1422       if (count <= 1)
1423         continue;
1424 
1425       // Find a potentially maximal candidate by scanning through and
1426       // retaining the new definition if it's a supertree of the current
1427       // candidate.
1428       std::vector<bool> ok(count);
1429       size_t candidate = 0;
1430       ok[candidate] = true;
1431       for (size_t ix = 1; ix < count; ++ix)
1432         if (sub_tree(definitions[candidate], definitions[ix]))
1433           {
1434             candidate = ix;
1435             ok[candidate] = true;
1436           }
1437 
1438       // Verify the candidate is indeed maximal by scanning the
1439       // definitions not already known to be subtrees of it.
1440       bool bad = false;
1441       const auto& candidate_definition = definitions[candidate];
1442       const char* candidate_node_name = from_libxml(candidate_definition->name);
1443       const auto& candidate_effective_name =
1444           get_effective_name(candidate_definition);
1445       for (size_t ix = 0; ix < count; ++ix)
1446         {
1447           const auto& definition = definitions[ix];
1448           if (!ok[ix] && !sub_tree(definition, candidate_definition))
1449             {
1450               if (strcmp(from_libxml(definition->name), candidate_node_name) != 0
1451                   || get_effective_name(definition) != candidate_effective_name)
1452                 ++conflicting_types;
1453               bad = true;
1454               break;
1455             }
1456         }
1457 
1458       if (bad)
1459         {
1460           if (report)
1461             std::cerr << "unresolvable duplicate definitions found for type '"
1462                       << id << "'\n";
1463           continue;
1464         }
1465 
1466       if (eliminate)
1467         // Remove all but the maximal definition.
1468         for (size_t ix = 0; ix < count; ++ix)
1469           if (ix != candidate)
1470             remove_element(definitions[ix]);
1471     }
1472 
1473   return conflicting_types;
1474 }
1475 
1476 static const std::set<std::string> INSTR_VARIABLE_ATTRIBUTES = {
1477   "path",
1478   "comp-dir-path",
1479   "language",
1480 };
1481 
1482 /// Collect elements of abi-instr elements by namespace.
1483 ///
1484 /// Namespaces are not returned but are recursively traversed with the
1485 /// namespace stack being maintained. Other elements are associated with
1486 /// the current namespace.
1487 ///
1488 /// @param nodes the nodes to traverse
1489 ///
1490 /// @return child elements grouped by namespace scope
1491 static std::map<namespace_scope, std::vector<xmlNodePtr>>
get_children_by_namespace(const std::vector<xmlNodePtr> & nodes)1492 get_children_by_namespace(const std::vector<xmlNodePtr>& nodes)
1493 {
1494   std::map<namespace_scope, std::vector<xmlNodePtr>> result;
1495   namespace_scope scope;
1496 
1497   std::function<void(xmlNodePtr)> process = [&](xmlNodePtr node) {
1498     if (node->type != XML_ELEMENT_NODE)
1499       return;
1500     std::optional<std::string> namespace_name;
1501     const char* node_name = from_libxml(node->name);
1502     if (strcmp(node_name, "namespace-decl") == 0)
1503       namespace_name = get_attribute(node, "name");
1504     if (namespace_name)
1505       {
1506         scope.push_back(namespace_name.value());
1507         for (auto child : get_children(node))
1508           process(child);
1509         scope.pop_back();
1510       }
1511     else
1512       result[scope].push_back(node);
1513   };
1514 
1515   for (auto node : nodes)
1516     for (auto child : get_children(node))
1517       process(child);
1518   return result;
1519 }
1520 
1521 /// Sort namespaces, types and declarations.
1522 ///
1523 /// This loses annotations (XML comments) on namespace-decl elements.
1524 /// It would have been a fair amount of extra work to preserve them.
1525 ///
1526 /// @param root the XML root element
1527 static void
sort_namespaces_types_and_declarations(xmlNodePtr root)1528 sort_namespaces_types_and_declarations(xmlNodePtr root)
1529 {
1530   // There are (currently) 2 ABI formats we handle here.
1531   //
1532   // 1. An abi-corpus containing one or more abi-instr. In this case, we
1533   // move all namespaces, types and declarations to a replacement
1534   // abi-instr at the end of the abi-corpus. The existing abi-instr will
1535   // then be confirmed as empty and removed.
1536   //
1537   // 2. An abi-corpus-group containing one or more abi-corpus each
1538   // containing zero or more abi-instr (with at least one abi-instr
1539   // altogether). In this case the replacement abi-instr is created
1540   // within the first abi-corpus of the group.
1541   //
1542   // Anything else is left alone. For example, single abi-instr elements
1543   // are present in some libabigail test suite files.
1544 
1545   // We first need to identify where to place the new abi-instr and
1546   // collect all the abi-instr to process.
1547   xmlNodePtr where = nullptr;
1548   std::vector<xmlNodePtr> instrs;
1549 
1550   auto process_corpus = [&](xmlNodePtr corpus) {
1551     if (!where)
1552       where = corpus;
1553     for (auto instr : get_children(corpus))
1554       if (strcmp(from_libxml(instr->name), "abi-instr") == 0)
1555         instrs.push_back(instr);
1556   };
1557 
1558   const char* root_name = from_libxml(root->name);
1559   if (strcmp(root_name, "abi-corpus-group") == 0)
1560     {
1561       // Process all corpora in a corpus group together.
1562       for (auto corpus : get_children(root))
1563         if (strcmp(from_libxml(corpus->name), "abi-corpus") == 0)
1564           process_corpus(corpus);
1565     }
1566   else if (strcmp(root_name, "abi-corpus") == 0)
1567     {
1568       // We have a corpus to sort, just get its instrs.
1569       process_corpus(root);
1570     }
1571 
1572   if (instrs.empty())
1573     return;
1574 
1575   // Collect the attributes of all the instrs.
1576   std::map<std::string, std::set<std::string>> attributes;
1577   for (auto instr : instrs)
1578     for (auto p = instr->properties; p; p = p->next)
1579       {
1580         // This is horrible. There should be a better way of iterating.
1581         const char* attribute_name = from_libxml(p->name);
1582         const auto attribute_value = get_attribute(instr, attribute_name);
1583         assert(attribute_value);
1584         attributes[attribute_name].insert(attribute_value.value());
1585       }
1586 
1587   // Create and attach a replacement instr and populate its attributes.
1588   xmlNodePtr replacement =
1589       xmlAddChild(where, xmlNewNode(nullptr, to_libxml("abi-instr")));
1590   for (const auto& attribute : attributes)
1591     {
1592       const char* attribute_name = attribute.first.c_str();
1593       const auto& attribute_values = attribute.second;
1594       if (attribute_values.size() == 1)
1595         set_attribute(replacement, attribute_name, *attribute_values.begin());
1596       else if (INSTR_VARIABLE_ATTRIBUTES.count(attribute_name))
1597         set_attribute(replacement, attribute_name, "various");
1598       else
1599         {
1600           std::cerr << "unexpectedly variable abi-instr attribute '"
1601                     << attribute_name << "'\n";
1602           remove_node(replacement);
1603           return;
1604         }
1605     }
1606 
1607   // Order XML nodes by XML element names, effective names, mangled names and
1608   // type ids.
1609   struct Compare {
1610     int
1611     cmp(xmlNodePtr a, xmlNodePtr b) const
1612     {
1613       int result;
1614 
1615       // Compare XML element names.
1616       result = strcmp(from_libxml(a->name), from_libxml(b->name));
1617       if (result)
1618           return result;
1619 
1620       // Compare effective names.
1621       const auto a_effective_name = get_effective_name(a);
1622       const auto b_effective_name = get_effective_name(b);
1623 
1624       result = compare_optional(a_effective_name, b_effective_name);
1625       if (result)
1626         return result;
1627 
1628       // Compare declarations using mangled names.
1629       result = compare_attributes("mangled-name", a, b);
1630       if (result)
1631         return result;
1632 
1633       // Compare types using ids.
1634       return compare_attributes("id", a, b);
1635     }
1636 
1637     bool
1638     operator()(xmlNodePtr a, xmlNodePtr b) const
1639     {
1640       return cmp(a, b) < 0;
1641     }
1642   };
1643 
1644   // Collect the child elements of all the instrs, by namespace scope.
1645   auto scoped_children = get_children_by_namespace(instrs);
1646   for (auto& [scope, children] : scoped_children)
1647     // Sort the children, preserving order of duplicates.
1648     std::stable_sort(children.begin(), children.end(), Compare());
1649 
1650   // Create namespace elements on demand. The global namespace, with
1651   // empty scope, is just the replacement instr itself.
1652   std::map<namespace_scope, xmlNodePtr> namespace_elements{{{}, replacement}};
1653   std::function<xmlNodePtr(const namespace_scope&)> get_namespace_element =
1654       [&](const namespace_scope& scope) {
1655         auto insertion = namespace_elements.insert({scope, nullptr});
1656     if (insertion.second)
1657       {
1658         // Insertion succeeded, so the scope cannot be empty.
1659         namespace_scope truncated = scope;
1660         truncated.pop_back();
1661         xmlNodePtr parent = get_namespace_element(truncated);
1662         // We can now create an XML element in the right place.
1663         xmlNodePtr child = xmlNewNode(nullptr, to_libxml("namespace-decl"));
1664         set_attribute(child, "name", scope.back());
1665         xmlAddChild(parent, child);
1666         insertion.first->second = child;
1667       }
1668     return insertion.first->second;
1669   };
1670 
1671   // Move the children to the replacement instr or its subelements.
1672   for (const auto& [scope, elements] : scoped_children)
1673     {
1674       xmlNodePtr namespace_element = get_namespace_element(scope);
1675       for (auto element : elements)
1676         move_element(element, namespace_element);
1677     }
1678 
1679   // Check the original instrs are now all empty and remove them.
1680   for (auto instr : instrs)
1681     if (get_children_by_namespace({instr}).empty())
1682       remove_node(instr);
1683     else
1684       std::cerr << "original abi-instr has residual child elements\n";
1685 }
1686 
1687 static constexpr std::array<std::string_view, 2> SYMBOL_SECTION_SUFFICES = {
1688   "symbol_list",
1689   "whitelist",
1690 };
1691 
1692 /// Read symbols from a file.
1693 ///
1694 /// This aims to be compatible with the .ini format used by libabigail
1695 /// for suppression specifications and symbol lists. All symbol list
1696 /// sections in the given file are combined into a single set of
1697 /// symbols.
1698 ///
1699 /// @param filename the name of the file from which to read
1700 ///
1701 /// @return a set of symbols
1702 symbol_set
read_symbols(const char * filename)1703 read_symbols(const char* filename)
1704 {
1705   symbol_set symbols;
1706   std::ifstream file(filename);
1707   if (!file)
1708     {
1709       std::cerr << "error opening symbol file '" << filename << "'\n";
1710       exit(1);
1711     }
1712 
1713   bool in_symbol_section = false;
1714   std::string line;
1715   while (std::getline(file, line))
1716     {
1717       size_t start = 0;
1718       size_t limit = line.size();
1719       // Strip comments and leading / trailing whitespace.
1720       while (start < limit)
1721         {
1722           if (std::isspace(line[start]))
1723             ++start;
1724           else if (line[start] == '#')
1725             start = limit;
1726           else
1727             break;
1728         }
1729       while (start < limit)
1730         {
1731           if (std::isspace(line[limit - 1]))
1732             --limit;
1733           else
1734             break;
1735         }
1736       // Skip empty lines.
1737       if (start == limit)
1738         continue;
1739       // See if we are entering a symbol list section.
1740       if (line[start] == '[' && line[limit - 1] == ']')
1741         {
1742           std::string_view section(&line[start + 1], limit - start - 2);
1743           bool found = false;
1744           for (const auto& suffix : SYMBOL_SECTION_SUFFICES)
1745             if (section.size() >= suffix.size()
1746                 && section.substr(section.size() - suffix.size()) == suffix)
1747               {
1748                 found = true;
1749                 break;
1750               }
1751           in_symbol_section = found;
1752           continue;
1753         }
1754       // Add symbol.
1755       if (in_symbol_section)
1756         symbols.insert(std::string(&line[start], limit - start));
1757     }
1758   if (!file.eof())
1759     {
1760       std::cerr << "error reading symbol file '" << filename << "'\n";
1761       exit(1);
1762     }
1763   return symbols;
1764 }
1765 
1766 /// Get aliases from XML node.
1767 ///
1768 /// @param node the XML node to process
1769 ///
1770 /// @return an ordered set of aliases
1771 std::set<std::string>
get_aliases(xmlNodePtr node)1772 get_aliases(xmlNodePtr node)
1773 {
1774   std::set<std::string> aliases;
1775   const auto alias = get_attribute(node, "alias");
1776   if (alias)
1777     {
1778       std::istringstream is(alias.value());
1779       std::string item;
1780       while (std::getline(is, item, ','))
1781         aliases.insert(item);
1782     }
1783   return aliases;
1784 }
1785 
1786 /// Set aliases in XML node.
1787 ///
1788 /// @param node the XML node to process
1789 ///
1790 /// @param aliases an ordered set of aliases
1791 void
set_aliases(xmlNodePtr node,const std::set<std::string> & aliases)1792 set_aliases(xmlNodePtr node, const std::set<std::string>& aliases)
1793 {
1794   if (aliases.empty())
1795     {
1796       unset_attribute(node, "alias");
1797     }
1798   else
1799     {
1800       std::ostringstream os;
1801       bool first = true;
1802       for (const auto& alias : aliases)
1803         {
1804           if (first)
1805             first = false;
1806           else
1807             os << ',';
1808           os << alias;
1809         }
1810       set_attribute(node, "alias", os.str());
1811     }
1812 }
1813 
1814 /// Gather information about symbols and record alias <-> main mappings.
1815 ///
1816 /// @param symbol_map a map from elf-symbol-id to XML node
1817 ///
1818 /// @param alias_map a map from alias elf-symbol-id to main
1819 ///
1820 /// @param main_map a map from main elf-symbol-id to aliases
1821 ///
1822 /// @param node the XML node to process
1823 void
process_symbols(std::unordered_map<std::string,xmlNodePtr> & symbol_map,std::unordered_map<std::string,std::string> & alias_map,std::unordered_map<std::string,std::set<std::string>> & main_map,xmlNodePtr node)1824 process_symbols(
1825     std::unordered_map<std::string, xmlNodePtr>& symbol_map,
1826     std::unordered_map<std::string, std::string>& alias_map,
1827     std::unordered_map<std::string, std::set<std::string>>& main_map,
1828     xmlNodePtr node)
1829 {
1830   if (node->type != XML_ELEMENT_NODE)
1831     return;
1832   const char* node_name = from_libxml(node->name);
1833   if (strcmp(node_name, "abi-corpus-group") == 0
1834       || strcmp(node_name, "abi-corpus") == 0
1835       || strcmp(node_name, "elf-variable-symbols") == 0
1836       || strcmp(node_name, "elf-function-symbols") == 0)
1837     {
1838       // Process children.
1839       for (auto child : get_children(node))
1840         process_symbols(symbol_map, alias_map, main_map, child);
1841     }
1842   else if (strcmp(node_name, "elf-symbol") == 0)
1843     {
1844       const auto id = get_elf_symbol_id(node);
1845       if (!symbol_map.insert({id, node}).second)
1846         {
1847           std::cerr << "multiple symbols with id " << id << "\n";
1848           exit(1);
1849         }
1850       const auto aliases = get_aliases(node);
1851       for (const auto& alias : aliases)
1852         if (!alias_map.insert({alias, id}).second)
1853           {
1854             std::cerr << "multiple aliases with id " << alias << "\n";
1855             exit(1);
1856           }
1857       if (!aliases.empty())
1858         main_map.insert({id, aliases});
1859     }
1860 }
1861 
1862 /// Rewrite elf-symbol-id attributes following ELF symbol removal.
1863 ///
1864 /// @param mapping map from old to new elf-symbol-id, if any
1865 void
rewrite_symbols_in_declarations(const std::unordered_map<std::string,std::optional<std::string>> & mapping,xmlNodePtr node)1866 rewrite_symbols_in_declarations(
1867     const std::unordered_map<std::string, std::optional<std::string>>& mapping,
1868     xmlNodePtr node)
1869 {
1870   if (node->type != XML_ELEMENT_NODE)
1871     return;
1872 
1873   const char* node_name = from_libxml(node->name);
1874   if (strcmp(node_name, "var-decl") == 0
1875       || strcmp(node_name, "function-decl") == 0)
1876     {
1877       auto symbol = get_attribute(node, "elf-symbol-id");
1878       bool changed = false;
1879       while (symbol)
1880         {
1881           const auto it = mapping.find(symbol.value());
1882           if (it == mapping.end())
1883             break;
1884           symbol = it->second;
1885           changed = true;
1886         }
1887       if (changed)
1888         {
1889           if (symbol)
1890             set_attribute(node, "elf-symbol-id", symbol.value());
1891           else
1892             unset_attribute(node, "elf-symbol-id");
1893         }
1894     }
1895 
1896   for (xmlNodePtr child : get_children(node))
1897     rewrite_symbols_in_declarations(mapping, child);
1898 }
1899 
1900 /// Remove unlisted ELF symbols.
1901 ///
1902 /// @param symbols the set of symbols
1903 ///
1904 /// @param root the XML root element
1905 ///
1906 /// @return mapping from alias to main elf-symbol-id
1907 std::unordered_map<std::string, std::string>
filter_symbols(const std::optional<symbol_set> & symbols,xmlNodePtr root)1908     filter_symbols(const std::optional<symbol_set>& symbols, xmlNodePtr root)
1909 {
1910   // find symbols and record alias <-> main mappings
1911   std::unordered_map<std::string, xmlNodePtr> symbol_map;
1912   std::unordered_map<std::string, std::string> alias_map;
1913   std::unordered_map<std::string, std::set<std::string>> main_map;
1914   process_symbols(symbol_map, alias_map, main_map, root);
1915   // check that aliases and main symbols are disjoint
1916   for (const auto& [alias, main] : alias_map)
1917     if (alias_map.count(main))
1918       {
1919         std::cerr << "found main symbol and alias with id " << main << '\n';
1920         exit(1);
1921       }
1922 
1923   if (!symbols)
1924     return alias_map;
1925 
1926   // Track when an alias is promoted to a main symbol or a symbol is deleted as
1927   // these are the cases when we need update references to symbols in
1928   // declarations.
1929   std::unordered_map<std::string, std::optional<std::string>> mapping;
1930 
1931   // filter the symbols, preserving those listed
1932   for (const auto& [id, node] : symbol_map)
1933     {
1934       const auto name = get_attribute(node, "name");
1935       assert(name);
1936       if (symbols->count(name.value()))
1937         continue;
1938       remove_element(node);
1939 
1940       // The symbol has been removed, so remove its id from the alias <-> main
1941       // mappings, promoting another alias to main symbol if needed, and
1942       // updating XML alias attributes.
1943       //
1944       // There are 3 cases:
1945       //   a main symbol - with one or more aliases
1946       //   an alias - with a main symbol
1947       //   an unaliased symbol
1948       if (const auto main_it = main_map.find(id);
1949           main_it != main_map.end())
1950         {
1951           // A main symbol with one or more aliases.
1952           std::set<std::string> aliases;
1953           std::swap(aliases, main_it->second);
1954           main_map.erase(main_it);
1955           // the first alias will be the new main symbol
1956           const auto first_it = aliases.begin();
1957           assert(first_it != aliases.end());
1958           const auto first = *first_it;
1959           // remove first from the list of aliases and its link to id
1960           aliases.erase(first_it);
1961           alias_map.erase(first);
1962           if (!aliases.empty())
1963             {
1964               // update the XML attribute
1965               set_aliases(symbol_map[first], aliases);
1966               // update the maps
1967               for (const auto& alias : aliases)
1968                 alias_map[alias] = first;
1969               std::swap(aliases, main_map[first]);
1970             }
1971           // declarations referring to id must be repointed at first
1972           mapping[id] = {first};
1973         }
1974       else if (const auto alias_it = alias_map.find(id);
1975                alias_it != alias_map.end())
1976         {
1977           // An alias with a main symbol.
1978           const auto main = alias_it->second;
1979           auto& aliases = main_map[main];
1980           // remove id from the maps
1981           alias_map.erase(alias_it);
1982           aliases.erase(id);
1983           // update the XML attribute
1984           set_aliases(symbol_map[main], aliases);
1985           if (aliases.empty())
1986             // main hasn't changed but is no longer aliased
1987             main_map.erase(main);
1988         }
1989       else
1990         {
1991           // An unaliased symbol.
1992           //
1993           // declaration references to id must be removed
1994           mapping[id] = {};
1995         }
1996     }
1997 
1998   rewrite_symbols_in_declarations(mapping, root);
1999 
2000   return alias_map;
2001 }
2002 
2003 /// Main program.
2004 ///
2005 /// Read and write ABI XML, with optional processing passes.
2006 ///
2007 /// @param argc argument count
2008 ///
2009 /// @param argv argument vector
2010 ///
2011 /// @return exit status
2012 int
main(int argc,char * argv[])2013 main(int argc, char* argv[])
2014 {
2015   // Defaults.
2016   const char* opt_input = nullptr;
2017   const char* opt_output = nullptr;
2018   std::optional<symbol_set> opt_symbols;
2019   LocationInfo opt_locations = LocationInfo::COLUMN;
2020   int opt_indentation = 2;
2021   bool opt_normalise_anonymous = false;
2022   bool opt_reanonymise_anonymous = false;
2023   bool opt_discard_naming_typedefs = false;
2024   bool opt_remove_function_parameter_type_qualifiers = false;
2025   bool opt_prune_unreachable = false;
2026   bool opt_report_untyped = false;
2027   bool opt_abort_on_untyped = false;
2028   bool opt_clear_non_reachable = false;
2029   bool opt_eliminate_excess_members = false;
2030   bool opt_eliminate_duplicates = false;
2031   bool opt_report_conflicts = false;
2032   bool opt_sort = false;
2033   bool opt_drop_empty = false;
2034 
2035   // Experimental flags. These are not part of --all.
2036   //
2037   // TODO: Move out of experimental status when stable.
2038   bool opt_renumber_anonymous_types = false;
2039 
2040   // Process command line.
2041   auto usage = [&]() -> int {
2042     std::cerr << "usage: " << argv[0] << '\n'
2043               << "  [-i|--input file]\n"
2044               << "  [-o|--output file]\n"
2045               << "  [-S|--symbols file]\n"
2046               << "  [-L|--locations {column|line|file|none}]\n"
2047               << "  [-I|--indentation n]\n"
2048               << "  [-a|--all] (implies -n -r -t -f -p -u -b -x -e -c -s -d)\n"
2049               << "  [-n|--[no-]normalise-anonymous]\n"
2050               << "  [-r|--[no-]reanonymise-anonymous]\n"
2051               << "  [-t|--[no-]discard-naming-typedefs]\n"
2052               << "  [-f|--[no-]remove-function-parameter-type-qualifiers]\n"
2053               << "  [-p|--[no-]prune-unreachable]\n"
2054               << "  [-u|--[no-]report-untyped]\n"
2055               << "  [-U|--abort-on-untyped-symbols]\n"
2056               << "  [-b|--[no-]clear-non-reachable]\n"
2057               << "  [-x|--[no-]eliminate-excess-members\n"
2058               << "  [-e|--[no-]eliminate-duplicates]\n"
2059               << "  [-c|--[no-]report-conflicts]\n"
2060               << "  [-s|--[no-]sort]\n"
2061               << "  [-d|--[no-]drop-empty]\n"
2062               << "\nExperimental flags, not part of --all\n"
2063               << "  [-M|--[no-]renumber-anonymous-types]\n";
2064     return 1;
2065   };
2066   int opt_index = 1;
2067   auto get_arg = [&]() {
2068     if (opt_index < argc)
2069       return argv[opt_index++];
2070     exit(usage());
2071   };
2072   while (opt_index < argc)
2073     {
2074       const std::string arg = get_arg();
2075       if (arg == "-i" || arg == "--input")
2076         opt_input = get_arg();
2077       else if (arg == "-o" || arg == "--output")
2078         opt_output = get_arg();
2079       else if (arg == "-S" || arg == "--symbols")
2080         opt_symbols = read_symbols(get_arg());
2081       else if (arg == "-L" || arg == "--locations")
2082         {
2083           auto it = LOCATION_INFO_NAME.find(get_arg());
2084           if (it == LOCATION_INFO_NAME.end())
2085             exit(usage());
2086           opt_locations = it->second;
2087         }
2088       else if (arg == "-I" || arg == "--indentation")
2089         {
2090           std::istringstream is(get_arg());
2091           is >> std::noskipws >> opt_indentation;
2092           if (!is || !is.eof() || opt_indentation < 0)
2093             exit(usage());
2094         }
2095       else if (arg == "-a" || arg == "--all")
2096         opt_normalise_anonymous = opt_reanonymise_anonymous
2097                                 = opt_discard_naming_typedefs
2098                                 = opt_remove_function_parameter_type_qualifiers
2099                                 = opt_prune_unreachable
2100                                 = opt_report_untyped
2101                                 = opt_clear_non_reachable
2102                                 = opt_eliminate_excess_members
2103                                 = opt_eliminate_duplicates
2104                                 = opt_report_conflicts
2105                                 = opt_sort
2106                                 = opt_drop_empty
2107                                 = true;
2108       else if (arg == "-n" || arg == "--normalise-anonymous")
2109         opt_normalise_anonymous = true;
2110       else if (arg == "--no-normalise-anonymous")
2111         opt_normalise_anonymous = false;
2112       else if (arg == "-r" || arg == "--reanonymise-anonymous")
2113         opt_reanonymise_anonymous = true;
2114       else if (arg == "--no-reanonymise-anonymous")
2115         opt_reanonymise_anonymous = false;
2116       else if (arg == "-t" || arg == "--discard-naming-typedefs")
2117         opt_discard_naming_typedefs = true;
2118       else if (arg == "--no-discard-naming-typedefs")
2119         opt_discard_naming_typedefs = false;
2120       else if (arg == "-f" ||
2121                arg == "--remove-function-parameter-type-qualifiers")
2122         opt_remove_function_parameter_type_qualifiers = true;
2123       else if (arg == "--no-remove-function-parameter-type-qualifiers")
2124         opt_remove_function_parameter_type_qualifiers = false;
2125       else if (arg == "-p" || arg == "--prune-unreachable")
2126         opt_prune_unreachable = true;
2127       else if (arg == "--no-prune-unreachable")
2128         opt_prune_unreachable = false;
2129       else if (arg == "-u" || arg == "--report-untyped")
2130         opt_report_untyped = true;
2131       else if (arg == "--no-report-untyped")
2132         opt_report_untyped = false;
2133       else if (arg == "-U" || arg == "--abort-on-untyped-symbols")
2134         opt_abort_on_untyped = true;
2135       else if (arg == "-b" || arg == "--clear-non-reachable")
2136         opt_clear_non_reachable = true;
2137       else if (arg == "--no-clear-non-reachable")
2138         opt_clear_non_reachable = false;
2139       else if (arg == "-x" || arg == "--eliminate-excess-members")
2140         opt_eliminate_excess_members = true;
2141       else if (arg == "--no-eliminate-excess-members")
2142         opt_eliminate_excess_members = false;
2143       else if (arg == "-e" || arg == "--eliminate-duplicates")
2144         opt_eliminate_duplicates = true;
2145       else if (arg == "--no-eliminate-duplicates")
2146         opt_eliminate_duplicates = false;
2147       else if (arg == "-c" || arg == "--report-conflicts")
2148         opt_report_conflicts = true;
2149       else if (arg == "--no-report-conflicts")
2150         opt_report_conflicts = false;
2151       else if (arg == "-s" || arg == "--sort")
2152         opt_sort = true;
2153       else if (arg == "--no-sort")
2154         opt_sort = false;
2155       else if (arg == "-d" || arg == "--drop-empty")
2156         opt_drop_empty = true;
2157       else if (arg == "--no-drop-empty")
2158         opt_drop_empty = false;
2159       else if (arg == "-M" || arg == "--renumber-anonymous-types")
2160         opt_renumber_anonymous_types = true;
2161       else if (arg == "--no-renumber-anonymous-types")
2162         opt_renumber_anonymous_types = false;
2163       else
2164         exit(usage());
2165     }
2166 
2167   // Open input for reading.
2168   int in_fd = STDIN_FILENO;
2169   if (opt_input)
2170     {
2171       in_fd = open(opt_input, O_RDONLY);
2172       if (in_fd < 0)
2173         {
2174           std::cerr << "could not open '" << opt_input << "' for reading: "
2175                     << strerror(errno) << '\n';
2176           exit(1);
2177         }
2178     }
2179 
2180   // Read the XML.
2181   xmlParserCtxtPtr parser_context = xmlNewParserCtxt();
2182   xmlDocPtr document
2183       = xmlCtxtReadFd(parser_context, in_fd, nullptr, nullptr, 0);
2184   if (!document)
2185     {
2186       std::cerr << "failed to parse input as XML\n";
2187       exit(1);
2188     }
2189   xmlFreeParserCtxt(parser_context);
2190   close(in_fd);
2191 
2192   // Get the root element.
2193   xmlNodePtr root = xmlDocGetRootElement(document);
2194   if (!root)
2195     {
2196       std::cerr << "XML document has no root element\n";
2197       exit(1);
2198     }
2199 
2200   // Strip text nodes to simplify other operations.
2201   strip_text(root);
2202 
2203   // Get alias -> main mapping and remove unlisted symbols.
2204   const auto alias_map = filter_symbols(opt_symbols, root);
2205 
2206   // Record type ids which correspond to anonymous types.
2207   // Renumber recorded type ids using information about the type.
2208   // Replace recorded type ids by renumbered ones.
2209   if (opt_renumber_anonymous_types)
2210     {
2211       std::unordered_map<std::string, xmlNodePtr> to_renumber;
2212       std::unordered_set<size_t> used_hashes;
2213       record_ids_to_renumber(root, to_renumber, used_hashes);
2214 
2215       std::unordered_map<std::string, std::string> type_id_map;
2216       for (const auto& [type_id, node] : to_renumber)
2217         resolve_ids_to_renumber(type_id, to_renumber, used_hashes, type_id_map);
2218 
2219       renumber_type_ids(root, type_id_map);
2220     }
2221 
2222   // Normalise anonymous type names.
2223   // Reanonymise anonymous types.
2224   // Discard naming typedef backlinks.
2225   if (opt_normalise_anonymous || opt_reanonymise_anonymous
2226       || opt_discard_naming_typedefs)
2227     handle_anonymous_types(opt_normalise_anonymous, opt_reanonymise_anonymous,
2228                            opt_discard_naming_typedefs, root);
2229 
2230   // Remove useless top-level qualifiers on function parameter and return
2231   // types.
2232   if (opt_remove_function_parameter_type_qualifiers)
2233     {
2234       std::unordered_map<std::string, std::string> qualifier_id_to_type_id;
2235       build_qualifier_id_to_type_id_map(root, qualifier_id_to_type_id);
2236       resolve_qualifier_chains(qualifier_id_to_type_id);
2237       remove_function_parameter_type_qualifiers(root, qualifier_id_to_type_id);
2238     }
2239 
2240   // Prune unreachable elements and/or report untyped symbols.
2241   size_t untyped_symbols = 0;
2242   if (opt_prune_unreachable || opt_report_untyped || opt_abort_on_untyped)
2243     untyped_symbols += handle_unreachable(
2244         opt_prune_unreachable, opt_report_untyped, alias_map, root);
2245   if (opt_abort_on_untyped && untyped_symbols)
2246     {
2247       std::cerr << "found " << untyped_symbols << " untyped symbols\n";
2248       exit(1);
2249     }
2250 
2251   // Limit location information.
2252   if (opt_locations > LocationInfo::COLUMN)
2253     limit_locations(opt_locations, root);
2254 
2255   // Clear unwanted non-reachable attributes.
2256   if (opt_clear_non_reachable)
2257     clear_non_reachable(root);
2258 
2259   // Handle excess data members.
2260   handle_excess_members(opt_eliminate_excess_members, root);
2261 
2262   // Eliminate complete duplicates and extra fragments of types.
2263   // Report conflicting duplicate defintions.
2264   // Record whether there are conflicting duplicate definitions.
2265   size_t conflicting_types = 0;
2266   if (opt_eliminate_duplicates || opt_report_conflicts || opt_sort)
2267     conflicting_types += handle_duplicate_types(
2268         opt_eliminate_duplicates, opt_report_conflicts, root);
2269 
2270   // Sort namespaces, types and declarations.
2271   if (opt_sort)
2272     {
2273       if (conflicting_types)
2274         std::cerr << "found type definition conflicts, skipping sort\n";
2275       else
2276         sort_namespaces_types_and_declarations(root);
2277     }
2278 
2279   // Drop empty subelements.
2280   if (opt_drop_empty)
2281     drop_empty(root);
2282 
2283   // Reformat root element for human consumption.
2284   format_xml(std::string(opt_indentation, ' '), std::string(), root);
2285 
2286   // Open output for writing.
2287   int out_fd = STDOUT_FILENO;
2288   if (opt_output)
2289     {
2290       out_fd = open(opt_output, O_CREAT | O_TRUNC | O_WRONLY,
2291                     S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
2292       if (out_fd < 0)
2293         {
2294           std::cerr << "could not open '" << opt_output << "' for writing: "
2295                     << strerror(errno) << '\n';
2296           exit(1);
2297         }
2298     }
2299 
2300   // Write the XML.
2301   //
2302   // First to memory, as we need to do a little post-processing.
2303   xmlChar* out_data;
2304   int out_size;
2305   xmlDocDumpMemory(document, &out_data, &out_size);
2306   // Remove the XML declaration as it currently upsets abidiff.
2307   xmlChar* out_limit = out_data + out_size;
2308   while (out_data < out_limit && *out_data != '\n')
2309     ++out_data;
2310   if (out_data < out_limit)
2311     ++out_data;
2312   // Adjust quotes to match abidw.
2313   adjust_quotes(out_data, out_limit);
2314   // And now to a file.
2315   size_t count = out_limit - out_data;
2316   if (write(out_fd, out_data, count) != count)
2317     {
2318       std::cerr << "could not write output: " << strerror(errno) << '\n';
2319       exit(1);
2320     }
2321   if (close(out_fd) < 0)
2322     {
2323       std::cerr << "could not close output: " << strerror(errno) << '\n';
2324       exit(1);
2325     }
2326 
2327   // Free libxml document.
2328   xmlFreeDoc(document);
2329   return 0;
2330 }
2331