1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2021 Google, Inc.
5 //
6 // Author: Giuliano Procida
7
8 /// @file
9 ///
10 /// This file contains ABI XML manipulation routines and a main driver.
11 ///
12 /// The libxml Tree API is used. The XPath API is not used as it proved
13 /// to be many times slower than direct traversal but only slightly more
14 /// convenient.
15
16 #include <fcntl.h>
17 #include <unistd.h>
18
19 #include <algorithm>
20 #include <array>
21 #include <cassert>
22 #include <cctype>
23 #include <cstring>
24 #include <fstream>
25 #include <functional>
26 #include <ios>
27 #include <iostream>
28 #include <map>
29 #include <optional>
30 #include <set>
31 #include <sstream>
32 #include <string>
33 #include <unordered_map>
34 #include <unordered_set>
35 #include <vector>
36
37 #include <libxml/globals.h>
38 #include <libxml/parser.h>
39 #include <libxml/tree.h>
40
41 /// Convenience typedef referring to a namespace scope.
42 using namespace_scope = std::vector<std::string>;
43
44 /// Convenience typedef referring to a set of symbols.
45 using symbol_set = std::unordered_set<std::string>;
46
47 /// Level of location information to preserve.
48 enum struct LocationInfo { COLUMN, LINE, FILE, NONE };
49
50 static const std::map<std::string, LocationInfo> LOCATION_INFO_NAME = {
51 {"column", LocationInfo::COLUMN},
52 {"line", LocationInfo::LINE},
53 {"file", LocationInfo::FILE},
54 {"none", LocationInfo::NONE},
55 };
56
57 static const std::map<std::string, std::string> NAMED_TYPES = {
58 {"enum-decl", "__anonymous_enum__"},
59 {"class-decl", "__anonymous_struct__"},
60 {"union-decl", "__anonymous_union__"},
61 };
62
63
64 /// Cast a C string to a libxml string.
65 ///
66 /// @param str the C string (pointer)
67 ///
68 /// @return the same thing, as a type compatible with the libxml API
69 static const xmlChar*
to_libxml(const char * str)70 to_libxml(const char* str)
71 {
72 return reinterpret_cast<const xmlChar*>(str);
73 }
74
75 /// Cast a libxml string to C string.
76 ///
77 /// @param str the libxml string (pointer)
78 ///
79 /// @return the same thing, as a type compatible with the C library API
80 static const char*
from_libxml(const xmlChar * str)81 from_libxml(const xmlChar* str)
82 {
83 return reinterpret_cast<const char*>(str);
84 }
85
86 /// Remove a node from its document and free its storage.
87 ///
88 /// @param node the node to remove
89 static void
remove_node(xmlNodePtr node)90 remove_node(xmlNodePtr node)
91 {
92 xmlUnlinkNode(node);
93 xmlFreeNode(node);
94 }
95
96 /// Remove an XML element and any immediately preceding comment.
97 ///
98 /// @param node the element to remove
99 static void
remove_element(xmlNodePtr node)100 remove_element(xmlNodePtr node)
101 {
102 xmlNodePtr previous_node = node->prev;
103 if (previous_node && previous_node->type == XML_COMMENT_NODE)
104 remove_node(previous_node);
105 remove_node(node);
106 }
107
108 /// Move a node to an element.
109 ///
110 /// @param node the node to move
111 ///
112 /// @param destination the destination element
113 static void
move_node(xmlNodePtr node,xmlNodePtr destination)114 move_node(xmlNodePtr node, xmlNodePtr destination)
115 {
116 xmlUnlinkNode(node);
117 xmlAddChild(destination, node);
118 }
119
120 /// Move an XML element and any immediately preceding comment to another
121 /// element.
122 ///
123 /// @param node the element to remove
124 ///
125 /// @param destination the destination element
126 static void
move_element(xmlNodePtr node,xmlNodePtr destination)127 move_element(xmlNodePtr node, xmlNodePtr destination)
128 {
129 xmlNodePtr previous_node = node->prev;
130 if (previous_node && previous_node->type == XML_COMMENT_NODE)
131 move_node(previous_node, destination);
132 move_node(node, destination);
133 }
134
135 /// Get child nodes of given node.
136 ///
137 /// @param node the node whose children to fetch
138 ///
139 /// @return a vector of child nodes
140 static std::vector<xmlNodePtr>
get_children(xmlNodePtr node)141 get_children(xmlNodePtr node)
142 {
143 std::vector<xmlNodePtr> result;
144 for (xmlNodePtr child = node->children; child; child = child->next)
145 result.push_back(child);
146 return result;
147 }
148
149 /// Fetch an attribute from a node.
150 ///
151 /// @param node the node
152 ///
153 /// @param name the attribute name
154 ///
155 /// @return the attribute value, if present
156 static std::optional<std::string>
get_attribute(xmlNodePtr node,const char * name)157 get_attribute(xmlNodePtr node, const char* name)
158 {
159 std::optional<std::string> result;
160 xmlChar* attribute = xmlGetProp(node, to_libxml(name));
161 if (attribute)
162 {
163 result = from_libxml(attribute);
164 xmlFree(attribute);
165 }
166 return result;
167 }
168
169 /// Set an attribute value.
170 ///
171 /// @param node the node
172 ///
173 /// @param name the attribute name
174 ///
175 /// @param value the attribute value
176 static void
set_attribute(xmlNodePtr node,const char * name,const std::string & value)177 set_attribute(xmlNodePtr node, const char* name,
178 const std::string& value)
179 {
180 xmlSetProp(node, to_libxml(name), to_libxml(value.c_str()));
181 }
182
183 /// Unset an attribute value.
184 ///
185 /// @param node the node
186 ///
187 /// @param name the attribute name
188 static void
unset_attribute(xmlNodePtr node,const char * name)189 unset_attribute(xmlNodePtr node, const char* name)
190 {
191 xmlUnsetProp(node, to_libxml(name));
192 }
193
194 /// Remove text nodes, recursively.
195 ///
196 /// This simplifies subsequent analysis and manipulation. Removing and
197 /// moving elements will destroy formatting anyway. The only remaining
198 /// node types should be elements and comments.
199 ///
200 /// @param node the node to process
201 static void
strip_text(xmlNodePtr node)202 strip_text(xmlNodePtr node)
203 {
204 if (node->type == XML_TEXT_NODE)
205 remove_node(node);
206 else if (node->type == XML_ELEMENT_NODE)
207 for (xmlNodePtr child : get_children(node))
208 strip_text(child);
209 }
210
211 /// Add text before / after a node.
212 ///
213 /// @param node the node
214 ///
215 /// @param after whether the next should go after
216 ///
217 /// @param text the text
218 static void
add_text(xmlNodePtr node,bool after,const std::string & text)219 add_text(xmlNodePtr node, bool after, const std::string& text)
220 {
221 xmlNodePtr text_node = xmlNewTextLen(to_libxml(text.data()), text.size());
222 if (after)
223 xmlAddNextSibling(node, text_node);
224 else
225 xmlAddPrevSibling(node, text_node);
226 }
227
228 /// Format an XML element by adding internal indentation and newlines.
229 ///
230 /// This makes the XML readable.
231 ///
232 /// @param indentation what to add to the line indentation prefix
233 ///
234 /// @param prefix the current line indentation prefix
235 ///
236 /// @param node the node to format
237 static void
format_xml(const std::string & indentation,std::string prefix,xmlNodePtr node)238 format_xml(const std::string& indentation, std::string prefix, xmlNodePtr node)
239 {
240 std::vector<xmlNodePtr> children = get_children(node);
241 if (children.empty())
242 return;
243
244 // The ordering of operations here is incidental. The outcomes we want
245 // are: 1. an extra newline after the opening tag and indentation of
246 // the closing tag to match, and 2. indentation and newline for each
247 // child.
248 add_text(children[0], false, "\n");
249 add_text(children[children.size() - 1], true, prefix);
250 prefix += indentation;
251 for (xmlNodePtr child : children)
252 {
253 add_text(child, false, prefix);
254 format_xml(indentation, prefix, child);
255 add_text(child, true, "\n");
256 }
257 }
258
259 /// Rewrite attributes using single quotes.
260 ///
261 /// libxml uses double quotes but libabigail uses single quotes.
262 ///
263 /// Note that libabigail does not emit attributes *containing* single
264 /// quotes and if it did it would escape them as " which libxml
265 /// would in turn preserve. However, the code here will handle all forms
266 /// of quotes, conservatively.
267 ///
268 /// Annotation comments can contain single quote characters so just
269 /// checking for any single quotes at all is insufficiently precise.
270 ///
271 /// @param start a pointer to the start of the XML text
272 ///
273 /// @param limit a pointer to just past the end of the XML text
274 static void
adjust_quotes(xmlChar * start,xmlChar * limit)275 adjust_quotes(xmlChar* start, xmlChar* limit)
276 {
277 const std::string open{"<!--"};
278 const std::string close{"-->"};
279 while (start < limit)
280 {
281 // Look for a '<'
282 start = std::find(start, limit, '<');
283 if (start == limit)
284 break;
285 if (start + open.size() < limit
286 && std::equal(open.begin(), open.end(), start))
287 {
288 // Have a comment, skip to the end.
289 start += open.size();
290 xmlChar* end = std::search(start, limit, close.begin(), close.end());
291 if (end == limit)
292 break;
293 start = end + close.size();
294 }
295 else
296 {
297 // Have some tag, search for the end.
298 start += 1;
299 xmlChar* end = std::find(start, limit, '>');
300 if (end == limit)
301 break;
302 // In general, inside a tag we could find either ' or " being
303 // used to quote attributes and the other quote character
304 // being used as part of the attribute data. However, libxml's
305 // xmlDocDump* functions use " to quote attributes and it's
306 // safe to substitute this quote character with ' so long as '
307 // does not appear within the attribute data.
308 if (std::find(start, end, '\'') == end)
309 for (xmlChar* c = start; c < end; ++c)
310 if (*c == '"')
311 *c = '\'';
312 start = end + 1;
313 }
314 }
315 }
316
317 static const std::set<std::string> DROP_IF_EMPTY = {
318 "elf-variable-symbols",
319 "elf-function-symbols",
320 "namespace-decl",
321 "abi-instr",
322 "abi-corpus",
323 "abi-corpus-group",
324 };
325
326 /// Drop empty elements, if safe to do so, recursively.
327 ///
328 /// @param node the element to process
329 static void
drop_empty(xmlNodePtr node)330 drop_empty(xmlNodePtr node)
331 {
332 if (node->type != XML_ELEMENT_NODE)
333 return;
334 for (xmlNodePtr child : get_children(node))
335 drop_empty(child);
336 // Do not drop the root element, even if empty.
337 if (node->parent->type == XML_DOCUMENT_NODE)
338 return;
339 if (!node->children && DROP_IF_EMPTY.count(from_libxml(node->name)))
340 remove_element(node);
341 }
342
343 /// Get ELF symbol id.
344 ///
345 /// This is not an explicit attribute. It takes one of these forms:
346 ///
347 /// * name (if symbol is not versioned)
348 /// * name@version (if symbol is versioned but not the default version)
349 /// * name@@version (if symbol is versioned and the default version)
350 ///
351 /// @param node the elf-symbol element
352 ///
353 /// @return the ELF symbol id
354 static std::string
get_elf_symbol_id(xmlNodePtr node)355 get_elf_symbol_id(xmlNodePtr node)
356 {
357 const auto name = get_attribute(node, "name");
358 assert(name);
359 std::string result = name.value();
360 const auto version = get_attribute(node, "version");
361 if (version)
362 {
363 result += '@';
364 const auto is_default = get_attribute(node, "is-default-version");
365 if (is_default && is_default.value() == "yes")
366 result += '@';
367 result += version.value();
368 }
369 return result;
370 }
371
372 static const std::set<std::string> HAS_LOCATION = {
373 "class-decl",
374 "enum-decl",
375 "function-decl",
376 "parameter",
377 "typedef-decl",
378 "union-decl",
379 "var-decl"
380 };
381
382 /// Limit location information.
383 ///
384 /// @param location_info the level of location information to retain
385 ///
386 /// @param node the element to process
387 static void
limit_locations(LocationInfo location_info,xmlNodePtr node)388 limit_locations(LocationInfo location_info, xmlNodePtr node)
389 {
390 if (node->type != XML_ELEMENT_NODE)
391 return;
392 if (HAS_LOCATION.count(from_libxml(node->name)))
393 {
394 if (location_info > LocationInfo::COLUMN)
395 {
396 unset_attribute(node, "column");
397 if (location_info > LocationInfo::LINE)
398 {
399 unset_attribute(node, "line");
400 if (location_info > LocationInfo::FILE)
401 unset_attribute(node, "filepath");
402 }
403 }
404 }
405 for (xmlNodePtr child : get_children(node))
406 limit_locations(location_info, child);
407 }
408
409 /// Handle unreachable elements.
410 ///
411 /// Reachability is defined to be union of contains, containing and
412 /// refers-to relationships for types, declarations and symbols. The
413 /// roots for reachability are the ELF elements in the ABI.
414 ///
415 /// The subrange element requires special treatment. It has a useless
416 /// type id, but it is not a type and its type id aliases with that of
417 /// all other subranges of the same length. So don't treat it as a type.
418 ///
419 /// @param prune whether to prune unreachable elements
420 ///
421 /// @param report whether to report untyped symbols
422 ///
423 /// @param root the XML root element
424 ///
425 /// @return the number of untyped symbols
426 static size_t
handle_unreachable(bool prune,bool report,xmlNodePtr root)427 handle_unreachable(bool prune, bool report, xmlNodePtr root)
428 {
429 // ELF symbol ids.
430 std::set<std::string> elf_symbol_ids;
431
432 // Simple way of allowing two kinds of nodes: false=>type,
433 // true=>symbol.
434 using vertex_t = std::pair<bool, std::string>;
435
436 // Graph vertices.
437 std::set<vertex_t> vertices;
438 // Graph edges.
439 std::map<vertex_t, std::set<vertex_t>> edges;
440
441 // Keep track of type / symbol nesting so we can identify contains,
442 // containing and refers-to relationships.
443 std::vector<vertex_t> stack;
444
445 // Process an XML node, adding a vertex and possibly some edges.
446 std::function<void(xmlNodePtr)> process_node = [&](xmlNodePtr node) {
447 // We only care about elements and not comments, at this stage.
448 if (node->type != XML_ELEMENT_NODE)
449 return;
450
451 const char* node_name = from_libxml(node->name);
452
453 // Is this an ELF symbol?
454 if (strcmp(node_name, "elf-symbol") == 0)
455 {
456 elf_symbol_ids.insert(get_elf_symbol_id(node));
457 // Early return is safe, but not necessary.
458 return;
459 }
460
461 // Is this a type? Note that the same id may appear multiple times.
462 const auto id = strcmp(node_name, "subrange") != 0
463 ? get_attribute(node, "id")
464 : std::optional<std::string>();
465 if (id)
466 {
467 vertex_t type_vertex{false, id.value()};
468 vertices.insert(type_vertex);
469 const auto naming_typedef_id = get_attribute(node, "naming-typedef-id");
470 if (naming_typedef_id)
471 {
472 // This is an odd one, there can be a backwards link from an
473 // anonymous type to a typedef that refers to it. The -t
474 // option will drop these, but if they are still present, we
475 // should model the link to avoid the risk of dangling
476 // references.
477 vertex_t naming_typedef_vertex{false, naming_typedef_id.value()};
478 edges[type_vertex].insert(naming_typedef_vertex);
479 }
480 if (!stack.empty())
481 {
482 // Parent<->child dependencies; record dependencies both
483 // ways to avoid holes in XML types and declarations.
484 const auto& parent = stack.back();
485 edges[parent].insert(type_vertex);
486 edges[type_vertex].insert(parent);
487 }
488 // Record the type.
489 stack.push_back(type_vertex);
490 }
491
492 // Is this a (declaration expected to be linked to a) symbol?
493 const auto symbol = get_attribute(node, "elf-symbol-id");
494 if (symbol)
495 {
496 vertex_t symbol_vertex{true, symbol.value()};
497 vertices.insert(symbol_vertex);
498 if (!stack.empty())
499 {
500 // Parent<->child dependencies; record dependencies both
501 // ways to avoid making holes in XML types and declarations.
502 //
503 // Symbols exist outside of the type hierarchy, so choosing
504 // to make them depend on a containing type scope and vice
505 // versa is conservative and probably not necessary.
506 const auto& parent = stack.back();
507 edges[parent].insert(symbol_vertex);
508 edges[symbol_vertex].insert(parent);
509 }
510 // Record the symbol.
511 stack.push_back(symbol_vertex);
512 // In practice there will be at most one symbol on the stack; we could
513 // verify this here, but it wouldn't achieve anything.
514 }
515
516 // Being both would make the stack ordering ambiguous.
517 if (id && symbol)
518 {
519 std::cerr << "cannot handle element which is both type and symbol\n";
520 exit(1);
521 }
522
523 // Is there a reference to another type?
524 const auto type_id = get_attribute(node, "type-id");
525 if (type_id && !stack.empty())
526 {
527 // The enclosing type or symbol refers to another type.
528 const auto& parent = stack.back();
529 vertex_t type_id_vertex{false, type_id.value()};
530 edges[parent].insert(type_id_vertex);
531 }
532
533 // Process recursively.
534 for (auto child : get_children(node))
535 process_node(child);
536
537 // Restore the stack.
538 if (symbol)
539 stack.pop_back();
540 if (id)
541 stack.pop_back();
542 };
543
544 // Traverse the whole root element and build a graph.
545 process_node(root);
546
547 // Simple DFS.
548 std::set<vertex_t> seen;
549 std::function<void(vertex_t)> dfs = [&](vertex_t vertex) {
550 if (!seen.insert(vertex).second)
551 return;
552 auto it = edges.find(vertex);
553 if (it != edges.end())
554 for (auto to : it->second)
555 dfs(to);
556 };
557
558 // Count of how many symbols are untyped.
559 size_t untyped = 0;
560
561 // Traverse the graph, starting from the ELF symbols.
562 for (const auto& symbol_id : elf_symbol_ids)
563 {
564 vertex_t symbol_vertex{true, symbol_id};
565 if (vertices.count(symbol_vertex))
566 {
567 dfs(symbol_vertex);
568 }
569 else
570 {
571 if (report)
572 std::cerr << "no declaration found for ELF symbol with id "
573 << symbol_id << '\n';
574 ++untyped;
575 }
576 }
577
578 // This is a DFS with early stopping.
579 std::function<void(xmlNodePtr)> remove_unseen = [&](xmlNodePtr node) {
580 if (node->type != XML_ELEMENT_NODE)
581 return;
582
583 const char* node_name = from_libxml(node->name);
584
585 // Return if we know that this is a type to keep or drop in its
586 // entirety.
587 const auto id = strcmp(node_name, "subrange") != 0
588 ? get_attribute(node, "id")
589 : std::optional<std::string>();
590 if (id)
591 {
592 if (!seen.count(vertex_t{false, id.value()}))
593 remove_element(node);
594 return;
595 }
596
597 // Return if we know that this is a declaration to keep or drop in
598 // its entirety. Note that var-decl and function-decl are the only
599 // elements that can have an elf-symbol-id attribute.
600 if (strcmp(node_name, "var-decl") == 0
601 || strcmp(node_name, "function-decl") == 0)
602 {
603 const auto symbol = get_attribute(node, "elf-symbol-id");
604 if (!(symbol && seen.count(vertex_t{true, symbol.value()})))
605 remove_element(node);
606 return;
607 }
608
609 // Otherwise, this is not a type, declaration or part thereof, so
610 // process child elements.
611 for (auto child : get_children(node))
612 remove_unseen(child);
613 };
614
615 if (prune)
616 // Traverse the XML, removing unseen elements.
617 remove_unseen(root);
618
619 return untyped;
620 }
621
622 /// Tidy anonymous types in various ways.
623 ///
624 /// 1. Normalise anonymous type names by removing the numerical suffix.
625 ///
626 /// Anonymous type names take the form __anonymous_foo__N where foo is
627 /// one of enum, struct or union and N is an optional numerical suffix.
628 /// The suffices are senstive to processing order and do not convey
629 /// useful ABI information. They can cause spurious harmless diffs and
630 /// make XML diffing and rebasing harder.
631 ///
632 /// It's best to remove the suffix.
633 ///
634 /// 2. Reanonymise anonymous types that have been given names.
635 ///
636 /// A recent change to abidw changed its behaviour for any anonymous
637 /// type that has a naming typedef. In addition to linking the typedef
638 /// and type in both directions, the code now gives (some) anonymous
639 /// types the same name as the typedef. This misrepresents the original
640 /// types.
641 ///
642 /// Such types should be anonymous.
643 ///
644 /// 3. Discard naming typedef backlinks.
645 ///
646 /// The attribute naming-typedef-id is a backwards link from an
647 /// anonymous type to the typedef that refers to it. It is ignored by
648 /// abidiff.
649 ///
650 /// Unfortunately, libabigail sometimes conflates multiple anonymous
651 /// types that have naming typedefs and only one of the typedefs can
652 /// "win". ABI XML is thus sensitive to processing order and can also
653 /// end up containing definitions of an anonymous type with differing
654 /// naming-typedef-id attributes.
655 ///
656 /// It's best to just drop the attribute.
657 ///
658 /// @param node the XML node to process
659 static void
handle_anonymous_types(bool normalise,bool reanonymise,bool discard_naming,xmlNodePtr node)660 handle_anonymous_types(bool normalise, bool reanonymise, bool discard_naming,
661 xmlNodePtr node)
662 {
663 if (node->type != XML_ELEMENT_NODE)
664 return;
665
666 const auto it = NAMED_TYPES.find(from_libxml(node->name));
667 if (it != NAMED_TYPES.end())
668 {
669 const auto& anon = it->second;
670 const auto name_attribute = get_attribute(node, "name");
671 const auto& name =
672 name_attribute ? name_attribute.value() : std::string();
673 const auto anon_attr = get_attribute(node, "is-anonymous");
674 const bool is_anon = anon_attr && anon_attr.value() == "yes";
675 const auto naming_attribute = get_attribute(node, "naming-typedef-id");
676 if (normalise && is_anon && name != anon) {
677 // __anonymous_foo__123 -> __anonymous_foo__
678 set_attribute(node, "name", anon);
679 }
680 if (reanonymise && !is_anon && naming_attribute) {
681 // bar with naming typedef -> __anonymous_foo__
682 set_attribute(node, "is-anonymous", "yes");
683 set_attribute(node, "name", anon);
684 }
685 if (discard_naming && naming_attribute)
686 unset_attribute(node, "naming-typedef-id");
687 }
688
689 for (auto child : get_children(node))
690 handle_anonymous_types(normalise, reanonymise, discard_naming, child);
691 }
692
693 /// Remove attributes emitted by abidw --load-all-types.
694 ///
695 /// With this invocation and if any user-defined types are deemed
696 /// unreachable, libabigail will output a tracking-non-reachable-types
697 /// attribute on top-level elements and a is-non-reachable attribute on
698 /// each such type element.
699 ///
700 /// abitidy has its own graph-theoretic notion of reachability and these
701 /// attributes have no ABI relevance.
702 ///
703 /// It's best to just drop them.
704 ///
705 /// @param node the XML node to process
706 void
clear_non_reachable(xmlNodePtr node)707 clear_non_reachable(xmlNodePtr node)
708 {
709 if (node->type != XML_ELEMENT_NODE)
710 return;
711
712 const char* node_name = from_libxml(node->name);
713
714 if (strcmp(node_name, "abi-corpus-group") == 0
715 || strcmp(node_name, "abi-corpus") == 0)
716 unset_attribute(node, "tracking-non-reachable-types");
717 else if (NAMED_TYPES.find(node_name) != NAMED_TYPES.end())
718 unset_attribute(node, "is-non-reachable");
719
720 for (auto child : get_children(node))
721 clear_non_reachable(child);
722 }
723
724 /// The set of attributes that should be excluded from consideration
725 /// when comparing XML elements.
726 ///
727 /// Source location attributes are omitted with --no-show-locs without
728 /// changing the meaning of the ABI. They can also sometimes vary
729 /// between duplicate type definitions.
730 ///
731 /// The naming-typedef-id attribute, if not already removed by another
732 /// pass, is irrelevant to ABI semantics.
733 ///
734 /// The is-non-reachable attribute, if not already removed by another
735 /// pass, is irrelevant to ABI semantics.
736 static const std::unordered_set<std::string> IRRELEVANT_ATTRIBUTES = {
737 {"filepath"},
738 {"line"},
739 {"column"},
740 {"naming-typedef-id"},
741 {"is-non-reachable"},
742 };
743
744 /// Determine whether one XML element is a subtree of another.
745 ///
746 /// XML elements representing types are sometimes emitted multiple
747 /// times, identically. Also, member typedefs are sometimes emitted
748 /// separately from their types, resulting in duplicate XML fragments.
749 ///
750 /// Both these issues can be resolved by first detecting duplicate
751 /// occurrences of a given type id and then checking to see if there's
752 /// an instance that subsumes the others, which can then be eliminated.
753 ///
754 /// @param left the first element to compare
755 ///
756 /// @param right the second element to compare
757 ///
758 /// @return whether the first element is a subtree of the second
759 bool
sub_tree(xmlNodePtr left,xmlNodePtr right)760 sub_tree(xmlNodePtr left, xmlNodePtr right)
761 {
762 // Node names must match.
763 const char* left_name = from_libxml(left->name);
764 const char* right_name = from_libxml(right->name);
765 if (strcmp(left_name, right_name) != 0)
766 return false;
767 // Attributes may be missing on the left, but must match otherwise.
768 for (auto p = left->properties; p; p = p->next)
769 {
770 const char* attribute_name = from_libxml(p->name);
771 if (IRRELEVANT_ATTRIBUTES.count(attribute_name))
772 continue;
773 // EXCEPTION: libabigail emits the access specifier for the type
774 // it's trying to "emit in scope" rather than for what may be a
775 // containing type; so allow member-type attribute access to differ.
776 if (strcmp(left_name, "member-type") == 0
777 && strcmp(attribute_name, "access") == 0)
778 continue;
779 const auto left_value = get_attribute(left, attribute_name);
780 assert(left_value);
781 const auto right_value = get_attribute(right, attribute_name);
782 if (!right_value || left_value.value() != right_value.value())
783 return false;
784 }
785 // The left subelements must be a subsequence of the right ones.
786 xmlNodePtr left_child = xmlFirstElementChild(left);
787 xmlNodePtr right_child = xmlFirstElementChild(right);
788 while (left_child && right_child)
789 {
790 if (sub_tree(left_child, right_child))
791 left_child = xmlNextElementSibling(left_child);
792 right_child = xmlNextElementSibling(right_child);
793 }
794 return !left_child;
795 }
796
797 /// Elminate non-conflicting / report conflicting type definitions.
798 ///
799 /// This function can eliminate exact type duplicates and duplicates
800 /// where there is at least one maximal definition. It can report the
801 /// remaining, conflicting duplicate definitions.
802 ///
803 /// If a type has duplicate definitions in multiple namespace scopes,
804 /// these should not be reordered. This function reports how many such
805 /// types it finds.
806 ///
807 /// @param eliminate whether to eliminate non-conflicting duplicates
808 ///
809 /// @param report whether to report conflicting duplicate definitions
810 ///
811 /// @param root the root XML element
812 ///
813 /// @return the number of types defined in multiple namespace scopes
handle_duplicate_types(bool eliminate,bool report,xmlNodePtr root)814 size_t handle_duplicate_types(bool eliminate, bool report, xmlNodePtr root)
815 {
816 // map of type-id to pair of set of namespace scopes and vector of
817 // xmlNodes
818 std::unordered_map<
819 std::string,
820 std::pair<
821 std::set<namespace_scope>,
822 std::vector<xmlNodePtr>>> types;
823 namespace_scope namespaces;
824
825 // find all type occurrences
826 std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) {
827 if (node->type != XML_ELEMENT_NODE)
828 return;
829 const char* node_name = from_libxml(node->name);
830 std::optional<std::string> namespace_name;
831 if (strcmp(node_name, "namespace-decl") == 0)
832 namespace_name = get_attribute(node, "name");
833 if (namespace_name)
834 namespaces.push_back(namespace_name.value());
835 if (strcmp(node_name, "abi-corpus-group") == 0
836 || strcmp(node_name, "abi-corpus") == 0
837 || strcmp(node_name, "abi-instr") == 0
838 || namespace_name)
839 {
840 for (auto child : get_children(node))
841 dfs(child);
842 }
843 else
844 {
845 const auto id = get_attribute(node, "id");
846 if (id)
847 {
848 auto& info = types[id.value()];
849 info.first.insert(namespaces);
850 info.second.push_back(node);
851 }
852 }
853 if (namespace_name)
854 namespaces.pop_back();
855 };
856 dfs(root);
857
858 size_t scope_conflicts = 0;
859 for (const auto& [id, scopes_and_definitions] : types)
860 {
861 const auto& [scopes, definitions] = scopes_and_definitions;
862
863 if (scopes.size() > 1)
864 {
865 if (report)
866 std::cerr << "conflicting scopes found for type '" << id << "'\n";
867 ++scope_conflicts;
868 continue;
869 }
870
871 const size_t count = definitions.size();
872 if (count <= 1)
873 continue;
874
875 // Find a potentially maximal candidate by scanning through and
876 // retaining the new definition if it's a supertree of the current
877 // candidate.
878 std::vector<bool> ok(count);
879 size_t candidate = 0;
880 ok[candidate] = true;
881 for (size_t ix = 1; ix < count; ++ix)
882 if (sub_tree(definitions[candidate], definitions[ix]))
883 {
884 candidate = ix;
885 ok[candidate] = true;
886 }
887
888 // Verify the candidate is indeed maximal by scanning the
889 // definitions not already known to be subtrees of it.
890 bool bad = false;
891 for (size_t ix = 0; ix < count; ++ix)
892 if (!ok[ix] && !sub_tree(definitions[ix], definitions[candidate]))
893 {
894 bad = true;
895 break;
896 }
897 if (bad)
898 {
899 if (report)
900 std::cerr << "conflicting definitions found for type '" << id
901 << "'\n";
902 continue;
903 }
904
905 if (eliminate)
906 // Remove all but the maximal definition.
907 for (size_t ix = 0; ix < count; ++ix)
908 if (ix != candidate)
909 remove_element(definitions[ix]);
910 }
911
912 return scope_conflicts;
913 }
914
915 static const std::set<std::string> INSTR_VARIABLE_ATTRIBUTES = {
916 "path",
917 "comp-dir-path",
918 "language",
919 };
920
921 /// Collect elements of abi-instr elements by namespace.
922 ///
923 /// Namespaces are not returned but are recursively traversed with the
924 /// namespace stack being maintained. Other elements are associated with
925 /// the current namespace.
926 ///
927 /// @param nodes the nodes to traverse
928 ///
929 /// @return child elements grouped by namespace scope
930 static std::map<namespace_scope, std::vector<xmlNodePtr>>
get_children_by_namespace(const std::vector<xmlNodePtr> & nodes)931 get_children_by_namespace(const std::vector<xmlNodePtr>& nodes)
932 {
933 std::map<namespace_scope, std::vector<xmlNodePtr>> result;
934 namespace_scope scope;
935
936 std::function<void(xmlNodePtr)> process = [&](xmlNodePtr node) {
937 if (node->type != XML_ELEMENT_NODE)
938 return;
939 std::optional<std::string> namespace_name;
940 const char* node_name = from_libxml(node->name);
941 if (strcmp(node_name, "namespace-decl") == 0)
942 namespace_name = get_attribute(node, "name");
943 if (namespace_name)
944 {
945 scope.push_back(namespace_name.value());
946 for (auto child : get_children(node))
947 process(child);
948 scope.pop_back();
949 }
950 else
951 result[scope].push_back(node);
952 };
953
954 for (auto node : nodes)
955 for (auto child : get_children(node))
956 process(child);
957 return result;
958 }
959
960 /// Sort namespaces, types and declarations.
961 ///
962 /// This loses annotations (XML comments) on namespace-decl elements.
963 /// It would have been a fair amount of extra work to preserve them.
964 ///
965 /// @param root the XML root element
966 static void
sort_namespaces_types_and_declarations(xmlNodePtr root)967 sort_namespaces_types_and_declarations(xmlNodePtr root)
968 {
969 // There are (currently) 2 ABI formats we handle here.
970 //
971 // 1. An abi-corpus containing one or more abi-instr. In this case, we
972 // move all namespaces, types and declarations to a replacement
973 // abi-instr at the end of the abi-corpus. The existing abi-instr will
974 // then be confirmed as empty and removed.
975 //
976 // 2. An abi-corpus-group containing one or more abi-corpus each
977 // containing zero or more abi-instr (with at least one abi-instr
978 // altogether). In this case the replacement abi-instr is created
979 // within the first abi-corpus of the group.
980 //
981 // Anything else is left alone. For example, single abi-instr elements
982 // are present in some libabigail test suite files.
983
984 // We first need to identify where to place the new abi-instr and
985 // collect all the abi-instr to process.
986 xmlNodePtr where = nullptr;
987 std::vector<xmlNodePtr> instrs;
988
989 auto process_corpus = [&](xmlNodePtr corpus) {
990 if (!where)
991 where = corpus;
992 for (auto instr : get_children(corpus))
993 if (strcmp(from_libxml(instr->name), "abi-instr") == 0)
994 instrs.push_back(instr);
995 };
996
997 const char* root_name = from_libxml(root->name);
998 if (strcmp(root_name, "abi-corpus-group") == 0)
999 {
1000 // Process all corpora in a corpus group together.
1001 for (auto corpus : get_children(root))
1002 if (strcmp(from_libxml(corpus->name), "abi-corpus") == 0)
1003 process_corpus(corpus);
1004 }
1005 else if (strcmp(root_name, "abi-corpus") == 0)
1006 {
1007 // We have a corpus to sort, just get its instrs.
1008 process_corpus(root);
1009 }
1010
1011 if (instrs.empty())
1012 return;
1013
1014 // Collect the attributes of all the instrs.
1015 std::map<std::string, std::set<std::string>> attributes;
1016 for (auto instr : instrs)
1017 for (auto p = instr->properties; p; p = p->next)
1018 {
1019 // This is horrible. There should be a better way of iterating.
1020 const char* attribute_name = from_libxml(p->name);
1021 const auto attribute_value = get_attribute(instr, attribute_name);
1022 assert(attribute_value);
1023 attributes[attribute_name].insert(attribute_value.value());
1024 }
1025
1026 // Create and attach a replacement instr and populate its attributes.
1027 xmlNodePtr replacement =
1028 xmlAddChild(where, xmlNewNode(nullptr, to_libxml("abi-instr")));
1029 for (const auto& attribute : attributes)
1030 {
1031 const char* attribute_name = attribute.first.c_str();
1032 const auto& attribute_values = attribute.second;
1033 if (attribute_values.size() == 1)
1034 set_attribute(replacement, attribute_name, *attribute_values.begin());
1035 else if (INSTR_VARIABLE_ATTRIBUTES.count(attribute_name))
1036 set_attribute(replacement, attribute_name, "various");
1037 else
1038 {
1039 std::cerr << "unexpectedly variable abi-instr attribute '"
1040 << attribute_name << "'\n";
1041 remove_node(replacement);
1042 return;
1043 }
1044 }
1045
1046 // Order types before declarations, types by id, declarations by name
1047 // (and by mangled-name, if present).
1048 struct Compare {
1049 int
1050 cmp(xmlNodePtr a, xmlNodePtr b) const
1051 {
1052 // NOTE: This must not reorder type definitions with the same id.
1053 // In particular, we cannot do anything nice and easy like order
1054 // by element tag first.
1055 //
1056 // TODO: Replace compare and subtraction with <=>.
1057 int result;
1058
1059 auto a_id = get_attribute(a, "id");
1060 auto b_id = get_attribute(b, "id");
1061 // types before non-types
1062 result = b_id.has_value() - a_id.has_value();
1063 if (result)
1064 return result;
1065 if (a_id)
1066 // sort types by id
1067 return a_id.value().compare(b_id.value());
1068
1069 auto a_name = get_attribute(a, "name");
1070 auto b_name = get_attribute(b, "name");
1071 // declarations before non-declarations
1072 result = b_name.has_value() - a_name.has_value();
1073 if (result)
1074 return result;
1075 if (a_name)
1076 {
1077 // sort declarations by name
1078 result = a_name.value().compare(b_name.value());
1079 if (result)
1080 return result;
1081 auto a_mangled = get_attribute(a, "mangled-name");
1082 auto b_mangled = get_attribute(b, "mangled-name");
1083 // without mangled-name first
1084 result = a_mangled.has_value() - b_mangled.has_value();
1085 if (result)
1086 return result;
1087 // and by mangled-name if present
1088 return !a_mangled ? 0 : a_mangled.value().compare(b_mangled.value());
1089 }
1090
1091 // a and b are not types or declarations; should not be reached
1092 return 0;
1093 }
1094
1095 bool
1096 operator()(xmlNodePtr a, xmlNodePtr b) const
1097 {
1098 return cmp(a, b) < 0;
1099 }
1100 };
1101
1102 // Collect the child elements of all the instrs, by namespace scope.
1103 auto scoped_children = get_children_by_namespace(instrs);
1104 for (auto& [scope, children] : scoped_children)
1105 // Sort the children, preserving order of duplicates.
1106 std::stable_sort(children.begin(), children.end(), Compare());
1107
1108 // Create namespace elements on demand. The global namespace, with
1109 // empty scope, is just the replacement instr itself.
1110 std::map<namespace_scope, xmlNodePtr> namespace_elements{{{}, replacement}};
1111 std::function<xmlNodePtr(const namespace_scope&)> get_namespace_element =
1112 [&](const namespace_scope& scope) {
1113 auto insertion = namespace_elements.insert({scope, nullptr});
1114 if (insertion.second)
1115 {
1116 // Insertion succeeded, so the scope cannot be empty.
1117 namespace_scope truncated = scope;
1118 truncated.pop_back();
1119 xmlNodePtr parent = get_namespace_element(truncated);
1120 // We can now create an XML element in the right place.
1121 xmlNodePtr child = xmlNewNode(nullptr, to_libxml("namespace-decl"));
1122 set_attribute(child, "name", scope.back());
1123 xmlAddChild(parent, child);
1124 insertion.first->second = child;
1125 }
1126 return insertion.first->second;
1127 };
1128
1129 // Move the children to the replacement instr or its subelements.
1130 for (const auto& [scope, elements] : scoped_children)
1131 {
1132 xmlNodePtr namespace_element = get_namespace_element(scope);
1133 for (auto element : elements)
1134 move_element(element, namespace_element);
1135 }
1136
1137 // Check the original instrs are now all empty and remove them.
1138 for (auto instr : instrs)
1139 if (get_children_by_namespace({instr}).empty())
1140 remove_node(instr);
1141 else
1142 std::cerr << "original abi-instr has residual child elements\n";
1143 }
1144
1145 static constexpr std::array<std::string_view, 2> SYMBOL_SECTION_SUFFICES = {
1146 "symbol_list",
1147 "whitelist",
1148 };
1149
1150 /// Read symbols from a file.
1151 ///
1152 /// This aims to be compatible with the .ini format used by libabigail
1153 /// for suppression specifications and symbol lists. All symbol list
1154 /// sections in the given file are combined into a single set of
1155 /// symbols.
1156 ///
1157 /// @param filename the name of the file from which to read
1158 ///
1159 /// @return a set of symbols
1160 symbol_set
read_symbols(const char * filename)1161 read_symbols(const char* filename)
1162 {
1163 symbol_set symbols;
1164 std::ifstream file(filename);
1165 if (!file)
1166 {
1167 std::cerr << "error opening symbol file '" << filename << "'\n";
1168 exit(1);
1169 }
1170
1171 bool in_symbol_section = false;
1172 std::string line;
1173 while (std::getline(file, line))
1174 {
1175 size_t start = 0;
1176 size_t limit = line.size();
1177 // Strip comments and leading / trailing whitespace.
1178 while (start < limit)
1179 {
1180 if (std::isspace(line[start]))
1181 ++start;
1182 else if (line[start] == '#')
1183 start = limit;
1184 else
1185 break;
1186 }
1187 while (start < limit)
1188 {
1189 if (std::isspace(line[limit - 1]))
1190 --limit;
1191 else
1192 break;
1193 }
1194 // Skip empty lines.
1195 if (start == limit)
1196 continue;
1197 // See if we are entering a symbol list section.
1198 if (line[start] == '[' && line[limit - 1] == ']')
1199 {
1200 std::string_view section(&line[start + 1], limit - start - 2);
1201 bool found = false;
1202 for (const auto& suffix : SYMBOL_SECTION_SUFFICES)
1203 if (section.size() >= suffix.size()
1204 && section.substr(section.size() - suffix.size()) == suffix)
1205 {
1206 found = true;
1207 break;
1208 }
1209 in_symbol_section = found;
1210 continue;
1211 }
1212 // Add symbol.
1213 if (in_symbol_section)
1214 symbols.insert(std::string(&line[start], limit - start));
1215 }
1216 if (!file.eof())
1217 {
1218 std::cerr << "error reading symbol file '" << filename << "'\n";
1219 exit(1);
1220 }
1221 return symbols;
1222 }
1223
1224 /// Remove unlisted ELF symbols.
1225 ///
1226 /// @param symbols the set of symbols
1227 ///
1228 /// @param node the XML node to process
1229 void
filter_symbols(const symbol_set & symbols,xmlNodePtr node)1230 filter_symbols(const symbol_set& symbols, xmlNodePtr node)
1231 {
1232 if (node->type != XML_ELEMENT_NODE)
1233 return;
1234 const char* node_name = from_libxml(node->name);
1235 if (strcmp(node_name, "abi-corpus-group") == 0
1236 || strcmp(node_name, "abi-corpus") == 0
1237 || strcmp(node_name, "elf-variable-symbols") == 0
1238 || strcmp(node_name, "elf-function-symbols") == 0)
1239 {
1240 // Process children.
1241 for (auto child : get_children(node))
1242 filter_symbols(symbols, child);
1243 }
1244 else if (strcmp(node_name, "elf-symbol") == 0)
1245 {
1246 const auto name = get_attribute(node, "name");
1247 if (name && !symbols.count(name.value()))
1248 remove_element(node);
1249 }
1250 }
1251
1252 /// Main program.
1253 ///
1254 /// Read and write ABI XML, with optional processing passes.
1255 ///
1256 /// @param argc argument count
1257 ///
1258 /// @param argv argument vector
1259 ///
1260 /// @return exit status
1261 int
main(int argc,char * argv[])1262 main(int argc, char* argv[])
1263 {
1264 // Defaults.
1265 const char* opt_input = nullptr;
1266 const char* opt_output = nullptr;
1267 std::optional<symbol_set> opt_symbols;
1268 LocationInfo opt_locations = LocationInfo::COLUMN;
1269 int opt_indentation = 2;
1270 bool opt_normalise_anonymous = false;
1271 bool opt_reanonymise_anonymous = false;
1272 bool opt_discard_naming_typedefs = false;
1273 bool opt_prune_unreachable = false;
1274 bool opt_report_untyped = false;
1275 bool opt_abort_on_untyped = false;
1276 bool opt_clear_non_reachable = false;
1277 bool opt_eliminate_duplicates = false;
1278 bool opt_report_conflicts = false;
1279 bool opt_sort = false;
1280 bool opt_drop_empty = false;
1281
1282 // Process command line.
1283 auto usage = [&]() -> int {
1284 std::cerr << "usage: " << argv[0] << '\n'
1285 << " [-i|--input file]\n"
1286 << " [-o|--output file]\n"
1287 << " [-S|--symbols file]\n"
1288 << " [-L|--locations {column|line|file|none}]\n"
1289 << " [-I|--indentation n]\n"
1290 << " [-a|--all] (implies -n -r -t -p -u -b -e -c -s -d)\n"
1291 << " [-n|--[no-]normalise-anonymous]\n"
1292 << " [-r|--[no-]reanonymise-anonymous]\n"
1293 << " [-t|--[no-]discard-naming-typedefs]\n"
1294 << " [-p|--[no-]prune-unreachable]\n"
1295 << " [-u|--[no-]report-untyped]\n"
1296 << " [-U|--abort-on-untyped-symbols]\n"
1297 << " [-b|--[no-]clear-non-reachable]\n"
1298 << " [-e|--[no-]eliminate-duplicates]\n"
1299 << " [-c|--[no-]report-conflicts]\n"
1300 << " [-s|--[no-]sort]\n"
1301 << " [-d|--[no-]drop-empty]\n";
1302 return 1;
1303 };
1304 int opt_index = 1;
1305 auto get_arg = [&]() {
1306 if (opt_index < argc)
1307 return argv[opt_index++];
1308 exit(usage());
1309 };
1310 while (opt_index < argc)
1311 {
1312 const std::string arg = get_arg();
1313 if (arg == "-i" || arg == "--input")
1314 opt_input = get_arg();
1315 else if (arg == "-o" || arg == "--output")
1316 opt_output = get_arg();
1317 else if (arg == "-S" || arg == "--symbols")
1318 opt_symbols = read_symbols(get_arg());
1319 else if (arg == "-L" || arg == "--locations")
1320 {
1321 auto it = LOCATION_INFO_NAME.find(get_arg());
1322 if (it == LOCATION_INFO_NAME.end())
1323 exit(usage());
1324 opt_locations = it->second;
1325 }
1326 else if (arg == "-I" || arg == "--indentation")
1327 {
1328 std::istringstream is(get_arg());
1329 is >> std::noskipws >> opt_indentation;
1330 if (!is || !is.eof() || opt_indentation < 0)
1331 exit(usage());
1332 }
1333 else if (arg == "-a" || arg == "--all")
1334 opt_normalise_anonymous = opt_reanonymise_anonymous
1335 = opt_discard_naming_typedefs
1336 = opt_prune_unreachable
1337 = opt_report_untyped
1338 = opt_clear_non_reachable
1339 = opt_eliminate_duplicates
1340 = opt_report_conflicts
1341 = opt_sort
1342 = opt_drop_empty
1343 = true;
1344 else if (arg == "-n" || arg == "--normalise-anonymous")
1345 opt_normalise_anonymous = true;
1346 else if (arg == "--no-normalise-anonymous")
1347 opt_normalise_anonymous = false;
1348 else if (arg == "-r" || arg == "--reanonymise-anonymous")
1349 opt_reanonymise_anonymous = true;
1350 else if (arg == "--no-reanonymise-anonymous")
1351 opt_reanonymise_anonymous = false;
1352 else if (arg == "-t" || arg == "--discard-naming-typedefs")
1353 opt_discard_naming_typedefs = true;
1354 else if (arg == "--no-discard-naming-typedefs")
1355 opt_discard_naming_typedefs = false;
1356 else if (arg == "-p" || arg == "--prune-unreachable")
1357 opt_prune_unreachable = true;
1358 else if (arg == "--no-prune-unreachable")
1359 opt_prune_unreachable = false;
1360 else if (arg == "-u" || arg == "--report-untyped")
1361 opt_report_untyped = true;
1362 else if (arg == "--no-report-untyped")
1363 opt_report_untyped = false;
1364 else if (arg == "-U" || arg == "--abort-on-untyped-symbols")
1365 opt_abort_on_untyped = true;
1366 else if (arg == "-b" || arg == "--clear-non-reachable")
1367 opt_clear_non_reachable = true;
1368 else if (arg == "--no-clear-non-reachable")
1369 opt_clear_non_reachable = false;
1370 else if (arg == "-e" || arg == "--eliminate-duplicates")
1371 opt_eliminate_duplicates = true;
1372 else if (arg == "--no-eliminate-duplicates")
1373 opt_eliminate_duplicates = false;
1374 else if (arg == "-c" || arg == "--report-conflicts")
1375 opt_report_conflicts = true;
1376 else if (arg == "--no-report-conflicts")
1377 opt_report_conflicts = false;
1378 else if (arg == "-s" || arg == "--sort")
1379 opt_sort = true;
1380 else if (arg == "--no-sort")
1381 opt_sort = false;
1382 else if (arg == "-d" || arg == "--drop-empty")
1383 opt_drop_empty = true;
1384 else if (arg == "--no-drop-empty")
1385 opt_drop_empty = false;
1386 else
1387 exit(usage());
1388 }
1389
1390 // Open input for reading.
1391 int in_fd = STDIN_FILENO;
1392 if (opt_input)
1393 {
1394 in_fd = open(opt_input, O_RDONLY);
1395 if (in_fd < 0)
1396 {
1397 std::cerr << "could not open '" << opt_input << "' for reading: "
1398 << strerror(errno) << '\n';
1399 exit(1);
1400 }
1401 }
1402
1403 // Read the XML.
1404 xmlParserCtxtPtr parser_context = xmlNewParserCtxt();
1405 xmlDocPtr document
1406 = xmlCtxtReadFd(parser_context, in_fd, nullptr, nullptr, 0);
1407 if (!document)
1408 {
1409 std::cerr << "failed to parse input as XML\n";
1410 exit(1);
1411 }
1412 xmlFreeParserCtxt(parser_context);
1413 close(in_fd);
1414
1415 // Get the root element.
1416 xmlNodePtr root = xmlDocGetRootElement(document);
1417 if (!root)
1418 {
1419 std::cerr << "XML document has no root element\n";
1420 exit(1);
1421 }
1422
1423 // Strip text nodes to simplify other operations.
1424 strip_text(root);
1425
1426 // Remove unlisted symbols.
1427 if (opt_symbols)
1428 filter_symbols(opt_symbols.value(), root);
1429
1430 // Normalise anonymous type names.
1431 // Reanonymise anonymous types.
1432 // Discard naming typedef backlinks.
1433 if (opt_normalise_anonymous || opt_reanonymise_anonymous
1434 || opt_discard_naming_typedefs)
1435 handle_anonymous_types(opt_normalise_anonymous, opt_reanonymise_anonymous,
1436 opt_discard_naming_typedefs, root);
1437
1438 // Prune unreachable elements and/or report untyped symbols.
1439 size_t untyped_symbols = 0;
1440 if (opt_prune_unreachable || opt_report_untyped || opt_abort_on_untyped)
1441 untyped_symbols += handle_unreachable(
1442 opt_prune_unreachable, opt_report_untyped, root);
1443 if (opt_abort_on_untyped && untyped_symbols)
1444 {
1445 std::cerr << "found " << untyped_symbols << " untyped symbols\n";
1446 exit(1);
1447 }
1448
1449 // Limit location information.
1450 if (opt_locations > LocationInfo::COLUMN)
1451 limit_locations(opt_locations, root);
1452
1453 // Clear unwanted non-reachable attributes.
1454 if (opt_clear_non_reachable)
1455 clear_non_reachable(root);
1456
1457 // Eliminate complete duplicates and extra fragments of types.
1458 // Report conflicting type defintions.
1459 // Record whether there are namespace scope conflicts.
1460 size_t scope_conflicts = 0;
1461 if (opt_eliminate_duplicates || opt_report_conflicts || opt_sort)
1462 scope_conflicts += handle_duplicate_types(
1463 opt_eliminate_duplicates, opt_report_conflicts, root);
1464
1465 // Sort namespaces, types and declarations.
1466 if (opt_sort)
1467 {
1468 if (scope_conflicts)
1469 std::cerr << "found type definition scope conflicts, skipping sort\n";
1470 else
1471 sort_namespaces_types_and_declarations(root);
1472 }
1473
1474 // Drop empty subelements.
1475 if (opt_drop_empty)
1476 drop_empty(root);
1477
1478 // Reformat root element for human consumption.
1479 format_xml(std::string(opt_indentation, ' '), std::string(), root);
1480
1481 // Open output for writing.
1482 int out_fd = STDOUT_FILENO;
1483 if (opt_output)
1484 {
1485 out_fd = open(opt_output, O_CREAT | O_TRUNC | O_WRONLY,
1486 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
1487 if (out_fd < 0)
1488 {
1489 std::cerr << "could not open '" << opt_output << "' for writing: "
1490 << strerror(errno) << '\n';
1491 exit(1);
1492 }
1493 }
1494
1495 // Write the XML.
1496 //
1497 // First to memory, as we need to do a little post-processing.
1498 xmlChar* out_data;
1499 int out_size;
1500 xmlDocDumpMemory(document, &out_data, &out_size);
1501 // Remove the XML declaration as it currently upsets abidiff.
1502 xmlChar* out_limit = out_data + out_size;
1503 while (out_data < out_limit && *out_data != '\n')
1504 ++out_data;
1505 if (out_data < out_limit)
1506 ++out_data;
1507 // Adjust quotes to match abidw.
1508 adjust_quotes(out_data, out_limit);
1509 // And now to a file.
1510 size_t count = out_limit - out_data;
1511 if (write(out_fd, out_data, count) != count)
1512 {
1513 std::cerr << "could not write output: " << strerror(errno) << '\n';
1514 exit(1);
1515 }
1516 if (close(out_fd) < 0)
1517 {
1518 std::cerr << "could not close output: " << strerror(errno) << '\n';
1519 exit(1);
1520 }
1521
1522 // Free libxml document.
1523 xmlFreeDoc(document);
1524 return 0;
1525 }
1526