• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Utility file for pretty print xml file.
6
7The function PrettyPrintNode will be used for formatting both histograms.xml
8and actions.xml.
9"""
10
11import logging
12import textwrap
13import xml.dom.minidom
14
15WRAP_COLUMN = 80
16
17
18class Error(Exception):
19  pass
20
21
22def LastLineLength(s):
23  """Returns the length of the last line in s.
24
25  Args:
26    s: A multi-line string, including newlines.
27
28  Returns:
29    The length of the last line in s, in characters.
30  """
31  if s.rfind('\n') == -1: return len(s)
32  return len(s) - s.rfind('\n') - len('\n')
33
34
35def XmlEscape(s):
36  """XML-escapes the given string, replacing magic characters (&<>") with their
37  escaped equivalents."""
38  s = s.replace("&", "&amp;").replace("<", "&lt;")
39  s = s.replace("\"", "&quot;").replace(">", "&gt;")
40  return s
41
42
43class XmlStyle(object):
44  """A class that stores all style specification for an output xml file."""
45
46  def __init__(self, attribute_order, tags_that_have_extra_newline,
47               tags_that_dont_indent, tags_that_allow_single_line):
48    # List of tag names for top-level nodes whose children are not indented.
49    self.attribute_order = attribute_order
50    self.tags_that_have_extra_newline = tags_that_have_extra_newline
51    self.tags_that_dont_indent = tags_that_dont_indent
52    self.tags_that_allow_single_line = tags_that_allow_single_line
53
54  def PrettyPrintNode(self, node, indent=0):
55    """Pretty-prints the given XML node at the given indent level.
56
57    Args:
58      node: The minidom node to pretty-print.
59      indent: The current indent level.
60
61    Returns:
62      The pretty-printed string (including embedded newlines).
63
64    Raises:
65      Error if the XML has unknown tags or attributes.
66    """
67    # Handle the top-level document node.
68    if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:
69      return '\n'.join([self.PrettyPrintNode(n) for n in node.childNodes])
70
71    # Handle text nodes.
72    if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:
73      # Wrap each paragraph in the text to fit in the 80 column limit.
74      wrapper = textwrap.TextWrapper()
75      wrapper.initial_indent = ' ' * indent
76      wrapper.subsequent_indent = ' ' * indent
77      wrapper.break_on_hyphens = False
78      wrapper.break_long_words = False
79      wrapper.width = WRAP_COLUMN
80      text = XmlEscape(node.data)
81      # Remove any common indent.
82      text = textwrap.dedent(text.strip('\n'))
83      lines = text.split('\n')
84      # Split the text into paragraphs at blank line boundaries.
85      paragraphs = [[]]
86      for l in lines:
87        if len(l.strip()) == 0 and len(paragraphs[-1]) > 0:
88          paragraphs.append([])
89        else:
90          paragraphs[-1].append(l)
91      # Remove trailing empty paragraph if present.
92      if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:
93        paragraphs = paragraphs[:-1]
94      # Wrap each paragraph and separate with two newlines.
95      return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])
96
97    # Handle element nodes.
98    if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
99      newlines_after_open, newlines_before_close, newlines_after_close = (
100          self.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0)))
101      # Open the tag.
102      s = ' ' * indent + '<' + node.tagName
103
104      # Calculate how much space to allow for the '>' or '/>'.
105      closing_chars = 1
106      if not node.childNodes:
107        closing_chars = 2
108
109      # Pretty-print the attributes.
110      attributes = node.attributes.keys()
111      if attributes:
112        # Reorder the attributes.
113        if node.tagName not in self.attribute_order:
114          unrecognized_attributes = attributes
115        else:
116          unrecognized_attributes = (
117              [a for a in attributes
118               if a not in self.attribute_order[node.tagName]])
119          attributes = [a for a in self.attribute_order[node.tagName]
120                        if a in attributes]
121
122        for a in unrecognized_attributes:
123          logging.error(
124              'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName))
125        if unrecognized_attributes:
126          raise Error()
127
128        for a in attributes:
129          value = XmlEscape(node.attributes[a].value)
130          # Replace sequences of whitespace with single spaces.
131          words = value.split()
132          a_str = ' %s="%s"' % (a, ' '.join(words))
133          # Start a new line if the attribute will make this line too long.
134          if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN:
135            s += '\n' + ' ' * (indent + 3)
136          # Output everything up to the first quote.
137          s += ' %s="' % (a)
138          value_indent_level = LastLineLength(s)
139          # Output one word at a time, splitting to the next line where
140          # necessary.
141          column = value_indent_level
142          for i, word in enumerate(words):
143            # This is slightly too conservative since not every word will be
144            # followed by the closing characters...
145            if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN):
146              s = s.rstrip()  # remove any trailing whitespace
147              s += '\n' + ' ' * value_indent_level
148              column = value_indent_level
149            s += word + ' '
150            column += len(word) + 1
151          s = s.rstrip()  # remove any trailing whitespace
152          s += '"'
153        s = s.rstrip()  # remove any trailing whitespace
154
155      # Pretty-print the child nodes.
156      if node.childNodes:
157        s += '>'
158        # Calculate the new indent level for child nodes.
159        new_indent = indent
160        if node.tagName not in self.tags_that_dont_indent:
161          new_indent += 2
162        child_nodes = node.childNodes
163
164        # Recursively pretty-print the child nodes.
165        child_nodes = [self.PrettyPrintNode(n, indent=new_indent)
166                       for n in child_nodes]
167        child_nodes = [c for c in child_nodes if len(c.strip()) > 0]
168
169        # Determine whether we can fit the entire node on a single line.
170        close_tag = '</%s>' % node.tagName
171        space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)
172        if (node.tagName in self.tags_that_allow_single_line and
173            len(child_nodes) == 1 and
174            len(child_nodes[0].strip()) <= space_left):
175          s += child_nodes[0].strip()
176        else:
177          s += '\n' * newlines_after_open + '\n'.join(child_nodes)
178          s += '\n' * newlines_before_close + ' ' * indent
179        s += close_tag
180      else:
181        s += '/>'
182      s += '\n' * newlines_after_close
183      return s
184
185    # Handle comment nodes.
186    if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:
187      return '<!--%s-->\n' % node.data
188
189    # Ignore other node types. This could be a processing instruction
190    # (<? ... ?>) or cdata section (<![CDATA[...]]!>), neither of which are
191    # legal in the histograms XML at present.
192    logging.error('Ignoring unrecognized node data: %s' % node.toxml())
193    raise Error()
194