1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Utility file for pretty print xml file. 6 7The function PrettyPrintNode will be used for formatting both histograms.xml 8and actions.xml. 9""" 10 11import logging 12import textwrap 13import xml.dom.minidom 14 15WRAP_COLUMN = 80 16 17 18class Error(Exception): 19 pass 20 21 22def LastLineLength(s): 23 """Returns the length of the last line in s. 24 25 Args: 26 s: A multi-line string, including newlines. 27 28 Returns: 29 The length of the last line in s, in characters. 30 """ 31 if s.rfind('\n') == -1: return len(s) 32 return len(s) - s.rfind('\n') - len('\n') 33 34 35def XmlEscape(s): 36 """XML-escapes the given string, replacing magic characters (&<>") with their 37 escaped equivalents.""" 38 s = s.replace("&", "&").replace("<", "<") 39 s = s.replace("\"", """).replace(">", ">") 40 return s 41 42 43class XmlStyle(object): 44 """A class that stores all style specification for an output xml file.""" 45 46 def __init__(self, attribute_order, tags_that_have_extra_newline, 47 tags_that_dont_indent, tags_that_allow_single_line): 48 # List of tag names for top-level nodes whose children are not indented. 49 self.attribute_order = attribute_order 50 self.tags_that_have_extra_newline = tags_that_have_extra_newline 51 self.tags_that_dont_indent = tags_that_dont_indent 52 self.tags_that_allow_single_line = tags_that_allow_single_line 53 54 def PrettyPrintNode(self, node, indent=0): 55 """Pretty-prints the given XML node at the given indent level. 56 57 Args: 58 node: The minidom node to pretty-print. 59 indent: The current indent level. 60 61 Returns: 62 The pretty-printed string (including embedded newlines). 63 64 Raises: 65 Error if the XML has unknown tags or attributes. 66 """ 67 # Handle the top-level document node. 68 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: 69 return '\n'.join([self.PrettyPrintNode(n) for n in node.childNodes]) 70 71 # Handle text nodes. 72 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: 73 # Wrap each paragraph in the text to fit in the 80 column limit. 74 wrapper = textwrap.TextWrapper() 75 wrapper.initial_indent = ' ' * indent 76 wrapper.subsequent_indent = ' ' * indent 77 wrapper.break_on_hyphens = False 78 wrapper.break_long_words = False 79 wrapper.width = WRAP_COLUMN 80 text = XmlEscape(node.data) 81 # Remove any common indent. 82 text = textwrap.dedent(text.strip('\n')) 83 lines = text.split('\n') 84 # Split the text into paragraphs at blank line boundaries. 85 paragraphs = [[]] 86 for l in lines: 87 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0: 88 paragraphs.append([]) 89 else: 90 paragraphs[-1].append(l) 91 # Remove trailing empty paragraph if present. 92 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: 93 paragraphs = paragraphs[:-1] 94 # Wrap each paragraph and separate with two newlines. 95 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) 96 97 # Handle element nodes. 98 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: 99 newlines_after_open, newlines_before_close, newlines_after_close = ( 100 self.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0))) 101 # Open the tag. 102 s = ' ' * indent + '<' + node.tagName 103 104 # Calculate how much space to allow for the '>' or '/>'. 105 closing_chars = 1 106 if not node.childNodes: 107 closing_chars = 2 108 109 # Pretty-print the attributes. 110 attributes = node.attributes.keys() 111 if attributes: 112 # Reorder the attributes. 113 if node.tagName not in self.attribute_order: 114 unrecognized_attributes = attributes 115 else: 116 unrecognized_attributes = ( 117 [a for a in attributes 118 if a not in self.attribute_order[node.tagName]]) 119 attributes = [a for a in self.attribute_order[node.tagName] 120 if a in attributes] 121 122 for a in unrecognized_attributes: 123 logging.error( 124 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) 125 if unrecognized_attributes: 126 raise Error() 127 128 for a in attributes: 129 value = XmlEscape(node.attributes[a].value) 130 # Replace sequences of whitespace with single spaces. 131 words = value.split() 132 a_str = ' %s="%s"' % (a, ' '.join(words)) 133 # Start a new line if the attribute will make this line too long. 134 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN: 135 s += '\n' + ' ' * (indent + 3) 136 # Output everything up to the first quote. 137 s += ' %s="' % (a) 138 value_indent_level = LastLineLength(s) 139 # Output one word at a time, splitting to the next line where 140 # necessary. 141 column = value_indent_level 142 for i, word in enumerate(words): 143 # This is slightly too conservative since not every word will be 144 # followed by the closing characters... 145 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN): 146 s = s.rstrip() # remove any trailing whitespace 147 s += '\n' + ' ' * value_indent_level 148 column = value_indent_level 149 s += word + ' ' 150 column += len(word) + 1 151 s = s.rstrip() # remove any trailing whitespace 152 s += '"' 153 s = s.rstrip() # remove any trailing whitespace 154 155 # Pretty-print the child nodes. 156 if node.childNodes: 157 s += '>' 158 # Calculate the new indent level for child nodes. 159 new_indent = indent 160 if node.tagName not in self.tags_that_dont_indent: 161 new_indent += 2 162 child_nodes = node.childNodes 163 164 # Recursively pretty-print the child nodes. 165 child_nodes = [self.PrettyPrintNode(n, indent=new_indent) 166 for n in child_nodes] 167 child_nodes = [c for c in child_nodes if len(c.strip()) > 0] 168 169 # Determine whether we can fit the entire node on a single line. 170 close_tag = '</%s>' % node.tagName 171 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) 172 if (node.tagName in self.tags_that_allow_single_line and 173 len(child_nodes) == 1 and 174 len(child_nodes[0].strip()) <= space_left): 175 s += child_nodes[0].strip() 176 else: 177 s += '\n' * newlines_after_open + '\n'.join(child_nodes) 178 s += '\n' * newlines_before_close + ' ' * indent 179 s += close_tag 180 else: 181 s += '/>' 182 s += '\n' * newlines_after_close 183 return s 184 185 # Handle comment nodes. 186 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: 187 return '<!--%s-->\n' % node.data 188 189 # Ignore other node types. This could be a processing instruction 190 # (<? ... ?>) or cdata section (<![CDATA[...]]!>), neither of which are 191 # legal in the histograms XML at present. 192 logging.error('Ignoring unrecognized node data: %s' % node.toxml()) 193 raise Error() 194