1from __future__ import absolute_import, division, unicode_literals 2from six import text_type 3 4import re 5 6from . import _base 7from .. import ihatexml 8from .. import constants 9from ..constants import namespaces 10from ..utils import moduleFactoryFactory 11 12tag_regexp = re.compile("{([^}]*)}(.*)") 13 14 15def getETreeBuilder(ElementTreeImplementation, fullTree=False): 16 ElementTree = ElementTreeImplementation 17 ElementTreeCommentType = ElementTree.Comment("asd").tag 18 19 class Element(_base.Node): 20 def __init__(self, name, namespace=None): 21 self._name = name 22 self._namespace = namespace 23 self._element = ElementTree.Element(self._getETreeTag(name, 24 namespace)) 25 if namespace is None: 26 self.nameTuple = namespaces["html"], self._name 27 else: 28 self.nameTuple = self._namespace, self._name 29 self.parent = None 30 self._childNodes = [] 31 self._flags = [] 32 33 def _getETreeTag(self, name, namespace): 34 if namespace is None: 35 etree_tag = name 36 else: 37 etree_tag = "{%s}%s" % (namespace, name) 38 return etree_tag 39 40 def _setName(self, name): 41 self._name = name 42 self._element.tag = self._getETreeTag(self._name, self._namespace) 43 44 def _getName(self): 45 return self._name 46 47 name = property(_getName, _setName) 48 49 def _setNamespace(self, namespace): 50 self._namespace = namespace 51 self._element.tag = self._getETreeTag(self._name, self._namespace) 52 53 def _getNamespace(self): 54 return self._namespace 55 56 namespace = property(_getNamespace, _setNamespace) 57 58 def _getAttributes(self): 59 return self._element.attrib 60 61 def _setAttributes(self, attributes): 62 # Delete existing attributes first 63 # XXX - there may be a better way to do this... 64 for key in list(self._element.attrib.keys()): 65 del self._element.attrib[key] 66 for key, value in attributes.items(): 67 if isinstance(key, tuple): 68 name = "{%s}%s" % (key[2], key[1]) 69 else: 70 name = key 71 self._element.set(name, value) 72 73 attributes = property(_getAttributes, _setAttributes) 74 75 def _getChildNodes(self): 76 return self._childNodes 77 78 def _setChildNodes(self, value): 79 del self._element[:] 80 self._childNodes = [] 81 for element in value: 82 self.insertChild(element) 83 84 childNodes = property(_getChildNodes, _setChildNodes) 85 86 def hasContent(self): 87 """Return true if the node has children or text""" 88 return bool(self._element.text or len(self._element)) 89 90 def appendChild(self, node): 91 self._childNodes.append(node) 92 self._element.append(node._element) 93 node.parent = self 94 95 def insertBefore(self, node, refNode): 96 index = list(self._element).index(refNode._element) 97 self._element.insert(index, node._element) 98 node.parent = self 99 100 def removeChild(self, node): 101 self._element.remove(node._element) 102 node.parent = None 103 104 def insertText(self, data, insertBefore=None): 105 if not(len(self._element)): 106 if not self._element.text: 107 self._element.text = "" 108 self._element.text += data 109 elif insertBefore is None: 110 # Insert the text as the tail of the last child element 111 if not self._element[-1].tail: 112 self._element[-1].tail = "" 113 self._element[-1].tail += data 114 else: 115 # Insert the text before the specified node 116 children = list(self._element) 117 index = children.index(insertBefore._element) 118 if index > 0: 119 if not self._element[index - 1].tail: 120 self._element[index - 1].tail = "" 121 self._element[index - 1].tail += data 122 else: 123 if not self._element.text: 124 self._element.text = "" 125 self._element.text += data 126 127 def cloneNode(self): 128 element = type(self)(self.name, self.namespace) 129 for name, value in self.attributes.items(): 130 element.attributes[name] = value 131 return element 132 133 def reparentChildren(self, newParent): 134 if newParent.childNodes: 135 newParent.childNodes[-1]._element.tail += self._element.text 136 else: 137 if not newParent._element.text: 138 newParent._element.text = "" 139 if self._element.text is not None: 140 newParent._element.text += self._element.text 141 self._element.text = "" 142 _base.Node.reparentChildren(self, newParent) 143 144 class Comment(Element): 145 def __init__(self, data): 146 # Use the superclass constructor to set all properties on the 147 # wrapper element 148 self._element = ElementTree.Comment(data) 149 self.parent = None 150 self._childNodes = [] 151 self._flags = [] 152 153 def _getData(self): 154 return self._element.text 155 156 def _setData(self, value): 157 self._element.text = value 158 159 data = property(_getData, _setData) 160 161 class DocumentType(Element): 162 def __init__(self, name, publicId, systemId): 163 Element.__init__(self, "<!DOCTYPE>") 164 self._element.text = name 165 self.publicId = publicId 166 self.systemId = systemId 167 168 def _getPublicId(self): 169 return self._element.get("publicId", "") 170 171 def _setPublicId(self, value): 172 if value is not None: 173 self._element.set("publicId", value) 174 175 publicId = property(_getPublicId, _setPublicId) 176 177 def _getSystemId(self): 178 return self._element.get("systemId", "") 179 180 def _setSystemId(self, value): 181 if value is not None: 182 self._element.set("systemId", value) 183 184 systemId = property(_getSystemId, _setSystemId) 185 186 class Document(Element): 187 def __init__(self): 188 Element.__init__(self, "DOCUMENT_ROOT") 189 190 class DocumentFragment(Element): 191 def __init__(self): 192 Element.__init__(self, "DOCUMENT_FRAGMENT") 193 194 def testSerializer(element): 195 rv = [] 196 197 def serializeElement(element, indent=0): 198 if not(hasattr(element, "tag")): 199 element = element.getroot() 200 if element.tag == "<!DOCTYPE>": 201 if element.get("publicId") or element.get("systemId"): 202 publicId = element.get("publicId") or "" 203 systemId = element.get("systemId") or "" 204 rv.append("""<!DOCTYPE %s "%s" "%s">""" % 205 (element.text, publicId, systemId)) 206 else: 207 rv.append("<!DOCTYPE %s>" % (element.text,)) 208 elif element.tag == "DOCUMENT_ROOT": 209 rv.append("#document") 210 if element.text is not None: 211 rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) 212 if element.tail is not None: 213 raise TypeError("Document node cannot have tail") 214 if hasattr(element, "attrib") and len(element.attrib): 215 raise TypeError("Document node cannot have attributes") 216 elif element.tag == ElementTreeCommentType: 217 rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) 218 else: 219 assert isinstance(element.tag, text_type), \ 220 "Expected unicode, got %s, %s" % (type(element.tag), element.tag) 221 nsmatch = tag_regexp.match(element.tag) 222 223 if nsmatch is None: 224 name = element.tag 225 else: 226 ns, name = nsmatch.groups() 227 prefix = constants.prefixes[ns] 228 name = "%s %s" % (prefix, name) 229 rv.append("|%s<%s>" % (' ' * indent, name)) 230 231 if hasattr(element, "attrib"): 232 attributes = [] 233 for name, value in element.attrib.items(): 234 nsmatch = tag_regexp.match(name) 235 if nsmatch is not None: 236 ns, name = nsmatch.groups() 237 prefix = constants.prefixes[ns] 238 attr_string = "%s %s" % (prefix, name) 239 else: 240 attr_string = name 241 attributes.append((attr_string, value)) 242 243 for name, value in sorted(attributes): 244 rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) 245 if element.text: 246 rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) 247 indent += 2 248 for child in element: 249 serializeElement(child, indent) 250 if element.tail: 251 rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) 252 serializeElement(element, 0) 253 254 return "\n".join(rv) 255 256 def tostring(element): 257 """Serialize an element and its child nodes to a string""" 258 rv = [] 259 filter = ihatexml.InfosetFilter() 260 261 def serializeElement(element): 262 if isinstance(element, ElementTree.ElementTree): 263 element = element.getroot() 264 265 if element.tag == "<!DOCTYPE>": 266 if element.get("publicId") or element.get("systemId"): 267 publicId = element.get("publicId") or "" 268 systemId = element.get("systemId") or "" 269 rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" % 270 (element.text, publicId, systemId)) 271 else: 272 rv.append("<!DOCTYPE %s>" % (element.text,)) 273 elif element.tag == "DOCUMENT_ROOT": 274 if element.text is not None: 275 rv.append(element.text) 276 if element.tail is not None: 277 raise TypeError("Document node cannot have tail") 278 if hasattr(element, "attrib") and len(element.attrib): 279 raise TypeError("Document node cannot have attributes") 280 281 for child in element: 282 serializeElement(child) 283 284 elif element.tag == ElementTreeCommentType: 285 rv.append("<!--%s-->" % (element.text,)) 286 else: 287 # This is assumed to be an ordinary element 288 if not element.attrib: 289 rv.append("<%s>" % (filter.fromXmlName(element.tag),)) 290 else: 291 attr = " ".join(["%s=\"%s\"" % ( 292 filter.fromXmlName(name), value) 293 for name, value in element.attrib.items()]) 294 rv.append("<%s %s>" % (element.tag, attr)) 295 if element.text: 296 rv.append(element.text) 297 298 for child in element: 299 serializeElement(child) 300 301 rv.append("</%s>" % (element.tag,)) 302 303 if element.tail: 304 rv.append(element.tail) 305 306 serializeElement(element) 307 308 return "".join(rv) 309 310 class TreeBuilder(_base.TreeBuilder): 311 documentClass = Document 312 doctypeClass = DocumentType 313 elementClass = Element 314 commentClass = Comment 315 fragmentClass = DocumentFragment 316 implementation = ElementTreeImplementation 317 318 def testSerializer(self, element): 319 return testSerializer(element) 320 321 def getDocument(self): 322 if fullTree: 323 return self.document._element 324 else: 325 if self.defaultNamespace is not None: 326 return self.document._element.find( 327 "{%s}html" % self.defaultNamespace) 328 else: 329 return self.document._element.find("html") 330 331 def getFragment(self): 332 return _base.TreeBuilder.getFragment(self)._element 333 334 return locals() 335 336 337getETreeModule = moduleFactoryFactory(getETreeBuilder) 338