• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from __future__ import absolute_import, division, unicode_literals
2from six import text_type
3
4import re
5
6from . import _base
7from .. import ihatexml
8from .. import constants
9from ..constants import namespaces
10from ..utils import moduleFactoryFactory
11
12tag_regexp = re.compile("{([^}]*)}(.*)")
13
14
15def getETreeBuilder(ElementTreeImplementation, fullTree=False):
16    ElementTree = ElementTreeImplementation
17    ElementTreeCommentType = ElementTree.Comment("asd").tag
18
19    class Element(_base.Node):
20        def __init__(self, name, namespace=None):
21            self._name = name
22            self._namespace = namespace
23            self._element = ElementTree.Element(self._getETreeTag(name,
24                                                                  namespace))
25            if namespace is None:
26                self.nameTuple = namespaces["html"], self._name
27            else:
28                self.nameTuple = self._namespace, self._name
29            self.parent = None
30            self._childNodes = []
31            self._flags = []
32
33        def _getETreeTag(self, name, namespace):
34            if namespace is None:
35                etree_tag = name
36            else:
37                etree_tag = "{%s}%s" % (namespace, name)
38            return etree_tag
39
40        def _setName(self, name):
41            self._name = name
42            self._element.tag = self._getETreeTag(self._name, self._namespace)
43
44        def _getName(self):
45            return self._name
46
47        name = property(_getName, _setName)
48
49        def _setNamespace(self, namespace):
50            self._namespace = namespace
51            self._element.tag = self._getETreeTag(self._name, self._namespace)
52
53        def _getNamespace(self):
54            return self._namespace
55
56        namespace = property(_getNamespace, _setNamespace)
57
58        def _getAttributes(self):
59            return self._element.attrib
60
61        def _setAttributes(self, attributes):
62            # Delete existing attributes first
63            # XXX - there may be a better way to do this...
64            for key in list(self._element.attrib.keys()):
65                del self._element.attrib[key]
66            for key, value in attributes.items():
67                if isinstance(key, tuple):
68                    name = "{%s}%s" % (key[2], key[1])
69                else:
70                    name = key
71                self._element.set(name, value)
72
73        attributes = property(_getAttributes, _setAttributes)
74
75        def _getChildNodes(self):
76            return self._childNodes
77
78        def _setChildNodes(self, value):
79            del self._element[:]
80            self._childNodes = []
81            for element in value:
82                self.insertChild(element)
83
84        childNodes = property(_getChildNodes, _setChildNodes)
85
86        def hasContent(self):
87            """Return true if the node has children or text"""
88            return bool(self._element.text or len(self._element))
89
90        def appendChild(self, node):
91            self._childNodes.append(node)
92            self._element.append(node._element)
93            node.parent = self
94
95        def insertBefore(self, node, refNode):
96            index = list(self._element).index(refNode._element)
97            self._element.insert(index, node._element)
98            node.parent = self
99
100        def removeChild(self, node):
101            self._element.remove(node._element)
102            node.parent = None
103
104        def insertText(self, data, insertBefore=None):
105            if not(len(self._element)):
106                if not self._element.text:
107                    self._element.text = ""
108                self._element.text += data
109            elif insertBefore is None:
110                # Insert the text as the tail of the last child element
111                if not self._element[-1].tail:
112                    self._element[-1].tail = ""
113                self._element[-1].tail += data
114            else:
115                # Insert the text before the specified node
116                children = list(self._element)
117                index = children.index(insertBefore._element)
118                if index > 0:
119                    if not self._element[index - 1].tail:
120                        self._element[index - 1].tail = ""
121                    self._element[index - 1].tail += data
122                else:
123                    if not self._element.text:
124                        self._element.text = ""
125                    self._element.text += data
126
127        def cloneNode(self):
128            element = type(self)(self.name, self.namespace)
129            for name, value in self.attributes.items():
130                element.attributes[name] = value
131            return element
132
133        def reparentChildren(self, newParent):
134            if newParent.childNodes:
135                newParent.childNodes[-1]._element.tail += self._element.text
136            else:
137                if not newParent._element.text:
138                    newParent._element.text = ""
139                if self._element.text is not None:
140                    newParent._element.text += self._element.text
141            self._element.text = ""
142            _base.Node.reparentChildren(self, newParent)
143
144    class Comment(Element):
145        def __init__(self, data):
146            # Use the superclass constructor to set all properties on the
147            # wrapper element
148            self._element = ElementTree.Comment(data)
149            self.parent = None
150            self._childNodes = []
151            self._flags = []
152
153        def _getData(self):
154            return self._element.text
155
156        def _setData(self, value):
157            self._element.text = value
158
159        data = property(_getData, _setData)
160
161    class DocumentType(Element):
162        def __init__(self, name, publicId, systemId):
163            Element.__init__(self, "<!DOCTYPE>")
164            self._element.text = name
165            self.publicId = publicId
166            self.systemId = systemId
167
168        def _getPublicId(self):
169            return self._element.get("publicId", "")
170
171        def _setPublicId(self, value):
172            if value is not None:
173                self._element.set("publicId", value)
174
175        publicId = property(_getPublicId, _setPublicId)
176
177        def _getSystemId(self):
178            return self._element.get("systemId", "")
179
180        def _setSystemId(self, value):
181            if value is not None:
182                self._element.set("systemId", value)
183
184        systemId = property(_getSystemId, _setSystemId)
185
186    class Document(Element):
187        def __init__(self):
188            Element.__init__(self, "DOCUMENT_ROOT")
189
190    class DocumentFragment(Element):
191        def __init__(self):
192            Element.__init__(self, "DOCUMENT_FRAGMENT")
193
194    def testSerializer(element):
195        rv = []
196
197        def serializeElement(element, indent=0):
198            if not(hasattr(element, "tag")):
199                element = element.getroot()
200            if element.tag == "<!DOCTYPE>":
201                if element.get("publicId") or element.get("systemId"):
202                    publicId = element.get("publicId") or ""
203                    systemId = element.get("systemId") or ""
204                    rv.append("""<!DOCTYPE %s "%s" "%s">""" %
205                              (element.text, publicId, systemId))
206                else:
207                    rv.append("<!DOCTYPE %s>" % (element.text,))
208            elif element.tag == "DOCUMENT_ROOT":
209                rv.append("#document")
210                if element.text is not None:
211                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
212                if element.tail is not None:
213                    raise TypeError("Document node cannot have tail")
214                if hasattr(element, "attrib") and len(element.attrib):
215                    raise TypeError("Document node cannot have attributes")
216            elif element.tag == ElementTreeCommentType:
217                rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
218            else:
219                assert isinstance(element.tag, text_type), \
220                    "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
221                nsmatch = tag_regexp.match(element.tag)
222
223                if nsmatch is None:
224                    name = element.tag
225                else:
226                    ns, name = nsmatch.groups()
227                    prefix = constants.prefixes[ns]
228                    name = "%s %s" % (prefix, name)
229                rv.append("|%s<%s>" % (' ' * indent, name))
230
231                if hasattr(element, "attrib"):
232                    attributes = []
233                    for name, value in element.attrib.items():
234                        nsmatch = tag_regexp.match(name)
235                        if nsmatch is not None:
236                            ns, name = nsmatch.groups()
237                            prefix = constants.prefixes[ns]
238                            attr_string = "%s %s" % (prefix, name)
239                        else:
240                            attr_string = name
241                        attributes.append((attr_string, value))
242
243                    for name, value in sorted(attributes):
244                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
245                if element.text:
246                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
247            indent += 2
248            for child in element:
249                serializeElement(child, indent)
250            if element.tail:
251                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
252        serializeElement(element, 0)
253
254        return "\n".join(rv)
255
256    def tostring(element):
257        """Serialize an element and its child nodes to a string"""
258        rv = []
259        filter = ihatexml.InfosetFilter()
260
261        def serializeElement(element):
262            if isinstance(element, ElementTree.ElementTree):
263                element = element.getroot()
264
265            if element.tag == "<!DOCTYPE>":
266                if element.get("publicId") or element.get("systemId"):
267                    publicId = element.get("publicId") or ""
268                    systemId = element.get("systemId") or ""
269                    rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
270                              (element.text, publicId, systemId))
271                else:
272                    rv.append("<!DOCTYPE %s>" % (element.text,))
273            elif element.tag == "DOCUMENT_ROOT":
274                if element.text is not None:
275                    rv.append(element.text)
276                if element.tail is not None:
277                    raise TypeError("Document node cannot have tail")
278                if hasattr(element, "attrib") and len(element.attrib):
279                    raise TypeError("Document node cannot have attributes")
280
281                for child in element:
282                    serializeElement(child)
283
284            elif element.tag == ElementTreeCommentType:
285                rv.append("<!--%s-->" % (element.text,))
286            else:
287                # This is assumed to be an ordinary element
288                if not element.attrib:
289                    rv.append("<%s>" % (filter.fromXmlName(element.tag),))
290                else:
291                    attr = " ".join(["%s=\"%s\"" % (
292                        filter.fromXmlName(name), value)
293                        for name, value in element.attrib.items()])
294                    rv.append("<%s %s>" % (element.tag, attr))
295                if element.text:
296                    rv.append(element.text)
297
298                for child in element:
299                    serializeElement(child)
300
301                rv.append("</%s>" % (element.tag,))
302
303            if element.tail:
304                rv.append(element.tail)
305
306        serializeElement(element)
307
308        return "".join(rv)
309
310    class TreeBuilder(_base.TreeBuilder):
311        documentClass = Document
312        doctypeClass = DocumentType
313        elementClass = Element
314        commentClass = Comment
315        fragmentClass = DocumentFragment
316        implementation = ElementTreeImplementation
317
318        def testSerializer(self, element):
319            return testSerializer(element)
320
321        def getDocument(self):
322            if fullTree:
323                return self.document._element
324            else:
325                if self.defaultNamespace is not None:
326                    return self.document._element.find(
327                        "{%s}html" % self.defaultNamespace)
328                else:
329                    return self.document._element.find("html")
330
331        def getFragment(self):
332            return _base.TreeBuilder.getFragment(self)._element
333
334    return locals()
335
336
337getETreeModule = moduleFactoryFactory(getETreeBuilder)
338