• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from __future__ import absolute_import, division, unicode_literals
2
3try:
4    from collections import OrderedDict
5except ImportError:
6    try:
7        from ordereddict import OrderedDict
8    except ImportError:
9        OrderedDict = dict
10
11import re
12
13from six import string_types
14
15from . import _base
16from ..utils import moduleFactoryFactory
17
18tag_regexp = re.compile("{([^}]*)}(.*)")
19
20
21def getETreeBuilder(ElementTreeImplementation):
22    ElementTree = ElementTreeImplementation
23    ElementTreeCommentType = ElementTree.Comment("asd").tag
24
25    class TreeWalker(_base.NonRecursiveTreeWalker):
26        """Given the particular ElementTree representation, this implementation,
27        to avoid using recursion, returns "nodes" as tuples with the following
28        content:
29
30        1. The current element
31
32        2. The index of the element relative to its parent
33
34        3. A stack of ancestor elements
35
36        4. A flag "text", "tail" or None to indicate if the current node is a
37           text node; either the text or tail of the current element (1)
38        """
39        def getNodeDetails(self, node):
40            if isinstance(node, tuple):  # It might be the root Element
41                elt, key, parents, flag = node
42                if flag in ("text", "tail"):
43                    return _base.TEXT, getattr(elt, flag)
44                else:
45                    node = elt
46
47            if not(hasattr(node, "tag")):
48                node = node.getroot()
49
50            if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
51                return (_base.DOCUMENT,)
52
53            elif node.tag == "<!DOCTYPE>":
54                return (_base.DOCTYPE, node.text,
55                        node.get("publicId"), node.get("systemId"))
56
57            elif node.tag == ElementTreeCommentType:
58                return _base.COMMENT, node.text
59
60            else:
61                assert isinstance(node.tag, string_types), type(node.tag)
62                # This is assumed to be an ordinary element
63                match = tag_regexp.match(node.tag)
64                if match:
65                    namespace, tag = match.groups()
66                else:
67                    namespace = None
68                    tag = node.tag
69                attrs = OrderedDict()
70                for name, value in list(node.attrib.items()):
71                    match = tag_regexp.match(name)
72                    if match:
73                        attrs[(match.group(1), match.group(2))] = value
74                    else:
75                        attrs[(None, name)] = value
76                return (_base.ELEMENT, namespace, tag,
77                        attrs, len(node) or node.text)
78
79        def getFirstChild(self, node):
80            if isinstance(node, tuple):
81                element, key, parents, flag = node
82            else:
83                element, key, parents, flag = node, None, [], None
84
85            if flag in ("text", "tail"):
86                return None
87            else:
88                if element.text:
89                    return element, key, parents, "text"
90                elif len(element):
91                    parents.append(element)
92                    return element[0], 0, parents, None
93                else:
94                    return None
95
96        def getNextSibling(self, node):
97            if isinstance(node, tuple):
98                element, key, parents, flag = node
99            else:
100                return None
101
102            if flag == "text":
103                if len(element):
104                    parents.append(element)
105                    return element[0], 0, parents, None
106                else:
107                    return None
108            else:
109                if element.tail and flag != "tail":
110                    return element, key, parents, "tail"
111                elif key < len(parents[-1]) - 1:
112                    return parents[-1][key + 1], key + 1, parents, None
113                else:
114                    return None
115
116        def getParentNode(self, node):
117            if isinstance(node, tuple):
118                element, key, parents, flag = node
119            else:
120                return None
121
122            if flag == "text":
123                if not parents:
124                    return element
125                else:
126                    return element, key, parents, None
127            else:
128                parent = parents.pop()
129                if not parents:
130                    return parent
131                else:
132                    return parent, list(parents[-1]).index(parent), parents, None
133
134    return locals()
135
136getETreeModule = moduleFactoryFactory(getETreeBuilder)
137