• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from __future__ import absolute_import, division, unicode_literals
2
3from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
4    COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
5
6from . import _base
7
8from ..constants import voidElements
9
10
11class TreeWalker(_base.TreeWalker):
12    def __iter__(self):
13        ignore_until = None
14        previous = None
15        for event in self.tree:
16            if previous is not None and \
17                    (ignore_until is None or previous[1] is ignore_until):
18                if previous[1] is ignore_until:
19                    ignore_until = None
20                for token in self.tokens(previous, event):
21                    yield token
22                    if token["type"] == "EmptyTag":
23                        ignore_until = previous[1]
24            previous = event
25        if ignore_until is None or previous[1] is ignore_until:
26            for token in self.tokens(previous, None):
27                yield token
28        elif ignore_until is not None:
29            raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
30
31    def tokens(self, event, next):
32        type, node = event
33        if type == START_ELEMENT:
34            name = node.nodeName
35            namespace = node.namespaceURI
36            attrs = {}
37            for attr in list(node.attributes.keys()):
38                attr = node.getAttributeNode(attr)
39                attrs[(attr.namespaceURI, attr.localName)] = attr.value
40            if name in voidElements:
41                for token in self.emptyTag(namespace,
42                                           name,
43                                           attrs,
44                                           not next or next[1] is not node):
45                    yield token
46            else:
47                yield self.startTag(namespace, name, attrs)
48
49        elif type == END_ELEMENT:
50            name = node.nodeName
51            namespace = node.namespaceURI
52            if name not in voidElements:
53                yield self.endTag(namespace, name)
54
55        elif type == COMMENT:
56            yield self.comment(node.nodeValue)
57
58        elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
59            for token in self.text(node.nodeValue):
60                yield token
61
62        else:
63            yield self.unknown(type)
64