1from __future__ import absolute_import, division, unicode_literals 2 3from genshi.core import QName 4from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT 5from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT 6 7from . import _base 8 9from ..constants import voidElements, namespaces 10 11 12class TreeWalker(_base.TreeWalker): 13 def __iter__(self): 14 # Buffer the events so we can pass in the following one 15 previous = None 16 for event in self.tree: 17 if previous is not None: 18 for token in self.tokens(previous, event): 19 yield token 20 previous = event 21 22 # Don't forget the final event! 23 if previous is not None: 24 for token in self.tokens(previous, None): 25 yield token 26 27 def tokens(self, event, next): 28 kind, data, pos = event 29 if kind == START: 30 tag, attribs = data 31 name = tag.localname 32 namespace = tag.namespace 33 converted_attribs = {} 34 for k, v in attribs: 35 if isinstance(k, QName): 36 converted_attribs[(k.namespace, k.localname)] = v 37 else: 38 converted_attribs[(None, k)] = v 39 40 if namespace == namespaces["html"] and name in voidElements: 41 for token in self.emptyTag(namespace, name, converted_attribs, 42 not next or next[0] != END 43 or next[1] != tag): 44 yield token 45 else: 46 yield self.startTag(namespace, name, converted_attribs) 47 48 elif kind == END: 49 name = data.localname 50 namespace = data.namespace 51 if name not in voidElements: 52 yield self.endTag(namespace, name) 53 54 elif kind == COMMENT: 55 yield self.comment(data) 56 57 elif kind == TEXT: 58 for token in self.text(data): 59 yield token 60 61 elif kind == DOCTYPE: 62 yield self.doctype(*data) 63 64 elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, 65 START_CDATA, END_CDATA, PI): 66 pass 67 68 else: 69 yield self.unknown(kind) 70