from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT from genshi.output import NamespaceFlattener import _base from html5lib.constants import voidElements class TreeWalker(_base.TreeWalker): def __iter__(self): depth = 0 ignore_until = None previous = None for event in self.tree: if previous is not None: if previous[0] == START: depth += 1 if ignore_until <= depth: ignore_until = None if ignore_until is None: for token in self.tokens(previous, event): yield token if token["type"] == "EmptyTag": ignore_until = depth if previous[0] == END: depth -= 1 previous = event if previous is not None: if ignore_until is None or ignore_until <= depth: for token in self.tokens(previous, None): yield token elif ignore_until is not None: raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") def tokens(self, event, next): kind, data, pos = event if kind == START: tag, attrib = data name = tag.localname namespace = tag.namespace if tag in voidElements: for token in self.emptyTag(namespace, name, list(attrib), not next or next[0] != END or next[1] != tag): yield token else: yield self.startTag(namespace, name, list(attrib)) elif kind == END: name = data.localname namespace = data.namespace if name not in voidElements: yield self.endTag(namespace, name) elif kind == COMMENT: yield self.comment(data) elif kind == TEXT: for token in self.text(data): yield token elif kind == DOCTYPE: yield self.doctype(*data) elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, \ START_CDATA, END_CDATA, PI): pass else: yield self.unknown(kind)