mirror of
https://github.com/moparisthebest/SickRage
synced 2025-01-07 03:48:02 -05:00
71 lines
2.4 KiB
Python
71 lines
2.4 KiB
Python
|
from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
|
||
|
from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
|
||
|
from genshi.output import NamespaceFlattener
|
||
|
|
||
|
import _base
|
||
|
|
||
|
from html5lib.constants import voidElements
|
||
|
|
||
|
class TreeWalker(_base.TreeWalker):
|
||
|
def __iter__(self):
|
||
|
depth = 0
|
||
|
ignore_until = None
|
||
|
previous = None
|
||
|
for event in self.tree:
|
||
|
if previous is not None:
|
||
|
if previous[0] == START:
|
||
|
depth += 1
|
||
|
if ignore_until <= depth:
|
||
|
ignore_until = None
|
||
|
if ignore_until is None:
|
||
|
for token in self.tokens(previous, event):
|
||
|
yield token
|
||
|
if token["type"] == "EmptyTag":
|
||
|
ignore_until = depth
|
||
|
if previous[0] == END:
|
||
|
depth -= 1
|
||
|
previous = event
|
||
|
if previous is not None:
|
||
|
if ignore_until is None or ignore_until <= depth:
|
||
|
for token in self.tokens(previous, None):
|
||
|
yield token
|
||
|
elif ignore_until is not None:
|
||
|
raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
|
||
|
|
||
|
def tokens(self, event, next):
|
||
|
kind, data, pos = event
|
||
|
if kind == START:
|
||
|
tag, attrib = data
|
||
|
name = tag.localname
|
||
|
namespace = tag.namespace
|
||
|
if tag in voidElements:
|
||
|
for token in self.emptyTag(namespace, name, list(attrib),
|
||
|
not next or next[0] != END
|
||
|
or next[1] != tag):
|
||
|
yield token
|
||
|
else:
|
||
|
yield self.startTag(namespace, name, list(attrib))
|
||
|
|
||
|
elif kind == END:
|
||
|
name = data.localname
|
||
|
namespace = data.namespace
|
||
|
if name not in voidElements:
|
||
|
yield self.endTag(namespace, name)
|
||
|
|
||
|
elif kind == COMMENT:
|
||
|
yield self.comment(data)
|
||
|
|
||
|
elif kind == TEXT:
|
||
|
for token in self.text(data):
|
||
|
yield token
|
||
|
|
||
|
elif kind == DOCTYPE:
|
||
|
yield self.doctype(*data)
|
||
|
|
||
|
elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, \
|
||
|
START_CDATA, END_CDATA, PI):
|
||
|
pass
|
||
|
|
||
|
else:
|
||
|
yield self.unknown(kind)
|