1
0
mirror of https://github.com/moparisthebest/SickRage synced 2024-11-14 05:15:12 -05:00
SickRage/lib/html5lib/treewalkers/_base.py
echel0n 0d9fbc1ad7 Welcome to our SickBeard-TVRage Edition ...
This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer.

Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer.

Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk!

Enjoy!
2014-03-09 22:39:12 -07:00

177 lines
5.8 KiB
Python

import gettext
_ = gettext.gettext
from html5lib.constants import voidElements, spaceCharacters
spaceCharacters = u"".join(spaceCharacters)
class TreeWalker(object):
def __init__(self, tree):
self.tree = tree
def __iter__(self):
raise NotImplementedError
def error(self, msg):
return {"type": "SerializeError", "data": msg}
def normalizeAttrs(self, attrs):
newattrs = {}
if attrs:
#TODO: treewalkers should always have attrs
for (namespace,name),value in attrs.iteritems():
namespace = unicode(namespace) if namespace else None
name = unicode(name)
value = unicode(value)
newattrs[(namespace,name)] = value
return newattrs
def emptyTag(self, namespace, name, attrs, hasChildren=False):
yield {"type": "EmptyTag", "name": unicode(name),
"namespace":unicode(namespace),
"data": self.normalizeAttrs(attrs)}
if hasChildren:
yield self.error(_("Void element has children"))
def startTag(self, namespace, name, attrs):
return {"type": "StartTag",
"name": unicode(name),
"namespace":unicode(namespace),
"data": self.normalizeAttrs(attrs)}
def endTag(self, namespace, name):
return {"type": "EndTag",
"name": unicode(name),
"namespace":unicode(namespace),
"data": {}}
def text(self, data):
data = unicode(data)
middle = data.lstrip(spaceCharacters)
left = data[:len(data)-len(middle)]
if left:
yield {"type": "SpaceCharacters", "data": left}
data = middle
middle = data.rstrip(spaceCharacters)
right = data[len(middle):]
if middle:
yield {"type": "Characters", "data": middle}
if right:
yield {"type": "SpaceCharacters", "data": right}
def comment(self, data):
return {"type": "Comment", "data": unicode(data)}
def doctype(self, name, publicId=None, systemId=None, correct=True):
return {"type": "Doctype",
"name": name is not None and unicode(name) or u"",
"publicId": publicId,
"systemId": systemId,
"correct": correct}
def entity(self, name):
return {"type": "Entity", "name": unicode(name)}
def unknown(self, nodeType):
return self.error(_("Unknown node type: ") + nodeType)
class RecursiveTreeWalker(TreeWalker):
def walkChildren(self, node):
raise NodeImplementedError
def element(self, node, namespace, name, attrs, hasChildren):
if name in voidElements:
for token in self.emptyTag(namespace, name, attrs, hasChildren):
yield token
else:
yield self.startTag(name, attrs)
if hasChildren:
for token in self.walkChildren(node):
yield token
yield self.endTag(name)
from xml.dom import Node
DOCUMENT = Node.DOCUMENT_NODE
DOCTYPE = Node.DOCUMENT_TYPE_NODE
TEXT = Node.TEXT_NODE
ELEMENT = Node.ELEMENT_NODE
COMMENT = Node.COMMENT_NODE
ENTITY = Node.ENTITY_NODE
UNKNOWN = "<#UNKNOWN#>"
class NonRecursiveTreeWalker(TreeWalker):
def getNodeDetails(self, node):
raise NotImplementedError
def getFirstChild(self, node):
raise NotImplementedError
def getNextSibling(self, node):
raise NotImplementedError
def getParentNode(self, node):
raise NotImplementedError
def __iter__(self):
currentNode = self.tree
while currentNode is not None:
details = self.getNodeDetails(currentNode)
type, details = details[0], details[1:]
hasChildren = False
endTag = None
if type == DOCTYPE:
yield self.doctype(*details)
elif type == TEXT:
for token in self.text(*details):
yield token
elif type == ELEMENT:
namespace, name, attributes, hasChildren = details
if name in voidElements:
for token in self.emptyTag(namespace, name, attributes,
hasChildren):
yield token
hasChildren = False
else:
endTag = name
yield self.startTag(namespace, name, attributes)
elif type == COMMENT:
yield self.comment(details[0])
elif type == ENTITY:
yield self.entity(details[0])
elif type == DOCUMENT:
hasChildren = True
else:
yield self.unknown(details[0])
if hasChildren:
firstChild = self.getFirstChild(currentNode)
else:
firstChild = None
if firstChild is not None:
currentNode = firstChild
else:
while currentNode is not None:
details = self.getNodeDetails(currentNode)
type, details = details[0], details[1:]
if type == ELEMENT:
namespace, name, attributes, hasChildren = details
if name not in voidElements:
yield self.endTag(namespace, name)
if self.tree is currentNode:
currentNode = None
break
nextSibling = self.getNextSibling(currentNode)
if nextSibling is not None:
currentNode = nextSibling
break
else:
currentNode = self.getParentNode(currentNode)